Spaces:

vitorcalvi
/

mmesa-gitex

Build error

App Files Files Community

vitorcalvi commited on Nov 11, 2024

Commit

5a7c06e

1 Parent(s): be18558

1

Browse files

Files changed (33) hide show

.DS_Store +0 -0
.gradio/cached_examples/22/log.csv +2 -0
.gradio/cached_examples/28/log.csv +2 -0
.gradio/certificate.pem +31 -0
LICENSE +0 -21
app.py +163 -16
app/__pycache__/config.cpython-310.pyc +0 -0
app/__pycache__/model.cpython-310.pyc +0 -0
app/__pycache__/model_architectures.cpython-310.pyc +0 -0
app/config.py +18 -9
app/model.py +10 -21
app/model_architectures.py +65 -23
app/sleep_quality_processing.py +0 -94
app/video_processing.py +64 -56
app_gpuzero.py +0 -64
assets/.DS_Store +0 -0
assets/models/FER_dynamic_LSTM.pt +3 -0
assets/models/FER_static_ResNet50_AffectNet.pt +3 -0
llm/mentalBERT.py +0 -73
notebooks/pytorch-roberta-onnx.ipynb +0 -280
onxxchatbot.py +0 -40
tabs/FACS_analysis.py +9 -8
tabs/__emotion_analysis.py +0 -36
tabs/__pycache__/FACS_analysis.cpython-310.pyc +0 -0
tabs/__pycache__/deception_detection.cpython-310.pyc +0 -0
tabs/__pycache__/heart_rate_variability.cpython-310.pyc +0 -0
tabs/__pycache__/speech_stress_analysis.cpython-310.pyc +0 -0
tabs/__pycache__/speech_stress_analysis.cpython-312.pyc +0 -0
tabs/__sentiment_analysis.py +0 -36
tabs/deception_detection.py +601 -0
tabs/heart_rate_variability.py +220 -0
tabs/speech_stress_analysis.py +217 -95
verify.py +0 -3

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

.gradio/cached_examples/22/log.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ HRV Results,PPG Signal Plot,timestamp
2	+ Video too short. Please provide at least 10 seconds of footage.,,2024-11-11 07:13:23.866354

.gradio/cached_examples/28/log.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ Summary,Analysis Plots,Detailed Metrics,Recording Information,timestamp
2	+ Video too short. Please provide at least 10 seconds of footage.,,,,2024-11-11 07:17:26.515598

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

LICENSE DELETED Viewed

@@ -1,21 +0,0 @@
-MIT License
-Copyright (c) 2024 Elena Ryumina
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.

app.py CHANGED Viewed

@@ -4,36 +4,183 @@ import gradio as gr
 from tabs.speech_stress_analysis import create_voice_stress_tab
 from tabs.speech_emotion_recognition import create_emotion_recognition_tab
 from tabs.FACS_analysis import create_facs_analysis_tab
-# Import the UI components
-from ui_components import CUSTOM_CSS, HEADER_HTML, DISCLAIMER_HTML
-# Define the tab structure
 TAB_STRUCTURE = [
     ("Visual Analysis", [
-        ("FACS for Stress, Anxiety, Depression", create_facs_analysis_tab),
     ]),
     ("Speech Analysis", [
         ("Speech Stress", create_voice_stress_tab),
-        ("Speech Emotion", create_emotion_recognition_tab),
     ])
 ]
-def create_demo():
-    with gr.Blocks(css=CUSTOM_CSS) as demo:
-        gr.Markdown(HEADER_HTML)
-        with gr.Tabs(elem_classes=["main-tab"]):
             for main_tab, sub_tabs in TAB_STRUCTURE:
                 with gr.Tab(main_tab):
-                    with gr.Tabs():
-                        for sub_tab, create_fn in sub_tabs:
-                            with gr.Tab(sub_tab):
-                                create_fn()
         gr.HTML(DISCLAIMER_HTML)
     return demo
-# Create the demo instance
-demo = create_demo()
 if __name__ == "__main__":
-    demo.queue(api_open=True).launch(share=False)

 from tabs.speech_stress_analysis import create_voice_stress_tab
 from tabs.speech_emotion_recognition import create_emotion_recognition_tab
 from tabs.FACS_analysis import create_facs_analysis_tab
+from tabs.heart_rate_variability import create_heart_rate_variability_tab
+from tabs.deception_detection import create_deception_detection_tab, load_models
+import logging
+import torch
+from typing import Dict
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Custom CSS for better styling
+CUSTOM_CSS = """
+/* Global styles */
+.gradio-container {
+    font-family: 'Arial', sans-serif;
+    max-width: 1200px;
+    margin: auto;
+    padding: 20px;
+    background-color: #f8f9fa;
+}
+/* Header styling */
+h1 {
+    color: #2c3e50;
+    text-align: center;
+    padding: 20px 0;
+    margin-bottom: 30px;
+    border-bottom: 2px solid #3498db;
+}
+/* Tab navigation styling */
+.gradio-tabs-nav {
+    background-color: #ffffff;
+    border-radius: 8px;
+    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+    margin-bottom: 20px;
+}
+/* Content areas */
+.content-area {
+    background: white;
+    padding: 20px;
+    border-radius: 8px;
+    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+    margin-top: 20px;
+}
+/* Results area */
+.results-area {
+    background-color: #ffffff;
+    padding: 20px;
+    border-radius: 8px;
+    margin-top: 20px;
+    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+}
+/* Disclaimer styling */
+.disclaimer {
+    background-color: #f8f9fa;
+    border-left: 4px solid #3498db;
+    padding: 15px;
+    margin-top: 30px;
+    font-size: 0.9em;
+    color: #666;
+}
+"""
+# HTML content
+HEADER_HTML = """
+<div style="text-align: center; padding: 20px;">
+    <h1>AI-Driven Multimodal Emotional State Analysis</h1>
+    <p style="font-size: 1.2em; color: #666;">
+        Comprehensive analysis of stress, emotion, and truthfulness through facial expressions,
+        heart rate variability, and speech patterns.
+    </p>
+</div>
+"""
+DISCLAIMER_HTML = """
+<div class="disclaimer">
+    <h3>Important Notice</h3>
+    <p>This application provides AI-driven analysis for:</p>
+    <ul>
+        <li>Stress and emotion detection</li>
+        <li>Heart rate variability analysis</li>
+        <li>Speech pattern analysis</li>
+        <li>Truth/deception indication</li>
+    </ul>
+    <p><strong>Disclaimer:</strong> This tool is for research and informational purposes only.
+    It should not be used as a substitute for professional medical advice, diagnosis, or treatment.
+    The deception detection feature is experimental and should not be used as definitive proof
+    of truthfulness or deception.</p>
+</div>
+"""
+# Tab structure
 TAB_STRUCTURE = [
     ("Visual Analysis", [
+        ("FACS Analysis", create_facs_analysis_tab),
+        ("Heart Rate Variability", create_heart_rate_variability_tab),
+        ("Truth/Deception Detection", create_deception_detection_tab)  # Pass models here
     ]),
     ("Speech Analysis", [
         ("Speech Stress", create_voice_stress_tab),
+        ("Speech Emotion", create_emotion_recognition_tab)
     ])
 ]
+def create_demo(models: Dict[str, torch.nn.Module]):
+    """Create and configure the Gradio interface."""
+    with gr.Blocks(css=CUSTOM_CSS, title="Multimodal Emotional State Analysis") as demo:
+        # Header
+        gr.HTML(HEADER_HTML)
+        # Main content area with Tabs
+        with gr.Tabs():
             for main_tab, sub_tabs in TAB_STRUCTURE:
                 with gr.Tab(main_tab):
+                    with gr.Column():
+                        with gr.Tabs():
+                            for sub_tab, create_fn in sub_tabs:
+                                with gr.Tab(sub_tab):
+                                    if main_tab == "Visual Analysis" and sub_tab == "Truth/Deception Detection":
+                                        # Pass loaded models to the deception detection tab
+                                        create_fn(models)
+                                    else:
+                                        create_fn()
+                    # Add help information below sub-tabs
+                    if main_tab == "Visual Analysis":
+                        gr.Markdown("""
+                        ### Visual Analysis Features
+                        - **FACS Analysis**: Facial Action Coding System for emotion detection
+                        - **Heart Rate Variability**: Stress and wellness indicators
+                        - **Truth/Deception Detection**: Physiological response analysis
+                        **For best results:**
+                        1. Use good lighting
+                        2. Face the camera directly
+                        3. Minimize movement during recording
+                        """)
+                    elif main_tab == "Speech Analysis":
+                        gr.Markdown("""
+                        ### Speech Analysis Features
+                        - **Speech Stress**: Voice stress analysis
+                        - **Speech Emotion**: Emotional content detection
+                        **For best results:**
+                        1. Use a quiet environment
+                        2. Speak clearly
+                        3. Avoid background noise
+                        """)
+        # Disclaimer
         gr.HTML(DISCLAIMER_HTML)
     return demo
+def main():
+    """Main function to run the application."""
+    # Load models once and pass them to the deception detection tab
+    models_loaded = load_models()
+    if not models_loaded:
+        logger.error("No models loaded. Exiting application.")
+        return
+    # Initialize Gradio interface
+    demo = create_demo(models_loaded)
+    # Configure and launch the interface
+    demo.queue()  # Enable queuing without specific concurrency count
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        debug=True,
+        show_error=True
+    )
 if __name__ == "__main__":
+    main()

app/__pycache__/config.cpython-310.pyc CHANGED Viewed

Binary files a/app/__pycache__/config.cpython-310.pyc and b/app/__pycache__/config.cpython-310.pyc differ

app/__pycache__/model.cpython-310.pyc CHANGED Viewed

Binary files a/app/__pycache__/model.cpython-310.pyc and b/app/__pycache__/model.cpython-310.pyc differ

app/__pycache__/model_architectures.cpython-310.pyc CHANGED Viewed

Binary files a/app/__pycache__/model_architectures.cpython-310.pyc and b/app/__pycache__/model_architectures.cpython-310.pyc differ

app/config.py CHANGED Viewed

@@ -1,7 +1,8 @@
 """
 File: config.py
-Author: Elena Ryumina and Dmitry Ryumin
-Description: Configuration file.
 License: MIT License
 """
@@ -9,25 +10,32 @@ import toml
 from typing import Dict
 from types import SimpleNamespace
 def flatten_dict(prefix: str, d: Dict) -> Dict:
     result = {}
     for k, v in d.items():
         if isinstance(v, dict):
             result.update(flatten_dict(f"{prefix}{k}_", v))
         else:
             result[f"{prefix}{k}"] = v
     return result
-config = toml.load("config.toml")
-config_data = flatten_dict("", config)
-config_data = SimpleNamespace(**config_data)
 DICT_EMO = {
     0: "Neutral",
     1: "Happiness",
@@ -38,6 +46,7 @@ DICT_EMO = {
     6: "Anger",
 }
 COLORS = {
     0: 'blue',
     1: 'orange',

+# config.py
 """
 File: config.py
+Description: Configuration file for the AI-Driven Multimodal Emotional State Analysis application.
 License: MIT License
 """
 from typing import Dict
 from types import SimpleNamespace
 def flatten_dict(prefix: str, d: Dict) -> Dict:
+    """
+    Recursively flattens a nested dictionary, concatenating keys with underscores.
+    """
     result = {}
     for k, v in d.items():
         if isinstance(v, dict):
             result.update(flatten_dict(f"{prefix}{k}_", v))
         else:
             result[f"{prefix}{k}"] = v
     return result
+# Load configuration from 'config.toml' if it exists
+try:
+    config = toml.load("config.toml")
+except FileNotFoundError:
+    config = {}
+    print("Warning: 'config.toml' not found. Using default configuration.")
+# Flatten the configuration dictionary
+config_data_dict = flatten_dict("", config)
+# Convert the dictionary to a SimpleNamespace for easy attribute access
+config_data = SimpleNamespace(**config_data_dict)
+# Define emotion labels
 DICT_EMO = {
     0: "Neutral",
     1: "Happiness",
     6: "Anger",
 }
+# Define colors for plotting or UI elements
 COLORS = {
     0: 'blue',
     1: 'orange',

app/model.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import os
 import torch
 import torch.nn as nn
@@ -23,7 +25,12 @@ def load_model(model_class, model_path, *args, **kwargs):
     model = model_class(*args, **kwargs).to(device)
     if os.path.exists(model_path):
         try:
-            model.load_state_dict(torch.load(model_path, map_location=device))
             model.eval()
             logger.info(f"Model loaded successfully from {model_path}")
         except Exception as e:
@@ -40,7 +47,7 @@ pth_model_static = load_model(ResNet50, STATIC_MODEL_PATH, num_classes=7, channe
 pth_model_dynamic = load_model(LSTMPyTorch, DYNAMIC_MODEL_PATH, input_size=2048, hidden_size=256, num_layers=2, num_classes=7)
 # Set up GradCAM
-target_layers = [pth_model_static.resnet.layer4[-1]]
 cam = GradCAM(model=pth_model_static, target_layers=target_layers)
 # Define image preprocessing
@@ -54,25 +61,7 @@ def pth_processing(img):
     img = pth_transform(img).unsqueeze(0).to(device)
     return img
-def predict_emotion(img):
-    with torch.no_grad():
-        output = pth_model_static(pth_processing(img))
-        _, predicted = torch.max(output, 1)
-    return predicted.item()
-def get_emotion_probabilities(img):
-    with torch.no_grad():
-        output = nn.functional.softmax(pth_model_static(pth_processing(img)), dim=1)
-    return output.squeeze().cpu().numpy()
-def generate_cam(img):
-    input_tensor = pth_processing(img)
-    targets = [ClassifierOutputTarget(predict_emotion(img))]
-    grayscale_cam = cam(input_tensor=input_tensor, targets=targets)
-    return grayscale_cam[0, :]
-# Add any other necessary functions or variables here
 if __name__ == "__main__":
     logger.info("Model initialization complete.")
-    # You can add some test code here to verify everything is working correctly

+# model.py
 import os
 import torch
 import torch.nn as nn
     model = model_class(*args, **kwargs).to(device)
     if os.path.exists(model_path):
         try:
+            state_dict = torch.load(model_path, map_location=device)
+            missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
+            if missing_keys:
+                logger.warning(f"Missing keys when loading model from {model_path}: {missing_keys}")
+            if unexpected_keys:
+                logger.warning(f"Unexpected keys when loading model from {model_path}: {unexpected_keys}")
             model.eval()
             logger.info(f"Model loaded successfully from {model_path}")
         except Exception as e:
 pth_model_dynamic = load_model(LSTMPyTorch, DYNAMIC_MODEL_PATH, input_size=2048, hidden_size=256, num_layers=2, num_classes=7)
 # Set up GradCAM
+target_layers = [pth_model_static.layer4[-1]]  # Adjusted to match the updated model
 cam = GradCAM(model=pth_model_static, target_layers=target_layers)
 # Define image preprocessing
     img = pth_transform(img).unsqueeze(0).to(device)
     return img
+# Additional utility functions...
 if __name__ == "__main__":
     logger.info("Model initialization complete.")

app/model_architectures.py CHANGED Viewed

@@ -1,32 +1,67 @@
 import torch
 import torch.nn as nn
 import torchvision.models as models
 class ResNet50(nn.Module):
     def __init__(self, num_classes=7, channels=3):
         super(ResNet50, self).__init__()
-        self.resnet = models.resnet50(pretrained=True)
-        # Modify the first convolutional layer if channels != 3
-        if channels != 3:
-            self.resnet.conv1 = nn.Conv2d(channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
-        num_features = self.resnet.fc.in_features
-        self.resnet.fc = nn.Linear(num_features, num_classes)
     def forward(self, x):
-        return self.resnet(x)
     def extract_features(self, x):
-        x = self.resnet.conv1(x)
-        x = self.resnet.bn1(x)
-        x = self.resnet.relu(x)
-        x = self.resnet.maxpool(x)
-        x = self.resnet.layer1(x)
-        x = self.resnet.layer2(x)
-        x = self.resnet.layer3(x)
-        x = self.resnet.layer4(x)
-        x = self.resnet.avgpool(x)
         x = torch.flatten(x, 1)
         return x
@@ -34,13 +69,20 @@ class LSTMPyTorch(nn.Module):
     def __init__(self, input_size, hidden_size, num_layers, num_classes):
         super(LSTMPyTorch, self).__init__()
         self.hidden_size = hidden_size
-        self.num_layers = num_layers
-        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
         self.fc = nn.Linear(hidden_size, num_classes)
     def forward(self, x):
-        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
-        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
-        out, _ = self.lstm(x, (h0, c0))
-        out = self.fc(out[:, -1, :])
-        return out

+# model_architectures.py
 import torch
 import torch.nn as nn
 import torchvision.models as models
+import logging
+logger = logging.getLogger(__name__)
 class ResNet50(nn.Module):
     def __init__(self, num_classes=7, channels=3):
         super(ResNet50, self).__init__()
+        # Define layers directly without wrapping in 'resnet'
+        self.conv_layer_s2_same = nn.Conv2d(channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
+        self.batch_norm1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        # Load pre-trained ResNet50 model
+        resnet = models.resnet50(pretrained=True)
+        # Extract layers
+        self.layer1 = resnet.layer1
+        self.layer2 = resnet.layer2
+        self.layer3 = resnet.layer3
+        self.layer4 = resnet.layer4
+        self.avgpool = resnet.avgpool
+        # Fully connected layers
+        self.fc1 = nn.Linear(resnet.fc.in_features, num_classes)
+        # If your model has additional fully connected layers, define them here
+        # Example:
+        # self.fc2 = nn.Linear(num_classes, num_classes)
     def forward(self, x):
+        x = self.conv_layer_s2_same(x)
+        x = self.batch_norm1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = torch.flatten(x, 1)
+        x = self.fc1(x)
+        # If additional fully connected layers are defined, pass x through them
+        # x = self.fc2(x)
+        return x
     def extract_features(self, x):
+        x = self.conv_layer_s2_same(x)
+        x = self.batch_norm1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
         x = torch.flatten(x, 1)
         return x
     def __init__(self, input_size, hidden_size, num_layers, num_classes):
         super(LSTMPyTorch, self).__init__()
         self.hidden_size = hidden_size
+        # Define separate LSTM layers
+        self.lstm1 = nn.LSTM(input_size, hidden_size, num_layers=1, batch_first=True)
+        self.lstm2 = nn.LSTM(hidden_size, hidden_size, num_layers=1, batch_first=True)
         self.fc = nn.Linear(hidden_size, num_classes)
     def forward(self, x):
+        h0_1 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
+        c0_1 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
+        out1, _ = self.lstm1(x, (h0_1, c0_1))
+        h0_2 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
+        c0_2 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
+        out2, _ = self.lstm2(out1, (h0_2, c0_2))
+        out = self.fc(out2[:, -1, :])
+        return out

app/sleep_quality_processing.py DELETED Viewed

@@ -1,94 +0,0 @@
-import cv2
-import numpy as np
-import matplotlib.pyplot as plt
-import mediapipe as mp
-from app.face_utils import get_box
-mp_face_mesh = mp.solutions.face_mesh
-def preprocess_video_and_predict_sleep_quality(video):
-    cap = cv2.VideoCapture(video)
-    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    fps = np.round(cap.get(cv2.CAP_PROP_FPS))
-    path_save_video_original = 'result_original.mp4'
-    path_save_video_face = 'result_face.mp4'
-    path_save_video_sleep = 'result_sleep.mp4'
-    vid_writer_original = cv2.VideoWriter(path_save_video_original, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
-    vid_writer_face = cv2.VideoWriter(path_save_video_face, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
-    vid_writer_sleep = cv2.VideoWriter(path_save_video_sleep, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
-    frames = []
-    sleep_quality_scores = []
-    eye_bags_images = []
-    with mp_face_mesh.FaceMesh(
-    max_num_faces=1,
-    refine_landmarks=False,
-    min_detection_confidence=0.5,
-    min_tracking_confidence=0.5) as face_mesh:
-        while cap.isOpened():
-            ret, frame = cap.read()
-            if not ret:
-                break
-            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            results = face_mesh.process(frame_rgb)
-            if results.multi_face_landmarks:
-                for fl in results.multi_face_landmarks:
-                    startX, startY, endX, endY = get_box(fl, w, h)
-                    cur_face = frame_rgb[startY:endY, startX:endX]
-                    sleep_quality_score, eye_bags_image = analyze_sleep_quality(cur_face)
-                    sleep_quality_scores.append(sleep_quality_score)
-                    eye_bags_images.append(cv2.resize(eye_bags_image, (224, 224)))
-                    sleep_quality_viz = create_sleep_quality_visualization(cur_face, sleep_quality_score)
-                    cur_face = cv2.resize(cur_face, (224, 224))
-                    vid_writer_face.write(cv2.cvtColor(cur_face, cv2.COLOR_RGB2BGR))
-                    vid_writer_sleep.write(sleep_quality_viz)
-            vid_writer_original.write(frame)
-            frames.append(len(frames) + 1)
-    cap.release()
-    vid_writer_original.release()
-    vid_writer_face.release()
-    vid_writer_sleep.release()
-    sleep_stat = sleep_quality_statistics_plot(frames, sleep_quality_scores)
-    if eye_bags_images:
-        average_eye_bags_image = np.mean(np.array(eye_bags_images), axis=0).astype(np.uint8)
-    else:
-        average_eye_bags_image = np.zeros((224, 224, 3), dtype=np.uint8)
-    return (path_save_video_original, path_save_video_face, path_save_video_sleep,
-            average_eye_bags_image, sleep_stat)
-def analyze_sleep_quality(face_image):
-    # Placeholder function - implement your sleep quality analysis here
-    sleep_quality_score = np.random.random()
-    eye_bags_image = cv2.resize(face_image, (224, 224))
-    return sleep_quality_score, eye_bags_image
-def create_sleep_quality_visualization(face_image, sleep_quality_score):
-    viz = face_image.copy()
-    cv2.putText(viz, f"Sleep Quality: {sleep_quality_score:.2f}", (10, 30),
-                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
-    return cv2.cvtColor(viz, cv2.COLOR_RGB2BGR)
-def sleep_quality_statistics_plot(frames, sleep_quality_scores):
-    fig, ax = plt.subplots()
-    ax.plot(frames, sleep_quality_scores)
-    ax.set_xlabel('Frame')
-    ax.set_ylabel('Sleep Quality Score')
-    ax.set_title('Sleep Quality Over Time')
-    plt.tight_layout()
-    return fig

app/video_processing.py CHANGED Viewed

@@ -8,19 +8,28 @@ from app.face_utils import get_box, display_info
 from app.config import config_data
 from app.plot import statistics_plot
 from .au_processing import features_to_au_intensities, au_statistics_plot
 mp_face_mesh = mp.solutions.face_mesh
-def preprocess_video_and_predict(video):
-    cap = cv2.VideoCapture(video)
-    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    fps = np.round(cap.get(cv2.CAP_PROP_FPS))
-    path_save_video_face = 'result_face.mp4'
-    vid_writer_face = cv2.VideoWriter(path_save_video_face, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
     path_save_video_hm = 'result_hm.mp4'
     vid_writer_hm = cv2.VideoWriter(path_save_video_hm, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
     lstm_features = []
@@ -30,54 +39,58 @@ def preprocess_video_and_predict(video):
     frames = []
     au_intensities_list = []
     last_output = None
-    last_heatmap = None
     last_au_intensities = None
     cur_face = None
     with mp_face_mesh.FaceMesh(
-    max_num_faces=1,
-    refine_landmarks=False,
-    min_detection_confidence=0.5,
-    min_tracking_confidence=0.5) as face_mesh:
         while cap.isOpened():
-            _, frame = cap.read()
-            if frame is None: break
-            frame_copy = frame.copy()
-            frame_copy.flags.writeable = False
-            frame_copy = cv2.cvtColor(frame_copy, cv2.COLOR_BGR2RGB)
-            results = face_mesh.process(frame_copy)
-            frame_copy.flags.writeable = True
             if results.multi_face_landmarks:
-                for fl in results.multi_face_landmarks:
-                    startX, startY, endX, endY  = get_box(fl, w, h)
-                    cur_face = frame_copy[startY:endY, startX: endX]
-                    if count_face%config_data.FRAME_DOWNSAMPLING == 0:
-                        cur_face_copy = pth_processing(Image.fromarray(cur_face))
                         with torch.no_grad():
-                            features = torch.nn.functional.relu(pth_model_static.extract_features(cur_face_copy)).detach().numpy()
-                            au_intensities = features_to_au_intensities(pth_model_static(cur_face_copy))
-                        grayscale_cam = cam(input_tensor=cur_face_copy)
-                        grayscale_cam = grayscale_cam[0, :]
-                        cur_face_hm = cv2.resize(cur_face,(224,224), interpolation = cv2.INTER_AREA)
-                        cur_face_hm = np.float32(cur_face_hm) / 255
-                        heatmap = show_cam_on_image(cur_face_hm, grayscale_cam, use_rgb=False)
                         last_heatmap = heatmap
                         last_au_intensities = au_intensities
-                        if len(lstm_features) == 0:
-                            lstm_features = [features]*10
                         else:
                             lstm_features = lstm_features[1:] + [features]
-                        lstm_f = torch.from_numpy(np.vstack(lstm_features))
-                        lstm_f = torch.unsqueeze(lstm_f, 0)
                         with torch.no_grad():
-                            output = pth_model_dynamic(lstm_f).detach().numpy()
                         last_output = output
                         if count_face == 0:
@@ -88,38 +101,33 @@ def preprocess_video_and_predict(video):
                             output = last_output
                             heatmap = last_heatmap
                             au_intensities = last_au_intensities
-                        elif last_output is None:
-                            output = np.empty((1, 7))
-                            output[:] = np.nan
-                            au_intensities = np.empty(24)
-                            au_intensities[:] = np.nan
                     probs.append(output[0])
                     frames.append(count_frame)
                     au_intensities_list.append(au_intensities)
             else:
                 if last_output is not None:
                     lstm_features = []
-                    empty = np.empty((7))
-                    empty[:] = np.nan
-                    probs.append(empty)
                     frames.append(count_frame)
                     au_intensities_list.append(np.full(24, np.nan))
             if cur_face is not None:
-                heatmap_f = display_info(heatmap, 'Frame: {}'.format(count_frame), box_scale=.3)
-                cur_face = cv2.cvtColor(cur_face, cv2.COLOR_RGB2BGR)
-                cur_face = cv2.resize(cur_face, (224,224), interpolation = cv2.INTER_AREA)
-                cur_face = display_info(cur_face, 'Frame: {}'.format(count_frame), box_scale=.3)
-                vid_writer_face.write(cur_face)
-                vid_writer_hm.write(heatmap_f)
             count_frame += 1
             if count_face != 0:
                 count_face += 1
         vid_writer_face.release()
         vid_writer_hm.release()
@@ -128,5 +136,5 @@ def preprocess_video_and_predict(video):
         if not stat or not au_stat:
             return None, None, None, None, None
-    return video, path_save_video_face, path_save_video_hm, stat, au_stat

 from app.config import config_data
 from app.plot import statistics_plot
 from .au_processing import features_to_au_intensities, au_statistics_plot
+from pytorch_grad_cam.utils.image import show_cam_on_image
 mp_face_mesh = mp.solutions.face_mesh
+def preprocess_video_and_predict(video_path):
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        print(f"Error opening video file: {video_path}")
+        return None, None, None, None, None
+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    if fps <= 0 or fps != fps:  # Handle NaN fps
+        fps = 30  # Default FPS
+    # Paths to save processed videos
+    path_save_video_face = 'result_face.mp4'
     path_save_video_hm = 'result_hm.mp4'
+    # Video writers
+    vid_writer_face = cv2.VideoWriter(path_save_video_face, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
     vid_writer_hm = cv2.VideoWriter(path_save_video_hm, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
     lstm_features = []
     frames = []
     au_intensities_list = []
     last_output = None
+    last_heatmap = None
     last_au_intensities = None
     cur_face = None
     with mp_face_mesh.FaceMesh(
+            max_num_faces=1,
+            refine_landmarks=False,
+            min_detection_confidence=0.5,
+            min_tracking_confidence=0.5) as face_mesh:
         while cap.isOpened():
+            ret, frame = cap.read()
+            if not ret:
+                break
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            results = face_mesh.process(frame_rgb)
             if results.multi_face_landmarks:
+                for face_landmarks in results.multi_face_landmarks:
+                    startX, startY, endX, endY = get_box(face_landmarks, width, height)
+                    cur_face = frame_rgb[startY:endY, startX:endX]
+                    if count_face % config_data.FRAME_DOWNSAMPLING == 0:
+                        cur_face_pil = Image.fromarray(cur_face)
+                        cur_face_processed = pth_processing(cur_face_pil)
                         with torch.no_grad():
+                            features = torch.nn.functional.relu(
+                                pth_model_static.extract_features(cur_face_processed)
+                            ).cpu().numpy()
+                            au_intensities = features_to_au_intensities(
+                                pth_model_static(cur_face_processed)
+                            )
+                        # Generate heatmap
+                        grayscale_cam = cam(input_tensor=cur_face_processed)[0, :]
+                        cur_face_resized = cv2.resize(cur_face, (224, 224), interpolation=cv2.INTER_AREA)
+                        cur_face_normalized = np.float32(cur_face_resized) / 255
+                        heatmap = show_cam_on_image(cur_face_normalized, grayscale_cam, use_rgb=False)
                         last_heatmap = heatmap
                         last_au_intensities = au_intensities
+                        if not lstm_features:
+                            lstm_features = [features] * 10
                         else:
                             lstm_features = lstm_features[1:] + [features]
+                        lstm_input = torch.from_numpy(np.vstack(lstm_features)).unsqueeze(0)
                         with torch.no_grad():
+                            output = pth_model_dynamic(lstm_input).cpu().numpy()
                         last_output = output
                         if count_face == 0:
                             output = last_output
                             heatmap = last_heatmap
                             au_intensities = last_au_intensities
+                        else:
+                            output = np.full((1, 7), np.nan)
+                            au_intensities = np.full(24, np.nan)
                     probs.append(output[0])
                     frames.append(count_frame)
                     au_intensities_list.append(au_intensities)
             else:
                 if last_output is not None:
                     lstm_features = []
+                    probs.append(np.full(7, np.nan))
                     frames.append(count_frame)
                     au_intensities_list.append(np.full(24, np.nan))
             if cur_face is not None:
+                heatmap_frame = display_info(heatmap, f'Frame: {count_frame}', box_scale=0.3)
+                cur_face_bgr = cv2.cvtColor(cur_face, cv2.COLOR_RGB2BGR)
+                cur_face_resized = cv2.resize(cur_face_bgr, (224, 224), interpolation=cv2.INTER_AREA)
+                cur_face_annotated = display_info(cur_face_resized, f'Frame: {count_frame}', box_scale=0.3)
+                vid_writer_face.write(cur_face_annotated)
+                vid_writer_hm.write(heatmap_frame)
             count_frame += 1
             if count_face != 0:
                 count_face += 1
+        cap.release()
         vid_writer_face.release()
         vid_writer_hm.release()
         if not stat or not au_stat:
             return None, None, None, None, None
+    return video_path, path_save_video_face, path_save_video_hm, stat, au_stat

app_gpuzero.py DELETED Viewed

@@ -1,64 +0,0 @@
-import gradio as gr
-from tabs.heart_rate_variability import create_hrv_tab
-from tabs.blink_detection import create_blink_tab
-from tabs.gaze_estimation import create_gaze_estimation_tab
-from tabs.speech_stress_analysis import create_voice_stress_tab
-from tabs.head_posture_detection import create_head_posture_tab
-from tabs.face_expressions import create_face_expressions_tab
-from tabs.speech_emotion_recognition import create_emotion_recognition_tab
-from tabs.sleep_quality import create_sleep_quality_tab
-from tabs.sentiment_analysis import create_sentiment_tab
-from tabs.emotion_analysis import create_emotion_tab
-from tabs.body_movement_analysis import create_body_movement_tab
-from tabs.posture_analysis import create_posture_analysis_tab
-from tabs.skin_analysis import create_skin_conductance_tab
-from tabs.FACS_analysis import create_facs_analysis_tab
-from tabs.roberta_chatbot import create_roberta_chatbot_tab
-# Import the UI components
-from ui_components import CUSTOM_CSS, HEADER_HTML, DISCLAIMER_HTML
-TAB_STRUCTURE = [
-    ("Visual Analysis", [
-        ("Emotional Face Expressions", create_face_expressions_tab),
-        ("FACS for Stress, Anxiety, Depression", create_facs_analysis_tab),
-        ("Gaze Estimation", create_gaze_estimation_tab),
-        ("Head Posture", create_head_posture_tab),
-        ("Blink Rate", create_blink_tab),
-        ("Sleep Quality", create_sleep_quality_tab),
-        ("Heart Rate Variability", create_hrv_tab),
-        ("Body Movement", create_body_movement_tab),
-        ("Posture", create_posture_analysis_tab),
-        ("Skin", create_skin_conductance_tab)
-    ]),
-    ("Speech Analysis", [
-        ("Speech Stress", create_voice_stress_tab),
-        ("Speech Emotion", create_emotion_recognition_tab)
-    ]),
-    ("Text Analysis", [
-        ("Sentiment", create_sentiment_tab),
-        ("Emotion", create_emotion_tab),
-        ("Roberta Mental Health Chatbot", create_roberta_chatbot_tab)
-    ]),
-    ("Brain Analysis (coming soon)", [
-    ])
-]
-def create_demo():
-    with gr.Blocks(css=CUSTOM_CSS) as demo:
-        gr.Markdown(HEADER_HTML)
-        with gr.Tabs(elem_classes=["main-tab"]):
-            for main_tab, sub_tabs in TAB_STRUCTURE:
-                with gr.Tab(main_tab):
-                    with gr.Tabs():
-                        for sub_tab, create_fn in sub_tabs:
-                            with gr.Tab(sub_tab):
-                                create_fn()
-        gr.HTML(DISCLAIMER_HTML)
-    return demo
-# Create the demo instance
-demo = create_demo()
-if __name__ == "__main__":
-    demo.queue(api_open=True).launch(share=False)

assets/.DS_Store CHANGED Viewed

Binary files a/assets/.DS_Store and b/assets/.DS_Store differ

assets/models/FER_dynamic_LSTM.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0cd1561a72f9de26c315bb857f03e8946635db047e0dbea52bb0276610f19751
+size 11569208

assets/models/FER_static_ResNet50_AffectNet.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8274190b5be4355bd2f07b59f593fcdb294f9d7c563bfa9ac9e5ea06c10692d2
+size 98562934

llm/mentalBERT.py DELETED Viewed

@@ -1,73 +0,0 @@
-import torch
-from transformers import RobertaTokenizer, RobertaForSequenceClassification
-import gradio as gr
-# Load the tokenizer and models
-tokenizer = RobertaTokenizer.from_pretrained("mental/mental-roberta-base")
-sentiment_model = RobertaForSequenceClassification.from_pretrained("mental/mental-roberta-base")
-emotion_model = RobertaForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
-# Define the labels
-sentiment_labels = ["negative", "positive"]
-emotion_labels = ["anger", "disgust", "fear", "joy", "neutral", "sadness", "surprise"]
-def analyze_text(text):
-    try:
-        # Tokenize the input text
-        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
-        # Get sentiment model outputs
-        sentiment_outputs = sentiment_model(**inputs)
-        sentiment_logits = sentiment_outputs.logits
-        sentiment_probs = torch.nn.functional.softmax(sentiment_logits, dim=-1)
-        # Debugging: Print logits and probs shapes
-        print("Sentiment logits shape:", sentiment_logits.shape)
-        print("Sentiment logits:", sentiment_logits)
-        print("Sentiment probs shape:", sentiment_probs.shape)
-        print("Sentiment probs:", sentiment_probs)
-        # Get the highest probability and corresponding label for sentiment
-        max_sentiment_prob, max_sentiment_index = torch.max(sentiment_probs, dim=1)
-        sentiment = sentiment_labels[max_sentiment_index.item()]
-        # Get emotion model outputs
-        emotion_outputs = emotion_model(**inputs)
-        emotion_logits = emotion_outputs.logits
-        emotion_probs = torch.nn.functional.softmax(emotion_logits, dim=-1)
-        # Debugging: Print logits and probs shapes
-        print("Emotion logits shape:", emotion_logits.shape)
-        print("Emotion logits:", emotion_logits)
-        print("Emotion probs shape:", emotion_probs.shape)
-        print("Emotion probs:", emotion_probs)
-        # Get the highest probability and corresponding label for emotion
-        max_emotion_prob, max_emotion_index = torch.max(emotion_probs, dim=1)
-        emotion = emotion_labels[max_emotion_index.item()]
-        return sentiment, f"{max_sentiment_prob.item():.4f}", emotion, f"{max_emotion_prob.item():.4f}"
-    except Exception as e:
-        print("Error:", str(e))
-        return "Error", "N/A", "Error", "N/A"
-# Define the Gradio interface
-interface = gr.Interface(
-    fn=analyze_text,
-    inputs=gr.Textbox(
-        lines=5,
-        placeholder="Enter text here...",
-        value="I don’t know a lot but what I do know is, we don’t start off very big and we all try to make each other smaller."
-    ),
-    outputs=[
-        gr.Textbox(label="Detected Sentiment"),
-        gr.Textbox(label="Sentiment Confidence Score"),
-        gr.Textbox(label="Detected Emotion"),
-        gr.Textbox(label="Emotion Confidence Score")
-    ],
-    title="Sentiment and Emotion Analysis: Detecting Positive/Negative Sentiment and Specific Emotions",
-    description="Enter a piece of text to detect overall sentiment (positive or negative) and specific emotions (anger, disgust, fear, joy, neutral, sadness, surprise)."
-)
-# Launch the interface
-interface.launch()

notebooks/pytorch-roberta-onnx.ipynb DELETED Viewed

@@ -1,280 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Pytorch RoBERTa to ONNX"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This notebook documents how to export the PyTorch NLP model into ONNX format and then use it to make predictions using the ONNX runtime.\n",
-    "\n",
-    "The model uses the `simpletransformers` library which is a Python wrappers around the `transformers` library which contains PyTorch NLP transformer architectures and weights."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import torch\n",
-    "import numpy as np\n",
-    "from simpletransformers.model import TransformerModel\n",
-    "from transformers import RobertaForSequenceClassification, RobertaTokenizer\n",
-    "import onnx\n",
-    "import onnxruntime"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 1: Load pretrained PyTorch model"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Download the model weights from https://storage.googleapis.com/seldon-models/pytorch/moviesentiment_roberta/pytorch_model.bin"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model = TransformerModel('roberta', 'roberta-base', args=({'fp16': False}))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<All keys matched successfully>"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.model.load_state_dict(torch.load('pytorch_model.bin'))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 2: Export as ONNX"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "PyTorch supports exporting to ONNX, you just need to specify a valid input tensor for the model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tokenizer = RobertaTokenizer.from_pretrained('roberta-base')\n",
-    "input_ids = torch.tensor(tokenizer.encode(\"This film is so bad\", add_special_tokens=True)).unsqueeze(0)  # Batch size 1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([[   0,  713,  822,   16,   98, 1099,    2]])"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "input_ids"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Export as ONNX, we specify dynamic axes for batch dimension and sequence length as sentences come in various lengths."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/janis/.conda/envs/py37/lib/python3.7/site-packages/transformers/modeling_roberta.py:172: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
-      "  if input_ids[:, 0].sum().item() != 0:\n"
-     ]
-    }
-   ],
-   "source": [
-    "torch.onnx.export(model.model,\n",
-    "                  (input_ids),\n",
-    "                  \"roberta.onnx\",\n",
-    "                  input_names=['input'],\n",
-    "                  output_names=['output'],\n",
-    "                  dynamic_axes={'input' :{0 : 'batch_size',\n",
-    "                                          1: 'sentence_length'},\n",
-    "                                'output': {0: 'batch_size'}})"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 3: Test predictions are the same using ONNX runtime"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "onnx_model = onnx.load(\"roberta.onnx\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# checks the exported model, may crash ipython kernel if run together with the PyTorch model in memory\n",
-    "# onnx.checker.check_model(onnx_model)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import onnxruntime\n",
-    "\n",
-    "ort_session = onnxruntime.InferenceSession(\"roberta.onnx\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def to_numpy(tensor):\n",
-    "    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "input_ids = torch.tensor(tokenizer.encode(\"This film is so bad\", add_special_tokens=True)).unsqueeze(0)  # Batch size 1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# compute ONNX Runtime output prediction\n",
-    "ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(input_ids)}\n",
-    "ort_out = ort_session.run(None, ort_inputs)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "out = model.model(input_ids)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "((tensor([[ 2.3067, -2.6440]], grad_fn=<AddmmBackward>),),\n",
-       " [array([[ 2.3066945, -2.6439788]], dtype=float32)])"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "out, ort_out"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.testing.assert_allclose(to_numpy(out[0]), ort_out[0], rtol=1e-03, atol=1e-05)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

onxxchatbot.py DELETED Viewed

@@ -1,40 +0,0 @@
-import gradio as gr
-from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
-# Load pre-trained model and tokenizer
-model_name = "roberta-base"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSequenceClassification.from_pretrained(model_name)
-# Create a text classification pipeline
-classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
-# Define response generation function
-def generate_response(input_text):
-    # Classify the input text
-    result = classifier(input_text)[0]
-    label = result['label']
-    score = result['score']
-    # Map the classification result to a response
-    responses = {
-        "LABEL_0": "I understand you might be going through a difficult time. Remember, it's okay to seek help when you need it.",
-        "LABEL_1": "Your feelings are valid. Have you considered talking to a mental health professional about this?",
-        "LABEL_2": "Taking care of your mental health is crucial. Small steps like regular exercise and good sleep can make a big difference.",
-        "LABEL_3": "It sounds like you're dealing with a lot. Remember, you're not alone in this journey.",
-        "LABEL_4": "I hear you. Coping with mental health challenges can be tough. Have you tried any relaxation techniques like deep breathing or meditation?"
-    }
-    return responses.get(label, "I'm here to listen and support you. Could you tell me more about how you're feeling?")
-# Define chatbot function for Gradio
-def chatbot(message, history):
-    response = generate_response(message)
-    return response
-# Create Gradio interface
-iface = gr.ChatInterface(
-    fn=chatbot,
-    title="Mental Health Support Chatbot (RoBERTa)",
-    description="This chatbot uses a pre-trained RoBERTa model for mental health conversations. Remember, this is not a substitute for professional help. If you're in crisis, please seek immediate professional assistance."
-)

tabs/FACS_analysis.py CHANGED Viewed

@@ -4,18 +4,19 @@ import numpy as np
 import matplotlib.pyplot as plt
 from app.app_utils import preprocess_frame_and_predict_aus
-# Define the AUs associated with stress, anxiety, and depression
 STRESS_AUS = [4, 7, 17, 23, 24]
 ANXIETY_AUS = [1, 2, 4, 5, 20]
-DEPRESSION_AUS = [1, 4, 15, 17]
 AU_DESCRIPTIONS = {
     1: "Inner Brow Raiser",
     2: "Outer Brow Raiser",
     4: "Brow Lowerer",
     5: "Upper Lid Raiser",
     7: "Lid Tightener",
-    15: "Lip Corner Depressor",
     17: "Chin Raiser",
     20: "Lip Stretcher",
     23: "Lip Tightener",
@@ -52,13 +53,13 @@ def process_video_for_facs(video_path):
     # Calculate and normalize emotional state scores
     stress_score = normalize_score(np.mean([avg_au_intensities[au-1] for au in STRESS_AUS if au <= len(avg_au_intensities)]))
     anxiety_score = normalize_score(np.mean([avg_au_intensities[au-1] for au in ANXIETY_AUS if au <= len(avg_au_intensities)]))
-    depression_score = normalize_score(np.mean([avg_au_intensities[au-1] for au in DEPRESSION_AUS if au <= len(avg_au_intensities)]))
     fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
     # Emotional state scores
-    states = ['Stress', 'Anxiety', 'Depression']
-    scores = [stress_score, anxiety_score, depression_score]
     bars = ax1.bar(states, scores)
     ax1.set_ylim(0, 1)
     ax1.set_title('Emotional State Scores')
@@ -68,7 +69,7 @@ def process_video_for_facs(video_path):
                  f'{height:.2f}', ha='center', va='bottom')
     # AU intensities
-    all_aus = sorted(set(STRESS_AUS + ANXIETY_AUS + DEPRESSION_AUS))
     all_aus = [au for au in all_aus if au <= len(avg_au_intensities)]
     au_labels = [f"AU{au}\n{AU_DESCRIPTIONS.get(au, '')}" for au in all_aus]
     au_values = [avg_au_intensities[au-1] for au in all_aus]
@@ -89,7 +90,7 @@ def create_facs_analysis_tab():
             gr.Examples(["./assets/videos/fitness.mp4"], inputs=[input_video])
         with gr.Column(scale=2):
             output_image = gr.Image(label="Processed Frame")
-            facs_chart = gr.Plot(label="FACS Analysis for SAD")
     # Automatically trigger the analysis when a video is uploaded
     input_video.change(

 import matplotlib.pyplot as plt
 from app.app_utils import preprocess_frame_and_predict_aus
+# Define the AUs associated with stress, anxiety, and happiness
 STRESS_AUS = [4, 7, 17, 23, 24]
 ANXIETY_AUS = [1, 2, 4, 5, 20]
+HAPPINESS_AUS = [6, 12]
 AU_DESCRIPTIONS = {
     1: "Inner Brow Raiser",
     2: "Outer Brow Raiser",
     4: "Brow Lowerer",
     5: "Upper Lid Raiser",
+    6: "Cheek Raiser",
     7: "Lid Tightener",
+    12: "Lip Corner Puller",
     17: "Chin Raiser",
     20: "Lip Stretcher",
     23: "Lip Tightener",
     # Calculate and normalize emotional state scores
     stress_score = normalize_score(np.mean([avg_au_intensities[au-1] for au in STRESS_AUS if au <= len(avg_au_intensities)]))
     anxiety_score = normalize_score(np.mean([avg_au_intensities[au-1] for au in ANXIETY_AUS if au <= len(avg_au_intensities)]))
+    happiness_score = normalize_score(np.mean([avg_au_intensities[au-1] for au in HAPPINESS_AUS if au <= len(avg_au_intensities)]))
     fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
     # Emotional state scores
+    states = ['Stress', 'Anxiety', 'Happiness']
+    scores = [stress_score, anxiety_score, happiness_score]
     bars = ax1.bar(states, scores)
     ax1.set_ylim(0, 1)
     ax1.set_title('Emotional State Scores')
                  f'{height:.2f}', ha='center', va='bottom')
     # AU intensities
+    all_aus = sorted(set(STRESS_AUS + ANXIETY_AUS + HAPPINESS_AUS))
     all_aus = [au for au in all_aus if au <= len(avg_au_intensities)]
     au_labels = [f"AU{au}\n{AU_DESCRIPTIONS.get(au, '')}" for au in all_aus]
     au_values = [avg_au_intensities[au-1] for au in all_aus]
             gr.Examples(["./assets/videos/fitness.mp4"], inputs=[input_video])
         with gr.Column(scale=2):
             output_image = gr.Image(label="Processed Frame")
+            facs_chart = gr.Plot(label="FACS Analysis for Stress, Anxiety, and Happiness")
     # Automatically trigger the analysis when a video is uploaded
     input_video.change(

tabs/__emotion_analysis.py DELETED Viewed

@@ -1,36 +0,0 @@
-import os
-import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-import gradio as gr
-os.environ["TOKENIZERS_PARALLELISM"] = "true"
-emotion_tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
-emotion_model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
-emotion_labels = ["anger", "disgust", "fear", "joy", "neutral", "sadness", "surprise"]
-def analyze_emotion(text):
-    try:
-        inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
-        outputs = emotion_model(**inputs)
-        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
-        max_prob, max_index = torch.max(probs, dim=1)
-        return emotion_labels[max_index.item()], f"{max_prob.item():.4f}"
-    except Exception as e:
-        print(f"Error in emotion analysis: {e}")
-        return "Error", "N/A"
-def create_emotion_tab():
-    with gr.Row():
-        with gr.Column(scale=2):
-            input_text = gr.Textbox(value='I actually speak to the expets myself to give you the best value you can get', lines=5, placeholder="Enter text here...", label="Input Text")
-            with gr.Row():
-                clear_btn = gr.Button("Clear", scale=1)
-                submit_btn = gr.Button("Analyze", scale=1, elem_classes="submit")
-        with gr.Column(scale=1):
-            output_emotion = gr.Textbox(label="Detected Emotion")
-            output_confidence = gr.Textbox(label="Emotion Confidence Score")
-    submit_btn.click(analyze_emotion, inputs=[input_text], outputs=[output_emotion, output_confidence])
-    clear_btn.click(lambda: ("", "", ""), outputs=[input_text, output_emotion, output_confidence])
-    gr.Examples(["I am so happy today!", "I feel terrible and sad.", "This is a neutral statement."], inputs=[input_text])

tabs/__pycache__/FACS_analysis.cpython-310.pyc CHANGED Viewed

Binary files a/tabs/__pycache__/FACS_analysis.cpython-310.pyc and b/tabs/__pycache__/FACS_analysis.cpython-310.pyc differ

tabs/__pycache__/deception_detection.cpython-310.pyc ADDED Viewed

Binary file (17.9 kB). View file

tabs/__pycache__/heart_rate_variability.cpython-310.pyc CHANGED Viewed

Binary files a/tabs/__pycache__/heart_rate_variability.cpython-310.pyc and b/tabs/__pycache__/heart_rate_variability.cpython-310.pyc differ

tabs/__pycache__/speech_stress_analysis.cpython-310.pyc CHANGED Viewed

Binary files a/tabs/__pycache__/speech_stress_analysis.cpython-310.pyc and b/tabs/__pycache__/speech_stress_analysis.cpython-310.pyc differ

tabs/__pycache__/speech_stress_analysis.cpython-312.pyc ADDED Viewed

Binary file (10.5 kB). View file

tabs/__sentiment_analysis.py DELETED Viewed

@@ -1,36 +0,0 @@
-import os
-import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-import gradio as gr
-os.environ["TOKENIZERS_PARALLELISM"] = "true"
-sentiment_tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
-sentiment_model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
-sentiment_labels = ["very negative", "negative", "neutral", "positive", "very positive"]
-def analyze_sentiment(text):
-    try:
-        inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
-        outputs = sentiment_model(**inputs)
-        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
-        max_prob, max_index = torch.max(probs, dim=1)
-        return sentiment_labels[max_index.item()], f"{max_prob.item():.4f}"
-    except Exception as e:
-        print(f"Error in sentiment analysis: {e}")
-        return "Error", "N/A"
-def create_sentiment_tab():
-    with gr.Row():
-        with gr.Column(scale=2):
-            input_text = gr.Textbox(value="I actually speak to the expets myself to give you the best value you can get", lines=5, placeholder="Enter text here...", label="Input Text")
-            with gr.Row():
-                clear_btn = gr.Button("Clear", scale=1)
-                submit_btn = gr.Button("Analyze", scale=1, elem_classes="submit")
-        with gr.Column(scale=1):
-            output_sentiment = gr.Textbox(label="Detected Sentiment")
-            output_confidence = gr.Textbox(label="Sentiment Confidence Score")
-    submit_btn.click(analyze_sentiment, inputs=[input_text], outputs=[output_sentiment, output_confidence], queue=True)
-    clear_btn.click(lambda: ("", "", ""), outputs=[input_text, output_sentiment, output_confidence], queue=True)
-    gr.Examples(["I am so happy today!", "I feel terrible and sad.", "This is a neutral statement."], inputs=[input_text])

tabs/deception_detection.py ADDED Viewed

	@@ -0,0 +1,601 @@

+# tabs/deception_detection.py
+import gradio as gr
+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+from scipy.signal import butter, filtfilt, find_peaks
+from typing import Tuple, Optional, Dict
+import logging
+from dataclasses import dataclass
+from enum import Enum
+import librosa
+import moviepy.editor as mp
+import os
+import tempfile
+import torch
+import torch.nn as nn
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
+import mediapipe as mp_mediapipe
+import re
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Define Enums and DataClasses
+class DeceptionLevel(Enum):
+    LOW = 'Low'
+    MODERATE = 'Moderate'
+    HIGH = 'High'
+@dataclass
+class Metric:
+    name: str
+    threshold: float
+    value: float = 0.0
+    detected: bool = False
+    def analyze(self, new_value: float):
+        self.value = new_value
+        self.detected = self.value > self.threshold
+class SignalProcessor:
+    def __init__(self, fs: float):
+        self.fs = fs  # Sampling frequency
+    def bandpass_filter(self, data: np.ndarray, lowcut: float = 0.75, highcut: float = 3.0) -> np.ndarray:
+        """Apply bandpass filter to signal."""
+        nyq = 0.5 * self.fs
+        low = lowcut / nyq
+        high = highcut / nyq
+        b, a = butter(2, [low, high], btype='band')
+        filtered = filtfilt(b, a, data)
+        logger.debug("Applied bandpass filter.")
+        return filtered
+    def find_peaks_in_signal(self, signal: np.ndarray) -> np.ndarray:
+        """Find peaks in the signal."""
+        min_distance = int(60 / 180 * self.fs)  # At least 60 BPM (180 BPM max)
+        peaks, _ = find_peaks(signal, distance=min_distance)
+        logger.debug(f"Detected {len(peaks)} peaks in the signal.")
+        return peaks
+class DeceptionAnalyzer:
+    def __init__(self):
+        self.metrics = {
+            "HRV Suppression": Metric("HRV Suppression", threshold=30.0),
+            "Heart Rate Elevation": Metric("Heart Rate Elevation", threshold=100.0),
+            "Rhythm Irregularity": Metric("Rhythm Irregularity", threshold=0.1),
+            "Blink Rate": Metric("Blink Rate", threshold=25.0),
+            "Head Movements": Metric("Head Movements", threshold=10.0),
+            "Speech Stress": Metric("Speech Stress", threshold=0.5),
+            "Speech Pitch Variation": Metric("Speech Pitch Variation", threshold=50.0),
+            "Pauses and Hesitations": Metric("Pauses and Hesitations", threshold=2.0),
+            "Filler Words": Metric("Filler Words", threshold=5.0),
+        }
+    def analyze_signals(self, heart_rate: np.ndarray, rr_intervals: np.ndarray, hrv_rmssd: float,
+                        speech_features: Dict[str, float], facial_features: Dict[str, float]) -> Tuple[Dict[str, Dict], float, DeceptionLevel]:
+        """
+        Analyze the extracted signals and compute deception probability.
+        """
+        # Analyze HRV Suppression
+        self.metrics["HRV Suppression"].analyze(hrv_rmssd)
+        # Analyze Heart Rate Elevation
+        avg_heart_rate = np.mean(heart_rate)
+        self.metrics["Heart Rate Elevation"].analyze(avg_heart_rate)
+        # Analyze Rhythm Irregularity
+        rhythm_irregularity = np.std(rr_intervals) / np.mean(rr_intervals)
+        self.metrics["Rhythm Irregularity"].analyze(rhythm_irregularity)
+        # Analyze Speech Features
+        for key in ["Speech Stress", "Speech Pitch Variation", "Pauses and Hesitations", "Filler Words"]:
+            if key in speech_features:
+                self.metrics[key].analyze(speech_features[key])
+        # Analyze Facial Features
+        # Placeholder values; in actual implementation, replace with real values
+        self.metrics["Blink Rate"].analyze(facial_features.get("Blink Rate", 0))
+        self.metrics["Head Movements"].analyze(facial_features.get("Head Movements", 0))
+        # Calculate deception probability
+        detected_indicators = sum(1 for m in self.metrics.values() if m.detected)
+        total_indicators = len(self.metrics)
+        probability = (detected_indicators / total_indicators) * 100
+        # Determine deception level
+        if probability < 30:
+            level = DeceptionLevel.LOW
+        elif probability < 70:
+            level = DeceptionLevel.MODERATE
+        else:
+            level = DeceptionLevel.HIGH
+        # Prepare metrics for visualization
+        metrics_data = {name: {
+            "value": m.value,
+            "threshold": m.threshold,
+            "detected": m.detected
+        } for name, m in self.metrics.items()}
+        return metrics_data, probability, level
+def load_transcription_model(model_name: str) -> Optional[torch.nn.Module]:
+    """
+    Load the speech-to-text transcription model.
+    """
+    try:
+        model = Wav2Vec2ForCTC.from_pretrained(
+            model_name,
+            ignore_mismatched_sizes=True
+        )
+        model.eval()
+        logger.info("Transcription model loaded successfully.")
+        return model
+    except Exception as e:
+        logger.error(f"Error loading transcription model: {e}")
+        return None
+def load_models() -> Dict[str, torch.nn.Module]:
+    """
+    Load all necessary models for the deception detection system.
+    """
+    models_dict = {}
+    try:
+        # Load Transcription Model
+        transcription_model_name = 'facebook/wav2vec2-base-960h'
+        transcription_model = load_transcription_model(transcription_model_name)
+        if transcription_model:
+            models_dict['transcription_model'] = transcription_model
+    except Exception as e:
+        logger.error(f"Error loading models: {e}")
+    return models_dict
+def transcribe_audio(audio_path: str, transcription_model: nn.Module) -> str:
+    """
+    Transcribe audio to text using Wav2Vec2 model.
+    """
+    try:
+        tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
+        y, sr = librosa.load(audio_path, sr=16000)
+        input_values = tokenizer(y, return_tensors="pt", padding="longest").input_values
+        with torch.no_grad():
+            logits = transcription_model(input_values).logits
+        predicted_ids = torch.argmax(logits, dim=-1)
+        transcription = tokenizer.decode(predicted_ids[0])
+        # Clean transcription
+        transcription = transcription.lower()
+        transcription = re.sub(r'[^a-z\s]', '', transcription)
+        return transcription
+    except Exception as e:
+        logger.error(f"Error transcribing audio: {str(e)}")
+        return ""
+def detect_silence(y: np.ndarray, sr: int, top_db: int = 30) -> float:
+    """
+    Detect total duration of silence in the audio.
+    """
+    try:
+        intervals = librosa.effects.split(y, top_db=top_db)
+        silence_duration = 0.0
+        prev_end = 0
+        for start, end in intervals:
+            silence = (start - prev_end) / sr
+            silence_duration += silence
+            prev_end = end
+        # Add silence after the last interval
+        silence_duration += (len(y) - prev_end) / sr
+        return silence_duration
+    except Exception as e:
+        logger.error(f"Error detecting silence: {str(e)}")
+        return 0.0
+def count_filler_words(transcription: str) -> int:
+    """
+    Count the number of filler words in the transcription.
+    """
+    filler_words_list = ['um', 'uh', 'er', 'ah', 'like', 'you know', 'so']
+    return sum(transcription.split().count(word) for word in filler_words_list)
+def analyze_speech(audio_path: str, transcription_model: nn.Module) -> Dict[str, float]:
+    """
+    Analyze speech from the audio file and extract features.
+    """
+    if not audio_path:
+        logger.warning("No audio path provided.")
+        return {}
+    try:
+        # Load audio file
+        y, sr = librosa.load(audio_path, sr=16000)  # Ensure consistent sampling rate
+        logger.info(f"Loaded audio file with sampling rate: {sr} Hz")
+        # Extract prosodic features
+        pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
+        pitch_values = pitches[magnitudes > np.median(magnitudes)]
+        avg_pitch = np.mean(pitch_values) if len(pitch_values) > 0 else 0.0
+        pitch_variation = np.std(pitch_values) if len(pitch_values) > 0 else 0.0
+        # Calculate speech stress based on pitch variation
+        speech_stress = pitch_variation / (avg_pitch if avg_pitch != 0 else 1)
+        # Calculate speech rate (words per minute)
+        transcription = transcribe_audio(audio_path, transcription_model)
+        words = transcription.split()
+        duration_minutes = librosa.get_duration(y=y, sr=sr) / 60
+        speech_rate = len(words) / duration_minutes if duration_minutes > 0 else 0.0
+        # Detect pauses and hesitations
+        silence_duration = detect_silence(y, sr)
+        filler_words = count_filler_words(transcription)
+        logger.info(f"Speech Analysis - Avg Pitch: {avg_pitch:.2f} Hz, Pitch Variation: {pitch_variation:.2f} Hz")
+        logger.info(f"Speech Stress Level: {speech_stress:.2f}")
+        logger.info(f"Speech Rate: {speech_rate:.2f} WPM")
+        logger.info(f"Silence Duration: {silence_duration:.2f} seconds")
+        logger.info(f"Filler Words Count: {filler_words}")
+        # Return extracted features
+        return {
+            "Speech Stress": speech_stress,
+            "Speech Pitch Variation": pitch_variation,
+            "Pauses and Hesitations": silence_duration,
+            "Filler Words": filler_words
+        }
+    except Exception as e:
+        logger.error(f"Error analyzing speech: {str(e)}")
+        return {}
+def extract_audio_from_video(video_path: str) -> Optional[str]:
+    """
+    Extract audio from the video file and save it as a temporary WAV file.
+    """
+    if not video_path:
+        logger.warning("No video path provided for audio extraction.")
+        return None
+    try:
+        video_clip = mp.VideoFileClip(video_path)
+        if video_clip.audio is None:
+            logger.warning("No audio track found in the video.")
+            video_clip.close()
+            return None
+        temp_audio_fd, temp_audio_path = tempfile.mkstemp(suffix=".wav")
+        os.close(temp_audio_fd)  # Close the file descriptor
+        video_clip.audio.write_audiofile(temp_audio_path, logger=None)
+        video_clip.close()
+        logger.info(f"Extracted audio to temporary file: {temp_audio_path}")
+        return temp_audio_path
+    except Exception as e:
+        logger.error(f"Error extracting audio from video: {str(e)}")
+        return None
+def detect_blink(face_landmarks, frame: np.ndarray) -> float:
+    """
+    Detect blink rate from facial landmarks.
+    Placeholder implementation.
+    """
+    # Implement Eye Aspect Ratio (EAR) or other blink detection methods
+    return np.random.uniform(10, 20)  # Example blink rate
+def estimate_head_movement(face_landmarks) -> float:
+    """
+    Estimate head movements based on facial landmarks.
+    Placeholder implementation.
+    """
+    # Implement head pose estimation to detect nods/shakes
+    return np.random.uniform(5, 15)  # Example head movements
+def create_visualization(metrics: Dict, probability: float, heart_rate: np.ndarray,
+                         duration: float, level: DeceptionLevel, speech_features: Dict[str, float]) -> plt.Figure:
+    """
+    Create visualization of analysis results.
+    """
+    # Set figure style parameters
+    plt.style.use('default')
+    plt.rcParams.update({
+        'figure.facecolor': 'white',
+        'axes.facecolor': 'white',
+        'grid.color': '#E0E0E0',
+        'grid.linestyle': '-',
+        'grid.alpha': 0.3,
+        'font.size': 10,
+        'axes.labelsize': 10,
+        'axes.titlesize': 12,
+        'figure.titlesize': 14,
+        'font.family': ['DejaVu Sans', 'Arial', 'sans-serif']
+    })
+    # Create figure and axes
+    fig = plt.figure(figsize=(12, 20))
+    # Create polar plot for deception probability gauge
+    ax1 = fig.add_subplot(4, 1, 1, projection='polar')
+    # Create other subplots
+    ax2 = fig.add_subplot(4, 1, 2)
+    ax3 = fig.add_subplot(4, 1, 3)
+    ax4 = fig.add_subplot(4, 1, 4)
+    # Plot 1: Deception Probability Gauge
+    # Create gauge plot
+    theta = np.linspace(0, np.pi, 100)
+    radius = np.ones(100)
+    ax1.plot(theta, radius, color='#E0E0E0', linewidth=30, alpha=0.3)
+    current_angle = (probability / 100) * np.pi
+    ax1.plot([0, current_angle], [0, 0.7], color='red', linewidth=5)
+    ax1.set_xticks([])
+    ax1.set_yticks([])
+    ax1.set_title(f'Deception Probability: {probability:.1f}% ({level.value})', pad=20, color='#333333')
+    ax1.set_theta_zero_location('N')
+    ax1.set_facecolor('white')
+    ax1.grid(False)
+    ax1.spines['polar'].set_visible(False)
+    # Plot 2: Metrics Bar Chart
+    names = list(metrics.keys())
+    values = [m["value"] for m in metrics.values()]
+    thresholds = [m["threshold"] for m in metrics.values()]
+    detected = [m["detected"] for m in metrics.values()]
+    x = np.arange(len(names))
+    width = 0.35
+    bar_colors = ['#FF6B6B' if d else '#4BB543' for d in detected]
+    ax2.bar(x - width/2, values, width, label='Current', color=bar_colors)
+    ax2.bar(x + width/2, thresholds, width, label='Threshold', color='#E0E0E0', alpha=0.7)
+    ax2.set_ylabel('Value')
+    ax2.set_title('Physiological, Facial, and Speech Indicators', pad=20)
+    ax2.set_xticks(x)
+    ax2.set_xticklabels(names, rotation=45, ha='right')
+    ax2.grid(True, axis='y', alpha=0.3)
+    ax2.legend(loc='upper right', framealpha=0.9)
+    # Plot 3: Heart Rate Over Time
+    time_axis = np.linspace(0, duration, len(heart_rate))
+    ax3.plot(time_axis, heart_rate, color='#3498db')
+    ax3.set_xlabel('Time (s)')
+    ax3.set_ylabel('Heart Rate (BPM)')
+    ax3.set_title('Heart Rate Over Time', pad=20)
+    ax3.grid(True, alpha=0.3)
+    # Plot 4: Speech Features
+    pauses = speech_features.get("Pauses and Hesitations", 0)
+    filler_words = speech_features.get("Filler Words", 0)
+    labels = ['Pauses (s)', 'Filler Words (count)']
+    values = [pauses, filler_words]
+    colors = ['#FFC300', '#FF5733']
+    ax4.bar(labels, values, color=colors)
+    ax4.set_ylabel('Count / Duration')
+    ax4.set_title('Pauses and Hesitations in Speech', pad=20)
+    ax4.grid(True, axis='y', alpha=0.3)
+    plt.tight_layout()
+    return fig
+def process_video_and_audio(video_path: str, models: Dict[str, torch.nn.Module]) -> Tuple[Optional[np.ndarray], Optional[plt.Figure]]:
+    """
+    Process video and audio, perform deception analysis.
+    """
+    logger.info("Starting video and audio processing.")
+    if not video_path:
+        logger.warning("No video path provided.")
+        return None, None
+    try:
+        # Extract audio from video
+        audio_path = extract_audio_from_video(video_path)
+        if not audio_path:
+            logger.warning("No audio available for speech analysis.")
+        # Initialize video capture
+        cap = cv2.VideoCapture(video_path)
+        if not cap.isOpened():
+            logger.error("Failed to open video file.")
+            return None, None
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        if fps <= 0 or fps != fps:
+            logger.error("Invalid frame rate detected.")
+            cap.release()
+            return None, None
+        logger.info(f"Video FPS: {fps}")
+        # Initialize processors
+        signal_processor = SignalProcessor(fps)
+        analyzer = DeceptionAnalyzer()
+        ppg_signal = []
+        last_frame = None
+        # Initialize Mediapipe for real-time facial feature extraction
+        mp_face_mesh = mp_mediapipe.solutions.face_mesh
+        face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1)
+        frame_counter = 0
+        # Process video frames
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            frame_counter += 1
+            # Extract PPG signal from green channel
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            green_channel = frame_rgb[:, :, 1]
+            ppg_signal.append(np.mean(green_channel))
+            # Extract facial features
+            results = face_mesh.process(frame_rgb)
+            if results.multi_face_landmarks:
+                face_landmarks = results.multi_face_landmarks[0]
+                # Blink Detection
+                blink = detect_blink(face_landmarks, frame)
+                analyzer.metrics["Blink Rate"].analyze(blink)
+                # Head Movement Detection
+                head_movement = estimate_head_movement(face_landmarks)
+                analyzer.metrics["Head Movements"].analyze(head_movement)
+            else:
+                analyzer.metrics["Blink Rate"].analyze(0.0)
+                analyzer.metrics["Head Movements"].analyze(0.0)
+            # Store last frame
+            last_frame = cv2.resize(frame_rgb, (320, 240))
+            # Optional: Log progress every 100 frames
+            if frame_counter % 100 == 0:
+                logger.info(f"Processed {frame_counter} frames.")
+        cap.release()
+        face_mesh.close()
+        logger.info(f"Total frames processed: {frame_counter}")
+        if not ppg_signal or last_frame is None:
+            logger.error("No PPG signal extracted or last frame missing.")
+            return last_frame, None
+        # Convert PPG signal to numpy array
+        ppg_signal = np.array(ppg_signal)
+        logger.debug("PPG signal extracted.")
+        # Apply bandpass filter
+        filtered_signal = signal_processor.bandpass_filter(ppg_signal)
+        logger.debug("Filtered PPG signal.")
+        # Find peaks in the filtered signal
+        peaks = signal_processor.find_peaks_in_signal(filtered_signal)
+        if len(peaks) < 2:
+            logger.warning("Insufficient peaks detected. Signal quality may be poor.")
+            return last_frame, None  # Return last_frame but no analysis
+        # Calculate RR intervals in milliseconds
+        rr_intervals = np.diff(peaks) / fps * 1000  # ms
+        heart_rate = 60 * fps / np.diff(peaks)       # BPM
+        if len(rr_intervals) == 0 or len(heart_rate) == 0:
+            logger.error("Failed to calculate RR intervals or heart rate.")
+            return last_frame, None
+        # Calculate RMSSD (Root Mean Square of Successive Differences)
+        hrv_rmssd = np.sqrt(np.mean(np.diff(rr_intervals) ** 2))
+        logger.debug(f"Calculated RMSSD: {hrv_rmssd:.2f} ms")
+        # Analyze speech
+        if audio_path and 'transcription_model' in models:
+            speech_features = analyze_speech(audio_path, models['transcription_model'])
+        else:
+            speech_features = {}
+        # Analyze signals
+        metrics, probability, level = analyzer.analyze_signals(
+            heart_rate, rr_intervals, hrv_rmssd, speech_features,
+            {}
+        )
+        # Create visualization
+        duration = len(ppg_signal) / fps  # seconds
+        fig = create_visualization(
+            metrics, probability, heart_rate,
+            duration, level, speech_features
+        )
+        # Clean up temporary audio file if it was extracted
+        if audio_path and os.path.exists(audio_path):
+            try:
+                os.remove(audio_path)
+                logger.info(f"Deleted temporary audio file: {audio_path}")
+            except Exception as e:
+                logger.error(f"Error deleting temporary audio file: {str(e)}")
+        logger.info("Video and audio processing completed successfully.")
+        return last_frame, fig
+    except Exception as e:
+        logger.error(f"Error processing video and audio: {str(e)}")
+        return None, None
+def create_deception_detection_tab(models: Dict[str, torch.nn.Module]) -> gr.Blocks:
+    """
+    Create the deception detection interface tab using Gradio.
+    """
+    def analyze(video):
+        try:
+            if video is None:
+                return None, None
+            video_path = video
+            logger.info(f"Received video for analysis: {video_path}")
+            if not os.path.exists(video_path):
+                logger.error("Video file does not exist.")
+                return None, None
+            last_frame, fig = process_video_and_audio(video_path, models)
+            if fig:
+                return last_frame, fig
+            else:
+                return last_frame, None
+        except Exception as e:
+            logger.error(f"Error in analyze function: {str(e)}")
+            return None, None
+    with gr.Blocks() as deception_interface:
+        with gr.Row():
+            with gr.Column(scale=1):
+                input_video = gr.Video(label="Upload Video for Deception Analysis")
+                gr.Markdown("""
+                ### Deception Level Analysis
+                This analysis evaluates physiological, facial, and speech indicators
+                that may suggest deceptive behavior.
+                **Physiological Indicators:**
+                - ◇ HRV Suppression
+                - ◇ Heart Rate Elevation
+                - ◇ Rhythm Irregularity
+                **Facial Indicators:**
+                - ◇ Blink Rate
+                - ◇ Head Movements
+                **Speech Indicators:**
+                - ◇ Speech Stress
+                - ◇ Speech Pitch Variation
+                - ◇ Pauses and Hesitations
+                - ◇ Filler Words
+                **Interpretation:**
+                - **Low (0-30%):** Minimal indicators
+                - **Moderate (30-70%):** Some indicators
+                - **High (>70%):** Strong indicators
+                **Important Note:**
+                This analysis is for research purposes only.
+                Results should not be used as definitive proof
+                of deception or truthfulness.
+                """)
+            with gr.Column(scale=2):
+                output_frame = gr.Image(label="Last Frame of Video", height=240)
+                analysis_plot = gr.Plot(label="Deception Analysis")
+        # Configure automatic analysis upon video upload
+        input_video.change(
+            fn=analyze,
+            inputs=[input_video],
+            outputs=[output_frame, analysis_plot]
+        )
+    return deception_interface

tabs/heart_rate_variability.py ADDED Viewed

	@@ -0,0 +1,220 @@

+import gradio as gr
+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+from scipy.signal import butter, filtfilt, find_peaks
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def get_stress_level(rmssd, hr_mean, hr_std):
+    """
+    Calculate stress level based on HRV parameters.
+    Returns both numerical value (0-100) and category.
+    """
+    # RMSSD factor (lower RMSSD = higher stress)
+    rmssd_normalized = max(0, min(100, (150 - rmssd) / 1.5))
+    # Heart rate factor (higher HR = higher stress)
+    hr_factor = max(0, min(100, (hr_mean - 60) * 2))
+    # Heart rate variability factor (lower variability = higher stress)
+    hr_variability_factor = max(0, min(100, hr_std * 5))
+    # Combine factors with weights
+    stress_value = (0.4 * rmssd_normalized +
+                   0.4 * hr_factor +
+                   0.2 * hr_variability_factor)
+    # Determine category
+    if stress_value < 30:
+        category = "Low"
+    elif stress_value < 60:
+        category = "Moderate"
+    else:
+        category = "High"
+    return stress_value, category
+def get_anxiety_level(value):
+    """Get anxiety level category based on value."""
+    if value < 30:
+        return "Low"
+    elif value < 70:
+        return "Moderate"
+    else:
+        return "High"
+def calculate_anxiety_index(heart_rate, hrv):
+    """Calculate anxiety index based on heart rate and HRV."""
+    if len(heart_rate) < 2:
+        return 0
+    hr_mean = np.mean(heart_rate)
+    hr_std = np.std(heart_rate)
+    # Combine factors indicating anxiety
+    hr_factor = min(100, max(0, (hr_mean - 60) / 0.4))
+    variability_factor = min(100, (hr_std / 20) * 100)
+    hrv_factor = min(100, max(0, (100 - hrv) / 1))
+    anxiety_index = (hr_factor + variability_factor + hrv_factor) / 3
+    return anxiety_index
+def process_video_for_hrv(video_path):
+    """Process video and extract HRV metrics focusing on stress and anxiety."""
+    if not video_path:
+        return None, None
+    try:
+        cap = cv2.VideoCapture(video_path)
+        ppg_signal = []
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        last_frame = None
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            # Extract green channel for PPG
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            green_channel = frame_rgb[:, :, 1]
+            ppg_value = np.mean(green_channel)
+            ppg_signal.append(ppg_value)
+            # Store last frame for display
+            last_frame = cv2.resize(frame_rgb, (320, 240))
+        cap.release()
+        if not ppg_signal or last_frame is None:
+            return None, None
+        # Process PPG signal
+        ppg_signal = np.array(ppg_signal)
+        filtered_signal = filtfilt(*butter(2, [0.5, 5], fs=fps, btype='band'), ppg_signal)
+        # Find peaks for heart rate calculation
+        peaks, _ = find_peaks(filtered_signal, distance=int(0.5 * fps))
+        if len(peaks) < 2:
+            return None, None
+        # Calculate basic metrics
+        rr_intervals = np.diff(peaks) / fps * 1000
+        heart_rate = 60 * fps / np.diff(peaks)
+        hrv_rmssd = np.sqrt(np.mean(np.diff(rr_intervals) ** 2))
+        # Calculate stress and anxiety indices
+        hr_mean = np.mean(heart_rate)
+        hr_std = np.std(heart_rate)
+        stress_value, stress_category = get_stress_level(hrv_rmssd, hr_mean, hr_std)
+        anxiety_idx = calculate_anxiety_index(heart_rate, hrv_rmssd)
+        # Create visualization
+        fig = plt.figure(figsize=(12, 10))
+        # Plot 1: Stress and Anxiety Levels (top)
+        ax1 = plt.subplot(211)
+        metrics = ['Stress Level', 'Anxiety Level']
+        values = [stress_value, anxiety_idx]
+        colors = ['#FF6B6B', '#4D96FF']  # Warm red for stress, cool blue for anxiety
+        bars = ax1.bar(metrics, values, color=colors)
+        ax1.set_ylim(0, 100)
+        ax1.set_title('Stress and Anxiety Analysis', pad=20)
+        ax1.set_ylabel('Level (%)')
+        # Add value labels and status
+        for bar, val, metric in zip(bars, values, metrics):
+            height = val
+            status = stress_category if metric == 'Stress Level' else get_anxiety_level(val)
+            ax1.text(bar.get_x() + bar.get_width()/2., height + 1,
+                    f'{val:.1f}%\n{status}',
+                    ha='center', va='bottom')
+        # Plot 2: Heart Rate and HRV Trends (bottom)
+        ax2 = plt.subplot(212)
+        time = np.linspace(0, len(heart_rate), len(heart_rate))
+        ax2.plot(time, heart_rate, color='#2ECC71', label='Heart Rate', linewidth=2)
+        ax2.set_title('Heart Rate Variation')
+        ax2.set_xlabel('Beat Number')
+        ax2.set_ylabel('Heart Rate (BPM)')
+        ax2.grid(True, alpha=0.3)
+        # Add metrics information with color-coded status
+        def get_status_color(category):
+            return {
+                'Low': '#2ECC71',      # Green
+                'Moderate': '#F1C40F',  # Yellow
+                'High': '#E74C3C'       # Red
+            }.get(category, 'black')
+        info_text = (
+            f'HRV (RMSSD): {hrv_rmssd:.1f} ms\n'
+            f'Average HR: {hr_mean:.1f} BPM\n'
+            f'Recording: {len(ppg_signal)/fps:.1f} s\n\n'
+            f'Stress Status: {stress_category}\n'
+            f'Anxiety Status: {get_anxiety_level(anxiety_idx)}'
+        )
+        # Add metrics box with gradient background
+        bbox_props = dict(
+            boxstyle='round,pad=0.5',
+            facecolor='white',
+            alpha=0.8,
+            edgecolor='gray'
+        )
+        ax2.text(0.02, 0.98, info_text,
+                transform=ax2.transAxes,
+                verticalalignment='top',
+                bbox=bbox_props,
+                fontsize=10)
+        plt.tight_layout()
+        return last_frame, fig
+    except Exception as e:
+        logger.error(f"Error processing video: {str(e)}")
+        return None, None
+def create_heart_rate_variability_tab():
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_video = gr.Video()
+            gr.Markdown("""
+            ### Stress and Anxiety Analysis
+            **Measurements:**
+            - Stress Level (0-100%)
+            - Anxiety Level (0-100%)
+            - Heart Rate Variability (HRV)
+            **Status Levels:**
+            🟢 Low: Normal state
+            🟡 Moderate: Elevated levels
+            🔴 High: Significant elevation
+            **For best results:**
+            1. Ensure good lighting
+            2. Minimize movement
+            3. Face the camera directly
+            """)
+            gr.Examples(["./assets/videos/fitness.mp4"], inputs=[input_video])
+        with gr.Column(scale=2):
+            output_frame = gr.Image(label="Face Detection", height=240)
+            hrv_plot = gr.Plot(label="Stress and Anxiety Analysis")
+    # Automatically trigger analysis on video upload
+    input_video.change(
+        fn=process_video_for_hrv,
+        inputs=[input_video],
+        outputs=[output_frame, hrv_plot]
+    )
+    return input_video, output_frame, hrv_plot

tabs/speech_stress_analysis.py CHANGED Viewed

@@ -2,93 +2,149 @@
 import gradio as gr
 import librosa
-import librosa.display
 import numpy as np
 import matplotlib.pyplot as plt
 import tempfile
 import warnings
-# Suppress specific warnings from transformers if needed
-warnings.filterwarnings("ignore", category=UserWarning, module='transformers')
 def extract_audio_features(audio_file):
     y, sr = librosa.load(audio_file, sr=None)
-    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
-    pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
-    pitches = pitches[(magnitudes > np.median(magnitudes)) & (pitches > 0)]
     energy = librosa.feature.rms(y=y)[0]
-    return mfccs, pitches, energy, y, sr
 def analyze_voice_stress(audio_file):
     if not audio_file:
-        return "No audio file provided.", None
     try:
-        mfccs, pitches, energy, y, sr = extract_audio_features(audio_file)
-        # Calculate variances
-        var_mfccs = np.var(mfccs, axis=1).mean()  # Mean variance across MFCC coefficients
-        var_energy = np.var(energy)               # Variance of RMS energy
-        var_pitches = np.var(pitches) if len(pitches) > 0 else 0  # Variance of pitches if present
-        # Debugging: Print individual variances
-        print(f"Variance MFCCs (mean across coefficients): {var_mfccs}")
-        print(f"Variance Energy: {var_energy}")
-        print(f"Variance Pitches: {var_pitches}")
-        # Normalize each variance using Z-Score Standardization
-        mfccs_mean = 1000
-        mfccs_std = 500
-        energy_mean = 0.005
-        energy_std = 0.005
-        pitches_mean = 500000
-        pitches_std = 200000
-        norm_var_mfccs = (var_mfccs - mfccs_mean) / mfccs_std
-        norm_var_energy = (var_energy - energy_mean) / energy_std
-        norm_var_pitches = (var_pitches - pitches_mean) / pitches_std if var_pitches > 0 else 0
-        # Debugging: Print normalized variances
-        print(f"Normalized Variance MFCCs: {norm_var_mfccs}")
-        print(f"Normalized Variance Energy: {norm_var_energy}")
-        print(f"Normalized Variance Pitches: {norm_var_pitches}")
-        # Combine normalized variances
-        stress_level = np.mean([
-            norm_var_mfccs,
-            norm_var_energy,
-            norm_var_pitches
-        ]) if var_pitches > 0 else np.mean([norm_var_mfccs, norm_var_energy])
-        # Debugging: Print stress_level before normalization
-        print(f"Calculated Stress Level (before scaling): {stress_level}")
-        # Scale to 0-100%
-        normalized_stress = (stress_level + 3) / 6 * 100  # Maps -3 to 0%, +3 to 100%
-        normalized_stress = np.clip(normalized_stress, 0, 100)  # Ensure within 0-100%
-        # Debugging: Print normalized_stress
-        print(f"Normalized Stress Level: {normalized_stress}")
         # Plotting
-        fig, axs = plt.subplots(3, 1, figsize=(10, 12))
-        # MFCCs
-        img_mfcc = librosa.display.specshow(mfccs, sr=sr, x_axis='time', ax=axs[0])
-        axs[0].set_title('MFCCs')
-        axs[0].set_ylabel('MFCC Coefficient')
-        fig.colorbar(img_mfcc, ax=axs[0])
-        # Pitch
-        axs[1].plot(pitches)
-        axs[1].set_title('Pitch')
-        axs[1].set_ylabel('Frequency (Hz)')
-        # Energy
-        axs[2].plot(energy)
-        axs[2].set_title('Energy (RMS)')
-        axs[2].set_ylabel('RMS Energy')
-        axs[2].set_xlabel('Frames')
         plt.tight_layout()
         with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_file:
@@ -96,31 +152,97 @@ def analyze_voice_stress(audio_file):
             plot_path = temp_file.name
         plt.close()
-        # Interpretation
-        if normalized_stress < 33:
-            stress_interpretation = "Low"
-        elif normalized_stress < 66:
-            stress_interpretation = "Medium"
-        else:
-            stress_interpretation = "High"
-        return f"{normalized_stress:.2f}% - {stress_interpretation} Stress", plot_path
     except Exception as e:
-        return f"Error: {str(e)}", None
-def create_voice_stress_tab():
-    with gr.Row():
-        with gr.Column(scale=2):
-            input_audio = gr.Audio(label="Input Audio", type="filepath")
-            clear_btn = gr.Button("Clear", scale=1)
-        with gr.Column(scale=1):
-            output_stress = gr.Label(label="Stress Level")
-            output_plot = gr.Image(label="Stress Analysis Plot")
-    # Automatically trigger analysis when an audio file is uploaded
-    input_audio.change(analyze_voice_stress, inputs=[input_audio], outputs=[output_stress, output_plot])
-    clear_btn.click(lambda: (None, None), outputs=[input_audio, output_stress, output_plot])
-    gr.Examples(["./assets/audio/fitness.wav"], inputs=[input_audio])

 import gradio as gr
 import librosa
 import numpy as np
 import matplotlib.pyplot as plt
 import tempfile
 import warnings
+warnings.filterwarnings("ignore", category=UserWarning, module='librosa')
 def extract_audio_features(audio_file):
     y, sr = librosa.load(audio_file, sr=None)
+    # Fundamental frequency estimation using librosa.pyin
+    f0, voiced_flag, voiced_probs = librosa.pyin(y, fmin=75, fmax=600)
+    f0 = f0[~np.isnan(f0)]  # Remove unvoiced frames
+    # Energy (intensity)
     energy = librosa.feature.rms(y=y)[0]
+    # MFCCs (Mel-frequency cepstral coefficients)
+    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
+    # Onset envelope for speech rate estimation
+    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
+    tempo, _ = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
+    speech_rate = tempo / 60  # Speech rate estimation (syllables per second)
+    return f0, energy, speech_rate, mfccs, y, sr
 def analyze_voice_stress(audio_file):
     if not audio_file:
+        return "No audio file provided.", None, None
     try:
+        f0, energy, speech_rate, mfccs, y, sr = extract_audio_features(audio_file)
+        # Calculate statistical measures
+        mean_f0 = np.mean(f0)
+        std_f0 = np.std(f0)
+        mean_energy = np.mean(energy)
+        std_energy = np.std(energy)
+        # Normative data (example values from medical literature)
+        norm_mean_f0_male = 110
+        norm_mean_f0_female = 220
+        norm_std_f0 = 20
+        norm_mean_energy = 0.02
+        norm_std_energy = 0.005
+        norm_speech_rate = 4.4
+        norm_std_speech_rate = 0.5
+        # Gender detection
+        gender = 'male' if mean_f0 < 165 else 'female'
+        norm_mean_f0 = norm_mean_f0_male if gender == 'male' else norm_mean_f0_female
+        # Compute Z-scores
+        z_f0 = (mean_f0 - norm_mean_f0) / norm_std_f0
+        z_energy = (mean_energy - norm_mean_energy) / norm_std_energy
+        z_speech_rate = (speech_rate - norm_speech_rate) / norm_std_speech_rate
+        # Combine Z-scores for stress level
+        stress_score = (0.4 * z_f0) + (0.4 * z_speech_rate) + (0.2 * z_energy)
+        stress_level = float(1 / (1 + np.exp(-stress_score)) * 100)  # Sigmoid function
+        if stress_level < 20:
+            stress_category = "Very Low Stress"
+        elif stress_level < 40:
+            stress_category = "Low Stress"
+        elif stress_level < 60:
+            stress_category = "Moderate Stress"
+        elif stress_level < 80:
+            stress_category = "High Stress"
+        else:
+            stress_category = "Very High Stress"
+        # More verbose interpretations for each stress category
+        interpretations = {
+            "Very Low Stress": (
+                "Your vocal analysis indicates a very relaxed state. "
+                "This suggests that you're currently experiencing minimal stress. "
+                "Maintaining such low stress levels is beneficial for your health. "
+                "Continue engaging in activities that promote relaxation and well-being. "
+                "Regular self-care practices can help sustain this positive state."
+            ),
+            "Low Stress": (
+                "Minor signs of stress are detected in your voice. "
+                "This is common due to everyday challenges and is usually not concerning. "
+                "Incorporating relaxation techniques, like deep breathing or meditation, may help. "
+                "Regular breaks and leisure activities can also reduce stress. "
+                "Staying mindful of stress levels supports overall health."
+            ),
+            "Moderate Stress": (
+                "Your voice reflects moderate stress levels. "
+                "This could be due to ongoing pressures or challenges you're facing. "
+                "Consider practicing stress management strategies such as mindfulness exercises or physical activity. "
+                "Identifying stressors and addressing them can be beneficial. "
+                "Balancing work and rest is important for your well-being."
+            ),
+            "High Stress": (
+                "Elevated stress levels are apparent in your vocal patterns. "
+                "It's important to recognize and address these feelings. "
+                "Identifying stressors and seeking support from friends, family, or professionals could be helpful. "
+                "Engaging in stress reduction techniques is recommended. "
+                "Taking proactive steps can improve your mental and physical health."
+            ),
+            "Very High Stress": (
+                "Your voice suggests very high stress levels. "
+                "This may indicate significant strain or anxiety. "
+                "It may be helpful to consult a healthcare professional for support. "
+                "Promptly addressing stress is important for your well-being. "
+                "Consider reaching out to trusted individuals or resources."
+            )
+        }
+        final_interpretation = interpretations[stress_category]
         # Plotting
+        fig, axs = plt.subplots(5, 1, figsize=(10, 15))
+        # Plot Fundamental Frequency (Pitch)
+        axs[0].plot(f0)
+        axs[0].set_title('Fundamental Frequency (Pitch)')
+        axs[0].set_ylabel('Frequency (Hz)')
+        # Plot Energy (Loudness)
+        axs[1].plot(energy)
+        axs[1].set_title('Energy (Loudness)')
+        axs[1].set_ylabel('Energy')
+        # Plot MFCCs
+        img = librosa.display.specshow(mfccs, sr=sr, x_axis='time', ax=axs[2])
+        axs[2].set_title('MFCCs (Mel-frequency cepstral coefficients)')
+        fig.colorbar(img, ax=axs[2])
+        # Plot Waveform
+        librosa.display.waveshow(y, sr=sr, ax=axs[3])
+        axs[3].set_title('Waveform')
+        axs[3].set_xlabel('Time (s)')
+        axs[3].set_ylabel('Amplitude')
+        # Plot Pitch Contour (Histogram of f0)
+        axs[4].hist(f0, bins=50, color='blue', alpha=0.7)
+        axs[4].set_title('Pitch Contour (Histogram of f0)')
+        axs[4].set_xlabel('Frequency (Hz)')
+        axs[4].set_ylabel('Count')
         plt.tight_layout()
         with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_file:
             plot_path = temp_file.name
         plt.close()
+        # Return separate values for Gradio output components
+        return f"{stress_level:.2f}% - {stress_category}", final_interpretation, plot_path
     except Exception as e:
+        return f"Error: {str(e)}", None, None
+def create_voice_stress_tab():
+    custom_css = """
+    /* General container styling for mobile */
+    .gradio-container {
+        padding: 10px !important;
+        font-size: 16px !important;
+    }
+    /* Headings */
+    h3 {
+        text-align: center;
+        font-size: 1.5em !important;
+        margin-bottom: 20px !important;
+    }
+    /* Full width for audio input and other components */
+    .gradio-container .gradio-row, .gradio-container .gradio-column {
+        flex-direction: column !important;
+        align-items: center !important;
+    }
+    /* Make the components scale better on smaller screens */
+    #input_audio, #stress_output, #interpretation_output, #plot_output {
+        width: 100% !important;
+        max-width: 100% !important;
+    }
+    #input_audio label, #stress_output label, #interpretation_output label, #plot_output label {
+        font-size: 1.2em !important;
+    }
+    /* Textbox area adjustment */
+    #interpretation_output textarea {
+        font-size: 1em !important;
+        line-height: 1.4 !important;
+    }
+    /* Responsive styling for images */
+    #plot_output img {
+        width: 100% !important;
+        height: auto !important;
+    }
+    /* Adjust clear button */
+    #clear_btn button {
+        font-size: 1em !important;
+        padding: 10px 20px !important;
+    }
+    /* Responsive adjustments */
+    @media only screen and (max-width: 600px) {
+        .gradio-container {
+            padding: 5px !important;
+            font-size: 14px !important;
+        }
+        h3 {
+            font-size: 1.2em !important;
+        }
+        #clear_btn button {
+            font-size: 0.9em !important;
+        }
+        #interpretation_output textarea {
+            font-size: 0.9em !important;
+        }
+    }
+    """
+    with gr.Blocks(css=custom_css) as voice_stress_tab:
+        gr.Markdown("<h3>Speech Stress Analysis</h3>")
+        with gr.Column():
+            input_audio = gr.Audio(label="Upload your voice recording", type="filepath", elem_id="input_audio")
+            stress_output = gr.Label(label="Stress Interpretation", elem_id="stress_output")
+            interpretation_output = gr.Textbox(label="Detailed Interpretation", lines=6, elem_id="interpretation_output")
+            plot_output = gr.Image(label="Stress Analysis Plot", elem_id="plot_output")
+        input_audio.change(
+            analyze_voice_stress,
+            inputs=[input_audio],
+            outputs=[stress_output, interpretation_output, plot_output]
+        )
+        gr.Button("Clear", elem_id="clear_btn").click(
+            lambda: (None, None, None),
+            outputs=[input_audio, stress_output, interpretation_output, plot_output]
+        )
+    return voice_stress_tab

verify.py DELETED Viewed

@@ -1,3 +0,0 @@
-import torch
-print(torch.backends.mps.is_available())  # Should return True
-print(torch.backends.mps.is_built())      # Should return True