vitorcalvi commited on
Commit
5a7c06e
·
1 Parent(s): be18558
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
.gradio/cached_examples/22/log.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ HRV Results,PPG Signal Plot,timestamp
2
+ Video too short. Please provide at least 10 seconds of footage.,,2024-11-11 07:13:23.866354
.gradio/cached_examples/28/log.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Summary,Analysis Plots,Detailed Metrics,Recording Information,timestamp
2
+ Video too short. Please provide at least 10 seconds of footage.,,,,2024-11-11 07:17:26.515598
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
LICENSE DELETED
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2024 Elena Ryumina
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -4,36 +4,183 @@ import gradio as gr
4
  from tabs.speech_stress_analysis import create_voice_stress_tab
5
  from tabs.speech_emotion_recognition import create_emotion_recognition_tab
6
  from tabs.FACS_analysis import create_facs_analysis_tab
 
 
 
 
 
7
 
8
- # Import the UI components
9
- from ui_components import CUSTOM_CSS, HEADER_HTML, DISCLAIMER_HTML
 
10
 
11
- # Define the tab structure
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  TAB_STRUCTURE = [
13
  ("Visual Analysis", [
14
- ("FACS for Stress, Anxiety, Depression", create_facs_analysis_tab),
 
 
15
  ]),
16
  ("Speech Analysis", [
17
  ("Speech Stress", create_voice_stress_tab),
18
- ("Speech Emotion", create_emotion_recognition_tab),
19
  ])
20
  ]
21
 
22
- def create_demo():
23
- with gr.Blocks(css=CUSTOM_CSS) as demo:
24
- gr.Markdown(HEADER_HTML)
25
- with gr.Tabs(elem_classes=["main-tab"]):
 
 
 
 
26
  for main_tab, sub_tabs in TAB_STRUCTURE:
27
  with gr.Tab(main_tab):
28
- with gr.Tabs():
29
- for sub_tab, create_fn in sub_tabs:
30
- with gr.Tab(sub_tab):
31
- create_fn()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  gr.HTML(DISCLAIMER_HTML)
 
33
  return demo
34
 
35
- # Create the demo instance
36
- demo = create_demo()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  if __name__ == "__main__":
39
- demo.queue(api_open=True).launch(share=False)
 
4
  from tabs.speech_stress_analysis import create_voice_stress_tab
5
  from tabs.speech_emotion_recognition import create_emotion_recognition_tab
6
  from tabs.FACS_analysis import create_facs_analysis_tab
7
+ from tabs.heart_rate_variability import create_heart_rate_variability_tab
8
+ from tabs.deception_detection import create_deception_detection_tab, load_models
9
+ import logging
10
+ import torch
11
+ from typing import Dict
12
 
13
+ # Configure logging
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
 
17
+ # Custom CSS for better styling
18
+ CUSTOM_CSS = """
19
+ /* Global styles */
20
+ .gradio-container {
21
+ font-family: 'Arial', sans-serif;
22
+ max-width: 1200px;
23
+ margin: auto;
24
+ padding: 20px;
25
+ background-color: #f8f9fa;
26
+ }
27
+
28
+ /* Header styling */
29
+ h1 {
30
+ color: #2c3e50;
31
+ text-align: center;
32
+ padding: 20px 0;
33
+ margin-bottom: 30px;
34
+ border-bottom: 2px solid #3498db;
35
+ }
36
+
37
+ /* Tab navigation styling */
38
+ .gradio-tabs-nav {
39
+ background-color: #ffffff;
40
+ border-radius: 8px;
41
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
42
+ margin-bottom: 20px;
43
+ }
44
+
45
+ /* Content areas */
46
+ .content-area {
47
+ background: white;
48
+ padding: 20px;
49
+ border-radius: 8px;
50
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
51
+ margin-top: 20px;
52
+ }
53
+
54
+ /* Results area */
55
+ .results-area {
56
+ background-color: #ffffff;
57
+ padding: 20px;
58
+ border-radius: 8px;
59
+ margin-top: 20px;
60
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
61
+ }
62
+
63
+ /* Disclaimer styling */
64
+ .disclaimer {
65
+ background-color: #f8f9fa;
66
+ border-left: 4px solid #3498db;
67
+ padding: 15px;
68
+ margin-top: 30px;
69
+ font-size: 0.9em;
70
+ color: #666;
71
+ }
72
+ """
73
+
74
+ # HTML content
75
+ HEADER_HTML = """
76
+ <div style="text-align: center; padding: 20px;">
77
+ <h1>AI-Driven Multimodal Emotional State Analysis</h1>
78
+ <p style="font-size: 1.2em; color: #666;">
79
+ Comprehensive analysis of stress, emotion, and truthfulness through facial expressions,
80
+ heart rate variability, and speech patterns.
81
+ </p>
82
+ </div>
83
+ """
84
+
85
+ DISCLAIMER_HTML = """
86
+ <div class="disclaimer">
87
+ <h3>Important Notice</h3>
88
+ <p>This application provides AI-driven analysis for:</p>
89
+ <ul>
90
+ <li>Stress and emotion detection</li>
91
+ <li>Heart rate variability analysis</li>
92
+ <li>Speech pattern analysis</li>
93
+ <li>Truth/deception indication</li>
94
+ </ul>
95
+ <p><strong>Disclaimer:</strong> This tool is for research and informational purposes only.
96
+ It should not be used as a substitute for professional medical advice, diagnosis, or treatment.
97
+ The deception detection feature is experimental and should not be used as definitive proof
98
+ of truthfulness or deception.</p>
99
+ </div>
100
+ """
101
+
102
+ # Tab structure
103
  TAB_STRUCTURE = [
104
  ("Visual Analysis", [
105
+ ("FACS Analysis", create_facs_analysis_tab),
106
+ ("Heart Rate Variability", create_heart_rate_variability_tab),
107
+ ("Truth/Deception Detection", create_deception_detection_tab) # Pass models here
108
  ]),
109
  ("Speech Analysis", [
110
  ("Speech Stress", create_voice_stress_tab),
111
+ ("Speech Emotion", create_emotion_recognition_tab)
112
  ])
113
  ]
114
 
115
+ def create_demo(models: Dict[str, torch.nn.Module]):
116
+ """Create and configure the Gradio interface."""
117
+ with gr.Blocks(css=CUSTOM_CSS, title="Multimodal Emotional State Analysis") as demo:
118
+ # Header
119
+ gr.HTML(HEADER_HTML)
120
+
121
+ # Main content area with Tabs
122
+ with gr.Tabs():
123
  for main_tab, sub_tabs in TAB_STRUCTURE:
124
  with gr.Tab(main_tab):
125
+ with gr.Column():
126
+ with gr.Tabs():
127
+ for sub_tab, create_fn in sub_tabs:
128
+ with gr.Tab(sub_tab):
129
+ if main_tab == "Visual Analysis" and sub_tab == "Truth/Deception Detection":
130
+ # Pass loaded models to the deception detection tab
131
+ create_fn(models)
132
+ else:
133
+ create_fn()
134
+ # Add help information below sub-tabs
135
+ if main_tab == "Visual Analysis":
136
+ gr.Markdown("""
137
+ ### Visual Analysis Features
138
+ - **FACS Analysis**: Facial Action Coding System for emotion detection
139
+ - **Heart Rate Variability**: Stress and wellness indicators
140
+ - **Truth/Deception Detection**: Physiological response analysis
141
+
142
+ **For best results:**
143
+ 1. Use good lighting
144
+ 2. Face the camera directly
145
+ 3. Minimize movement during recording
146
+ """)
147
+ elif main_tab == "Speech Analysis":
148
+ gr.Markdown("""
149
+ ### Speech Analysis Features
150
+ - **Speech Stress**: Voice stress analysis
151
+ - **Speech Emotion**: Emotional content detection
152
+
153
+ **For best results:**
154
+ 1. Use a quiet environment
155
+ 2. Speak clearly
156
+ 3. Avoid background noise
157
+ """)
158
+
159
+ # Disclaimer
160
  gr.HTML(DISCLAIMER_HTML)
161
+
162
  return demo
163
 
164
+ def main():
165
+ """Main function to run the application."""
166
+ # Load models once and pass them to the deception detection tab
167
+ models_loaded = load_models()
168
+ if not models_loaded:
169
+ logger.error("No models loaded. Exiting application.")
170
+ return
171
+
172
+ # Initialize Gradio interface
173
+ demo = create_demo(models_loaded)
174
+
175
+ # Configure and launch the interface
176
+ demo.queue() # Enable queuing without specific concurrency count
177
+ demo.launch(
178
+ server_name="0.0.0.0",
179
+ server_port=7860,
180
+ share=False,
181
+ debug=True,
182
+ show_error=True
183
+ )
184
 
185
  if __name__ == "__main__":
186
+ main()
app/__pycache__/config.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/config.cpython-310.pyc and b/app/__pycache__/config.cpython-310.pyc differ
 
app/__pycache__/model.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/model.cpython-310.pyc and b/app/__pycache__/model.cpython-310.pyc differ
 
app/__pycache__/model_architectures.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/model_architectures.cpython-310.pyc and b/app/__pycache__/model_architectures.cpython-310.pyc differ
 
app/config.py CHANGED
@@ -1,7 +1,8 @@
 
 
1
  """
2
  File: config.py
3
- Author: Elena Ryumina and Dmitry Ryumin
4
- Description: Configuration file.
5
  License: MIT License
6
  """
7
 
@@ -9,25 +10,32 @@ import toml
9
  from typing import Dict
10
  from types import SimpleNamespace
11
 
12
-
13
  def flatten_dict(prefix: str, d: Dict) -> Dict:
 
 
 
14
  result = {}
15
-
16
  for k, v in d.items():
17
  if isinstance(v, dict):
18
  result.update(flatten_dict(f"{prefix}{k}_", v))
19
  else:
20
  result[f"{prefix}{k}"] = v
21
-
22
  return result
23
 
 
 
 
 
 
 
24
 
25
- config = toml.load("config.toml")
26
-
27
- config_data = flatten_dict("", config)
28
 
29
- config_data = SimpleNamespace(**config_data)
 
30
 
 
31
  DICT_EMO = {
32
  0: "Neutral",
33
  1: "Happiness",
@@ -38,6 +46,7 @@ DICT_EMO = {
38
  6: "Anger",
39
  }
40
 
 
41
  COLORS = {
42
  0: 'blue',
43
  1: 'orange',
 
1
+ # config.py
2
+
3
  """
4
  File: config.py
5
+ Description: Configuration file for the AI-Driven Multimodal Emotional State Analysis application.
 
6
  License: MIT License
7
  """
8
 
 
10
  from typing import Dict
11
  from types import SimpleNamespace
12
 
 
13
  def flatten_dict(prefix: str, d: Dict) -> Dict:
14
+ """
15
+ Recursively flattens a nested dictionary, concatenating keys with underscores.
16
+ """
17
  result = {}
 
18
  for k, v in d.items():
19
  if isinstance(v, dict):
20
  result.update(flatten_dict(f"{prefix}{k}_", v))
21
  else:
22
  result[f"{prefix}{k}"] = v
 
23
  return result
24
 
25
+ # Load configuration from 'config.toml' if it exists
26
+ try:
27
+ config = toml.load("config.toml")
28
+ except FileNotFoundError:
29
+ config = {}
30
+ print("Warning: 'config.toml' not found. Using default configuration.")
31
 
32
+ # Flatten the configuration dictionary
33
+ config_data_dict = flatten_dict("", config)
 
34
 
35
+ # Convert the dictionary to a SimpleNamespace for easy attribute access
36
+ config_data = SimpleNamespace(**config_data_dict)
37
 
38
+ # Define emotion labels
39
  DICT_EMO = {
40
  0: "Neutral",
41
  1: "Happiness",
 
46
  6: "Anger",
47
  }
48
 
49
+ # Define colors for plotting or UI elements
50
  COLORS = {
51
  0: 'blue',
52
  1: 'orange',
app/model.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import os
2
  import torch
3
  import torch.nn as nn
@@ -23,7 +25,12 @@ def load_model(model_class, model_path, *args, **kwargs):
23
  model = model_class(*args, **kwargs).to(device)
24
  if os.path.exists(model_path):
25
  try:
26
- model.load_state_dict(torch.load(model_path, map_location=device))
 
 
 
 
 
27
  model.eval()
28
  logger.info(f"Model loaded successfully from {model_path}")
29
  except Exception as e:
@@ -40,7 +47,7 @@ pth_model_static = load_model(ResNet50, STATIC_MODEL_PATH, num_classes=7, channe
40
  pth_model_dynamic = load_model(LSTMPyTorch, DYNAMIC_MODEL_PATH, input_size=2048, hidden_size=256, num_layers=2, num_classes=7)
41
 
42
  # Set up GradCAM
43
- target_layers = [pth_model_static.resnet.layer4[-1]]
44
  cam = GradCAM(model=pth_model_static, target_layers=target_layers)
45
 
46
  # Define image preprocessing
@@ -54,25 +61,7 @@ def pth_processing(img):
54
  img = pth_transform(img).unsqueeze(0).to(device)
55
  return img
56
 
57
- def predict_emotion(img):
58
- with torch.no_grad():
59
- output = pth_model_static(pth_processing(img))
60
- _, predicted = torch.max(output, 1)
61
- return predicted.item()
62
-
63
- def get_emotion_probabilities(img):
64
- with torch.no_grad():
65
- output = nn.functional.softmax(pth_model_static(pth_processing(img)), dim=1)
66
- return output.squeeze().cpu().numpy()
67
-
68
- def generate_cam(img):
69
- input_tensor = pth_processing(img)
70
- targets = [ClassifierOutputTarget(predict_emotion(img))]
71
- grayscale_cam = cam(input_tensor=input_tensor, targets=targets)
72
- return grayscale_cam[0, :]
73
-
74
- # Add any other necessary functions or variables here
75
 
76
  if __name__ == "__main__":
77
  logger.info("Model initialization complete.")
78
- # You can add some test code here to verify everything is working correctly
 
1
+ # model.py
2
+
3
  import os
4
  import torch
5
  import torch.nn as nn
 
25
  model = model_class(*args, **kwargs).to(device)
26
  if os.path.exists(model_path):
27
  try:
28
+ state_dict = torch.load(model_path, map_location=device)
29
+ missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
30
+ if missing_keys:
31
+ logger.warning(f"Missing keys when loading model from {model_path}: {missing_keys}")
32
+ if unexpected_keys:
33
+ logger.warning(f"Unexpected keys when loading model from {model_path}: {unexpected_keys}")
34
  model.eval()
35
  logger.info(f"Model loaded successfully from {model_path}")
36
  except Exception as e:
 
47
  pth_model_dynamic = load_model(LSTMPyTorch, DYNAMIC_MODEL_PATH, input_size=2048, hidden_size=256, num_layers=2, num_classes=7)
48
 
49
  # Set up GradCAM
50
+ target_layers = [pth_model_static.layer4[-1]] # Adjusted to match the updated model
51
  cam = GradCAM(model=pth_model_static, target_layers=target_layers)
52
 
53
  # Define image preprocessing
 
61
  img = pth_transform(img).unsqueeze(0).to(device)
62
  return img
63
 
64
+ # Additional utility functions...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  if __name__ == "__main__":
67
  logger.info("Model initialization complete.")
 
app/model_architectures.py CHANGED
@@ -1,32 +1,67 @@
 
 
1
  import torch
2
  import torch.nn as nn
3
  import torchvision.models as models
 
 
 
4
 
5
  class ResNet50(nn.Module):
6
  def __init__(self, num_classes=7, channels=3):
7
  super(ResNet50, self).__init__()
8
- self.resnet = models.resnet50(pretrained=True)
9
- # Modify the first convolutional layer if channels != 3
10
- if channels != 3:
11
- self.resnet.conv1 = nn.Conv2d(channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
12
- num_features = self.resnet.fc.in_features
13
- self.resnet.fc = nn.Linear(num_features, num_classes)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  def forward(self, x):
16
- return self.resnet(x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  def extract_features(self, x):
19
- x = self.resnet.conv1(x)
20
- x = self.resnet.bn1(x)
21
- x = self.resnet.relu(x)
22
- x = self.resnet.maxpool(x)
23
 
24
- x = self.resnet.layer1(x)
25
- x = self.resnet.layer2(x)
26
- x = self.resnet.layer3(x)
27
- x = self.resnet.layer4(x)
28
 
29
- x = self.resnet.avgpool(x)
30
  x = torch.flatten(x, 1)
31
  return x
32
 
@@ -34,13 +69,20 @@ class LSTMPyTorch(nn.Module):
34
  def __init__(self, input_size, hidden_size, num_layers, num_classes):
35
  super(LSTMPyTorch, self).__init__()
36
  self.hidden_size = hidden_size
37
- self.num_layers = num_layers
38
- self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
 
 
39
  self.fc = nn.Linear(hidden_size, num_classes)
40
 
41
  def forward(self, x):
42
- h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
43
- c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
44
- out, _ = self.lstm(x, (h0, c0))
45
- out = self.fc(out[:, -1, :])
46
- return out
 
 
 
 
 
 
1
+ # model_architectures.py
2
+
3
  import torch
4
  import torch.nn as nn
5
  import torchvision.models as models
6
+ import logging
7
+
8
+ logger = logging.getLogger(__name__)
9
 
10
  class ResNet50(nn.Module):
11
  def __init__(self, num_classes=7, channels=3):
12
  super(ResNet50, self).__init__()
13
+ # Define layers directly without wrapping in 'resnet'
14
+ self.conv_layer_s2_same = nn.Conv2d(channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
15
+ self.batch_norm1 = nn.BatchNorm2d(64)
16
+ self.relu = nn.ReLU(inplace=True)
17
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
18
+
19
+ # Load pre-trained ResNet50 model
20
+ resnet = models.resnet50(pretrained=True)
21
+
22
+ # Extract layers
23
+ self.layer1 = resnet.layer1
24
+ self.layer2 = resnet.layer2
25
+ self.layer3 = resnet.layer3
26
+ self.layer4 = resnet.layer4
27
+ self.avgpool = resnet.avgpool
28
+
29
+ # Fully connected layers
30
+ self.fc1 = nn.Linear(resnet.fc.in_features, num_classes)
31
+ # If your model has additional fully connected layers, define them here
32
+ # Example:
33
+ # self.fc2 = nn.Linear(num_classes, num_classes)
34
 
35
  def forward(self, x):
36
+ x = self.conv_layer_s2_same(x)
37
+ x = self.batch_norm1(x)
38
+ x = self.relu(x)
39
+ x = self.maxpool(x)
40
+
41
+ x = self.layer1(x)
42
+ x = self.layer2(x)
43
+ x = self.layer3(x)
44
+ x = self.layer4(x)
45
+
46
+ x = self.avgpool(x)
47
+ x = torch.flatten(x, 1)
48
+ x = self.fc1(x)
49
+ # If additional fully connected layers are defined, pass x through them
50
+ # x = self.fc2(x)
51
+ return x
52
 
53
  def extract_features(self, x):
54
+ x = self.conv_layer_s2_same(x)
55
+ x = self.batch_norm1(x)
56
+ x = self.relu(x)
57
+ x = self.maxpool(x)
58
 
59
+ x = self.layer1(x)
60
+ x = self.layer2(x)
61
+ x = self.layer3(x)
62
+ x = self.layer4(x)
63
 
64
+ x = self.avgpool(x)
65
  x = torch.flatten(x, 1)
66
  return x
67
 
 
69
  def __init__(self, input_size, hidden_size, num_layers, num_classes):
70
  super(LSTMPyTorch, self).__init__()
71
  self.hidden_size = hidden_size
72
+
73
+ # Define separate LSTM layers
74
+ self.lstm1 = nn.LSTM(input_size, hidden_size, num_layers=1, batch_first=True)
75
+ self.lstm2 = nn.LSTM(hidden_size, hidden_size, num_layers=1, batch_first=True)
76
  self.fc = nn.Linear(hidden_size, num_classes)
77
 
78
  def forward(self, x):
79
+ h0_1 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
80
+ c0_1 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
81
+ out1, _ = self.lstm1(x, (h0_1, c0_1))
82
+
83
+ h0_2 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
84
+ c0_2 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
85
+ out2, _ = self.lstm2(out1, (h0_2, c0_2))
86
+
87
+ out = self.fc(out2[:, -1, :])
88
+ return out
app/sleep_quality_processing.py DELETED
@@ -1,94 +0,0 @@
1
- import cv2
2
- import numpy as np
3
- import matplotlib.pyplot as plt
4
- import mediapipe as mp
5
- from app.face_utils import get_box
6
-
7
- mp_face_mesh = mp.solutions.face_mesh
8
-
9
- def preprocess_video_and_predict_sleep_quality(video):
10
- cap = cv2.VideoCapture(video)
11
- w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
12
- h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
13
- fps = np.round(cap.get(cv2.CAP_PROP_FPS))
14
-
15
- path_save_video_original = 'result_original.mp4'
16
- path_save_video_face = 'result_face.mp4'
17
- path_save_video_sleep = 'result_sleep.mp4'
18
-
19
- vid_writer_original = cv2.VideoWriter(path_save_video_original, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
20
- vid_writer_face = cv2.VideoWriter(path_save_video_face, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
21
- vid_writer_sleep = cv2.VideoWriter(path_save_video_sleep, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
22
-
23
- frames = []
24
- sleep_quality_scores = []
25
- eye_bags_images = []
26
-
27
- with mp_face_mesh.FaceMesh(
28
- max_num_faces=1,
29
- refine_landmarks=False,
30
- min_detection_confidence=0.5,
31
- min_tracking_confidence=0.5) as face_mesh:
32
-
33
- while cap.isOpened():
34
- ret, frame = cap.read()
35
- if not ret:
36
- break
37
-
38
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
39
- results = face_mesh.process(frame_rgb)
40
-
41
- if results.multi_face_landmarks:
42
- for fl in results.multi_face_landmarks:
43
- startX, startY, endX, endY = get_box(fl, w, h)
44
- cur_face = frame_rgb[startY:endY, startX:endX]
45
-
46
- sleep_quality_score, eye_bags_image = analyze_sleep_quality(cur_face)
47
- sleep_quality_scores.append(sleep_quality_score)
48
- eye_bags_images.append(cv2.resize(eye_bags_image, (224, 224)))
49
-
50
- sleep_quality_viz = create_sleep_quality_visualization(cur_face, sleep_quality_score)
51
-
52
- cur_face = cv2.resize(cur_face, (224, 224))
53
-
54
- vid_writer_face.write(cv2.cvtColor(cur_face, cv2.COLOR_RGB2BGR))
55
- vid_writer_sleep.write(sleep_quality_viz)
56
-
57
- vid_writer_original.write(frame)
58
- frames.append(len(frames) + 1)
59
-
60
- cap.release()
61
- vid_writer_original.release()
62
- vid_writer_face.release()
63
- vid_writer_sleep.release()
64
-
65
- sleep_stat = sleep_quality_statistics_plot(frames, sleep_quality_scores)
66
-
67
- if eye_bags_images:
68
- average_eye_bags_image = np.mean(np.array(eye_bags_images), axis=0).astype(np.uint8)
69
- else:
70
- average_eye_bags_image = np.zeros((224, 224, 3), dtype=np.uint8)
71
-
72
- return (path_save_video_original, path_save_video_face, path_save_video_sleep,
73
- average_eye_bags_image, sleep_stat)
74
-
75
- def analyze_sleep_quality(face_image):
76
- # Placeholder function - implement your sleep quality analysis here
77
- sleep_quality_score = np.random.random()
78
- eye_bags_image = cv2.resize(face_image, (224, 224))
79
- return sleep_quality_score, eye_bags_image
80
-
81
- def create_sleep_quality_visualization(face_image, sleep_quality_score):
82
- viz = face_image.copy()
83
- cv2.putText(viz, f"Sleep Quality: {sleep_quality_score:.2f}", (10, 30),
84
- cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
85
- return cv2.cvtColor(viz, cv2.COLOR_RGB2BGR)
86
-
87
- def sleep_quality_statistics_plot(frames, sleep_quality_scores):
88
- fig, ax = plt.subplots()
89
- ax.plot(frames, sleep_quality_scores)
90
- ax.set_xlabel('Frame')
91
- ax.set_ylabel('Sleep Quality Score')
92
- ax.set_title('Sleep Quality Over Time')
93
- plt.tight_layout()
94
- return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/video_processing.py CHANGED
@@ -8,19 +8,28 @@ from app.face_utils import get_box, display_info
8
  from app.config import config_data
9
  from app.plot import statistics_plot
10
  from .au_processing import features_to_au_intensities, au_statistics_plot
 
11
 
12
  mp_face_mesh = mp.solutions.face_mesh
13
 
14
- def preprocess_video_and_predict(video):
15
- cap = cv2.VideoCapture(video)
16
- w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
17
- h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
18
- fps = np.round(cap.get(cv2.CAP_PROP_FPS))
19
 
20
- path_save_video_face = 'result_face.mp4'
21
- vid_writer_face = cv2.VideoWriter(path_save_video_face, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
 
 
 
22
 
 
 
23
  path_save_video_hm = 'result_hm.mp4'
 
 
 
24
  vid_writer_hm = cv2.VideoWriter(path_save_video_hm, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
25
 
26
  lstm_features = []
@@ -30,54 +39,58 @@ def preprocess_video_and_predict(video):
30
  frames = []
31
  au_intensities_list = []
32
  last_output = None
33
- last_heatmap = None
34
  last_au_intensities = None
35
  cur_face = None
36
 
37
  with mp_face_mesh.FaceMesh(
38
- max_num_faces=1,
39
- refine_landmarks=False,
40
- min_detection_confidence=0.5,
41
- min_tracking_confidence=0.5) as face_mesh:
42
 
43
  while cap.isOpened():
44
- _, frame = cap.read()
45
- if frame is None: break
 
46
 
47
- frame_copy = frame.copy()
48
- frame_copy.flags.writeable = False
49
- frame_copy = cv2.cvtColor(frame_copy, cv2.COLOR_BGR2RGB)
50
- results = face_mesh.process(frame_copy)
51
- frame_copy.flags.writeable = True
52
 
53
  if results.multi_face_landmarks:
54
- for fl in results.multi_face_landmarks:
55
- startX, startY, endX, endY = get_box(fl, w, h)
56
- cur_face = frame_copy[startY:endY, startX: endX]
 
 
 
 
57
 
58
- if count_face%config_data.FRAME_DOWNSAMPLING == 0:
59
- cur_face_copy = pth_processing(Image.fromarray(cur_face))
60
  with torch.no_grad():
61
- features = torch.nn.functional.relu(pth_model_static.extract_features(cur_face_copy)).detach().numpy()
62
- au_intensities = features_to_au_intensities(pth_model_static(cur_face_copy))
63
-
64
- grayscale_cam = cam(input_tensor=cur_face_copy)
65
- grayscale_cam = grayscale_cam[0, :]
66
- cur_face_hm = cv2.resize(cur_face,(224,224), interpolation = cv2.INTER_AREA)
67
- cur_face_hm = np.float32(cur_face_hm) / 255
68
- heatmap = show_cam_on_image(cur_face_hm, grayscale_cam, use_rgb=False)
 
 
 
 
 
69
  last_heatmap = heatmap
70
  last_au_intensities = au_intensities
71
-
72
- if len(lstm_features) == 0:
73
- lstm_features = [features]*10
74
  else:
75
  lstm_features = lstm_features[1:] + [features]
76
 
77
- lstm_f = torch.from_numpy(np.vstack(lstm_features))
78
- lstm_f = torch.unsqueeze(lstm_f, 0)
79
  with torch.no_grad():
80
- output = pth_model_dynamic(lstm_f).detach().numpy()
81
  last_output = output
82
 
83
  if count_face == 0:
@@ -88,38 +101,33 @@ def preprocess_video_and_predict(video):
88
  output = last_output
89
  heatmap = last_heatmap
90
  au_intensities = last_au_intensities
 
 
 
91
 
92
- elif last_output is None:
93
- output = np.empty((1, 7))
94
- output[:] = np.nan
95
- au_intensities = np.empty(24)
96
- au_intensities[:] = np.nan
97
-
98
  probs.append(output[0])
99
  frames.append(count_frame)
100
  au_intensities_list.append(au_intensities)
101
  else:
102
  if last_output is not None:
103
  lstm_features = []
104
- empty = np.empty((7))
105
- empty[:] = np.nan
106
- probs.append(empty)
107
  frames.append(count_frame)
108
  au_intensities_list.append(np.full(24, np.nan))
109
 
110
  if cur_face is not None:
111
- heatmap_f = display_info(heatmap, 'Frame: {}'.format(count_frame), box_scale=.3)
112
-
113
- cur_face = cv2.cvtColor(cur_face, cv2.COLOR_RGB2BGR)
114
- cur_face = cv2.resize(cur_face, (224,224), interpolation = cv2.INTER_AREA)
115
- cur_face = display_info(cur_face, 'Frame: {}'.format(count_frame), box_scale=.3)
116
- vid_writer_face.write(cur_face)
117
- vid_writer_hm.write(heatmap_f)
118
 
119
  count_frame += 1
120
  if count_face != 0:
121
  count_face += 1
122
 
 
123
  vid_writer_face.release()
124
  vid_writer_hm.release()
125
 
@@ -128,5 +136,5 @@ def preprocess_video_and_predict(video):
128
 
129
  if not stat or not au_stat:
130
  return None, None, None, None, None
131
-
132
- return video, path_save_video_face, path_save_video_hm, stat, au_stat
 
8
  from app.config import config_data
9
  from app.plot import statistics_plot
10
  from .au_processing import features_to_au_intensities, au_statistics_plot
11
+ from pytorch_grad_cam.utils.image import show_cam_on_image
12
 
13
  mp_face_mesh = mp.solutions.face_mesh
14
 
15
+ def preprocess_video_and_predict(video_path):
16
+ cap = cv2.VideoCapture(video_path)
17
+ if not cap.isOpened():
18
+ print(f"Error opening video file: {video_path}")
19
+ return None, None, None, None, None
20
 
21
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
22
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
23
+ fps = cap.get(cv2.CAP_PROP_FPS)
24
+ if fps <= 0 or fps != fps: # Handle NaN fps
25
+ fps = 30 # Default FPS
26
 
27
+ # Paths to save processed videos
28
+ path_save_video_face = 'result_face.mp4'
29
  path_save_video_hm = 'result_hm.mp4'
30
+
31
+ # Video writers
32
+ vid_writer_face = cv2.VideoWriter(path_save_video_face, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
33
  vid_writer_hm = cv2.VideoWriter(path_save_video_hm, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224))
34
 
35
  lstm_features = []
 
39
  frames = []
40
  au_intensities_list = []
41
  last_output = None
42
+ last_heatmap = None
43
  last_au_intensities = None
44
  cur_face = None
45
 
46
  with mp_face_mesh.FaceMesh(
47
+ max_num_faces=1,
48
+ refine_landmarks=False,
49
+ min_detection_confidence=0.5,
50
+ min_tracking_confidence=0.5) as face_mesh:
51
 
52
  while cap.isOpened():
53
+ ret, frame = cap.read()
54
+ if not ret:
55
+ break
56
 
57
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
58
+ results = face_mesh.process(frame_rgb)
 
 
 
59
 
60
  if results.multi_face_landmarks:
61
+ for face_landmarks in results.multi_face_landmarks:
62
+ startX, startY, endX, endY = get_box(face_landmarks, width, height)
63
+ cur_face = frame_rgb[startY:endY, startX:endX]
64
+
65
+ if count_face % config_data.FRAME_DOWNSAMPLING == 0:
66
+ cur_face_pil = Image.fromarray(cur_face)
67
+ cur_face_processed = pth_processing(cur_face_pil)
68
 
 
 
69
  with torch.no_grad():
70
+ features = torch.nn.functional.relu(
71
+ pth_model_static.extract_features(cur_face_processed)
72
+ ).cpu().numpy()
73
+ au_intensities = features_to_au_intensities(
74
+ pth_model_static(cur_face_processed)
75
+ )
76
+
77
+ # Generate heatmap
78
+ grayscale_cam = cam(input_tensor=cur_face_processed)[0, :]
79
+ cur_face_resized = cv2.resize(cur_face, (224, 224), interpolation=cv2.INTER_AREA)
80
+ cur_face_normalized = np.float32(cur_face_resized) / 255
81
+ heatmap = show_cam_on_image(cur_face_normalized, grayscale_cam, use_rgb=False)
82
+
83
  last_heatmap = heatmap
84
  last_au_intensities = au_intensities
85
+
86
+ if not lstm_features:
87
+ lstm_features = [features] * 10
88
  else:
89
  lstm_features = lstm_features[1:] + [features]
90
 
91
+ lstm_input = torch.from_numpy(np.vstack(lstm_features)).unsqueeze(0)
 
92
  with torch.no_grad():
93
+ output = pth_model_dynamic(lstm_input).cpu().numpy()
94
  last_output = output
95
 
96
  if count_face == 0:
 
101
  output = last_output
102
  heatmap = last_heatmap
103
  au_intensities = last_au_intensities
104
+ else:
105
+ output = np.full((1, 7), np.nan)
106
+ au_intensities = np.full(24, np.nan)
107
 
 
 
 
 
 
 
108
  probs.append(output[0])
109
  frames.append(count_frame)
110
  au_intensities_list.append(au_intensities)
111
  else:
112
  if last_output is not None:
113
  lstm_features = []
114
+ probs.append(np.full(7, np.nan))
 
 
115
  frames.append(count_frame)
116
  au_intensities_list.append(np.full(24, np.nan))
117
 
118
  if cur_face is not None:
119
+ heatmap_frame = display_info(heatmap, f'Frame: {count_frame}', box_scale=0.3)
120
+ cur_face_bgr = cv2.cvtColor(cur_face, cv2.COLOR_RGB2BGR)
121
+ cur_face_resized = cv2.resize(cur_face_bgr, (224, 224), interpolation=cv2.INTER_AREA)
122
+ cur_face_annotated = display_info(cur_face_resized, f'Frame: {count_frame}', box_scale=0.3)
123
+ vid_writer_face.write(cur_face_annotated)
124
+ vid_writer_hm.write(heatmap_frame)
 
125
 
126
  count_frame += 1
127
  if count_face != 0:
128
  count_face += 1
129
 
130
+ cap.release()
131
  vid_writer_face.release()
132
  vid_writer_hm.release()
133
 
 
136
 
137
  if not stat or not au_stat:
138
  return None, None, None, None, None
139
+
140
+ return video_path, path_save_video_face, path_save_video_hm, stat, au_stat
app_gpuzero.py DELETED
@@ -1,64 +0,0 @@
1
- import gradio as gr
2
- from tabs.heart_rate_variability import create_hrv_tab
3
- from tabs.blink_detection import create_blink_tab
4
- from tabs.gaze_estimation import create_gaze_estimation_tab
5
- from tabs.speech_stress_analysis import create_voice_stress_tab
6
- from tabs.head_posture_detection import create_head_posture_tab
7
- from tabs.face_expressions import create_face_expressions_tab
8
- from tabs.speech_emotion_recognition import create_emotion_recognition_tab
9
- from tabs.sleep_quality import create_sleep_quality_tab
10
- from tabs.sentiment_analysis import create_sentiment_tab
11
- from tabs.emotion_analysis import create_emotion_tab
12
- from tabs.body_movement_analysis import create_body_movement_tab
13
- from tabs.posture_analysis import create_posture_analysis_tab
14
- from tabs.skin_analysis import create_skin_conductance_tab
15
- from tabs.FACS_analysis import create_facs_analysis_tab
16
- from tabs.roberta_chatbot import create_roberta_chatbot_tab
17
-
18
- # Import the UI components
19
- from ui_components import CUSTOM_CSS, HEADER_HTML, DISCLAIMER_HTML
20
-
21
- TAB_STRUCTURE = [
22
- ("Visual Analysis", [
23
- ("Emotional Face Expressions", create_face_expressions_tab),
24
- ("FACS for Stress, Anxiety, Depression", create_facs_analysis_tab),
25
- ("Gaze Estimation", create_gaze_estimation_tab),
26
- ("Head Posture", create_head_posture_tab),
27
- ("Blink Rate", create_blink_tab),
28
- ("Sleep Quality", create_sleep_quality_tab),
29
- ("Heart Rate Variability", create_hrv_tab),
30
- ("Body Movement", create_body_movement_tab),
31
- ("Posture", create_posture_analysis_tab),
32
- ("Skin", create_skin_conductance_tab)
33
- ]),
34
- ("Speech Analysis", [
35
- ("Speech Stress", create_voice_stress_tab),
36
- ("Speech Emotion", create_emotion_recognition_tab)
37
- ]),
38
- ("Text Analysis", [
39
- ("Sentiment", create_sentiment_tab),
40
- ("Emotion", create_emotion_tab),
41
- ("Roberta Mental Health Chatbot", create_roberta_chatbot_tab)
42
- ]),
43
- ("Brain Analysis (coming soon)", [
44
- ])
45
- ]
46
-
47
- def create_demo():
48
- with gr.Blocks(css=CUSTOM_CSS) as demo:
49
- gr.Markdown(HEADER_HTML)
50
- with gr.Tabs(elem_classes=["main-tab"]):
51
- for main_tab, sub_tabs in TAB_STRUCTURE:
52
- with gr.Tab(main_tab):
53
- with gr.Tabs():
54
- for sub_tab, create_fn in sub_tabs:
55
- with gr.Tab(sub_tab):
56
- create_fn()
57
- gr.HTML(DISCLAIMER_HTML)
58
- return demo
59
-
60
- # Create the demo instance
61
- demo = create_demo()
62
-
63
- if __name__ == "__main__":
64
- demo.queue(api_open=True).launch(share=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
assets/.DS_Store CHANGED
Binary files a/assets/.DS_Store and b/assets/.DS_Store differ
 
assets/models/FER_dynamic_LSTM.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cd1561a72f9de26c315bb857f03e8946635db047e0dbea52bb0276610f19751
3
+ size 11569208
assets/models/FER_static_ResNet50_AffectNet.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8274190b5be4355bd2f07b59f593fcdb294f9d7c563bfa9ac9e5ea06c10692d2
3
+ size 98562934
llm/mentalBERT.py DELETED
@@ -1,73 +0,0 @@
1
- import torch
2
- from transformers import RobertaTokenizer, RobertaForSequenceClassification
3
- import gradio as gr
4
-
5
- # Load the tokenizer and models
6
- tokenizer = RobertaTokenizer.from_pretrained("mental/mental-roberta-base")
7
- sentiment_model = RobertaForSequenceClassification.from_pretrained("mental/mental-roberta-base")
8
- emotion_model = RobertaForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
9
-
10
- # Define the labels
11
- sentiment_labels = ["negative", "positive"]
12
- emotion_labels = ["anger", "disgust", "fear", "joy", "neutral", "sadness", "surprise"]
13
-
14
- def analyze_text(text):
15
- try:
16
- # Tokenize the input text
17
- inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
18
-
19
- # Get sentiment model outputs
20
- sentiment_outputs = sentiment_model(**inputs)
21
- sentiment_logits = sentiment_outputs.logits
22
- sentiment_probs = torch.nn.functional.softmax(sentiment_logits, dim=-1)
23
-
24
- # Debugging: Print logits and probs shapes
25
- print("Sentiment logits shape:", sentiment_logits.shape)
26
- print("Sentiment logits:", sentiment_logits)
27
- print("Sentiment probs shape:", sentiment_probs.shape)
28
- print("Sentiment probs:", sentiment_probs)
29
-
30
- # Get the highest probability and corresponding label for sentiment
31
- max_sentiment_prob, max_sentiment_index = torch.max(sentiment_probs, dim=1)
32
- sentiment = sentiment_labels[max_sentiment_index.item()]
33
-
34
- # Get emotion model outputs
35
- emotion_outputs = emotion_model(**inputs)
36
- emotion_logits = emotion_outputs.logits
37
- emotion_probs = torch.nn.functional.softmax(emotion_logits, dim=-1)
38
-
39
- # Debugging: Print logits and probs shapes
40
- print("Emotion logits shape:", emotion_logits.shape)
41
- print("Emotion logits:", emotion_logits)
42
- print("Emotion probs shape:", emotion_probs.shape)
43
- print("Emotion probs:", emotion_probs)
44
-
45
- # Get the highest probability and corresponding label for emotion
46
- max_emotion_prob, max_emotion_index = torch.max(emotion_probs, dim=1)
47
- emotion = emotion_labels[max_emotion_index.item()]
48
-
49
- return sentiment, f"{max_sentiment_prob.item():.4f}", emotion, f"{max_emotion_prob.item():.4f}"
50
- except Exception as e:
51
- print("Error:", str(e))
52
- return "Error", "N/A", "Error", "N/A"
53
-
54
- # Define the Gradio interface
55
- interface = gr.Interface(
56
- fn=analyze_text,
57
- inputs=gr.Textbox(
58
- lines=5,
59
- placeholder="Enter text here...",
60
- value="I don’t know a lot but what I do know is, we don’t start off very big and we all try to make each other smaller."
61
- ),
62
- outputs=[
63
- gr.Textbox(label="Detected Sentiment"),
64
- gr.Textbox(label="Sentiment Confidence Score"),
65
- gr.Textbox(label="Detected Emotion"),
66
- gr.Textbox(label="Emotion Confidence Score")
67
- ],
68
- title="Sentiment and Emotion Analysis: Detecting Positive/Negative Sentiment and Specific Emotions",
69
- description="Enter a piece of text to detect overall sentiment (positive or negative) and specific emotions (anger, disgust, fear, joy, neutral, sadness, surprise)."
70
- )
71
-
72
- # Launch the interface
73
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
notebooks/pytorch-roberta-onnx.ipynb DELETED
@@ -1,280 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {},
6
- "source": [
7
- "## Pytorch RoBERTa to ONNX"
8
- ]
9
- },
10
- {
11
- "cell_type": "markdown",
12
- "metadata": {},
13
- "source": [
14
- "This notebook documents how to export the PyTorch NLP model into ONNX format and then use it to make predictions using the ONNX runtime.\n",
15
- "\n",
16
- "The model uses the `simpletransformers` library which is a Python wrappers around the `transformers` library which contains PyTorch NLP transformer architectures and weights."
17
- ]
18
- },
19
- {
20
- "cell_type": "code",
21
- "execution_count": 1,
22
- "metadata": {},
23
- "outputs": [],
24
- "source": [
25
- "import torch\n",
26
- "import numpy as np\n",
27
- "from simpletransformers.model import TransformerModel\n",
28
- "from transformers import RobertaForSequenceClassification, RobertaTokenizer\n",
29
- "import onnx\n",
30
- "import onnxruntime"
31
- ]
32
- },
33
- {
34
- "cell_type": "markdown",
35
- "metadata": {},
36
- "source": [
37
- "## Step 1: Load pretrained PyTorch model"
38
- ]
39
- },
40
- {
41
- "cell_type": "markdown",
42
- "metadata": {},
43
- "source": [
44
- "Download the model weights from https://storage.googleapis.com/seldon-models/pytorch/moviesentiment_roberta/pytorch_model.bin"
45
- ]
46
- },
47
- {
48
- "cell_type": "code",
49
- "execution_count": 2,
50
- "metadata": {},
51
- "outputs": [],
52
- "source": [
53
- "model = TransformerModel('roberta', 'roberta-base', args=({'fp16': False}))"
54
- ]
55
- },
56
- {
57
- "cell_type": "code",
58
- "execution_count": 3,
59
- "metadata": {},
60
- "outputs": [
61
- {
62
- "data": {
63
- "text/plain": [
64
- "<All keys matched successfully>"
65
- ]
66
- },
67
- "execution_count": 3,
68
- "metadata": {},
69
- "output_type": "execute_result"
70
- }
71
- ],
72
- "source": [
73
- "model.model.load_state_dict(torch.load('pytorch_model.bin'))"
74
- ]
75
- },
76
- {
77
- "cell_type": "markdown",
78
- "metadata": {},
79
- "source": [
80
- "## Step 2: Export as ONNX"
81
- ]
82
- },
83
- {
84
- "cell_type": "markdown",
85
- "metadata": {},
86
- "source": [
87
- "PyTorch supports exporting to ONNX, you just need to specify a valid input tensor for the model."
88
- ]
89
- },
90
- {
91
- "cell_type": "code",
92
- "execution_count": 4,
93
- "metadata": {},
94
- "outputs": [],
95
- "source": [
96
- "tokenizer = RobertaTokenizer.from_pretrained('roberta-base')\n",
97
- "input_ids = torch.tensor(tokenizer.encode(\"This film is so bad\", add_special_tokens=True)).unsqueeze(0) # Batch size 1"
98
- ]
99
- },
100
- {
101
- "cell_type": "code",
102
- "execution_count": 5,
103
- "metadata": {},
104
- "outputs": [
105
- {
106
- "data": {
107
- "text/plain": [
108
- "tensor([[ 0, 713, 822, 16, 98, 1099, 2]])"
109
- ]
110
- },
111
- "execution_count": 5,
112
- "metadata": {},
113
- "output_type": "execute_result"
114
- }
115
- ],
116
- "source": [
117
- "input_ids"
118
- ]
119
- },
120
- {
121
- "cell_type": "markdown",
122
- "metadata": {},
123
- "source": [
124
- "Export as ONNX, we specify dynamic axes for batch dimension and sequence length as sentences come in various lengths."
125
- ]
126
- },
127
- {
128
- "cell_type": "code",
129
- "execution_count": 6,
130
- "metadata": {},
131
- "outputs": [
132
- {
133
- "name": "stderr",
134
- "output_type": "stream",
135
- "text": [
136
- "/home/janis/.conda/envs/py37/lib/python3.7/site-packages/transformers/modeling_roberta.py:172: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
137
- " if input_ids[:, 0].sum().item() != 0:\n"
138
- ]
139
- }
140
- ],
141
- "source": [
142
- "torch.onnx.export(model.model,\n",
143
- " (input_ids),\n",
144
- " \"roberta.onnx\",\n",
145
- " input_names=['input'],\n",
146
- " output_names=['output'],\n",
147
- " dynamic_axes={'input' :{0 : 'batch_size',\n",
148
- " 1: 'sentence_length'},\n",
149
- " 'output': {0: 'batch_size'}})"
150
- ]
151
- },
152
- {
153
- "cell_type": "markdown",
154
- "metadata": {},
155
- "source": [
156
- "## Step 3: Test predictions are the same using ONNX runtime"
157
- ]
158
- },
159
- {
160
- "cell_type": "code",
161
- "execution_count": 7,
162
- "metadata": {},
163
- "outputs": [],
164
- "source": [
165
- "onnx_model = onnx.load(\"roberta.onnx\")"
166
- ]
167
- },
168
- {
169
- "cell_type": "code",
170
- "execution_count": 8,
171
- "metadata": {},
172
- "outputs": [],
173
- "source": [
174
- "# checks the exported model, may crash ipython kernel if run together with the PyTorch model in memory\n",
175
- "# onnx.checker.check_model(onnx_model)"
176
- ]
177
- },
178
- {
179
- "cell_type": "code",
180
- "execution_count": 9,
181
- "metadata": {},
182
- "outputs": [],
183
- "source": [
184
- "import onnxruntime\n",
185
- "\n",
186
- "ort_session = onnxruntime.InferenceSession(\"roberta.onnx\")"
187
- ]
188
- },
189
- {
190
- "cell_type": "code",
191
- "execution_count": 10,
192
- "metadata": {},
193
- "outputs": [],
194
- "source": [
195
- "def to_numpy(tensor):\n",
196
- " return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()"
197
- ]
198
- },
199
- {
200
- "cell_type": "code",
201
- "execution_count": 11,
202
- "metadata": {},
203
- "outputs": [],
204
- "source": [
205
- "input_ids = torch.tensor(tokenizer.encode(\"This film is so bad\", add_special_tokens=True)).unsqueeze(0) # Batch size 1"
206
- ]
207
- },
208
- {
209
- "cell_type": "code",
210
- "execution_count": 12,
211
- "metadata": {},
212
- "outputs": [],
213
- "source": [
214
- "# compute ONNX Runtime output prediction\n",
215
- "ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(input_ids)}\n",
216
- "ort_out = ort_session.run(None, ort_inputs)"
217
- ]
218
- },
219
- {
220
- "cell_type": "code",
221
- "execution_count": 13,
222
- "metadata": {},
223
- "outputs": [],
224
- "source": [
225
- "out = model.model(input_ids)"
226
- ]
227
- },
228
- {
229
- "cell_type": "code",
230
- "execution_count": 14,
231
- "metadata": {},
232
- "outputs": [
233
- {
234
- "data": {
235
- "text/plain": [
236
- "((tensor([[ 2.3067, -2.6440]], grad_fn=<AddmmBackward>),),\n",
237
- " [array([[ 2.3066945, -2.6439788]], dtype=float32)])"
238
- ]
239
- },
240
- "execution_count": 14,
241
- "metadata": {},
242
- "output_type": "execute_result"
243
- }
244
- ],
245
- "source": [
246
- "out, ort_out"
247
- ]
248
- },
249
- {
250
- "cell_type": "code",
251
- "execution_count": 15,
252
- "metadata": {},
253
- "outputs": [],
254
- "source": [
255
- "np.testing.assert_allclose(to_numpy(out[0]), ort_out[0], rtol=1e-03, atol=1e-05)"
256
- ]
257
- }
258
- ],
259
- "metadata": {
260
- "kernelspec": {
261
- "display_name": "Python 3",
262
- "language": "python",
263
- "name": "python3"
264
- },
265
- "language_info": {
266
- "codemirror_mode": {
267
- "name": "ipython",
268
- "version": 3
269
- },
270
- "file_extension": ".py",
271
- "mimetype": "text/x-python",
272
- "name": "python",
273
- "nbconvert_exporter": "python",
274
- "pygments_lexer": "ipython3",
275
- "version": "3.7.3"
276
- }
277
- },
278
- "nbformat": 4,
279
- "nbformat_minor": 2
280
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
onxxchatbot.py DELETED
@@ -1,40 +0,0 @@
1
- import gradio as gr
2
- from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
3
-
4
- # Load pre-trained model and tokenizer
5
- model_name = "roberta-base"
6
- tokenizer = AutoTokenizer.from_pretrained(model_name)
7
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
8
-
9
- # Create a text classification pipeline
10
- classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
11
-
12
- # Define response generation function
13
- def generate_response(input_text):
14
- # Classify the input text
15
- result = classifier(input_text)[0]
16
- label = result['label']
17
- score = result['score']
18
-
19
- # Map the classification result to a response
20
- responses = {
21
- "LABEL_0": "I understand you might be going through a difficult time. Remember, it's okay to seek help when you need it.",
22
- "LABEL_1": "Your feelings are valid. Have you considered talking to a mental health professional about this?",
23
- "LABEL_2": "Taking care of your mental health is crucial. Small steps like regular exercise and good sleep can make a big difference.",
24
- "LABEL_3": "It sounds like you're dealing with a lot. Remember, you're not alone in this journey.",
25
- "LABEL_4": "I hear you. Coping with mental health challenges can be tough. Have you tried any relaxation techniques like deep breathing or meditation?"
26
- }
27
-
28
- return responses.get(label, "I'm here to listen and support you. Could you tell me more about how you're feeling?")
29
-
30
- # Define chatbot function for Gradio
31
- def chatbot(message, history):
32
- response = generate_response(message)
33
- return response
34
-
35
- # Create Gradio interface
36
- iface = gr.ChatInterface(
37
- fn=chatbot,
38
- title="Mental Health Support Chatbot (RoBERTa)",
39
- description="This chatbot uses a pre-trained RoBERTa model for mental health conversations. Remember, this is not a substitute for professional help. If you're in crisis, please seek immediate professional assistance."
40
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tabs/FACS_analysis.py CHANGED
@@ -4,18 +4,19 @@ import numpy as np
4
  import matplotlib.pyplot as plt
5
  from app.app_utils import preprocess_frame_and_predict_aus
6
 
7
- # Define the AUs associated with stress, anxiety, and depression
8
  STRESS_AUS = [4, 7, 17, 23, 24]
9
  ANXIETY_AUS = [1, 2, 4, 5, 20]
10
- DEPRESSION_AUS = [1, 4, 15, 17]
11
 
12
  AU_DESCRIPTIONS = {
13
  1: "Inner Brow Raiser",
14
  2: "Outer Brow Raiser",
15
  4: "Brow Lowerer",
16
  5: "Upper Lid Raiser",
 
17
  7: "Lid Tightener",
18
- 15: "Lip Corner Depressor",
19
  17: "Chin Raiser",
20
  20: "Lip Stretcher",
21
  23: "Lip Tightener",
@@ -52,13 +53,13 @@ def process_video_for_facs(video_path):
52
  # Calculate and normalize emotional state scores
53
  stress_score = normalize_score(np.mean([avg_au_intensities[au-1] for au in STRESS_AUS if au <= len(avg_au_intensities)]))
54
  anxiety_score = normalize_score(np.mean([avg_au_intensities[au-1] for au in ANXIETY_AUS if au <= len(avg_au_intensities)]))
55
- depression_score = normalize_score(np.mean([avg_au_intensities[au-1] for au in DEPRESSION_AUS if au <= len(avg_au_intensities)]))
56
 
57
  fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
58
 
59
  # Emotional state scores
60
- states = ['Stress', 'Anxiety', 'Depression']
61
- scores = [stress_score, anxiety_score, depression_score]
62
  bars = ax1.bar(states, scores)
63
  ax1.set_ylim(0, 1)
64
  ax1.set_title('Emotional State Scores')
@@ -68,7 +69,7 @@ def process_video_for_facs(video_path):
68
  f'{height:.2f}', ha='center', va='bottom')
69
 
70
  # AU intensities
71
- all_aus = sorted(set(STRESS_AUS + ANXIETY_AUS + DEPRESSION_AUS))
72
  all_aus = [au for au in all_aus if au <= len(avg_au_intensities)]
73
  au_labels = [f"AU{au}\n{AU_DESCRIPTIONS.get(au, '')}" for au in all_aus]
74
  au_values = [avg_au_intensities[au-1] for au in all_aus]
@@ -89,7 +90,7 @@ def create_facs_analysis_tab():
89
  gr.Examples(["./assets/videos/fitness.mp4"], inputs=[input_video])
90
  with gr.Column(scale=2):
91
  output_image = gr.Image(label="Processed Frame")
92
- facs_chart = gr.Plot(label="FACS Analysis for SAD")
93
 
94
  # Automatically trigger the analysis when a video is uploaded
95
  input_video.change(
 
4
  import matplotlib.pyplot as plt
5
  from app.app_utils import preprocess_frame_and_predict_aus
6
 
7
+ # Define the AUs associated with stress, anxiety, and happiness
8
  STRESS_AUS = [4, 7, 17, 23, 24]
9
  ANXIETY_AUS = [1, 2, 4, 5, 20]
10
+ HAPPINESS_AUS = [6, 12]
11
 
12
  AU_DESCRIPTIONS = {
13
  1: "Inner Brow Raiser",
14
  2: "Outer Brow Raiser",
15
  4: "Brow Lowerer",
16
  5: "Upper Lid Raiser",
17
+ 6: "Cheek Raiser",
18
  7: "Lid Tightener",
19
+ 12: "Lip Corner Puller",
20
  17: "Chin Raiser",
21
  20: "Lip Stretcher",
22
  23: "Lip Tightener",
 
53
  # Calculate and normalize emotional state scores
54
  stress_score = normalize_score(np.mean([avg_au_intensities[au-1] for au in STRESS_AUS if au <= len(avg_au_intensities)]))
55
  anxiety_score = normalize_score(np.mean([avg_au_intensities[au-1] for au in ANXIETY_AUS if au <= len(avg_au_intensities)]))
56
+ happiness_score = normalize_score(np.mean([avg_au_intensities[au-1] for au in HAPPINESS_AUS if au <= len(avg_au_intensities)]))
57
 
58
  fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
59
 
60
  # Emotional state scores
61
+ states = ['Stress', 'Anxiety', 'Happiness']
62
+ scores = [stress_score, anxiety_score, happiness_score]
63
  bars = ax1.bar(states, scores)
64
  ax1.set_ylim(0, 1)
65
  ax1.set_title('Emotional State Scores')
 
69
  f'{height:.2f}', ha='center', va='bottom')
70
 
71
  # AU intensities
72
+ all_aus = sorted(set(STRESS_AUS + ANXIETY_AUS + HAPPINESS_AUS))
73
  all_aus = [au for au in all_aus if au <= len(avg_au_intensities)]
74
  au_labels = [f"AU{au}\n{AU_DESCRIPTIONS.get(au, '')}" for au in all_aus]
75
  au_values = [avg_au_intensities[au-1] for au in all_aus]
 
90
  gr.Examples(["./assets/videos/fitness.mp4"], inputs=[input_video])
91
  with gr.Column(scale=2):
92
  output_image = gr.Image(label="Processed Frame")
93
+ facs_chart = gr.Plot(label="FACS Analysis for Stress, Anxiety, and Happiness")
94
 
95
  # Automatically trigger the analysis when a video is uploaded
96
  input_video.change(
tabs/__emotion_analysis.py DELETED
@@ -1,36 +0,0 @@
1
- import os
2
- import torch
3
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
- import gradio as gr
5
-
6
- os.environ["TOKENIZERS_PARALLELISM"] = "true"
7
-
8
- emotion_tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
9
- emotion_model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
10
- emotion_labels = ["anger", "disgust", "fear", "joy", "neutral", "sadness", "surprise"]
11
-
12
- def analyze_emotion(text):
13
- try:
14
- inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
15
- outputs = emotion_model(**inputs)
16
- probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
17
- max_prob, max_index = torch.max(probs, dim=1)
18
- return emotion_labels[max_index.item()], f"{max_prob.item():.4f}"
19
- except Exception as e:
20
- print(f"Error in emotion analysis: {e}")
21
- return "Error", "N/A"
22
-
23
- def create_emotion_tab():
24
- with gr.Row():
25
- with gr.Column(scale=2):
26
- input_text = gr.Textbox(value='I actually speak to the expets myself to give you the best value you can get', lines=5, placeholder="Enter text here...", label="Input Text")
27
- with gr.Row():
28
- clear_btn = gr.Button("Clear", scale=1)
29
- submit_btn = gr.Button("Analyze", scale=1, elem_classes="submit")
30
- with gr.Column(scale=1):
31
- output_emotion = gr.Textbox(label="Detected Emotion")
32
- output_confidence = gr.Textbox(label="Emotion Confidence Score")
33
-
34
- submit_btn.click(analyze_emotion, inputs=[input_text], outputs=[output_emotion, output_confidence])
35
- clear_btn.click(lambda: ("", "", ""), outputs=[input_text, output_emotion, output_confidence])
36
- gr.Examples(["I am so happy today!", "I feel terrible and sad.", "This is a neutral statement."], inputs=[input_text])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tabs/__pycache__/FACS_analysis.cpython-310.pyc CHANGED
Binary files a/tabs/__pycache__/FACS_analysis.cpython-310.pyc and b/tabs/__pycache__/FACS_analysis.cpython-310.pyc differ
 
tabs/__pycache__/deception_detection.cpython-310.pyc ADDED
Binary file (17.9 kB). View file
 
tabs/__pycache__/heart_rate_variability.cpython-310.pyc CHANGED
Binary files a/tabs/__pycache__/heart_rate_variability.cpython-310.pyc and b/tabs/__pycache__/heart_rate_variability.cpython-310.pyc differ
 
tabs/__pycache__/speech_stress_analysis.cpython-310.pyc CHANGED
Binary files a/tabs/__pycache__/speech_stress_analysis.cpython-310.pyc and b/tabs/__pycache__/speech_stress_analysis.cpython-310.pyc differ
 
tabs/__pycache__/speech_stress_analysis.cpython-312.pyc ADDED
Binary file (10.5 kB). View file
 
tabs/__sentiment_analysis.py DELETED
@@ -1,36 +0,0 @@
1
- import os
2
- import torch
3
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
- import gradio as gr
5
-
6
- os.environ["TOKENIZERS_PARALLELISM"] = "true"
7
-
8
- sentiment_tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
9
- sentiment_model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
10
- sentiment_labels = ["very negative", "negative", "neutral", "positive", "very positive"]
11
-
12
- def analyze_sentiment(text):
13
- try:
14
- inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
15
- outputs = sentiment_model(**inputs)
16
- probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
17
- max_prob, max_index = torch.max(probs, dim=1)
18
- return sentiment_labels[max_index.item()], f"{max_prob.item():.4f}"
19
- except Exception as e:
20
- print(f"Error in sentiment analysis: {e}")
21
- return "Error", "N/A"
22
-
23
- def create_sentiment_tab():
24
- with gr.Row():
25
- with gr.Column(scale=2):
26
- input_text = gr.Textbox(value="I actually speak to the expets myself to give you the best value you can get", lines=5, placeholder="Enter text here...", label="Input Text")
27
- with gr.Row():
28
- clear_btn = gr.Button("Clear", scale=1)
29
- submit_btn = gr.Button("Analyze", scale=1, elem_classes="submit")
30
- with gr.Column(scale=1):
31
- output_sentiment = gr.Textbox(label="Detected Sentiment")
32
- output_confidence = gr.Textbox(label="Sentiment Confidence Score")
33
-
34
- submit_btn.click(analyze_sentiment, inputs=[input_text], outputs=[output_sentiment, output_confidence], queue=True)
35
- clear_btn.click(lambda: ("", "", ""), outputs=[input_text, output_sentiment, output_confidence], queue=True)
36
- gr.Examples(["I am so happy today!", "I feel terrible and sad.", "This is a neutral statement."], inputs=[input_text])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tabs/deception_detection.py ADDED
@@ -0,0 +1,601 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tabs/deception_detection.py
2
+
3
+ import gradio as gr
4
+ import cv2
5
+ import numpy as np
6
+ import matplotlib.pyplot as plt
7
+ from scipy.signal import butter, filtfilt, find_peaks
8
+ from typing import Tuple, Optional, Dict
9
+ import logging
10
+ from dataclasses import dataclass
11
+ from enum import Enum
12
+ import librosa
13
+ import moviepy.editor as mp
14
+ import os
15
+ import tempfile
16
+ import torch
17
+ import torch.nn as nn
18
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
19
+ import mediapipe as mp_mediapipe
20
+ import re
21
+
22
+ # Configure logging
23
+ logging.basicConfig(level=logging.INFO)
24
+ logger = logging.getLogger(__name__)
25
+
26
+ # Define Enums and DataClasses
27
+ class DeceptionLevel(Enum):
28
+ LOW = 'Low'
29
+ MODERATE = 'Moderate'
30
+ HIGH = 'High'
31
+
32
+ @dataclass
33
+ class Metric:
34
+ name: str
35
+ threshold: float
36
+ value: float = 0.0
37
+ detected: bool = False
38
+
39
+ def analyze(self, new_value: float):
40
+ self.value = new_value
41
+ self.detected = self.value > self.threshold
42
+
43
+ class SignalProcessor:
44
+ def __init__(self, fs: float):
45
+ self.fs = fs # Sampling frequency
46
+
47
+ def bandpass_filter(self, data: np.ndarray, lowcut: float = 0.75, highcut: float = 3.0) -> np.ndarray:
48
+ """Apply bandpass filter to signal."""
49
+ nyq = 0.5 * self.fs
50
+ low = lowcut / nyq
51
+ high = highcut / nyq
52
+ b, a = butter(2, [low, high], btype='band')
53
+ filtered = filtfilt(b, a, data)
54
+ logger.debug("Applied bandpass filter.")
55
+ return filtered
56
+
57
+ def find_peaks_in_signal(self, signal: np.ndarray) -> np.ndarray:
58
+ """Find peaks in the signal."""
59
+ min_distance = int(60 / 180 * self.fs) # At least 60 BPM (180 BPM max)
60
+ peaks, _ = find_peaks(signal, distance=min_distance)
61
+ logger.debug(f"Detected {len(peaks)} peaks in the signal.")
62
+ return peaks
63
+
64
+ class DeceptionAnalyzer:
65
+ def __init__(self):
66
+ self.metrics = {
67
+ "HRV Suppression": Metric("HRV Suppression", threshold=30.0),
68
+ "Heart Rate Elevation": Metric("Heart Rate Elevation", threshold=100.0),
69
+ "Rhythm Irregularity": Metric("Rhythm Irregularity", threshold=0.1),
70
+ "Blink Rate": Metric("Blink Rate", threshold=25.0),
71
+ "Head Movements": Metric("Head Movements", threshold=10.0),
72
+ "Speech Stress": Metric("Speech Stress", threshold=0.5),
73
+ "Speech Pitch Variation": Metric("Speech Pitch Variation", threshold=50.0),
74
+ "Pauses and Hesitations": Metric("Pauses and Hesitations", threshold=2.0),
75
+ "Filler Words": Metric("Filler Words", threshold=5.0),
76
+ }
77
+
78
+ def analyze_signals(self, heart_rate: np.ndarray, rr_intervals: np.ndarray, hrv_rmssd: float,
79
+ speech_features: Dict[str, float], facial_features: Dict[str, float]) -> Tuple[Dict[str, Dict], float, DeceptionLevel]:
80
+ """
81
+ Analyze the extracted signals and compute deception probability.
82
+ """
83
+ # Analyze HRV Suppression
84
+ self.metrics["HRV Suppression"].analyze(hrv_rmssd)
85
+
86
+ # Analyze Heart Rate Elevation
87
+ avg_heart_rate = np.mean(heart_rate)
88
+ self.metrics["Heart Rate Elevation"].analyze(avg_heart_rate)
89
+
90
+ # Analyze Rhythm Irregularity
91
+ rhythm_irregularity = np.std(rr_intervals) / np.mean(rr_intervals)
92
+ self.metrics["Rhythm Irregularity"].analyze(rhythm_irregularity)
93
+
94
+ # Analyze Speech Features
95
+ for key in ["Speech Stress", "Speech Pitch Variation", "Pauses and Hesitations", "Filler Words"]:
96
+ if key in speech_features:
97
+ self.metrics[key].analyze(speech_features[key])
98
+
99
+ # Analyze Facial Features
100
+ # Placeholder values; in actual implementation, replace with real values
101
+ self.metrics["Blink Rate"].analyze(facial_features.get("Blink Rate", 0))
102
+ self.metrics["Head Movements"].analyze(facial_features.get("Head Movements", 0))
103
+
104
+ # Calculate deception probability
105
+ detected_indicators = sum(1 for m in self.metrics.values() if m.detected)
106
+ total_indicators = len(self.metrics)
107
+ probability = (detected_indicators / total_indicators) * 100
108
+
109
+ # Determine deception level
110
+ if probability < 30:
111
+ level = DeceptionLevel.LOW
112
+ elif probability < 70:
113
+ level = DeceptionLevel.MODERATE
114
+ else:
115
+ level = DeceptionLevel.HIGH
116
+
117
+ # Prepare metrics for visualization
118
+ metrics_data = {name: {
119
+ "value": m.value,
120
+ "threshold": m.threshold,
121
+ "detected": m.detected
122
+ } for name, m in self.metrics.items()}
123
+
124
+ return metrics_data, probability, level
125
+
126
+ def load_transcription_model(model_name: str) -> Optional[torch.nn.Module]:
127
+ """
128
+ Load the speech-to-text transcription model.
129
+ """
130
+ try:
131
+ model = Wav2Vec2ForCTC.from_pretrained(
132
+ model_name,
133
+ ignore_mismatched_sizes=True
134
+ )
135
+ model.eval()
136
+ logger.info("Transcription model loaded successfully.")
137
+ return model
138
+ except Exception as e:
139
+ logger.error(f"Error loading transcription model: {e}")
140
+ return None
141
+
142
+ def load_models() -> Dict[str, torch.nn.Module]:
143
+ """
144
+ Load all necessary models for the deception detection system.
145
+ """
146
+ models_dict = {}
147
+ try:
148
+ # Load Transcription Model
149
+ transcription_model_name = 'facebook/wav2vec2-base-960h'
150
+ transcription_model = load_transcription_model(transcription_model_name)
151
+ if transcription_model:
152
+ models_dict['transcription_model'] = transcription_model
153
+
154
+ except Exception as e:
155
+ logger.error(f"Error loading models: {e}")
156
+
157
+ return models_dict
158
+
159
+ def transcribe_audio(audio_path: str, transcription_model: nn.Module) -> str:
160
+ """
161
+ Transcribe audio to text using Wav2Vec2 model.
162
+ """
163
+ try:
164
+ tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
165
+ y, sr = librosa.load(audio_path, sr=16000)
166
+ input_values = tokenizer(y, return_tensors="pt", padding="longest").input_values
167
+
168
+ with torch.no_grad():
169
+ logits = transcription_model(input_values).logits
170
+
171
+ predicted_ids = torch.argmax(logits, dim=-1)
172
+ transcription = tokenizer.decode(predicted_ids[0])
173
+
174
+ # Clean transcription
175
+ transcription = transcription.lower()
176
+ transcription = re.sub(r'[^a-z\s]', '', transcription)
177
+
178
+ return transcription
179
+ except Exception as e:
180
+ logger.error(f"Error transcribing audio: {str(e)}")
181
+ return ""
182
+
183
+ def detect_silence(y: np.ndarray, sr: int, top_db: int = 30) -> float:
184
+ """
185
+ Detect total duration of silence in the audio.
186
+ """
187
+ try:
188
+ intervals = librosa.effects.split(y, top_db=top_db)
189
+ silence_duration = 0.0
190
+ prev_end = 0
191
+ for start, end in intervals:
192
+ silence = (start - prev_end) / sr
193
+ silence_duration += silence
194
+ prev_end = end
195
+ # Add silence after the last interval
196
+ silence_duration += (len(y) - prev_end) / sr
197
+ return silence_duration
198
+ except Exception as e:
199
+ logger.error(f"Error detecting silence: {str(e)}")
200
+ return 0.0
201
+
202
+ def count_filler_words(transcription: str) -> int:
203
+ """
204
+ Count the number of filler words in the transcription.
205
+ """
206
+ filler_words_list = ['um', 'uh', 'er', 'ah', 'like', 'you know', 'so']
207
+ return sum(transcription.split().count(word) for word in filler_words_list)
208
+
209
+ def analyze_speech(audio_path: str, transcription_model: nn.Module) -> Dict[str, float]:
210
+ """
211
+ Analyze speech from the audio file and extract features.
212
+ """
213
+ if not audio_path:
214
+ logger.warning("No audio path provided.")
215
+ return {}
216
+
217
+ try:
218
+ # Load audio file
219
+ y, sr = librosa.load(audio_path, sr=16000) # Ensure consistent sampling rate
220
+ logger.info(f"Loaded audio file with sampling rate: {sr} Hz")
221
+
222
+ # Extract prosodic features
223
+ pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
224
+ pitch_values = pitches[magnitudes > np.median(magnitudes)]
225
+ avg_pitch = np.mean(pitch_values) if len(pitch_values) > 0 else 0.0
226
+ pitch_variation = np.std(pitch_values) if len(pitch_values) > 0 else 0.0
227
+
228
+ # Calculate speech stress based on pitch variation
229
+ speech_stress = pitch_variation / (avg_pitch if avg_pitch != 0 else 1)
230
+
231
+ # Calculate speech rate (words per minute)
232
+ transcription = transcribe_audio(audio_path, transcription_model)
233
+ words = transcription.split()
234
+ duration_minutes = librosa.get_duration(y=y, sr=sr) / 60
235
+ speech_rate = len(words) / duration_minutes if duration_minutes > 0 else 0.0
236
+
237
+ # Detect pauses and hesitations
238
+ silence_duration = detect_silence(y, sr)
239
+ filler_words = count_filler_words(transcription)
240
+
241
+ logger.info(f"Speech Analysis - Avg Pitch: {avg_pitch:.2f} Hz, Pitch Variation: {pitch_variation:.2f} Hz")
242
+ logger.info(f"Speech Stress Level: {speech_stress:.2f}")
243
+ logger.info(f"Speech Rate: {speech_rate:.2f} WPM")
244
+ logger.info(f"Silence Duration: {silence_duration:.2f} seconds")
245
+ logger.info(f"Filler Words Count: {filler_words}")
246
+
247
+ # Return extracted features
248
+ return {
249
+ "Speech Stress": speech_stress,
250
+ "Speech Pitch Variation": pitch_variation,
251
+ "Pauses and Hesitations": silence_duration,
252
+ "Filler Words": filler_words
253
+ }
254
+
255
+ except Exception as e:
256
+ logger.error(f"Error analyzing speech: {str(e)}")
257
+ return {}
258
+
259
+ def extract_audio_from_video(video_path: str) -> Optional[str]:
260
+ """
261
+ Extract audio from the video file and save it as a temporary WAV file.
262
+ """
263
+ if not video_path:
264
+ logger.warning("No video path provided for audio extraction.")
265
+ return None
266
+
267
+ try:
268
+ video_clip = mp.VideoFileClip(video_path)
269
+ if video_clip.audio is None:
270
+ logger.warning("No audio track found in the video.")
271
+ video_clip.close()
272
+ return None
273
+
274
+ temp_audio_fd, temp_audio_path = tempfile.mkstemp(suffix=".wav")
275
+ os.close(temp_audio_fd) # Close the file descriptor
276
+
277
+ video_clip.audio.write_audiofile(temp_audio_path, logger=None)
278
+ video_clip.close()
279
+
280
+ logger.info(f"Extracted audio to temporary file: {temp_audio_path}")
281
+ return temp_audio_path
282
+
283
+ except Exception as e:
284
+ logger.error(f"Error extracting audio from video: {str(e)}")
285
+ return None
286
+
287
+ def detect_blink(face_landmarks, frame: np.ndarray) -> float:
288
+ """
289
+ Detect blink rate from facial landmarks.
290
+ Placeholder implementation.
291
+ """
292
+ # Implement Eye Aspect Ratio (EAR) or other blink detection methods
293
+ return np.random.uniform(10, 20) # Example blink rate
294
+
295
+ def estimate_head_movement(face_landmarks) -> float:
296
+ """
297
+ Estimate head movements based on facial landmarks.
298
+ Placeholder implementation.
299
+ """
300
+ # Implement head pose estimation to detect nods/shakes
301
+ return np.random.uniform(5, 15) # Example head movements
302
+
303
+ def create_visualization(metrics: Dict, probability: float, heart_rate: np.ndarray,
304
+ duration: float, level: DeceptionLevel, speech_features: Dict[str, float]) -> plt.Figure:
305
+ """
306
+ Create visualization of analysis results.
307
+ """
308
+ # Set figure style parameters
309
+ plt.style.use('default')
310
+ plt.rcParams.update({
311
+ 'figure.facecolor': 'white',
312
+ 'axes.facecolor': 'white',
313
+ 'grid.color': '#E0E0E0',
314
+ 'grid.linestyle': '-',
315
+ 'grid.alpha': 0.3,
316
+ 'font.size': 10,
317
+ 'axes.labelsize': 10,
318
+ 'axes.titlesize': 12,
319
+ 'figure.titlesize': 14,
320
+ 'font.family': ['DejaVu Sans', 'Arial', 'sans-serif']
321
+ })
322
+
323
+ # Create figure and axes
324
+ fig = plt.figure(figsize=(12, 20))
325
+
326
+ # Create polar plot for deception probability gauge
327
+ ax1 = fig.add_subplot(4, 1, 1, projection='polar')
328
+
329
+ # Create other subplots
330
+ ax2 = fig.add_subplot(4, 1, 2)
331
+ ax3 = fig.add_subplot(4, 1, 3)
332
+ ax4 = fig.add_subplot(4, 1, 4)
333
+
334
+ # Plot 1: Deception Probability Gauge
335
+ # Create gauge plot
336
+ theta = np.linspace(0, np.pi, 100)
337
+ radius = np.ones(100)
338
+ ax1.plot(theta, radius, color='#E0E0E0', linewidth=30, alpha=0.3)
339
+ current_angle = (probability / 100) * np.pi
340
+ ax1.plot([0, current_angle], [0, 0.7], color='red', linewidth=5)
341
+ ax1.set_xticks([])
342
+ ax1.set_yticks([])
343
+ ax1.set_title(f'Deception Probability: {probability:.1f}% ({level.value})', pad=20, color='#333333')
344
+ ax1.set_theta_zero_location('N')
345
+ ax1.set_facecolor('white')
346
+ ax1.grid(False)
347
+ ax1.spines['polar'].set_visible(False)
348
+
349
+ # Plot 2: Metrics Bar Chart
350
+ names = list(metrics.keys())
351
+ values = [m["value"] for m in metrics.values()]
352
+ thresholds = [m["threshold"] for m in metrics.values()]
353
+ detected = [m["detected"] for m in metrics.values()]
354
+ x = np.arange(len(names))
355
+ width = 0.35
356
+ bar_colors = ['#FF6B6B' if d else '#4BB543' for d in detected]
357
+ ax2.bar(x - width/2, values, width, label='Current', color=bar_colors)
358
+ ax2.bar(x + width/2, thresholds, width, label='Threshold', color='#E0E0E0', alpha=0.7)
359
+ ax2.set_ylabel('Value')
360
+ ax2.set_title('Physiological, Facial, and Speech Indicators', pad=20)
361
+ ax2.set_xticks(x)
362
+ ax2.set_xticklabels(names, rotation=45, ha='right')
363
+ ax2.grid(True, axis='y', alpha=0.3)
364
+ ax2.legend(loc='upper right', framealpha=0.9)
365
+
366
+ # Plot 3: Heart Rate Over Time
367
+ time_axis = np.linspace(0, duration, len(heart_rate))
368
+ ax3.plot(time_axis, heart_rate, color='#3498db')
369
+ ax3.set_xlabel('Time (s)')
370
+ ax3.set_ylabel('Heart Rate (BPM)')
371
+ ax3.set_title('Heart Rate Over Time', pad=20)
372
+ ax3.grid(True, alpha=0.3)
373
+
374
+ # Plot 4: Speech Features
375
+ pauses = speech_features.get("Pauses and Hesitations", 0)
376
+ filler_words = speech_features.get("Filler Words", 0)
377
+ labels = ['Pauses (s)', 'Filler Words (count)']
378
+ values = [pauses, filler_words]
379
+ colors = ['#FFC300', '#FF5733']
380
+ ax4.bar(labels, values, color=colors)
381
+ ax4.set_ylabel('Count / Duration')
382
+ ax4.set_title('Pauses and Hesitations in Speech', pad=20)
383
+ ax4.grid(True, axis='y', alpha=0.3)
384
+
385
+ plt.tight_layout()
386
+ return fig
387
+
388
+ def process_video_and_audio(video_path: str, models: Dict[str, torch.nn.Module]) -> Tuple[Optional[np.ndarray], Optional[plt.Figure]]:
389
+ """
390
+ Process video and audio, perform deception analysis.
391
+ """
392
+ logger.info("Starting video and audio processing.")
393
+ if not video_path:
394
+ logger.warning("No video path provided.")
395
+ return None, None
396
+
397
+ try:
398
+ # Extract audio from video
399
+ audio_path = extract_audio_from_video(video_path)
400
+ if not audio_path:
401
+ logger.warning("No audio available for speech analysis.")
402
+
403
+ # Initialize video capture
404
+ cap = cv2.VideoCapture(video_path)
405
+ if not cap.isOpened():
406
+ logger.error("Failed to open video file.")
407
+ return None, None
408
+
409
+ fps = cap.get(cv2.CAP_PROP_FPS)
410
+ if fps <= 0 or fps != fps:
411
+ logger.error("Invalid frame rate detected.")
412
+ cap.release()
413
+ return None, None
414
+ logger.info(f"Video FPS: {fps}")
415
+
416
+ # Initialize processors
417
+ signal_processor = SignalProcessor(fps)
418
+ analyzer = DeceptionAnalyzer()
419
+ ppg_signal = []
420
+ last_frame = None
421
+
422
+ # Initialize Mediapipe for real-time facial feature extraction
423
+ mp_face_mesh = mp_mediapipe.solutions.face_mesh
424
+ face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1)
425
+ frame_counter = 0
426
+
427
+ # Process video frames
428
+ while True:
429
+ ret, frame = cap.read()
430
+ if not ret:
431
+ break
432
+
433
+ frame_counter += 1
434
+
435
+ # Extract PPG signal from green channel
436
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
437
+ green_channel = frame_rgb[:, :, 1]
438
+ ppg_signal.append(np.mean(green_channel))
439
+
440
+ # Extract facial features
441
+ results = face_mesh.process(frame_rgb)
442
+ if results.multi_face_landmarks:
443
+ face_landmarks = results.multi_face_landmarks[0]
444
+ # Blink Detection
445
+ blink = detect_blink(face_landmarks, frame)
446
+ analyzer.metrics["Blink Rate"].analyze(blink)
447
+
448
+ # Head Movement Detection
449
+ head_movement = estimate_head_movement(face_landmarks)
450
+ analyzer.metrics["Head Movements"].analyze(head_movement)
451
+ else:
452
+ analyzer.metrics["Blink Rate"].analyze(0.0)
453
+ analyzer.metrics["Head Movements"].analyze(0.0)
454
+
455
+ # Store last frame
456
+ last_frame = cv2.resize(frame_rgb, (320, 240))
457
+
458
+ # Optional: Log progress every 100 frames
459
+ if frame_counter % 100 == 0:
460
+ logger.info(f"Processed {frame_counter} frames.")
461
+
462
+ cap.release()
463
+ face_mesh.close()
464
+ logger.info(f"Total frames processed: {frame_counter}")
465
+
466
+ if not ppg_signal or last_frame is None:
467
+ logger.error("No PPG signal extracted or last frame missing.")
468
+ return last_frame, None
469
+
470
+ # Convert PPG signal to numpy array
471
+ ppg_signal = np.array(ppg_signal)
472
+ logger.debug("PPG signal extracted.")
473
+
474
+ # Apply bandpass filter
475
+ filtered_signal = signal_processor.bandpass_filter(ppg_signal)
476
+ logger.debug("Filtered PPG signal.")
477
+
478
+ # Find peaks in the filtered signal
479
+ peaks = signal_processor.find_peaks_in_signal(filtered_signal)
480
+
481
+ if len(peaks) < 2:
482
+ logger.warning("Insufficient peaks detected. Signal quality may be poor.")
483
+ return last_frame, None # Return last_frame but no analysis
484
+
485
+ # Calculate RR intervals in milliseconds
486
+ rr_intervals = np.diff(peaks) / fps * 1000 # ms
487
+ heart_rate = 60 * fps / np.diff(peaks) # BPM
488
+
489
+ if len(rr_intervals) == 0 or len(heart_rate) == 0:
490
+ logger.error("Failed to calculate RR intervals or heart rate.")
491
+ return last_frame, None
492
+
493
+ # Calculate RMSSD (Root Mean Square of Successive Differences)
494
+ hrv_rmssd = np.sqrt(np.mean(np.diff(rr_intervals) ** 2))
495
+ logger.debug(f"Calculated RMSSD: {hrv_rmssd:.2f} ms")
496
+
497
+ # Analyze speech
498
+ if audio_path and 'transcription_model' in models:
499
+ speech_features = analyze_speech(audio_path, models['transcription_model'])
500
+ else:
501
+ speech_features = {}
502
+
503
+ # Analyze signals
504
+ metrics, probability, level = analyzer.analyze_signals(
505
+ heart_rate, rr_intervals, hrv_rmssd, speech_features,
506
+ {}
507
+ )
508
+
509
+ # Create visualization
510
+ duration = len(ppg_signal) / fps # seconds
511
+ fig = create_visualization(
512
+ metrics, probability, heart_rate,
513
+ duration, level, speech_features
514
+ )
515
+
516
+ # Clean up temporary audio file if it was extracted
517
+ if audio_path and os.path.exists(audio_path):
518
+ try:
519
+ os.remove(audio_path)
520
+ logger.info(f"Deleted temporary audio file: {audio_path}")
521
+ except Exception as e:
522
+ logger.error(f"Error deleting temporary audio file: {str(e)}")
523
+
524
+ logger.info("Video and audio processing completed successfully.")
525
+ return last_frame, fig
526
+
527
+ except Exception as e:
528
+ logger.error(f"Error processing video and audio: {str(e)}")
529
+ return None, None
530
+
531
+ def create_deception_detection_tab(models: Dict[str, torch.nn.Module]) -> gr.Blocks:
532
+ """
533
+ Create the deception detection interface tab using Gradio.
534
+ """
535
+ def analyze(video):
536
+ try:
537
+ if video is None:
538
+ return None, None
539
+ video_path = video
540
+ logger.info(f"Received video for analysis: {video_path}")
541
+
542
+ if not os.path.exists(video_path):
543
+ logger.error("Video file does not exist.")
544
+ return None, None
545
+
546
+ last_frame, fig = process_video_and_audio(video_path, models)
547
+ if fig:
548
+ return last_frame, fig
549
+ else:
550
+ return last_frame, None
551
+ except Exception as e:
552
+ logger.error(f"Error in analyze function: {str(e)}")
553
+ return None, None
554
+
555
+ with gr.Blocks() as deception_interface:
556
+ with gr.Row():
557
+ with gr.Column(scale=1):
558
+ input_video = gr.Video(label="Upload Video for Deception Analysis")
559
+ gr.Markdown("""
560
+ ### Deception Level Analysis
561
+
562
+ This analysis evaluates physiological, facial, and speech indicators
563
+ that may suggest deceptive behavior.
564
+
565
+ **Physiological Indicators:**
566
+ - ◇ HRV Suppression
567
+ - ◇ Heart Rate Elevation
568
+ - ◇ Rhythm Irregularity
569
+
570
+ **Facial Indicators:**
571
+ - ◇ Blink Rate
572
+ - ◇ Head Movements
573
+
574
+ **Speech Indicators:**
575
+ - ◇ Speech Stress
576
+ - ◇ Speech Pitch Variation
577
+ - ◇ Pauses and Hesitations
578
+ - ◇ Filler Words
579
+
580
+ **Interpretation:**
581
+ - **Low (0-30%):** Minimal indicators
582
+ - **Moderate (30-70%):** Some indicators
583
+ - **High (>70%):** Strong indicators
584
+
585
+ **Important Note:**
586
+ This analysis is for research purposes only.
587
+ Results should not be used as definitive proof
588
+ of deception or truthfulness.
589
+ """)
590
+ with gr.Column(scale=2):
591
+ output_frame = gr.Image(label="Last Frame of Video", height=240)
592
+ analysis_plot = gr.Plot(label="Deception Analysis")
593
+
594
+ # Configure automatic analysis upon video upload
595
+ input_video.change(
596
+ fn=analyze,
597
+ inputs=[input_video],
598
+ outputs=[output_frame, analysis_plot]
599
+ )
600
+
601
+ return deception_interface
tabs/heart_rate_variability.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ from scipy.signal import butter, filtfilt, find_peaks
6
+ import logging
7
+
8
+ # Configure logging
9
+ logging.basicConfig(level=logging.INFO)
10
+ logger = logging.getLogger(__name__)
11
+
12
+ def get_stress_level(rmssd, hr_mean, hr_std):
13
+ """
14
+ Calculate stress level based on HRV parameters.
15
+ Returns both numerical value (0-100) and category.
16
+ """
17
+ # RMSSD factor (lower RMSSD = higher stress)
18
+ rmssd_normalized = max(0, min(100, (150 - rmssd) / 1.5))
19
+
20
+ # Heart rate factor (higher HR = higher stress)
21
+ hr_factor = max(0, min(100, (hr_mean - 60) * 2))
22
+
23
+ # Heart rate variability factor (lower variability = higher stress)
24
+ hr_variability_factor = max(0, min(100, hr_std * 5))
25
+
26
+ # Combine factors with weights
27
+ stress_value = (0.4 * rmssd_normalized +
28
+ 0.4 * hr_factor +
29
+ 0.2 * hr_variability_factor)
30
+
31
+ # Determine category
32
+ if stress_value < 30:
33
+ category = "Low"
34
+ elif stress_value < 60:
35
+ category = "Moderate"
36
+ else:
37
+ category = "High"
38
+
39
+ return stress_value, category
40
+
41
+ def get_anxiety_level(value):
42
+ """Get anxiety level category based on value."""
43
+ if value < 30:
44
+ return "Low"
45
+ elif value < 70:
46
+ return "Moderate"
47
+ else:
48
+ return "High"
49
+
50
+ def calculate_anxiety_index(heart_rate, hrv):
51
+ """Calculate anxiety index based on heart rate and HRV."""
52
+ if len(heart_rate) < 2:
53
+ return 0
54
+
55
+ hr_mean = np.mean(heart_rate)
56
+ hr_std = np.std(heart_rate)
57
+
58
+ # Combine factors indicating anxiety
59
+ hr_factor = min(100, max(0, (hr_mean - 60) / 0.4))
60
+ variability_factor = min(100, (hr_std / 20) * 100)
61
+ hrv_factor = min(100, max(0, (100 - hrv) / 1))
62
+
63
+ anxiety_index = (hr_factor + variability_factor + hrv_factor) / 3
64
+ return anxiety_index
65
+
66
+ def process_video_for_hrv(video_path):
67
+ """Process video and extract HRV metrics focusing on stress and anxiety."""
68
+ if not video_path:
69
+ return None, None
70
+
71
+ try:
72
+ cap = cv2.VideoCapture(video_path)
73
+ ppg_signal = []
74
+ fps = cap.get(cv2.CAP_PROP_FPS)
75
+ last_frame = None
76
+
77
+ while True:
78
+ ret, frame = cap.read()
79
+ if not ret:
80
+ break
81
+
82
+ # Extract green channel for PPG
83
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
84
+ green_channel = frame_rgb[:, :, 1]
85
+ ppg_value = np.mean(green_channel)
86
+ ppg_signal.append(ppg_value)
87
+
88
+ # Store last frame for display
89
+ last_frame = cv2.resize(frame_rgb, (320, 240))
90
+
91
+ cap.release()
92
+
93
+ if not ppg_signal or last_frame is None:
94
+ return None, None
95
+
96
+ # Process PPG signal
97
+ ppg_signal = np.array(ppg_signal)
98
+ filtered_signal = filtfilt(*butter(2, [0.5, 5], fs=fps, btype='band'), ppg_signal)
99
+
100
+ # Find peaks for heart rate calculation
101
+ peaks, _ = find_peaks(filtered_signal, distance=int(0.5 * fps))
102
+ if len(peaks) < 2:
103
+ return None, None
104
+
105
+ # Calculate basic metrics
106
+ rr_intervals = np.diff(peaks) / fps * 1000
107
+ heart_rate = 60 * fps / np.diff(peaks)
108
+ hrv_rmssd = np.sqrt(np.mean(np.diff(rr_intervals) ** 2))
109
+
110
+ # Calculate stress and anxiety indices
111
+ hr_mean = np.mean(heart_rate)
112
+ hr_std = np.std(heart_rate)
113
+ stress_value, stress_category = get_stress_level(hrv_rmssd, hr_mean, hr_std)
114
+ anxiety_idx = calculate_anxiety_index(heart_rate, hrv_rmssd)
115
+
116
+ # Create visualization
117
+ fig = plt.figure(figsize=(12, 10))
118
+
119
+ # Plot 1: Stress and Anxiety Levels (top)
120
+ ax1 = plt.subplot(211)
121
+ metrics = ['Stress Level', 'Anxiety Level']
122
+ values = [stress_value, anxiety_idx]
123
+ colors = ['#FF6B6B', '#4D96FF'] # Warm red for stress, cool blue for anxiety
124
+
125
+ bars = ax1.bar(metrics, values, color=colors)
126
+ ax1.set_ylim(0, 100)
127
+ ax1.set_title('Stress and Anxiety Analysis', pad=20)
128
+ ax1.set_ylabel('Level (%)')
129
+
130
+ # Add value labels and status
131
+ for bar, val, metric in zip(bars, values, metrics):
132
+ height = val
133
+ status = stress_category if metric == 'Stress Level' else get_anxiety_level(val)
134
+ ax1.text(bar.get_x() + bar.get_width()/2., height + 1,
135
+ f'{val:.1f}%\n{status}',
136
+ ha='center', va='bottom')
137
+
138
+ # Plot 2: Heart Rate and HRV Trends (bottom)
139
+ ax2 = plt.subplot(212)
140
+ time = np.linspace(0, len(heart_rate), len(heart_rate))
141
+ ax2.plot(time, heart_rate, color='#2ECC71', label='Heart Rate', linewidth=2)
142
+ ax2.set_title('Heart Rate Variation')
143
+ ax2.set_xlabel('Beat Number')
144
+ ax2.set_ylabel('Heart Rate (BPM)')
145
+ ax2.grid(True, alpha=0.3)
146
+
147
+ # Add metrics information with color-coded status
148
+ def get_status_color(category):
149
+ return {
150
+ 'Low': '#2ECC71', # Green
151
+ 'Moderate': '#F1C40F', # Yellow
152
+ 'High': '#E74C3C' # Red
153
+ }.get(category, 'black')
154
+
155
+ info_text = (
156
+ f'HRV (RMSSD): {hrv_rmssd:.1f} ms\n'
157
+ f'Average HR: {hr_mean:.1f} BPM\n'
158
+ f'Recording: {len(ppg_signal)/fps:.1f} s\n\n'
159
+ f'Stress Status: {stress_category}\n'
160
+ f'Anxiety Status: {get_anxiety_level(anxiety_idx)}'
161
+ )
162
+
163
+ # Add metrics box with gradient background
164
+ bbox_props = dict(
165
+ boxstyle='round,pad=0.5',
166
+ facecolor='white',
167
+ alpha=0.8,
168
+ edgecolor='gray'
169
+ )
170
+
171
+ ax2.text(0.02, 0.98, info_text,
172
+ transform=ax2.transAxes,
173
+ verticalalignment='top',
174
+ bbox=bbox_props,
175
+ fontsize=10)
176
+
177
+ plt.tight_layout()
178
+
179
+ return last_frame, fig
180
+
181
+ except Exception as e:
182
+ logger.error(f"Error processing video: {str(e)}")
183
+ return None, None
184
+
185
+ def create_heart_rate_variability_tab():
186
+ with gr.Row():
187
+ with gr.Column(scale=1):
188
+ input_video = gr.Video()
189
+ gr.Markdown("""
190
+ ### Stress and Anxiety Analysis
191
+
192
+ **Measurements:**
193
+ - Stress Level (0-100%)
194
+ - Anxiety Level (0-100%)
195
+ - Heart Rate Variability (HRV)
196
+
197
+ **Status Levels:**
198
+ 🟢 Low: Normal state
199
+ 🟡 Moderate: Elevated levels
200
+ 🔴 High: Significant elevation
201
+
202
+ **For best results:**
203
+ 1. Ensure good lighting
204
+ 2. Minimize movement
205
+ 3. Face the camera directly
206
+ """)
207
+ gr.Examples(["./assets/videos/fitness.mp4"], inputs=[input_video])
208
+
209
+ with gr.Column(scale=2):
210
+ output_frame = gr.Image(label="Face Detection", height=240)
211
+ hrv_plot = gr.Plot(label="Stress and Anxiety Analysis")
212
+
213
+ # Automatically trigger analysis on video upload
214
+ input_video.change(
215
+ fn=process_video_for_hrv,
216
+ inputs=[input_video],
217
+ outputs=[output_frame, hrv_plot]
218
+ )
219
+
220
+ return input_video, output_frame, hrv_plot
tabs/speech_stress_analysis.py CHANGED
@@ -2,93 +2,149 @@
2
 
3
  import gradio as gr
4
  import librosa
5
- import librosa.display
6
  import numpy as np
7
  import matplotlib.pyplot as plt
8
  import tempfile
9
  import warnings
10
 
11
- # Suppress specific warnings from transformers if needed
12
- warnings.filterwarnings("ignore", category=UserWarning, module='transformers')
13
 
14
  def extract_audio_features(audio_file):
15
  y, sr = librosa.load(audio_file, sr=None)
16
- mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
17
- pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
18
- pitches = pitches[(magnitudes > np.median(magnitudes)) & (pitches > 0)]
 
 
 
19
  energy = librosa.feature.rms(y=y)[0]
20
- return mfccs, pitches, energy, y, sr
 
 
 
 
 
 
 
 
 
21
 
22
  def analyze_voice_stress(audio_file):
23
  if not audio_file:
24
- return "No audio file provided.", None
25
 
26
  try:
27
- mfccs, pitches, energy, y, sr = extract_audio_features(audio_file)
28
-
29
- # Calculate variances
30
- var_mfccs = np.var(mfccs, axis=1).mean() # Mean variance across MFCC coefficients
31
- var_energy = np.var(energy) # Variance of RMS energy
32
- var_pitches = np.var(pitches) if len(pitches) > 0 else 0 # Variance of pitches if present
33
-
34
- # Debugging: Print individual variances
35
- print(f"Variance MFCCs (mean across coefficients): {var_mfccs}")
36
- print(f"Variance Energy: {var_energy}")
37
- print(f"Variance Pitches: {var_pitches}")
38
-
39
- # Normalize each variance using Z-Score Standardization
40
- mfccs_mean = 1000
41
- mfccs_std = 500
42
- energy_mean = 0.005
43
- energy_std = 0.005
44
- pitches_mean = 500000
45
- pitches_std = 200000
46
-
47
- norm_var_mfccs = (var_mfccs - mfccs_mean) / mfccs_std
48
- norm_var_energy = (var_energy - energy_mean) / energy_std
49
- norm_var_pitches = (var_pitches - pitches_mean) / pitches_std if var_pitches > 0 else 0
50
-
51
- # Debugging: Print normalized variances
52
- print(f"Normalized Variance MFCCs: {norm_var_mfccs}")
53
- print(f"Normalized Variance Energy: {norm_var_energy}")
54
- print(f"Normalized Variance Pitches: {norm_var_pitches}")
55
-
56
- # Combine normalized variances
57
- stress_level = np.mean([
58
- norm_var_mfccs,
59
- norm_var_energy,
60
- norm_var_pitches
61
- ]) if var_pitches > 0 else np.mean([norm_var_mfccs, norm_var_energy])
62
-
63
- # Debugging: Print stress_level before normalization
64
- print(f"Calculated Stress Level (before scaling): {stress_level}")
65
-
66
- # Scale to 0-100%
67
- normalized_stress = (stress_level + 3) / 6 * 100 # Maps -3 to 0%, +3 to 100%
68
- normalized_stress = np.clip(normalized_stress, 0, 100) # Ensure within 0-100%
69
-
70
- # Debugging: Print normalized_stress
71
- print(f"Normalized Stress Level: {normalized_stress}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  # Plotting
74
- fig, axs = plt.subplots(3, 1, figsize=(10, 12))
75
-
76
- # MFCCs
77
- img_mfcc = librosa.display.specshow(mfccs, sr=sr, x_axis='time', ax=axs[0])
78
- axs[0].set_title('MFCCs')
79
- axs[0].set_ylabel('MFCC Coefficient')
80
- fig.colorbar(img_mfcc, ax=axs[0])
81
-
82
- # Pitch
83
- axs[1].plot(pitches)
84
- axs[1].set_title('Pitch')
85
- axs[1].set_ylabel('Frequency (Hz)')
86
-
87
- # Energy
88
- axs[2].plot(energy)
89
- axs[2].set_title('Energy (RMS)')
90
- axs[2].set_ylabel('RMS Energy')
91
- axs[2].set_xlabel('Frames')
 
 
 
 
 
 
 
 
 
 
92
 
93
  plt.tight_layout()
94
  with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_file:
@@ -96,31 +152,97 @@ def analyze_voice_stress(audio_file):
96
  plot_path = temp_file.name
97
  plt.close()
98
 
99
- # Interpretation
100
- if normalized_stress < 33:
101
- stress_interpretation = "Low"
102
- elif normalized_stress < 66:
103
- stress_interpretation = "Medium"
104
- else:
105
- stress_interpretation = "High"
106
 
107
- return f"{normalized_stress:.2f}% - {stress_interpretation} Stress", plot_path
108
  except Exception as e:
109
- return f"Error: {str(e)}", None
110
-
111
- def create_voice_stress_tab():
112
- with gr.Row():
113
- with gr.Column(scale=2):
114
- input_audio = gr.Audio(label="Input Audio", type="filepath")
115
- clear_btn = gr.Button("Clear", scale=1)
116
- with gr.Column(scale=1):
117
- output_stress = gr.Label(label="Stress Level")
118
- output_plot = gr.Image(label="Stress Analysis Plot")
119
-
120
- # Automatically trigger analysis when an audio file is uploaded
121
- input_audio.change(analyze_voice_stress, inputs=[input_audio], outputs=[output_stress, output_plot])
122
 
123
- clear_btn.click(lambda: (None, None), outputs=[input_audio, output_stress, output_plot])
124
-
125
- gr.Examples(["./assets/audio/fitness.wav"], inputs=[input_audio])
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  import gradio as gr
4
  import librosa
 
5
  import numpy as np
6
  import matplotlib.pyplot as plt
7
  import tempfile
8
  import warnings
9
 
10
+ warnings.filterwarnings("ignore", category=UserWarning, module='librosa')
 
11
 
12
  def extract_audio_features(audio_file):
13
  y, sr = librosa.load(audio_file, sr=None)
14
+
15
+ # Fundamental frequency estimation using librosa.pyin
16
+ f0, voiced_flag, voiced_probs = librosa.pyin(y, fmin=75, fmax=600)
17
+ f0 = f0[~np.isnan(f0)] # Remove unvoiced frames
18
+
19
+ # Energy (intensity)
20
  energy = librosa.feature.rms(y=y)[0]
21
+
22
+ # MFCCs (Mel-frequency cepstral coefficients)
23
+ mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
24
+
25
+ # Onset envelope for speech rate estimation
26
+ onset_env = librosa.onset.onset_strength(y=y, sr=sr)
27
+ tempo, _ = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
28
+ speech_rate = tempo / 60 # Speech rate estimation (syllables per second)
29
+
30
+ return f0, energy, speech_rate, mfccs, y, sr
31
 
32
  def analyze_voice_stress(audio_file):
33
  if not audio_file:
34
+ return "No audio file provided.", None, None
35
 
36
  try:
37
+ f0, energy, speech_rate, mfccs, y, sr = extract_audio_features(audio_file)
38
+
39
+ # Calculate statistical measures
40
+ mean_f0 = np.mean(f0)
41
+ std_f0 = np.std(f0)
42
+ mean_energy = np.mean(energy)
43
+ std_energy = np.std(energy)
44
+
45
+ # Normative data (example values from medical literature)
46
+ norm_mean_f0_male = 110
47
+ norm_mean_f0_female = 220
48
+ norm_std_f0 = 20
49
+ norm_mean_energy = 0.02
50
+ norm_std_energy = 0.005
51
+ norm_speech_rate = 4.4
52
+ norm_std_speech_rate = 0.5
53
+
54
+ # Gender detection
55
+ gender = 'male' if mean_f0 < 165 else 'female'
56
+ norm_mean_f0 = norm_mean_f0_male if gender == 'male' else norm_mean_f0_female
57
+
58
+ # Compute Z-scores
59
+ z_f0 = (mean_f0 - norm_mean_f0) / norm_std_f0
60
+ z_energy = (mean_energy - norm_mean_energy) / norm_std_energy
61
+ z_speech_rate = (speech_rate - norm_speech_rate) / norm_std_speech_rate
62
+
63
+ # Combine Z-scores for stress level
64
+ stress_score = (0.4 * z_f0) + (0.4 * z_speech_rate) + (0.2 * z_energy)
65
+ stress_level = float(1 / (1 + np.exp(-stress_score)) * 100) # Sigmoid function
66
+
67
+ if stress_level < 20:
68
+ stress_category = "Very Low Stress"
69
+ elif stress_level < 40:
70
+ stress_category = "Low Stress"
71
+ elif stress_level < 60:
72
+ stress_category = "Moderate Stress"
73
+ elif stress_level < 80:
74
+ stress_category = "High Stress"
75
+ else:
76
+ stress_category = "Very High Stress"
77
+
78
+ # More verbose interpretations for each stress category
79
+ interpretations = {
80
+ "Very Low Stress": (
81
+ "Your vocal analysis indicates a very relaxed state. "
82
+ "This suggests that you're currently experiencing minimal stress. "
83
+ "Maintaining such low stress levels is beneficial for your health. "
84
+ "Continue engaging in activities that promote relaxation and well-being. "
85
+ "Regular self-care practices can help sustain this positive state."
86
+ ),
87
+ "Low Stress": (
88
+ "Minor signs of stress are detected in your voice. "
89
+ "This is common due to everyday challenges and is usually not concerning. "
90
+ "Incorporating relaxation techniques, like deep breathing or meditation, may help. "
91
+ "Regular breaks and leisure activities can also reduce stress. "
92
+ "Staying mindful of stress levels supports overall health."
93
+ ),
94
+ "Moderate Stress": (
95
+ "Your voice reflects moderate stress levels. "
96
+ "This could be due to ongoing pressures or challenges you're facing. "
97
+ "Consider practicing stress management strategies such as mindfulness exercises or physical activity. "
98
+ "Identifying stressors and addressing them can be beneficial. "
99
+ "Balancing work and rest is important for your well-being."
100
+ ),
101
+ "High Stress": (
102
+ "Elevated stress levels are apparent in your vocal patterns. "
103
+ "It's important to recognize and address these feelings. "
104
+ "Identifying stressors and seeking support from friends, family, or professionals could be helpful. "
105
+ "Engaging in stress reduction techniques is recommended. "
106
+ "Taking proactive steps can improve your mental and physical health."
107
+ ),
108
+ "Very High Stress": (
109
+ "Your voice suggests very high stress levels. "
110
+ "This may indicate significant strain or anxiety. "
111
+ "It may be helpful to consult a healthcare professional for support. "
112
+ "Promptly addressing stress is important for your well-being. "
113
+ "Consider reaching out to trusted individuals or resources."
114
+ )
115
+ }
116
+
117
+ final_interpretation = interpretations[stress_category]
118
 
119
  # Plotting
120
+ fig, axs = plt.subplots(5, 1, figsize=(10, 15))
121
+
122
+ # Plot Fundamental Frequency (Pitch)
123
+ axs[0].plot(f0)
124
+ axs[0].set_title('Fundamental Frequency (Pitch)')
125
+ axs[0].set_ylabel('Frequency (Hz)')
126
+
127
+ # Plot Energy (Loudness)
128
+ axs[1].plot(energy)
129
+ axs[1].set_title('Energy (Loudness)')
130
+ axs[1].set_ylabel('Energy')
131
+
132
+ # Plot MFCCs
133
+ img = librosa.display.specshow(mfccs, sr=sr, x_axis='time', ax=axs[2])
134
+ axs[2].set_title('MFCCs (Mel-frequency cepstral coefficients)')
135
+ fig.colorbar(img, ax=axs[2])
136
+
137
+ # Plot Waveform
138
+ librosa.display.waveshow(y, sr=sr, ax=axs[3])
139
+ axs[3].set_title('Waveform')
140
+ axs[3].set_xlabel('Time (s)')
141
+ axs[3].set_ylabel('Amplitude')
142
+
143
+ # Plot Pitch Contour (Histogram of f0)
144
+ axs[4].hist(f0, bins=50, color='blue', alpha=0.7)
145
+ axs[4].set_title('Pitch Contour (Histogram of f0)')
146
+ axs[4].set_xlabel('Frequency (Hz)')
147
+ axs[4].set_ylabel('Count')
148
 
149
  plt.tight_layout()
150
  with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_file:
 
152
  plot_path = temp_file.name
153
  plt.close()
154
 
155
+ # Return separate values for Gradio output components
156
+ return f"{stress_level:.2f}% - {stress_category}", final_interpretation, plot_path
 
 
 
 
 
157
 
 
158
  except Exception as e:
159
+ return f"Error: {str(e)}", None, None
 
 
 
 
 
 
 
 
 
 
 
 
160
 
 
 
 
161
 
162
+ def create_voice_stress_tab():
163
+ custom_css = """
164
+ /* General container styling for mobile */
165
+ .gradio-container {
166
+ padding: 10px !important;
167
+ font-size: 16px !important;
168
+ }
169
+
170
+ /* Headings */
171
+ h3 {
172
+ text-align: center;
173
+ font-size: 1.5em !important;
174
+ margin-bottom: 20px !important;
175
+ }
176
+
177
+ /* Full width for audio input and other components */
178
+ .gradio-container .gradio-row, .gradio-container .gradio-column {
179
+ flex-direction: column !important;
180
+ align-items: center !important;
181
+ }
182
+
183
+ /* Make the components scale better on smaller screens */
184
+ #input_audio, #stress_output, #interpretation_output, #plot_output {
185
+ width: 100% !important;
186
+ max-width: 100% !important;
187
+ }
188
+
189
+ #input_audio label, #stress_output label, #interpretation_output label, #plot_output label {
190
+ font-size: 1.2em !important;
191
+ }
192
+
193
+ /* Textbox area adjustment */
194
+ #interpretation_output textarea {
195
+ font-size: 1em !important;
196
+ line-height: 1.4 !important;
197
+ }
198
+
199
+ /* Responsive styling for images */
200
+ #plot_output img {
201
+ width: 100% !important;
202
+ height: auto !important;
203
+ }
204
+
205
+ /* Adjust clear button */
206
+ #clear_btn button {
207
+ font-size: 1em !important;
208
+ padding: 10px 20px !important;
209
+ }
210
+
211
+ /* Responsive adjustments */
212
+ @media only screen and (max-width: 600px) {
213
+ .gradio-container {
214
+ padding: 5px !important;
215
+ font-size: 14px !important;
216
+ }
217
+ h3 {
218
+ font-size: 1.2em !important;
219
+ }
220
+ #clear_btn button {
221
+ font-size: 0.9em !important;
222
+ }
223
+ #interpretation_output textarea {
224
+ font-size: 0.9em !important;
225
+ }
226
+ }
227
+ """
228
+
229
+ with gr.Blocks(css=custom_css) as voice_stress_tab:
230
+ gr.Markdown("<h3>Speech Stress Analysis</h3>")
231
+ with gr.Column():
232
+ input_audio = gr.Audio(label="Upload your voice recording", type="filepath", elem_id="input_audio")
233
+ stress_output = gr.Label(label="Stress Interpretation", elem_id="stress_output")
234
+ interpretation_output = gr.Textbox(label="Detailed Interpretation", lines=6, elem_id="interpretation_output")
235
+ plot_output = gr.Image(label="Stress Analysis Plot", elem_id="plot_output")
236
+
237
+ input_audio.change(
238
+ analyze_voice_stress,
239
+ inputs=[input_audio],
240
+ outputs=[stress_output, interpretation_output, plot_output]
241
+ )
242
+
243
+ gr.Button("Clear", elem_id="clear_btn").click(
244
+ lambda: (None, None, None),
245
+ outputs=[input_audio, stress_output, interpretation_output, plot_output]
246
+ )
247
+
248
+ return voice_stress_tab
verify.py DELETED
@@ -1,3 +0,0 @@
1
- import torch
2
- print(torch.backends.mps.is_available()) # Should return True
3
- print(torch.backends.mps.is_built()) # Should return True