import streamlit as st import cv2 import torch import torchvision.transforms as transforms from PIL import Image import numpy as np import timm import torch.nn as nn import mediapipe as mp import time import tempfile import pandas as pd # Initialize device device = "cpu" # st.write(f"Using CUDA: {torch.cuda.is_available()}") # Define the transformation to apply to the images transform = transforms.Compose( [ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] ) change_list = [] # Load the model model = timm.create_model("tf_efficientnet_b0_ns", pretrained=False) model.classifier = nn.Sequential(nn.Linear(in_features=1280, out_features=7)) model = torch.load( "22.6_AffectNet_10K_part2.pt",map_location=device ) model.to(device) model.eval() # Initialize MediaPipe Face Detection mp_face_detection = mp.solutions.face_detection mp_drawing = mp.solutions.drawing_utils # Streamlit interface st.title("Emotion Detection from Video") st.write("Upload a video file to detect emotions.") uploaded_file = st.file_uploader("Choose a video file", type=["mp4", "avi", "mov"]) if uploaded_file is not None: with tempfile.NamedTemporaryFile(delete=False) as temp_file: temp_file.write(uploaded_file.read()) video_path = temp_file.name cap = cv2.VideoCapture(video_path) histogram = {i: 0 for i in range(7)} mat = [[0 for _ in range(7)] for _ in range(7)] prev_emotion = None current_emotion = None begin = time.time() with mp_face_detection.FaceDetection( model_selection=0, min_detection_confidence=0.5 ) as face_detection: while cap.isOpened(): ret, frame = cap.read() if not ret: break # Convert frame to RGB for MediaPipe rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Process the frame and detect faces results = face_detection.process(rgb_frame) if results.detections: for detection in results.detections: # Get bounding box bboxC = detection.location_data.relative_bounding_box ih, iw, _ = frame.shape x, y, w, h = ( int(bboxC.xmin * iw), int(bboxC.ymin * ih), int(bboxC.width * iw), int(bboxC.height * ih), ) # Extract the region of interest (the face) face = frame[y : y + h, x : x + w] # Convert the face to a PIL image face_pil = Image.fromarray(cv2.cvtColor(face, cv2.COLOR_BGR2RGB)) # Apply transformations face_tensor = transform(face_pil).unsqueeze(0).to(device) # Pass the face through the neural network with torch.no_grad(): output = model(face_tensor) _, predicted = torch.max(output, 1) label_dict = { 0: "angry", 1: "disgust", 2: "fear", 3: "happy", 4: "neutral", 5: "sad", 6: "surprised", } # Draw a rectangle around the face and label it with the prediction cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2) label = f"{label_dict[predicted.item()]}" current_emotion = predicted.item() if current_emotion != prev_emotion: current_time = time.time() - begin if prev_emotion != None: st.write( f"Change detected: {label_dict[prev_emotion]} -> {label_dict[current_emotion]} at {current_time}" ) change_list.append( f"Change detected: {label_dict[prev_emotion]} -> {label_dict[current_emotion]} at {current_time}" ) if prev_emotion is not None: mat[current_emotion][prev_emotion] += 1 prev_emotion = current_emotion histogram[predicted.item()] += 1 cv2.putText( frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2, ) # Display the resulting frame st.image(frame, channels="BGR") # Release the capture and close the windows cap.release() end = time.time() st.write(f"Total runtime of the program is {end - begin}") # Plot histogram st.write("Emotion Distribution") x = ["angry", "disgust", "fear", "happy", "neutral", "sad", "surprised"] y = list(histogram.values()) total = sum(y) y_new = [(i / total) * 100 for i in y] st.bar_chart({"Emotions": x, "Percentage": y_new}) print(mat) data = { "angry": mat[0], "disgust": mat[1], "fear": mat[2], "happy": mat[3], "neutral": mat[4], "sad": mat[5], "surprise": mat[6], } st.write("Change Matrix") st.write("Y - axis -> initial emotion") st.write("X - axis -> next emotion") df = pd.DataFrame( data, index=["angry", "disgust", "fear", "happy", "neutral", "sad", "surprised"], ) st.table(df) # for i in mat: # st.write(i[7], i[0:7]) st.write("Change List") st.write(change_list) else: st.write("Please upload a video file to start emotion detection.")