Spaces:

Prathamesh1420
/

bottle_lable_gradio

Sleeping

App Files Files Community

Prathamesh1420 commited on Feb 13

Commit

4f60fe7

verified ·

1 Parent(s): e5b288e

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -78

app.py CHANGED Viewed

@@ -1,123 +1,98 @@
-import gradio as gr
 import cv2
 import numpy as np
 import os
 import threading
-import base64
-from ultralytics import YOLO
 from langchain_core.messages import HumanMessage
 from langchain_google_genai import ChatGoogleGenerativeAI
-# Set up Google API Key
 os.environ["GOOGLE_API_KEY"] = "AIzaSyDOBd0_yNLckwsZJrpb9-CqTHFUx0Ah3R8"  # Replace with your API Key
 gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
-# Load YOLO model
 yolo_model = YOLO("best.pt")
 names = yolo_model.names
-# Constants for ROI detection
-current_date = time.strftime("%Y-%m-%d")
-crop_folder = f"crop_{current_date}"
-if not os.path.exists(crop_folder):
-    os.makedirs(crop_folder)
-processed_track_ids = set()
-lock = threading.Lock()
 def encode_image_to_base64(image):
     _, img_buffer = cv2.imencode('.jpg', image)
     return base64.b64encode(img_buffer).decode('utf-8')
-def analyze_image_with_gemini(current_image):
-    if current_image is None:
         return "No image available for analysis."
-    current_image_data = encode_image_to_base64(current_image)
-    message = HumanMessage(
-        content=[
-            {"type": "text", "text": "Analyze this image and check if the label is present on the bottle."},
-            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{current_image_data}"}, "description": "Detected product"}
-        ]
-    )
     try:
         response = gemini_model.invoke([message])
         return response.content
     except Exception as e:
         return f"Error processing image: {e}"
-def save_crop_image(crop, track_id):
-    filename = f"{crop_folder}/{track_id}.jpg"
-    cv2.imwrite(filename, crop)
-    return filename
-def process_crop_image(crop, track_id, responses):
-    response = analyze_image_with_gemini(crop)
-    responses.append((track_id, response))
 def process_video(video_path):
     cap = cv2.VideoCapture(video_path)
-    output_path = "output_video.mp4"
-    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
-    out = cv2.VideoWriter(output_path, fourcc, 20.0, (1020, 500))
-    responses = []
-    while cap.isOpened():
         ret, frame = cap.read()
         if not ret:
             break
-        frame = cv2.resize(frame, (1020, 500))
         results = yolo_model.track(frame, persist=True)
         if results[0].boxes is not None:
             boxes = results[0].boxes.xyxy.int().cpu().tolist()
             track_ids = results[0].boxes.id.int().cpu().tolist() if results[0].boxes.id is not None else [-1] * len(boxes)
-            for box, track_id in zip(boxes, track_ids):
-                with lock:
-                    if track_id not in processed_track_ids:
-                        x1, y1, x2, y2 = box
-                        crop = frame[y1:y2, x1:x2]
-                        save_crop_image(crop, track_id)
-                        threading.Thread(target=process_crop_image, args=(crop, track_id, responses)).start()
-                        processed_track_ids.add(track_id)
-        out.write(frame)
     cap.release()
-    out.release()
-    return output_path, responses
-def gradio_interface(video_file):
-    if video_file is None:
-        return "No video uploaded.", None, None
-    video_path = "uploaded_video.mp4"
-    with open(video_path, "wb") as f:
-        f.write(video_file.read())
-    processed_video, analysis_results = process_video(video_path)
-    return (
-        processed_video,
-        f"Download Processed Video: [Click Here]({processed_video})",
-        analysis_results
-    )
-# Create Gradio Interface
-app = gr.Interface(
     fn=gradio_interface,
-    inputs=[gr.File(label="Upload Video (MP4, AVI, MOV)")],
-    outputs=[
-        gr.Video(label="Processed Video"),
-        gr.HTML(label="Download Link"),
-        gr.JSON(label="AI Analysis Results")
-    ],
-    title="Bottle Label Checking using YOLO & Gemini AI",
-    description="Upload a video to detect bottles, crop images, and analyze labels using Google Gemini AI."
 )
-# Launch the Gradio App
-app.launch()

 import cv2
 import numpy as np
+from ultralytics import YOLO
+import cvzone
+import base64
 import os
+import time
 import threading
+import gradio as gr
 from langchain_core.messages import HumanMessage
 from langchain_google_genai import ChatGoogleGenerativeAI
+# ✅ Set up Google API Key
 os.environ["GOOGLE_API_KEY"] = "AIzaSyDOBd0_yNLckwsZJrpb9-CqTHFUx0Ah3R8"  # Replace with your API Key
+# ✅ Initialize the Gemini model
 gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
+# Load the YOLO model
 yolo_model = YOLO("best.pt")
 names = yolo_model.names
 def encode_image_to_base64(image):
     _, img_buffer = cv2.imencode('.jpg', image)
     return base64.b64encode(img_buffer).decode('utf-8')
+def analyze_image_with_gemini(image):
+    if image is None:
         return "No image available for analysis."
+    image_data = encode_image_to_base64(image)
+    message = HumanMessage(content=[
+        {"type": "text", "text": """
+        Analyze this image and determine if the label is present on the bottle.
+        Return the result strictly in a structured table format:
+        | Label Present | Damage |
+        |--------------|--------|
+        | Yes/No       | Yes/No |
+        """},
+        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}, "description": "Detected product"}
+    ])
     try:
         response = gemini_model.invoke([message])
         return response.content
     except Exception as e:
         return f"Error processing image: {e}"
 def process_video(video_path):
     cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        return "Error: Could not open video file."
+    frame_list = []
+    while True:
         ret, frame = cap.read()
         if not ret:
             break
+        frame = cv2.resize(frame, (1020, 500))
         results = yolo_model.track(frame, persist=True)
         if results[0].boxes is not None:
             boxes = results[0].boxes.xyxy.int().cpu().tolist()
+            class_ids = results[0].boxes.cls.int().cpu().tolist()
             track_ids = results[0].boxes.id.int().cpu().tolist() if results[0].boxes.id is not None else [-1] * len(boxes)
+            for box, track_id, class_id in zip(boxes, track_ids, class_ids):
+                x1, y1, x2, y2 = box
+                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
+                cvzone.putTextRect(frame, f'ID: {track_id}', (x2, y2), 1, 1)
+                cvzone.putTextRect(frame, f'{names[class_id]}', (x1, y1), 1, 1)
+                crop = frame[y1:y2, x1:x2]
+                response = analyze_image_with_gemini(crop)
+                print(response)
+        frame_list.append(frame)
     cap.release()
+    return frame_list[0]
+def gradio_interface(video):
+    temp_video_path = "temp_video.mp4"
+    with open(temp_video_path, "wb") as f:
+        f.write(video)
+    return process_video(temp_video_path)
+iface = gr.Interface(
     fn=gradio_interface,
+    inputs=gr.Video(label="Upload Video"),
+    outputs=gr.Image(label="Processed Frame"),
+    title="YOLO + Gemini AI Video Analysis",
+    description="Upload a video to detect objects and analyze them using Gemini AI."
 )
+if __name__ == "__main__":
+    iface.launch()