Prathamesh1420 commited on
Commit
4f60fe7
·
verified ·
1 Parent(s): e5b288e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -78
app.py CHANGED
@@ -1,123 +1,98 @@
1
- import gradio as gr
2
  import cv2
3
  import numpy as np
 
 
 
4
  import os
 
5
  import threading
6
- import base64
7
- from ultralytics import YOLO
8
  from langchain_core.messages import HumanMessage
9
  from langchain_google_genai import ChatGoogleGenerativeAI
10
 
11
- # Set up Google API Key
12
  os.environ["GOOGLE_API_KEY"] = "AIzaSyDOBd0_yNLckwsZJrpb9-CqTHFUx0Ah3R8" # Replace with your API Key
 
 
13
  gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
14
 
15
- # Load YOLO model
16
  yolo_model = YOLO("best.pt")
17
  names = yolo_model.names
18
 
19
- # Constants for ROI detection
20
- current_date = time.strftime("%Y-%m-%d")
21
- crop_folder = f"crop_{current_date}"
22
- if not os.path.exists(crop_folder):
23
- os.makedirs(crop_folder)
24
-
25
- processed_track_ids = set()
26
- lock = threading.Lock()
27
-
28
  def encode_image_to_base64(image):
29
  _, img_buffer = cv2.imencode('.jpg', image)
30
  return base64.b64encode(img_buffer).decode('utf-8')
31
 
32
- def analyze_image_with_gemini(current_image):
33
- if current_image is None:
34
  return "No image available for analysis."
35
 
36
- current_image_data = encode_image_to_base64(current_image)
37
- message = HumanMessage(
38
- content=[
39
- {"type": "text", "text": "Analyze this image and check if the label is present on the bottle."},
40
- {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{current_image_data}"}, "description": "Detected product"}
41
- ]
42
- )
 
 
 
 
 
43
  try:
44
  response = gemini_model.invoke([message])
45
  return response.content
46
  except Exception as e:
47
  return f"Error processing image: {e}"
48
 
49
- def save_crop_image(crop, track_id):
50
- filename = f"{crop_folder}/{track_id}.jpg"
51
- cv2.imwrite(filename, crop)
52
- return filename
53
-
54
- def process_crop_image(crop, track_id, responses):
55
- response = analyze_image_with_gemini(crop)
56
- responses.append((track_id, response))
57
-
58
  def process_video(video_path):
59
  cap = cv2.VideoCapture(video_path)
60
- output_path = "output_video.mp4"
61
- fourcc = cv2.VideoWriter_fourcc(*"mp4v")
62
- out = cv2.VideoWriter(output_path, fourcc, 20.0, (1020, 500))
63
-
64
- responses = []
65
 
66
- while cap.isOpened():
 
67
  ret, frame = cap.read()
68
  if not ret:
69
  break
70
- frame = cv2.resize(frame, (1020, 500))
71
 
 
72
  results = yolo_model.track(frame, persist=True)
 
73
  if results[0].boxes is not None:
74
  boxes = results[0].boxes.xyxy.int().cpu().tolist()
 
75
  track_ids = results[0].boxes.id.int().cpu().tolist() if results[0].boxes.id is not None else [-1] * len(boxes)
76
 
77
- for box, track_id in zip(boxes, track_ids):
78
- with lock:
79
- if track_id not in processed_track_ids:
80
- x1, y1, x2, y2 = box
81
- crop = frame[y1:y2, x1:x2]
82
- save_crop_image(crop, track_id)
83
- threading.Thread(target=process_crop_image, args=(crop, track_id, responses)).start()
84
- processed_track_ids.add(track_id)
85
 
86
- out.write(frame)
 
 
 
 
87
 
88
  cap.release()
89
- out.release()
90
-
91
- return output_path, responses
92
-
93
- def gradio_interface(video_file):
94
- if video_file is None:
95
- return "No video uploaded.", None, None
96
-
97
- video_path = "uploaded_video.mp4"
98
- with open(video_path, "wb") as f:
99
- f.write(video_file.read())
100
-
101
- processed_video, analysis_results = process_video(video_path)
102
 
103
- return (
104
- processed_video,
105
- f"Download Processed Video: [Click Here]({processed_video})",
106
- analysis_results
107
- )
108
 
109
- # Create Gradio Interface
110
- app = gr.Interface(
111
  fn=gradio_interface,
112
- inputs=[gr.File(label="Upload Video (MP4, AVI, MOV)")],
113
- outputs=[
114
- gr.Video(label="Processed Video"),
115
- gr.HTML(label="Download Link"),
116
- gr.JSON(label="AI Analysis Results")
117
- ],
118
- title="Bottle Label Checking using YOLO & Gemini AI",
119
- description="Upload a video to detect bottles, crop images, and analyze labels using Google Gemini AI."
120
  )
121
 
122
- # Launch the Gradio App
123
- app.launch()
 
 
1
  import cv2
2
  import numpy as np
3
+ from ultralytics import YOLO
4
+ import cvzone
5
+ import base64
6
  import os
7
+ import time
8
  import threading
9
+ import gradio as gr
 
10
  from langchain_core.messages import HumanMessage
11
  from langchain_google_genai import ChatGoogleGenerativeAI
12
 
13
+ # Set up Google API Key
14
  os.environ["GOOGLE_API_KEY"] = "AIzaSyDOBd0_yNLckwsZJrpb9-CqTHFUx0Ah3R8" # Replace with your API Key
15
+
16
+ # ✅ Initialize the Gemini model
17
  gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
18
 
19
+ # Load the YOLO model
20
  yolo_model = YOLO("best.pt")
21
  names = yolo_model.names
22
 
 
 
 
 
 
 
 
 
 
23
  def encode_image_to_base64(image):
24
  _, img_buffer = cv2.imencode('.jpg', image)
25
  return base64.b64encode(img_buffer).decode('utf-8')
26
 
27
+ def analyze_image_with_gemini(image):
28
+ if image is None:
29
  return "No image available for analysis."
30
 
31
+ image_data = encode_image_to_base64(image)
32
+ message = HumanMessage(content=[
33
+ {"type": "text", "text": """
34
+ Analyze this image and determine if the label is present on the bottle.
35
+ Return the result strictly in a structured table format:
36
+
37
+ | Label Present | Damage |
38
+ |--------------|--------|
39
+ | Yes/No | Yes/No |
40
+ """},
41
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}, "description": "Detected product"}
42
+ ])
43
  try:
44
  response = gemini_model.invoke([message])
45
  return response.content
46
  except Exception as e:
47
  return f"Error processing image: {e}"
48
 
 
 
 
 
 
 
 
 
 
49
  def process_video(video_path):
50
  cap = cv2.VideoCapture(video_path)
51
+ if not cap.isOpened():
52
+ return "Error: Could not open video file."
 
 
 
53
 
54
+ frame_list = []
55
+ while True:
56
  ret, frame = cap.read()
57
  if not ret:
58
  break
 
59
 
60
+ frame = cv2.resize(frame, (1020, 500))
61
  results = yolo_model.track(frame, persist=True)
62
+
63
  if results[0].boxes is not None:
64
  boxes = results[0].boxes.xyxy.int().cpu().tolist()
65
+ class_ids = results[0].boxes.cls.int().cpu().tolist()
66
  track_ids = results[0].boxes.id.int().cpu().tolist() if results[0].boxes.id is not None else [-1] * len(boxes)
67
 
68
+ for box, track_id, class_id in zip(boxes, track_ids, class_ids):
69
+ x1, y1, x2, y2 = box
70
+ cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
71
+ cvzone.putTextRect(frame, f'ID: {track_id}', (x2, y2), 1, 1)
72
+ cvzone.putTextRect(frame, f'{names[class_id]}', (x1, y1), 1, 1)
 
 
 
73
 
74
+ crop = frame[y1:y2, x1:x2]
75
+ response = analyze_image_with_gemini(crop)
76
+ print(response)
77
+
78
+ frame_list.append(frame)
79
 
80
  cap.release()
81
+ return frame_list[0]
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ def gradio_interface(video):
84
+ temp_video_path = "temp_video.mp4"
85
+ with open(temp_video_path, "wb") as f:
86
+ f.write(video)
87
+ return process_video(temp_video_path)
88
 
89
+ iface = gr.Interface(
 
90
  fn=gradio_interface,
91
+ inputs=gr.Video(label="Upload Video"),
92
+ outputs=gr.Image(label="Processed Frame"),
93
+ title="YOLO + Gemini AI Video Analysis",
94
+ description="Upload a video to detect objects and analyze them using Gemini AI."
 
 
 
 
95
  )
96
 
97
+ if __name__ == "__main__":
98
+ iface.launch()