Spaces:
Running
Running
feat: handle images instead webcam
Browse files- .gitignore +1 -0
- package-lock.json +0 -0
- src/App.tsx +24 -85
- src/components/ImageAnalysisView.tsx +207 -0
- src/components/ImageUpload.tsx +129 -0
- src/components/PromptInput.tsx +10 -6
- src/context/VLMContext.tsx +27 -13
- src/types/index.ts +7 -1
- src/types/vlm.ts +1 -1
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
node_modules/
|
package-lock.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/App.tsx
CHANGED
|
@@ -1,109 +1,48 @@
|
|
| 1 |
-
import { useState,
|
| 2 |
import LoadingScreen from "./components/LoadingScreen";
|
| 3 |
-
import
|
| 4 |
-
import
|
| 5 |
-
import WebcamPermissionDialog from "./components/WebcamPermissionDialog";
|
| 6 |
import type { AppState } from "./types";
|
| 7 |
|
| 8 |
export default function App() {
|
| 9 |
-
const [appState, setAppState] = useState<AppState>("
|
| 10 |
-
const [
|
| 11 |
-
const [isVideoReady, setIsVideoReady] = useState(false);
|
| 12 |
-
const videoRef = useRef<HTMLVideoElement | null>(null);
|
| 13 |
|
| 14 |
-
const
|
| 15 |
-
|
| 16 |
-
setAppState("welcome");
|
| 17 |
-
}, []);
|
| 18 |
-
|
| 19 |
-
const handleStart = useCallback(() => {
|
| 20 |
setAppState("loading");
|
| 21 |
}, []);
|
| 22 |
|
| 23 |
const handleLoadingComplete = useCallback(() => {
|
| 24 |
-
setAppState("
|
| 25 |
}, []);
|
| 26 |
|
| 27 |
-
const
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
} catch (error) {
|
| 31 |
-
console.error("Failed to play video:", error);
|
| 32 |
-
}
|
| 33 |
}, []);
|
| 34 |
|
| 35 |
-
const setupVideo = useCallback(
|
| 36 |
-
(video: HTMLVideoElement, stream: MediaStream) => {
|
| 37 |
-
video.srcObject = stream;
|
| 38 |
-
|
| 39 |
-
const handleCanPlay = () => {
|
| 40 |
-
setIsVideoReady(true);
|
| 41 |
-
playVideo(video);
|
| 42 |
-
};
|
| 43 |
-
|
| 44 |
-
video.addEventListener("canplay", handleCanPlay, { once: true });
|
| 45 |
-
|
| 46 |
-
return () => {
|
| 47 |
-
video.removeEventListener("canplay", handleCanPlay);
|
| 48 |
-
};
|
| 49 |
-
},
|
| 50 |
-
[playVideo],
|
| 51 |
-
);
|
| 52 |
-
|
| 53 |
-
useEffect(() => {
|
| 54 |
-
if (webcamStream && videoRef.current) {
|
| 55 |
-
const video = videoRef.current;
|
| 56 |
-
|
| 57 |
-
video.srcObject = null;
|
| 58 |
-
video.load();
|
| 59 |
-
|
| 60 |
-
const cleanup = setupVideo(video, webcamStream);
|
| 61 |
-
return cleanup;
|
| 62 |
-
}
|
| 63 |
-
}, [webcamStream, setupVideo]);
|
| 64 |
-
|
| 65 |
-
const videoBlurState = useMemo(() => {
|
| 66 |
-
switch (appState) {
|
| 67 |
-
case "requesting-permission":
|
| 68 |
-
return "blur(20px) brightness(0.2) saturate(0.5)";
|
| 69 |
-
case "welcome":
|
| 70 |
-
return "blur(12px) brightness(0.3) saturate(0.7)";
|
| 71 |
-
case "loading":
|
| 72 |
-
return "blur(8px) brightness(0.4) saturate(0.8)";
|
| 73 |
-
case "captioning":
|
| 74 |
-
return "none";
|
| 75 |
-
default:
|
| 76 |
-
return "blur(20px) brightness(0.2) saturate(0.5)";
|
| 77 |
-
}
|
| 78 |
-
}, [appState]);
|
| 79 |
-
|
| 80 |
return (
|
| 81 |
<div className="App relative h-screen overflow-hidden">
|
| 82 |
-
<div className="absolute inset-0 bg-gray-900" />
|
| 83 |
|
| 84 |
-
{
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
className="absolute inset-0 w-full h-full object-cover transition-all duration-1000 ease-out"
|
| 91 |
-
style={{
|
| 92 |
-
filter: videoBlurState,
|
| 93 |
-
opacity: isVideoReady ? 1 : 0,
|
| 94 |
-
}}
|
| 95 |
/>
|
| 96 |
)}
|
| 97 |
|
| 98 |
-
{appState !== "captioning" && <div className="absolute inset-0 bg-gray-900/80 backdrop-blur-sm" />}
|
| 99 |
-
|
| 100 |
-
{appState === "requesting-permission" && <WebcamPermissionDialog onPermissionGranted={handlePermissionGranted} />}
|
| 101 |
-
|
| 102 |
-
{appState === "welcome" && <WelcomeScreen onStart={handleStart} />}
|
| 103 |
-
|
| 104 |
{appState === "loading" && <LoadingScreen onComplete={handleLoadingComplete} />}
|
| 105 |
|
| 106 |
-
{appState === "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
</div>
|
| 108 |
);
|
| 109 |
}
|
|
|
|
| 1 |
+
import { useState, useCallback } from "react";
|
| 2 |
import LoadingScreen from "./components/LoadingScreen";
|
| 3 |
+
import ImageUpload from "./components/ImageUpload";
|
| 4 |
+
import ImageAnalysisView from "./components/ImageAnalysisView";
|
|
|
|
| 5 |
import type { AppState } from "./types";
|
| 6 |
|
| 7 |
export default function App() {
|
| 8 |
+
const [appState, setAppState] = useState<AppState>("upload");
|
| 9 |
+
const [uploadedImages, setUploadedImages] = useState<File[]>([]);
|
|
|
|
|
|
|
| 10 |
|
| 11 |
+
const handleImagesUploaded = useCallback((files: File[]) => {
|
| 12 |
+
setUploadedImages(files);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
setAppState("loading");
|
| 14 |
}, []);
|
| 15 |
|
| 16 |
const handleLoadingComplete = useCallback(() => {
|
| 17 |
+
setAppState("analyzing");
|
| 18 |
}, []);
|
| 19 |
|
| 20 |
+
const handleBackToUpload = useCallback(() => {
|
| 21 |
+
setUploadedImages([]);
|
| 22 |
+
setAppState("upload");
|
|
|
|
|
|
|
|
|
|
| 23 |
}, []);
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
return (
|
| 26 |
<div className="App relative h-screen overflow-hidden">
|
| 27 |
+
<div className="absolute inset-0 bg-gradient-to-br from-gray-900 via-blue-900/20 to-purple-900/20" />
|
| 28 |
|
| 29 |
+
{appState !== "analyzing" && <div className="absolute inset-0 bg-gray-900/80 backdrop-blur-sm" />}
|
| 30 |
+
|
| 31 |
+
{appState === "upload" && (
|
| 32 |
+
<ImageUpload
|
| 33 |
+
onImagesUploaded={handleImagesUploaded}
|
| 34 |
+
isAnalyzing={false}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
/>
|
| 36 |
)}
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
{appState === "loading" && <LoadingScreen onComplete={handleLoadingComplete} />}
|
| 39 |
|
| 40 |
+
{appState === "analyzing" && (
|
| 41 |
+
<ImageAnalysisView
|
| 42 |
+
images={uploadedImages}
|
| 43 |
+
onBackToUpload={handleBackToUpload}
|
| 44 |
+
/>
|
| 45 |
+
)}
|
| 46 |
</div>
|
| 47 |
);
|
| 48 |
}
|
src/components/ImageAnalysisView.tsx
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { useState, useRef, useEffect, useCallback } from "react";
|
| 2 |
+
import DraggableContainer from "./DraggableContainer";
|
| 3 |
+
import PromptInput from "./PromptInput";
|
| 4 |
+
import GlassButton from "./GlassButton";
|
| 5 |
+
import GlassContainer from "./GlassContainer";
|
| 6 |
+
import { useVLMContext } from "../context/useVLMContext";
|
| 7 |
+
import { PROMPTS, GLASS_EFFECTS } from "../constants";
|
| 8 |
+
import type { ImageAnalysisResult } from "../types";
|
| 9 |
+
|
| 10 |
+
interface ImageAnalysisViewProps {
|
| 11 |
+
images: File[];
|
| 12 |
+
onBackToUpload: () => void;
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
export default function ImageAnalysisView({ images, onBackToUpload }: ImageAnalysisViewProps) {
|
| 16 |
+
const [results, setResults] = useState<ImageAnalysisResult[]>([]);
|
| 17 |
+
const [currentPrompt, setCurrentPrompt] = useState<string>(PROMPTS.default);
|
| 18 |
+
const [isAnalyzing, setIsAnalyzing] = useState<boolean>(false);
|
| 19 |
+
const [currentImageIndex, setCurrentImageIndex] = useState<number>(0);
|
| 20 |
+
const [selectedImageUrl, setSelectedImageUrl] = useState<string>("");
|
| 21 |
+
|
| 22 |
+
const { isLoaded, runInference } = useVLMContext();
|
| 23 |
+
const abortControllerRef = useRef<AbortController | null>(null);
|
| 24 |
+
|
| 25 |
+
// Create preview URL for selected image
|
| 26 |
+
useEffect(() => {
|
| 27 |
+
if (images[currentImageIndex]) {
|
| 28 |
+
const url = URL.createObjectURL(images[currentImageIndex]);
|
| 29 |
+
setSelectedImageUrl(url);
|
| 30 |
+
return () => URL.revokeObjectURL(url);
|
| 31 |
+
}
|
| 32 |
+
}, [images, currentImageIndex]);
|
| 33 |
+
|
| 34 |
+
const analyzeAllImages = useCallback(async () => {
|
| 35 |
+
if (!isLoaded || isAnalyzing) return;
|
| 36 |
+
|
| 37 |
+
setIsAnalyzing(true);
|
| 38 |
+
setResults([]);
|
| 39 |
+
|
| 40 |
+
abortControllerRef.current?.abort();
|
| 41 |
+
abortControllerRef.current = new AbortController();
|
| 42 |
+
|
| 43 |
+
const analysisResults: ImageAnalysisResult[] = [];
|
| 44 |
+
|
| 45 |
+
try {
|
| 46 |
+
for (let i = 0; i < images.length; i++) {
|
| 47 |
+
if (abortControllerRef.current.signal.aborted) break;
|
| 48 |
+
|
| 49 |
+
setCurrentImageIndex(i);
|
| 50 |
+
const file = images[i];
|
| 51 |
+
|
| 52 |
+
try {
|
| 53 |
+
const caption = await runInference(file, currentPrompt);
|
| 54 |
+
analysisResults.push({ file, caption });
|
| 55 |
+
} catch (error) {
|
| 56 |
+
const errorMsg = error instanceof Error ? error.message : String(error);
|
| 57 |
+
analysisResults.push({ file, caption: "", error: errorMsg });
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
setResults([...analysisResults]);
|
| 61 |
+
}
|
| 62 |
+
} catch (error) {
|
| 63 |
+
console.error("Analysis interrupted:", error);
|
| 64 |
+
} finally {
|
| 65 |
+
setIsAnalyzing(false);
|
| 66 |
+
}
|
| 67 |
+
}, [images, currentPrompt, isLoaded, runInference, isAnalyzing]);
|
| 68 |
+
|
| 69 |
+
const handlePromptChange = useCallback((prompt: string) => {
|
| 70 |
+
setCurrentPrompt(prompt);
|
| 71 |
+
}, []);
|
| 72 |
+
|
| 73 |
+
const handleImageSelect = useCallback((index: number) => {
|
| 74 |
+
setCurrentImageIndex(index);
|
| 75 |
+
}, []);
|
| 76 |
+
|
| 77 |
+
const stopAnalysis = useCallback(() => {
|
| 78 |
+
abortControllerRef.current?.abort();
|
| 79 |
+
setIsAnalyzing(false);
|
| 80 |
+
}, []);
|
| 81 |
+
|
| 82 |
+
useEffect(() => {
|
| 83 |
+
return () => {
|
| 84 |
+
abortControllerRef.current?.abort();
|
| 85 |
+
};
|
| 86 |
+
}, []);
|
| 87 |
+
|
| 88 |
+
return (
|
| 89 |
+
<div className="absolute inset-0 text-white">
|
| 90 |
+
{/* Main image display */}
|
| 91 |
+
<div className="relative w-full h-full flex">
|
| 92 |
+
{/* Image preview */}
|
| 93 |
+
<div className="flex-1 flex items-center justify-center p-8">
|
| 94 |
+
{selectedImageUrl && (
|
| 95 |
+
<img
|
| 96 |
+
src={selectedImageUrl}
|
| 97 |
+
alt={`Preview of ${images[currentImageIndex]?.name}`}
|
| 98 |
+
className="max-w-full max-h-full object-contain rounded-lg shadow-2xl"
|
| 99 |
+
/>
|
| 100 |
+
)}
|
| 101 |
+
</div>
|
| 102 |
+
|
| 103 |
+
{/* Sidebar with image thumbnails and results */}
|
| 104 |
+
<div className="w-80 bg-black/20 backdrop-blur-sm border-l border-white/20 overflow-y-auto">
|
| 105 |
+
{/* Controls */}
|
| 106 |
+
<div className="p-4 border-b border-white/20">
|
| 107 |
+
<div className="flex gap-2 mb-4">
|
| 108 |
+
<GlassButton onClick={onBackToUpload} className="flex-1">
|
| 109 |
+
Back to Upload
|
| 110 |
+
</GlassButton>
|
| 111 |
+
{!isAnalyzing ? (
|
| 112 |
+
<GlassButton
|
| 113 |
+
onClick={analyzeAllImages}
|
| 114 |
+
disabled={!isLoaded}
|
| 115 |
+
className="flex-1"
|
| 116 |
+
>
|
| 117 |
+
Analyze All
|
| 118 |
+
</GlassButton>
|
| 119 |
+
) : (
|
| 120 |
+
<GlassButton onClick={stopAnalysis} className="flex-1 bg-red-500/20">
|
| 121 |
+
Stop
|
| 122 |
+
</GlassButton>
|
| 123 |
+
)}
|
| 124 |
+
</div>
|
| 125 |
+
|
| 126 |
+
{isAnalyzing && (
|
| 127 |
+
<div className="text-sm text-white/70 text-center">
|
| 128 |
+
Analyzing image {currentImageIndex + 1} of {images.length}...
|
| 129 |
+
</div>
|
| 130 |
+
)}
|
| 131 |
+
</div>
|
| 132 |
+
|
| 133 |
+
{/* Image list with results */}
|
| 134 |
+
<div className="p-4 space-y-4">
|
| 135 |
+
{images.map((file, index) => {
|
| 136 |
+
const result = results.find(r => r.file === file);
|
| 137 |
+
const isSelected = index === currentImageIndex;
|
| 138 |
+
const isProcessing = isAnalyzing && index === currentImageIndex;
|
| 139 |
+
|
| 140 |
+
return (
|
| 141 |
+
<div
|
| 142 |
+
key={`${file.name}-${index}`}
|
| 143 |
+
className={`cursor-pointer transition-all duration-200 ${
|
| 144 |
+
isSelected ? 'ring-2 ring-blue-400' : ''
|
| 145 |
+
}`}
|
| 146 |
+
onClick={() => handleImageSelect(index)}
|
| 147 |
+
>
|
| 148 |
+
<GlassContainer
|
| 149 |
+
bgColor={isSelected ? GLASS_EFFECTS.COLORS.BUTTON_BG : GLASS_EFFECTS.COLORS.DEFAULT_BG}
|
| 150 |
+
className="p-3 rounded-lg"
|
| 151 |
+
>
|
| 152 |
+
<div className="flex items-start gap-3">
|
| 153 |
+
{/* Thumbnail */}
|
| 154 |
+
<div className="w-16 h-16 bg-gray-700 rounded flex items-center justify-center text-xs flex-shrink-0">
|
| 155 |
+
<img
|
| 156 |
+
src={URL.createObjectURL(file)}
|
| 157 |
+
alt={file.name}
|
| 158 |
+
className="w-full h-full object-cover rounded"
|
| 159 |
+
onLoad={(e) => URL.revokeObjectURL((e.target as HTMLImageElement).src)}
|
| 160 |
+
/>
|
| 161 |
+
</div>
|
| 162 |
+
|
| 163 |
+
{/* Content */}
|
| 164 |
+
<div className="flex-1 min-w-0">
|
| 165 |
+
<div className="text-sm font-medium truncate mb-1">
|
| 166 |
+
{file.name}
|
| 167 |
+
</div>
|
| 168 |
+
|
| 169 |
+
{isProcessing && (
|
| 170 |
+
<div className="text-xs text-blue-400">
|
| 171 |
+
Processing...
|
| 172 |
+
</div>
|
| 173 |
+
)}
|
| 174 |
+
|
| 175 |
+
{result && (
|
| 176 |
+
<div className="text-xs">
|
| 177 |
+
{result.error ? (
|
| 178 |
+
<div className="text-red-400">
|
| 179 |
+
Error: {result.error}
|
| 180 |
+
</div>
|
| 181 |
+
) : (
|
| 182 |
+
<div className="text-white/80">
|
| 183 |
+
{result.caption}
|
| 184 |
+
</div>
|
| 185 |
+
)}
|
| 186 |
+
</div>
|
| 187 |
+
)}
|
| 188 |
+
</div>
|
| 189 |
+
</div>
|
| 190 |
+
</GlassContainer>
|
| 191 |
+
</div>
|
| 192 |
+
);
|
| 193 |
+
})}
|
| 194 |
+
</div>
|
| 195 |
+
</div>
|
| 196 |
+
</div>
|
| 197 |
+
|
| 198 |
+
{/* Draggable Prompt Input - Bottom Left */}
|
| 199 |
+
<DraggableContainer initialPosition="bottom-left">
|
| 200 |
+
<PromptInput
|
| 201 |
+
onPromptChange={handlePromptChange}
|
| 202 |
+
disabled={isAnalyzing}
|
| 203 |
+
/>
|
| 204 |
+
</DraggableContainer>
|
| 205 |
+
</div>
|
| 206 |
+
);
|
| 207 |
+
}
|
src/components/ImageUpload.tsx
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { useState, useCallback, useRef } from "react";
|
| 2 |
+
import GlassButton from "./GlassButton";
|
| 3 |
+
import GlassContainer from "./GlassContainer";
|
| 4 |
+
import { GLASS_EFFECTS } from "../constants";
|
| 5 |
+
|
| 6 |
+
interface ImageUploadProps {
|
| 7 |
+
onImagesUploaded: (files: File[]) => void;
|
| 8 |
+
isAnalyzing: boolean;
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
export default function ImageUpload({ onImagesUploaded, isAnalyzing }: ImageUploadProps) {
|
| 12 |
+
const [dragActive, setDragActive] = useState(false);
|
| 13 |
+
const fileInputRef = useRef<HTMLInputElement>(null);
|
| 14 |
+
|
| 15 |
+
const handleFiles = useCallback(
|
| 16 |
+
(files: FileList | null) => {
|
| 17 |
+
if (!files) return;
|
| 18 |
+
|
| 19 |
+
const imageFiles = Array.from(files).filter(file =>
|
| 20 |
+
file.type.startsWith("image/")
|
| 21 |
+
);
|
| 22 |
+
|
| 23 |
+
if (imageFiles.length > 0) {
|
| 24 |
+
onImagesUploaded(imageFiles);
|
| 25 |
+
}
|
| 26 |
+
},
|
| 27 |
+
[onImagesUploaded]
|
| 28 |
+
);
|
| 29 |
+
|
| 30 |
+
const handleDrag = useCallback((e: React.DragEvent) => {
|
| 31 |
+
e.preventDefault();
|
| 32 |
+
e.stopPropagation();
|
| 33 |
+
}, []);
|
| 34 |
+
|
| 35 |
+
const handleDragIn = useCallback((e: React.DragEvent) => {
|
| 36 |
+
e.preventDefault();
|
| 37 |
+
e.stopPropagation();
|
| 38 |
+
if (e.dataTransfer?.items && e.dataTransfer.items.length > 0) {
|
| 39 |
+
setDragActive(true);
|
| 40 |
+
}
|
| 41 |
+
}, []);
|
| 42 |
+
|
| 43 |
+
const handleDragOut = useCallback((e: React.DragEvent) => {
|
| 44 |
+
e.preventDefault();
|
| 45 |
+
e.stopPropagation();
|
| 46 |
+
setDragActive(false);
|
| 47 |
+
}, []);
|
| 48 |
+
|
| 49 |
+
const handleDrop = useCallback(
|
| 50 |
+
(e: React.DragEvent) => {
|
| 51 |
+
e.preventDefault();
|
| 52 |
+
e.stopPropagation();
|
| 53 |
+
setDragActive(false);
|
| 54 |
+
|
| 55 |
+
if (e.dataTransfer?.files && e.dataTransfer.files.length > 0) {
|
| 56 |
+
handleFiles(e.dataTransfer.files);
|
| 57 |
+
}
|
| 58 |
+
},
|
| 59 |
+
[handleFiles]
|
| 60 |
+
);
|
| 61 |
+
|
| 62 |
+
const handleFileInputChange = useCallback(
|
| 63 |
+
(e: React.ChangeEvent<HTMLInputElement>) => {
|
| 64 |
+
handleFiles(e.target.files);
|
| 65 |
+
},
|
| 66 |
+
[handleFiles]
|
| 67 |
+
);
|
| 68 |
+
|
| 69 |
+
const handleClick = useCallback(() => {
|
| 70 |
+
if (!isAnalyzing) {
|
| 71 |
+
fileInputRef.current?.click();
|
| 72 |
+
}
|
| 73 |
+
}, [isAnalyzing]);
|
| 74 |
+
|
| 75 |
+
return (
|
| 76 |
+
<div className="absolute inset-0 flex items-center justify-center">
|
| 77 |
+
<GlassContainer
|
| 78 |
+
bgColor={dragActive ? GLASS_EFFECTS.COLORS.BUTTON_BG : GLASS_EFFECTS.COLORS.DEFAULT_BG}
|
| 79 |
+
className={`p-8 rounded-2xl border-2 border-dashed transition-all duration-300 cursor-pointer max-w-md mx-4 ${
|
| 80 |
+
dragActive ? "border-blue-400 scale-105" : "border-white/30"
|
| 81 |
+
} ${isAnalyzing ? "opacity-50 pointer-events-none" : "hover:border-white/50"}`}
|
| 82 |
+
onDragEnter={handleDragIn}
|
| 83 |
+
onDragLeave={handleDragOut}
|
| 84 |
+
onDragOver={handleDrag}
|
| 85 |
+
onDrop={handleDrop}
|
| 86 |
+
onClick={handleClick}
|
| 87 |
+
>
|
| 88 |
+
<div className="text-center text-white">
|
| 89 |
+
<div className="mb-4">
|
| 90 |
+
<svg
|
| 91 |
+
className="mx-auto w-16 h-16 text-white/60"
|
| 92 |
+
fill="none"
|
| 93 |
+
stroke="currentColor"
|
| 94 |
+
viewBox="0 0 24 24"
|
| 95 |
+
>
|
| 96 |
+
<path
|
| 97 |
+
strokeLinecap="round"
|
| 98 |
+
strokeLinejoin="round"
|
| 99 |
+
strokeWidth={1.5}
|
| 100 |
+
d="M4 16l4.586-4.586a2 2 0 012.828 0L16 16m-2-2l1.586-1.586a2 2 0 012.828 0L20 14m-6-6h.01M6 20h12a2 2 0 002-2V6a2 2 0 00-2-2H6a2 2 0 00-2 2v12a2 2 0 002 2z"
|
| 101 |
+
/>
|
| 102 |
+
</svg>
|
| 103 |
+
</div>
|
| 104 |
+
|
| 105 |
+
<h3 className="text-xl font-semibold mb-2">Upload Images</h3>
|
| 106 |
+
<p className="text-white/80 mb-4">
|
| 107 |
+
Drag and drop images here, or click to select files
|
| 108 |
+
</p>
|
| 109 |
+
<p className="text-sm text-white/60 mb-6">
|
| 110 |
+
Supports JPG, PNG, GIF, WebP formats. Multiple files allowed.
|
| 111 |
+
</p>
|
| 112 |
+
|
| 113 |
+
<GlassButton disabled={isAnalyzing}>
|
| 114 |
+
{isAnalyzing ? "Analyzing..." : "Choose Files"}
|
| 115 |
+
</GlassButton>
|
| 116 |
+
</div>
|
| 117 |
+
|
| 118 |
+
<input
|
| 119 |
+
ref={fileInputRef}
|
| 120 |
+
type="file"
|
| 121 |
+
multiple
|
| 122 |
+
accept="image/*"
|
| 123 |
+
onChange={handleFileInputChange}
|
| 124 |
+
className="hidden"
|
| 125 |
+
/>
|
| 126 |
+
</GlassContainer>
|
| 127 |
+
</div>
|
| 128 |
+
);
|
| 129 |
+
}
|
src/components/PromptInput.tsx
CHANGED
|
@@ -5,9 +5,10 @@ import GlassContainer from "./GlassContainer";
|
|
| 5 |
interface PromptInputProps {
|
| 6 |
onPromptChange: (prompt: string) => void;
|
| 7 |
defaultPrompt?: string;
|
|
|
|
| 8 |
}
|
| 9 |
|
| 10 |
-
export default function PromptInput({ onPromptChange, defaultPrompt = PROMPTS.default }: PromptInputProps) {
|
| 11 |
const [prompt, setPrompt] = useState(defaultPrompt);
|
| 12 |
const [showSuggestions, setShowSuggestions] = useState(false);
|
| 13 |
const inputRef = useRef<HTMLTextAreaElement>(null);
|
|
@@ -116,10 +117,13 @@ export default function PromptInput({ onPromptChange, defaultPrompt = PROMPTS.de
|
|
| 116 |
ref={inputRef}
|
| 117 |
value={prompt}
|
| 118 |
onChange={handleInputChange}
|
| 119 |
-
onFocus={handleInputFocus}
|
| 120 |
-
onBlur={handleInputBlur}
|
| 121 |
-
onClick={handleInputClick}
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
| 123 |
style={{
|
| 124 |
background: "var(--input-bg)",
|
| 125 |
borderColor: "var(--input-border)",
|
|
@@ -132,7 +136,7 @@ export default function PromptInput({ onPromptChange, defaultPrompt = PROMPTS.de
|
|
| 132 |
placeholder={PROMPTS.placeholder}
|
| 133 |
rows={1}
|
| 134 |
/>
|
| 135 |
-
{prompt && (
|
| 136 |
<button
|
| 137 |
type="button"
|
| 138 |
onClick={clearInput}
|
|
|
|
| 5 |
interface PromptInputProps {
|
| 6 |
onPromptChange: (prompt: string) => void;
|
| 7 |
defaultPrompt?: string;
|
| 8 |
+
disabled?: boolean;
|
| 9 |
}
|
| 10 |
|
| 11 |
+
export default function PromptInput({ onPromptChange, defaultPrompt = PROMPTS.default, disabled = false }: PromptInputProps) {
|
| 12 |
const [prompt, setPrompt] = useState(defaultPrompt);
|
| 13 |
const [showSuggestions, setShowSuggestions] = useState(false);
|
| 14 |
const inputRef = useRef<HTMLTextAreaElement>(null);
|
|
|
|
| 117 |
ref={inputRef}
|
| 118 |
value={prompt}
|
| 119 |
onChange={handleInputChange}
|
| 120 |
+
onFocus={disabled ? undefined : handleInputFocus}
|
| 121 |
+
onBlur={disabled ? undefined : handleInputBlur}
|
| 122 |
+
onClick={disabled ? undefined : handleInputClick}
|
| 123 |
+
disabled={disabled}
|
| 124 |
+
className={`search-input w-full py-3 pl-4 pr-8 rounded-xl text-white text-base transition-all duration-400 border resize-none focus:outline-none focus:-translate-y-0.5 focus:shadow-lg ${
|
| 125 |
+
disabled ? 'opacity-50 cursor-not-allowed' : ''
|
| 126 |
+
}`}
|
| 127 |
style={{
|
| 128 |
background: "var(--input-bg)",
|
| 129 |
borderColor: "var(--input-border)",
|
|
|
|
| 136 |
placeholder={PROMPTS.placeholder}
|
| 137 |
rows={1}
|
| 138 |
/>
|
| 139 |
+
{prompt && !disabled && (
|
| 140 |
<button
|
| 141 |
type="button"
|
| 142 |
onClick={clearInput}
|
src/context/VLMContext.tsx
CHANGED
|
@@ -67,9 +67,9 @@ export const VLMProvider: React.FC<React.PropsWithChildren> = ({ children }) =>
|
|
| 67 |
);
|
| 68 |
|
| 69 |
const runInference = useCallback(
|
| 70 |
-
async (
|
| 71 |
if (inferenceLock.current) {
|
| 72 |
-
console.log("Inference already running, skipping
|
| 73 |
return ""; // Return empty string to signal a skip
|
| 74 |
}
|
| 75 |
inferenceLock.current = true;
|
|
@@ -78,21 +78,35 @@ export const VLMProvider: React.FC<React.PropsWithChildren> = ({ children }) =>
|
|
| 78 |
throw new Error("Model/processor not loaded");
|
| 79 |
}
|
| 80 |
|
| 81 |
-
|
| 82 |
-
canvasRef.current = document.createElement("canvas");
|
| 83 |
-
}
|
| 84 |
-
const canvas = canvasRef.current;
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
-
|
| 90 |
-
|
| 91 |
|
| 92 |
-
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
|
|
|
| 96 |
const messages = [
|
| 97 |
{
|
| 98 |
role: "system",
|
|
|
|
| 67 |
);
|
| 68 |
|
| 69 |
const runInference = useCallback(
|
| 70 |
+
async (imageSource: HTMLVideoElement | File, instruction: string, onTextUpdate?: (text: string) => void): Promise<string> => {
|
| 71 |
if (inferenceLock.current) {
|
| 72 |
+
console.log("Inference already running, skipping");
|
| 73 |
return ""; // Return empty string to signal a skip
|
| 74 |
}
|
| 75 |
inferenceLock.current = true;
|
|
|
|
| 78 |
throw new Error("Model/processor not loaded");
|
| 79 |
}
|
| 80 |
|
| 81 |
+
let rawImg: RawImage;
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
+
if (imageSource instanceof File) {
|
| 84 |
+
// Handle uploaded image file
|
| 85 |
+
const url = URL.createObjectURL(imageSource);
|
| 86 |
+
try {
|
| 87 |
+
rawImg = await RawImage.fromURL(url);
|
| 88 |
+
} finally {
|
| 89 |
+
URL.revokeObjectURL(url);
|
| 90 |
+
}
|
| 91 |
+
} else {
|
| 92 |
+
// Handle video frame (original logic)
|
| 93 |
+
if (!canvasRef.current) {
|
| 94 |
+
canvasRef.current = document.createElement("canvas");
|
| 95 |
+
}
|
| 96 |
+
const canvas = canvasRef.current;
|
| 97 |
+
const video = imageSource;
|
| 98 |
+
|
| 99 |
+
canvas.width = video.videoWidth;
|
| 100 |
+
canvas.height = video.videoHeight;
|
| 101 |
|
| 102 |
+
const ctx = canvas.getContext("2d", { willReadFrequently: true });
|
| 103 |
+
if (!ctx) throw new Error("Could not get canvas context");
|
| 104 |
|
| 105 |
+
ctx.drawImage(video, 0, 0);
|
| 106 |
|
| 107 |
+
const frame = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
| 108 |
+
rawImg = new RawImage(frame.data, frame.width, frame.height, 4);
|
| 109 |
+
}
|
| 110 |
const messages = [
|
| 111 |
{
|
| 112 |
role: "system",
|
src/types/index.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
export type AppState = "
|
| 2 |
|
| 3 |
export interface GlassEffectProps {
|
| 4 |
baseFrequency?: number;
|
|
@@ -25,3 +25,9 @@ export interface Dimensions {
|
|
| 25 |
}
|
| 26 |
|
| 27 |
export type InitialPosition = "bottom-left" | "bottom-right" | Position;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
export type AppState = "upload" | "loading" | "analyzing";
|
| 2 |
|
| 3 |
export interface GlassEffectProps {
|
| 4 |
baseFrequency?: number;
|
|
|
|
| 25 |
}
|
| 26 |
|
| 27 |
export type InitialPosition = "bottom-left" | "bottom-right" | Position;
|
| 28 |
+
|
| 29 |
+
export interface ImageAnalysisResult {
|
| 30 |
+
file: File;
|
| 31 |
+
caption: string;
|
| 32 |
+
error?: string;
|
| 33 |
+
}
|
src/types/vlm.ts
CHANGED
|
@@ -4,7 +4,7 @@ export type VLMContextValue = {
|
|
| 4 |
error: string | null;
|
| 5 |
loadModel: (onProgress?: (msg: string) => void) => Promise<void>;
|
| 6 |
runInference: (
|
| 7 |
-
|
| 8 |
instruction: string,
|
| 9 |
onTextUpdate?: (text: string) => void,
|
| 10 |
) => Promise<string>;
|
|
|
|
| 4 |
error: string | null;
|
| 5 |
loadModel: (onProgress?: (msg: string) => void) => Promise<void>;
|
| 6 |
runInference: (
|
| 7 |
+
imageSource: HTMLVideoElement | File,
|
| 8 |
instruction: string,
|
| 9 |
onTextUpdate?: (text: string) => void,
|
| 10 |
) => Promise<string>;
|