Julian Bilcke
commited on
Commit
·
95a4e14
1
Parent(s):
b194c9d
working on an experimental speech bubble display
Browse files- package-lock.json +6 -0
- package.json +1 -0
- src/app/interface/panel/bubble/index.tsx +1 -0
- src/app/interface/panel/index.tsx +33 -8
- src/app/interface/top-menu/index.tsx +13 -0
- src/app/main.tsx +8 -1
- src/app/queries/getStoryContinuation.ts +2 -0
- src/app/queries/getSystemPrompt.ts +3 -3
- src/app/queries/mockLLMResponse.ts +11 -3
- src/app/queries/predictNextPanels.ts +5 -4
- src/app/store/index.ts +47 -4
- src/lib/bubble/injectSpeechBubbleInTheBackground.ts +419 -0
- src/lib/createLlamaPrompt.ts +1 -1
- src/lib/dirtyGeneratedPanelCleaner.ts +3 -0
- src/lib/dirtyGeneratedPanelsParser.ts +5 -2
- src/lib/parseBadJSON.ts +3 -2
- src/types.ts +1 -0
package-lock.json
CHANGED
|
@@ -12,6 +12,7 @@
|
|
| 12 |
"@anthropic-ai/sdk": "^0.19.1",
|
| 13 |
"@huggingface/hub": "^0.15.1",
|
| 14 |
"@huggingface/inference": "^2.6.1",
|
|
|
|
| 15 |
"@radix-ui/react-accordion": "^1.1.2",
|
| 16 |
"@radix-ui/react-avatar": "^1.0.3",
|
| 17 |
"@radix-ui/react-checkbox": "^1.0.4",
|
|
@@ -828,6 +829,11 @@
|
|
| 828 |
"@jridgewell/sourcemap-codec": "^1.4.14"
|
| 829 |
}
|
| 830 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 831 |
"node_modules/@next/env": {
|
| 832 |
"version": "14.2.3",
|
| 833 |
"resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.3.tgz",
|
|
|
|
| 12 |
"@anthropic-ai/sdk": "^0.19.1",
|
| 13 |
"@huggingface/hub": "^0.15.1",
|
| 14 |
"@huggingface/inference": "^2.6.1",
|
| 15 |
+
"@mediapipe/tasks-vision": "^0.10.14",
|
| 16 |
"@radix-ui/react-accordion": "^1.1.2",
|
| 17 |
"@radix-ui/react-avatar": "^1.0.3",
|
| 18 |
"@radix-ui/react-checkbox": "^1.0.4",
|
|
|
|
| 829 |
"@jridgewell/sourcemap-codec": "^1.4.14"
|
| 830 |
}
|
| 831 |
},
|
| 832 |
+
"node_modules/@mediapipe/tasks-vision": {
|
| 833 |
+
"version": "0.10.14",
|
| 834 |
+
"resolved": "https://registry.npmjs.org/@mediapipe/tasks-vision/-/tasks-vision-0.10.14.tgz",
|
| 835 |
+
"integrity": "sha512-vOifgZhkndgybdvoRITzRkIueWWSiCKuEUXXK6Q4FaJsFvRJuwgg++vqFUMlL0Uox62U5aEXFhHxlhV7Ja5e3Q=="
|
| 836 |
+
},
|
| 837 |
"node_modules/@next/env": {
|
| 838 |
"version": "14.2.3",
|
| 839 |
"resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.3.tgz",
|
package.json
CHANGED
|
@@ -13,6 +13,7 @@
|
|
| 13 |
"@anthropic-ai/sdk": "^0.19.1",
|
| 14 |
"@huggingface/hub": "^0.15.1",
|
| 15 |
"@huggingface/inference": "^2.6.1",
|
|
|
|
| 16 |
"@radix-ui/react-accordion": "^1.1.2",
|
| 17 |
"@radix-ui/react-avatar": "^1.0.3",
|
| 18 |
"@radix-ui/react-checkbox": "^1.0.4",
|
|
|
|
| 13 |
"@anthropic-ai/sdk": "^0.19.1",
|
| 14 |
"@huggingface/hub": "^0.15.1",
|
| 15 |
"@huggingface/inference": "^2.6.1",
|
| 16 |
+
"@mediapipe/tasks-vision": "^0.10.14",
|
| 17 |
"@radix-ui/react-accordion": "^1.1.2",
|
| 18 |
"@radix-ui/react-avatar": "^1.0.3",
|
| 19 |
"@radix-ui/react-checkbox": "^1.0.4",
|
src/app/interface/panel/bubble/index.tsx
CHANGED
|
@@ -15,6 +15,7 @@ export function Bubble({ children, onChange }: {
|
|
| 15 |
|
| 16 |
const ref = useRef<HTMLDivElement>(null)
|
| 17 |
const zoomLevel = useStore(s => s.zoomLevel)
|
|
|
|
| 18 |
const showCaptions = useStore(s => s.showCaptions)
|
| 19 |
|
| 20 |
const text = useRef(`${children || ''}`)
|
|
|
|
| 15 |
|
| 16 |
const ref = useRef<HTMLDivElement>(null)
|
| 17 |
const zoomLevel = useStore(s => s.zoomLevel)
|
| 18 |
+
const showSpeeches = useStore(s => s.showSpeeches)
|
| 19 |
const showCaptions = useStore(s => s.showCaptions)
|
| 20 |
|
| 21 |
const text = useRef(`${children || ''}`)
|
src/app/interface/panel/index.tsx
CHANGED
|
@@ -2,22 +2,23 @@
|
|
| 2 |
|
| 3 |
import { useEffect, useRef, useState, useTransition } from "react"
|
| 4 |
import { RxReload, RxPencil2 } from "react-icons/rx"
|
|
|
|
| 5 |
|
| 6 |
import { RenderedScene, RenderingModelVendor } from "@/types"
|
| 7 |
-
|
| 8 |
import { getRender, newRender } from "@/app/engine/render"
|
| 9 |
import { useStore } from "@/app/store"
|
| 10 |
-
|
| 11 |
import { cn } from "@/lib/utils"
|
| 12 |
import { getInitialRenderedScene } from "@/lib/getInitialRenderedScene"
|
| 13 |
import { Progress } from "@/app/interface/progress"
|
|
|
|
| 14 |
import { EditModal } from "../edit-modal"
|
| 15 |
-
import { Bubble } from "./bubble"
|
| 16 |
import { getSettings } from "../settings-dialog/getSettings"
|
| 17 |
-
import { useLocalStorage } from "usehooks-ts"
|
| 18 |
import { localStorageKeys } from "../settings-dialog/localStorageKeys"
|
| 19 |
import { defaultSettings } from "../settings-dialog/defaultSettings"
|
| 20 |
|
|
|
|
|
|
|
| 21 |
export function Panel({
|
| 22 |
page,
|
| 23 |
nbPanels,
|
|
@@ -35,22 +36,18 @@ export function Panel({
|
|
| 35 |
// panel id, between 0 and (nbPanels - 1)
|
| 36 |
panel: number
|
| 37 |
|
| 38 |
-
|
| 39 |
className?: string
|
| 40 |
width?: number
|
| 41 |
height?: number
|
| 42 |
}) {
|
| 43 |
-
|
| 44 |
// index of the panel in the whole app
|
| 45 |
const panelIndex = page * nbPanels + panel
|
| 46 |
|
| 47 |
-
|
| 48 |
// the panel Id must be unique across all pages
|
| 49 |
const panelId = `${panelIndex}`
|
| 50 |
|
| 51 |
// console.log(`panel/index.tsx: <Panel panelId=${panelId}> rendered again!`)
|
| 52 |
|
| 53 |
-
|
| 54 |
const [mouseOver, setMouseOver] = useState(false)
|
| 55 |
const ref = useRef<HTMLImageElement>(null)
|
| 56 |
const font = useStore(s => s.font)
|
|
@@ -63,6 +60,10 @@ export function Panel({
|
|
| 63 |
|
| 64 |
const setPanelPrompt = useStore(s => s.setPanelPrompt)
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
const captions = useStore(s => s.captions)
|
| 67 |
const caption = captions[panelIndex] || ""
|
| 68 |
const setPanelCaption = useStore(s => s.setPanelCaption)
|
|
@@ -95,6 +96,28 @@ export function Panel({
|
|
| 95 |
|
| 96 |
let delay = enableRateLimiter ? (1000 + (500 * panelIndex)) : 1000
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
/*
|
| 99 |
console.log("panel/index.tsx: DEBUG: " + JSON.stringify({
|
| 100 |
page,
|
|
@@ -204,6 +227,7 @@ export function Panel({
|
|
| 204 |
if (newRendered.status === "completed") {
|
| 205 |
setGeneratingImages(panelId, false)
|
| 206 |
addToUpscaleQueue(panelId, newRendered)
|
|
|
|
| 207 |
} else if (!newRendered.status || newRendered.status === "error") {
|
| 208 |
setGeneratingImages(panelId, false)
|
| 209 |
} else {
|
|
@@ -274,6 +298,7 @@ export function Panel({
|
|
| 274 |
console.log("panel finished!")
|
| 275 |
setGeneratingImages(panelId, false)
|
| 276 |
addToUpscaleQueue(panelId, newRendered)
|
|
|
|
| 277 |
|
| 278 |
}
|
| 279 |
} catch (err) {
|
|
|
|
| 2 |
|
| 3 |
import { useEffect, useRef, useState, useTransition } from "react"
|
| 4 |
import { RxReload, RxPencil2 } from "react-icons/rx"
|
| 5 |
+
import { useLocalStorage } from "usehooks-ts"
|
| 6 |
|
| 7 |
import { RenderedScene, RenderingModelVendor } from "@/types"
|
|
|
|
| 8 |
import { getRender, newRender } from "@/app/engine/render"
|
| 9 |
import { useStore } from "@/app/store"
|
| 10 |
+
import { injectSpeechBubbleInTheBackground } from "@/lib/bubble/injectSpeechBubbleInTheBackground"
|
| 11 |
import { cn } from "@/lib/utils"
|
| 12 |
import { getInitialRenderedScene } from "@/lib/getInitialRenderedScene"
|
| 13 |
import { Progress } from "@/app/interface/progress"
|
| 14 |
+
|
| 15 |
import { EditModal } from "../edit-modal"
|
|
|
|
| 16 |
import { getSettings } from "../settings-dialog/getSettings"
|
|
|
|
| 17 |
import { localStorageKeys } from "../settings-dialog/localStorageKeys"
|
| 18 |
import { defaultSettings } from "../settings-dialog/defaultSettings"
|
| 19 |
|
| 20 |
+
import { Bubble } from "./bubble"
|
| 21 |
+
|
| 22 |
export function Panel({
|
| 23 |
page,
|
| 24 |
nbPanels,
|
|
|
|
| 36 |
// panel id, between 0 and (nbPanels - 1)
|
| 37 |
panel: number
|
| 38 |
|
|
|
|
| 39 |
className?: string
|
| 40 |
width?: number
|
| 41 |
height?: number
|
| 42 |
}) {
|
|
|
|
| 43 |
// index of the panel in the whole app
|
| 44 |
const panelIndex = page * nbPanels + panel
|
| 45 |
|
|
|
|
| 46 |
// the panel Id must be unique across all pages
|
| 47 |
const panelId = `${panelIndex}`
|
| 48 |
|
| 49 |
// console.log(`panel/index.tsx: <Panel panelId=${panelId}> rendered again!`)
|
| 50 |
|
|
|
|
| 51 |
const [mouseOver, setMouseOver] = useState(false)
|
| 52 |
const ref = useRef<HTMLImageElement>(null)
|
| 53 |
const font = useStore(s => s.font)
|
|
|
|
| 60 |
|
| 61 |
const setPanelPrompt = useStore(s => s.setPanelPrompt)
|
| 62 |
|
| 63 |
+
const speeches = useStore(s => s.speeches)
|
| 64 |
+
const speech = speeches[panelIndex] || ""
|
| 65 |
+
const setPanelSpeech = useStore(s => s.setPanelSpeech)
|
| 66 |
+
|
| 67 |
const captions = useStore(s => s.captions)
|
| 68 |
const caption = captions[panelIndex] || ""
|
| 69 |
const setPanelCaption = useStore(s => s.setPanelCaption)
|
|
|
|
| 96 |
|
| 97 |
let delay = enableRateLimiter ? (1000 + (500 * panelIndex)) : 1000
|
| 98 |
|
| 99 |
+
const addSpeechBubble = async () => {
|
| 100 |
+
if (!renderedRef.current) { return }
|
| 101 |
+
|
| 102 |
+
// story generation failed
|
| 103 |
+
if (speech.trim() === "...") { return }
|
| 104 |
+
|
| 105 |
+
console.log('Generating speech bubble...')
|
| 106 |
+
try {
|
| 107 |
+
const result = await injectSpeechBubbleInTheBackground({
|
| 108 |
+
inputImageInBase64: renderedRef.current.assetUrl,
|
| 109 |
+
text: speech,
|
| 110 |
+
shape: "oval",
|
| 111 |
+
line: "straight", // "straight", "bubble", "chaotic"
|
| 112 |
+
// font?: string;
|
| 113 |
+
// debug: true,
|
| 114 |
+
})
|
| 115 |
+
renderedRef.current.assetUrl = result
|
| 116 |
+
setRendered(panelId, renderedRef.current)
|
| 117 |
+
} catch (err) {
|
| 118 |
+
console.log(`error: failed to inject the speech bubble: ${err}`)
|
| 119 |
+
}
|
| 120 |
+
}
|
| 121 |
/*
|
| 122 |
console.log("panel/index.tsx: DEBUG: " + JSON.stringify({
|
| 123 |
page,
|
|
|
|
| 227 |
if (newRendered.status === "completed") {
|
| 228 |
setGeneratingImages(panelId, false)
|
| 229 |
addToUpscaleQueue(panelId, newRendered)
|
| 230 |
+
addSpeechBubble()
|
| 231 |
} else if (!newRendered.status || newRendered.status === "error") {
|
| 232 |
setGeneratingImages(panelId, false)
|
| 233 |
} else {
|
|
|
|
| 298 |
console.log("panel finished!")
|
| 299 |
setGeneratingImages(panelId, false)
|
| 300 |
addToUpscaleQueue(panelId, newRendered)
|
| 301 |
+
addSpeechBubble()
|
| 302 |
|
| 303 |
}
|
| 304 |
} catch (err) {
|
src/app/interface/top-menu/index.tsx
CHANGED
|
@@ -45,6 +45,9 @@ export function TopMenu() {
|
|
| 45 |
const layout = useStore(s => s.layout)
|
| 46 |
const setLayout = useStore(s => s.setLayout)
|
| 47 |
|
|
|
|
|
|
|
|
|
|
| 48 |
const setShowCaptions = useStore(s => s.setShowCaptions)
|
| 49 |
const showCaptions = useStore(s => s.showCaptions)
|
| 50 |
|
|
@@ -170,6 +173,16 @@ export function TopMenu() {
|
|
| 170 |
<span className="inline md:hidden">Cap.</span>
|
| 171 |
</Label>
|
| 172 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
{/*
|
| 174 |
<div className={cn(
|
| 175 |
`transition-all duration-200 ease-in-out`,
|
|
|
|
| 45 |
const layout = useStore(s => s.layout)
|
| 46 |
const setLayout = useStore(s => s.setLayout)
|
| 47 |
|
| 48 |
+
const setShowSpeeches = useStore(s => s.setShowSpeeches)
|
| 49 |
+
const showSpeeches = useStore(s => s.showSpeeches)
|
| 50 |
+
|
| 51 |
const setShowCaptions = useStore(s => s.setShowCaptions)
|
| 52 |
const showCaptions = useStore(s => s.showCaptions)
|
| 53 |
|
|
|
|
| 173 |
<span className="inline md:hidden">Cap.</span>
|
| 174 |
</Label>
|
| 175 |
</div>
|
| 176 |
+
<div className="flex flex-row items-center space-x-3">
|
| 177 |
+
<Switch
|
| 178 |
+
checked={showSpeeches}
|
| 179 |
+
onCheckedChange={setShowSpeeches}
|
| 180 |
+
/>
|
| 181 |
+
<Label className="text-gray-200 dark:text-gray-200">
|
| 182 |
+
<span className="hidden md:inline">Bubbles</span>
|
| 183 |
+
<span className="inline md:hidden">Bub.</span>
|
| 184 |
+
</Label>
|
| 185 |
+
</div>
|
| 186 |
{/*
|
| 187 |
<div className={cn(
|
| 188 |
`transition-all duration-200 ease-in-out`,
|
src/app/main.tsx
CHANGED
|
@@ -49,8 +49,11 @@ export default function Main() {
|
|
| 49 |
|
| 50 |
// do we need those?
|
| 51 |
const renderedScenes = useStore(s => s.renderedScenes)
|
| 52 |
-
const captions = useStore(s => s.captions)
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
const setCaptions = useStore(s => s.setCaptions)
|
| 55 |
|
| 56 |
const zoomLevel = useStore(s => s.zoomLevel)
|
|
@@ -101,6 +104,7 @@ export default function Main() {
|
|
| 101 |
const ref = useRef({
|
| 102 |
existingPanels: [] as GeneratedPanel[],
|
| 103 |
newPanelsPrompts: [] as string[],
|
|
|
|
| 104 |
newCaptions: [] as string[],
|
| 105 |
prompt: "",
|
| 106 |
preset: "",
|
|
@@ -142,6 +146,7 @@ export default function Main() {
|
|
| 142 |
ref.current = {
|
| 143 |
existingPanels: [],
|
| 144 |
newPanelsPrompts: [],
|
|
|
|
| 145 |
newCaptions: [],
|
| 146 |
prompt,
|
| 147 |
preset: preset?.label || "",
|
|
@@ -214,6 +219,7 @@ export default function Main() {
|
|
| 214 |
const endAt = currentPanel + nbPanelsToGenerate
|
| 215 |
for (let p = startAt; p < endAt; p++) {
|
| 216 |
ref.current.newCaptions.push(ref.current.existingPanels[p]?.caption.trim() || "...")
|
|
|
|
| 217 |
const newPanel = joinWords([
|
| 218 |
|
| 219 |
// what we do here is that ideally we give full control to the LLM for prompting,
|
|
@@ -231,6 +237,7 @@ export default function Main() {
|
|
| 231 |
|
| 232 |
// update the frontend
|
| 233 |
// console.log("updating the frontend..")
|
|
|
|
| 234 |
setCaptions(ref.current.newCaptions)
|
| 235 |
setPanels(ref.current.newPanelsPrompts)
|
| 236 |
setGeneratingStory(false)
|
|
|
|
| 49 |
|
| 50 |
// do we need those?
|
| 51 |
const renderedScenes = useStore(s => s.renderedScenes)
|
|
|
|
| 52 |
|
| 53 |
+
const speeches = useStore(s => s.speeches)
|
| 54 |
+
const setSpeeches = useStore(s => s.setSpeeches)
|
| 55 |
+
|
| 56 |
+
const captions = useStore(s => s.captions)
|
| 57 |
const setCaptions = useStore(s => s.setCaptions)
|
| 58 |
|
| 59 |
const zoomLevel = useStore(s => s.zoomLevel)
|
|
|
|
| 104 |
const ref = useRef({
|
| 105 |
existingPanels: [] as GeneratedPanel[],
|
| 106 |
newPanelsPrompts: [] as string[],
|
| 107 |
+
newSpeeches: [] as string[],
|
| 108 |
newCaptions: [] as string[],
|
| 109 |
prompt: "",
|
| 110 |
preset: "",
|
|
|
|
| 146 |
ref.current = {
|
| 147 |
existingPanels: [],
|
| 148 |
newPanelsPrompts: [],
|
| 149 |
+
newSpeeches: [],
|
| 150 |
newCaptions: [],
|
| 151 |
prompt,
|
| 152 |
preset: preset?.label || "",
|
|
|
|
| 219 |
const endAt = currentPanel + nbPanelsToGenerate
|
| 220 |
for (let p = startAt; p < endAt; p++) {
|
| 221 |
ref.current.newCaptions.push(ref.current.existingPanels[p]?.caption.trim() || "...")
|
| 222 |
+
ref.current.newSpeeches.push(ref.current.existingPanels[p]?.speech.trim() || "...")
|
| 223 |
const newPanel = joinWords([
|
| 224 |
|
| 225 |
// what we do here is that ideally we give full control to the LLM for prompting,
|
|
|
|
| 237 |
|
| 238 |
// update the frontend
|
| 239 |
// console.log("updating the frontend..")
|
| 240 |
+
setSpeeches(ref.current.newSpeeches)
|
| 241 |
setCaptions(ref.current.newCaptions)
|
| 242 |
setPanels(ref.current.newPanelsPrompts)
|
| 243 |
setGeneratingStory(false)
|
src/app/queries/getStoryContinuation.ts
CHANGED
|
@@ -48,6 +48,7 @@ export const getStoryContinuation = async ({
|
|
| 48 |
panels.push({
|
| 49 |
panel: startAt + i,
|
| 50 |
instructions: `${panelCandidates[i]?.instructions || ""}`,
|
|
|
|
| 51 |
caption: `${panelCandidates[i]?.caption || ""}`,
|
| 52 |
})
|
| 53 |
}
|
|
@@ -64,6 +65,7 @@ export const getStoryContinuation = async ({
|
|
| 64 |
userStoryPrompt,
|
| 65 |
`${".".repeat(p)}`,
|
| 66 |
]),
|
|
|
|
| 67 |
caption: "(Sorry, LLM generation failed: using degraded mode)"
|
| 68 |
})
|
| 69 |
}
|
|
|
|
| 48 |
panels.push({
|
| 49 |
panel: startAt + i,
|
| 50 |
instructions: `${panelCandidates[i]?.instructions || ""}`,
|
| 51 |
+
speech: `${panelCandidates[i]?.speech || ""}`,
|
| 52 |
caption: `${panelCandidates[i]?.caption || ""}`,
|
| 53 |
})
|
| 54 |
}
|
|
|
|
| 65 |
userStoryPrompt,
|
| 66 |
`${".".repeat(p)}`,
|
| 67 |
]),
|
| 68 |
+
speech: "...",
|
| 69 |
caption: "(Sorry, LLM generation failed: using degraded mode)"
|
| 70 |
})
|
| 71 |
}
|
src/app/queries/getSystemPrompt.ts
CHANGED
|
@@ -19,9 +19,9 @@ export function getSystemPrompt({
|
|
| 19 |
}) {
|
| 20 |
return [
|
| 21 |
`You are a writer specialized in ${preset.llmPrompt}`,
|
| 22 |
-
`Please write detailed drawing instructions and short (2-3 sentences long)
|
| 23 |
-
`Give your response as a VALID JSON array like this: \`Array<{ panel: number; instructions: string; caption: string; }>\`.`,
|
| 24 |
// `Give your response as Markdown bullet points.`,
|
| 25 |
-
`Be brief in the instructions and narrative captions of those ${nbPanelsToGenerate} panels, don't add your own comments. The
|
| 26 |
].filter(item => item).join("\n")
|
| 27 |
}
|
|
|
|
| 19 |
}) {
|
| 20 |
return [
|
| 21 |
`You are a writer specialized in ${preset.llmPrompt}`,
|
| 22 |
+
`Please write detailed drawing instructions and short (2-3 sentences long) speeches and narrator captions for the ${firstNextOrLast} ${nbPanelsToGenerate} panels (out of ${maxNbPanels} in total) of a new story, but keep it open-ended (it will be continued and expanded later). Please make sure each of those ${nbPanelsToGenerate} panels include info about character gender, age, origin, clothes, colors, location, lights, etc. Only generate those ${nbPanelsToGenerate} panels, but take into account the fact the panels are part of a longer story (${maxNbPanels} panels long).`,
|
| 23 |
+
`Give your response as a VALID JSON array like this: \`Array<{ panel: number; instructions: string; speech: string; caption: string; }>\`.`,
|
| 24 |
// `Give your response as Markdown bullet points.`,
|
| 25 |
+
`Be brief in the instructions, the speeches and the narrative captions of those ${nbPanelsToGenerate} panels, don't add your own comments. The speech must be captivating, smart, entertaining, usually a sentence or two. Be straight to the point, return JSON and never reply things like "Sure, I can.." etc. Reply using valid JSON!! Important: Write valid JSON!`
|
| 26 |
].filter(item => item).join("\n")
|
| 27 |
}
|
src/app/queries/mockLLMResponse.ts
CHANGED
|
@@ -3,41 +3,49 @@ import { GeneratedPanels } from "@/types"
|
|
| 3 |
export const mockGeneratedPanels: GeneratedPanels = [{
|
| 4 |
"panel": 1,
|
| 5 |
"instructions": "wide shot of detective walking towards a UFO crash site",
|
|
|
|
| 6 |
"caption": "Detective Jameson investigates a UFO crash in the desert"
|
| 7 |
},
|
| 8 |
{
|
| 9 |
"panel": 2,
|
| 10 |
"instructions": "close-up of detective's face, determined expression",
|
|
|
|
| 11 |
"caption": "He's been tracking this case for weeks"
|
| 12 |
},
|
| 13 |
{
|
| 14 |
"panel": 3,
|
| 15 |
"instructions": "medium shot of detective examining UFO debris",
|
|
|
|
| 16 |
"caption": "The evidence is scattered all over the desert"
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"panel": 4,
|
| 20 |
"instructions": "close-up of strange symbol on UFO debris",
|
| 21 |
-
"
|
|
|
|
| 22 |
},
|
| 23 |
{
|
| 24 |
"panel": 5,
|
| 25 |
"instructions": "wide shot of detective walking towards a strange rock formation",
|
|
|
|
| 26 |
"caption": "Jameson follows a trail that leads him deeper into the desert"
|
| 27 |
},
|
| 28 |
{
|
| 29 |
"panel": 6,
|
| 30 |
"instructions": "medium shot of detective discovering an alien body",
|
| 31 |
-
"
|
|
|
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"panel": 7,
|
| 35 |
"instructions": "close-up of alien's face, eyes closed, peaceful expression",
|
|
|
|
| 36 |
"caption": "An alien life form, deceased"
|
| 37 |
},
|
| 38 |
{
|
| 39 |
"panel": 8,
|
| 40 |
"instructions": "wide shot of detective standing over the alien body, looking up at the sky",
|
| 41 |
-
"
|
|
|
|
| 42 |
}
|
| 43 |
]
|
|
|
|
| 3 |
export const mockGeneratedPanels: GeneratedPanels = [{
|
| 4 |
"panel": 1,
|
| 5 |
"instructions": "wide shot of detective walking towards a UFO crash site",
|
| 6 |
+
"speech": "Hmm.. interesting.",
|
| 7 |
"caption": "Detective Jameson investigates a UFO crash in the desert"
|
| 8 |
},
|
| 9 |
{
|
| 10 |
"panel": 2,
|
| 11 |
"instructions": "close-up of detective's face, determined expression",
|
| 12 |
+
"speech": "I've been tracking this case for weeks",
|
| 13 |
"caption": "He's been tracking this case for weeks"
|
| 14 |
},
|
| 15 |
{
|
| 16 |
"panel": 3,
|
| 17 |
"instructions": "medium shot of detective examining UFO debris",
|
| 18 |
+
"speech": "...",
|
| 19 |
"caption": "The evidence is scattered all over the desert"
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"panel": 4,
|
| 23 |
"instructions": "close-up of strange symbol on UFO debris",
|
| 24 |
+
"speech": " what does this symbol mean?",
|
| 25 |
+
"caption": "strange symbols"
|
| 26 |
},
|
| 27 |
{
|
| 28 |
"panel": 5,
|
| 29 |
"instructions": "wide shot of detective walking towards a strange rock formation",
|
| 30 |
+
"speech": "I've been tracking this case for weeks",
|
| 31 |
"caption": "Jameson follows a trail that leads him deeper into the desert"
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"panel": 6,
|
| 35 |
"instructions": "medium shot of detective discovering an alien body",
|
| 36 |
+
"speech": "I'm not alone in the desert",
|
| 37 |
+
"caption": "He's not alone"
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"panel": 7,
|
| 41 |
"instructions": "close-up of alien's face, eyes closed, peaceful expression",
|
| 42 |
+
"speech": "...?",
|
| 43 |
"caption": "An alien life form, deceased"
|
| 44 |
},
|
| 45 |
{
|
| 46 |
"panel": 8,
|
| 47 |
"instructions": "wide shot of detective standing over the alien body, looking up at the sky",
|
| 48 |
+
"speech": "what other secrets lie beyond the stars?",
|
| 49 |
+
"caption": "Jameson wonders"
|
| 50 |
}
|
| 51 |
]
|
src/app/queries/predictNextPanels.ts
CHANGED
|
@@ -31,7 +31,7 @@ export const predictNextPanels = async ({
|
|
| 31 |
// return mockGeneratedPanels
|
| 32 |
|
| 33 |
const existingPanelsTemplate = existingPanels.length
|
| 34 |
-
? ` To help you, here are the previous panels and
|
| 35 |
: ''
|
| 36 |
|
| 37 |
const firstNextOrLast =
|
|
@@ -55,9 +55,9 @@ export const predictNextPanels = async ({
|
|
| 55 |
|
| 56 |
let result = ""
|
| 57 |
|
| 58 |
-
// we don't require a lot of token for our task
|
| 59 |
-
// but to be safe, let's count ~
|
| 60 |
-
const nbTokensPerPanel =
|
| 61 |
|
| 62 |
const nbMaxNewTokens = nbPanelsToGenerate * nbTokensPerPanel
|
| 63 |
|
|
@@ -115,6 +115,7 @@ export const predictNextPanels = async ({
|
|
| 115 |
.map((cap, i) => ({
|
| 116 |
panel: i,
|
| 117 |
caption: cap,
|
|
|
|
| 118 |
instructions: cap,
|
| 119 |
}))
|
| 120 |
)
|
|
|
|
| 31 |
// return mockGeneratedPanels
|
| 32 |
|
| 33 |
const existingPanelsTemplate = existingPanels.length
|
| 34 |
+
? ` To help you, here are the previous panels, their speeches and captions (note: if you see an anomaly here eg. no speech, no caption or the same description repeated multiple times, do not hesitate to fix the story): ${JSON.stringify(existingPanels, null, 2)}`
|
| 35 |
: ''
|
| 36 |
|
| 37 |
const firstNextOrLast =
|
|
|
|
| 55 |
|
| 56 |
let result = ""
|
| 57 |
|
| 58 |
+
// we don't require a lot of token for our task,
|
| 59 |
+
// but to be safe, let's count ~200 tokens per panel
|
| 60 |
+
const nbTokensPerPanel = 200
|
| 61 |
|
| 62 |
const nbMaxNewTokens = nbPanelsToGenerate * nbTokensPerPanel
|
| 63 |
|
|
|
|
| 115 |
.map((cap, i) => ({
|
| 116 |
panel: i,
|
| 117 |
caption: cap,
|
| 118 |
+
speech: cap,
|
| 119 |
instructions: cap,
|
| 120 |
}))
|
| 121 |
)
|
src/app/store/index.ts
CHANGED
|
@@ -26,8 +26,10 @@ export const useStore = create<{
|
|
| 26 |
currentNbPanels: number
|
| 27 |
maxNbPanels: number
|
| 28 |
panels: string[]
|
|
|
|
| 29 |
captions: string[]
|
| 30 |
upscaleQueue: Record<string, RenderedScene>
|
|
|
|
| 31 |
showCaptions: boolean
|
| 32 |
renderedScenes: Record<string, RenderedScene>
|
| 33 |
layout: LayoutName
|
|
@@ -55,9 +57,12 @@ export const useStore = create<{
|
|
| 55 |
setPreset: (preset: Preset) => void
|
| 56 |
setPanels: (panels: string[]) => void
|
| 57 |
setPanelPrompt: (newPrompt: string, index: number) => void
|
| 58 |
-
setShowCaptions: (showCaptions: boolean) => void
|
| 59 |
setLayout: (layout: LayoutName, index?: number) => void
|
| 60 |
setLayouts: (layouts: LayoutName[]) => void
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
setCaptions: (captions: string[]) => void
|
| 62 |
setPanelCaption: (newCaption: string, index: number) => void
|
| 63 |
setZoomLevel: (zoomLevel: number) => void
|
|
@@ -85,6 +90,7 @@ export const useStore = create<{
|
|
| 85 |
stylePrompt: string
|
| 86 |
panels: string[]
|
| 87 |
renderedScenes: Record<string, RenderedScene>
|
|
|
|
| 88 |
captions: string[]
|
| 89 |
}>
|
| 90 |
loadClap: (blob: Blob) => Promise<void>
|
|
@@ -107,9 +113,11 @@ export const useStore = create<{
|
|
| 107 |
maxNbPanels: 4,
|
| 108 |
|
| 109 |
panels: [],
|
|
|
|
| 110 |
captions: [],
|
| 111 |
upscaleQueue: {} as Record<string, RenderedScene>,
|
| 112 |
renderedScenes: {} as Record<string, RenderedScene>,
|
|
|
|
| 113 |
showCaptions: getParam("showCaptions", false),
|
| 114 |
|
| 115 |
// deprecated?
|
|
@@ -284,6 +292,24 @@ export const useStore = create<{
|
|
| 284 |
))
|
| 285 |
})
|
| 286 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
setCaptions: (captions: string[]) => {
|
| 288 |
set({
|
| 289 |
captions,
|
|
@@ -324,6 +350,7 @@ export const useStore = create<{
|
|
| 324 |
currentNbPages: 1,
|
| 325 |
currentNbPanels: currentNbPanelsPerPage,
|
| 326 |
panels: [],
|
|
|
|
| 327 |
captions: [],
|
| 328 |
upscaleQueue: {},
|
| 329 |
renderedScenes: {},
|
|
@@ -408,6 +435,7 @@ export const useStore = create<{
|
|
| 408 |
currentNbPages: 1,
|
| 409 |
currentNbPanels: currentNbPanelsPerPage,
|
| 410 |
panels: [],
|
|
|
|
| 411 |
captions: [],
|
| 412 |
upscaleQueue: {},
|
| 413 |
renderedScenes: {},
|
|
@@ -431,6 +459,7 @@ export const useStore = create<{
|
|
| 431 |
prompt,
|
| 432 |
panels,
|
| 433 |
renderedScenes,
|
|
|
|
| 434 |
captions
|
| 435 |
} = get()
|
| 436 |
|
|
@@ -459,7 +488,7 @@ export const useStore = create<{
|
|
| 459 |
for (let i = 0; i < panels.length; i++) {
|
| 460 |
|
| 461 |
const panel = panels[i]
|
| 462 |
-
|
| 463 |
const caption = captions[i]
|
| 464 |
|
| 465 |
const renderedScene = renderedScenes[`${i}`]
|
|
@@ -492,7 +521,7 @@ export const useStore = create<{
|
|
| 492 |
startTimeInMs: currentElapsedTimeInMs,
|
| 493 |
assetDurationInMs: defaultSegmentDurationInMs,
|
| 494 |
category: ClapSegmentCategory.DIALOGUE,
|
| 495 |
-
prompt:
|
| 496 |
outputType: ClapOutputType.AUDIO,
|
| 497 |
status: ClapSegmentStatus.TO_GENERATE,
|
| 498 |
}))
|
|
@@ -525,6 +554,7 @@ export const useStore = create<{
|
|
| 525 |
stylePrompt: string
|
| 526 |
panels: string[]
|
| 527 |
renderedScenes: Record<string, RenderedScene>
|
|
|
|
| 528 |
captions: string[]
|
| 529 |
}> => {
|
| 530 |
|
|
@@ -534,6 +564,7 @@ export const useStore = create<{
|
|
| 534 |
const panels: string[] = []
|
| 535 |
const renderedScenes: Record<string, RenderedScene> = {}
|
| 536 |
const captions: string[] = []
|
|
|
|
| 537 |
|
| 538 |
const panelGenerationStatus: Record<number, boolean> = {}
|
| 539 |
|
|
@@ -552,14 +583,21 @@ export const useStore = create<{
|
|
| 552 |
cameraShot,
|
| 553 |
clap.segments,
|
| 554 |
ClapSegmentCategory.INTERFACE,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 555 |
).at(0) as (ClapSegment | undefined)
|
| 556 |
})).filter(item => item.storyboard && item.ui) as {
|
| 557 |
camera: ClapSegment
|
| 558 |
storyboard: ClapSegment
|
| 559 |
ui: ClapSegment
|
|
|
|
| 560 |
}[]
|
| 561 |
|
| 562 |
-
shots.forEach(({ camera, storyboard, ui }, id) => {
|
| 563 |
|
| 564 |
panels.push(storyboard.prompt)
|
| 565 |
|
|
@@ -582,6 +620,8 @@ export const useStore = create<{
|
|
| 582 |
|
| 583 |
panelGenerationStatus[id] = false
|
| 584 |
|
|
|
|
|
|
|
| 585 |
captions.push(ui?.prompt || "")
|
| 586 |
})
|
| 587 |
|
|
@@ -595,6 +635,7 @@ export const useStore = create<{
|
|
| 595 |
stylePrompt,
|
| 596 |
panels,
|
| 597 |
renderedScenes,
|
|
|
|
| 598 |
captions,
|
| 599 |
|
| 600 |
}
|
|
@@ -614,6 +655,7 @@ export const useStore = create<{
|
|
| 614 |
stylePrompt,
|
| 615 |
panels,
|
| 616 |
renderedScenes,
|
|
|
|
| 617 |
captions,
|
| 618 |
} = await convertClapToComic(currentClap)
|
| 619 |
|
|
@@ -629,6 +671,7 @@ export const useStore = create<{
|
|
| 629 |
// layout,
|
| 630 |
panels,
|
| 631 |
renderedScenes,
|
|
|
|
| 632 |
captions,
|
| 633 |
currentNbPages: Math.round(currentNbPanels / currentNbPanelsPerPage),
|
| 634 |
upscaleQueue: {},
|
|
|
|
| 26 |
currentNbPanels: number
|
| 27 |
maxNbPanels: number
|
| 28 |
panels: string[]
|
| 29 |
+
speeches: string[]
|
| 30 |
captions: string[]
|
| 31 |
upscaleQueue: Record<string, RenderedScene>
|
| 32 |
+
showSpeeches: boolean
|
| 33 |
showCaptions: boolean
|
| 34 |
renderedScenes: Record<string, RenderedScene>
|
| 35 |
layout: LayoutName
|
|
|
|
| 57 |
setPreset: (preset: Preset) => void
|
| 58 |
setPanels: (panels: string[]) => void
|
| 59 |
setPanelPrompt: (newPrompt: string, index: number) => void
|
|
|
|
| 60 |
setLayout: (layout: LayoutName, index?: number) => void
|
| 61 |
setLayouts: (layouts: LayoutName[]) => void
|
| 62 |
+
setShowSpeeches: (showSpeeches: boolean) => void
|
| 63 |
+
setSpeeches: (speeches: string[]) => void
|
| 64 |
+
setPanelSpeech: (newSpeech: string, index: number) => void
|
| 65 |
+
setShowCaptions: (showCaptions: boolean) => void
|
| 66 |
setCaptions: (captions: string[]) => void
|
| 67 |
setPanelCaption: (newCaption: string, index: number) => void
|
| 68 |
setZoomLevel: (zoomLevel: number) => void
|
|
|
|
| 90 |
stylePrompt: string
|
| 91 |
panels: string[]
|
| 92 |
renderedScenes: Record<string, RenderedScene>
|
| 93 |
+
speeches: string[]
|
| 94 |
captions: string[]
|
| 95 |
}>
|
| 96 |
loadClap: (blob: Blob) => Promise<void>
|
|
|
|
| 113 |
maxNbPanels: 4,
|
| 114 |
|
| 115 |
panels: [],
|
| 116 |
+
speeches: [],
|
| 117 |
captions: [],
|
| 118 |
upscaleQueue: {} as Record<string, RenderedScene>,
|
| 119 |
renderedScenes: {} as Record<string, RenderedScene>,
|
| 120 |
+
showSpeeches: getParam("showSpeeches", false),
|
| 121 |
showCaptions: getParam("showCaptions", false),
|
| 122 |
|
| 123 |
// deprecated?
|
|
|
|
| 292 |
))
|
| 293 |
})
|
| 294 |
},
|
| 295 |
+
setSpeeches: (speeches: string[]) => {
|
| 296 |
+
set({
|
| 297 |
+
speeches,
|
| 298 |
+
})
|
| 299 |
+
},
|
| 300 |
+
setShowSpeeches: (showSpeeches: boolean) => {
|
| 301 |
+
set({
|
| 302 |
+
showSpeeches,
|
| 303 |
+
})
|
| 304 |
+
},
|
| 305 |
+
setPanelSpeech: (newSpeech, index) => {
|
| 306 |
+
const { speeches } = get()
|
| 307 |
+
set({
|
| 308 |
+
speeches: speeches.map((c, i) => (
|
| 309 |
+
index === i ? newSpeech : c
|
| 310 |
+
))
|
| 311 |
+
})
|
| 312 |
+
},
|
| 313 |
setCaptions: (captions: string[]) => {
|
| 314 |
set({
|
| 315 |
captions,
|
|
|
|
| 350 |
currentNbPages: 1,
|
| 351 |
currentNbPanels: currentNbPanelsPerPage,
|
| 352 |
panels: [],
|
| 353 |
+
speeches: [],
|
| 354 |
captions: [],
|
| 355 |
upscaleQueue: {},
|
| 356 |
renderedScenes: {},
|
|
|
|
| 435 |
currentNbPages: 1,
|
| 436 |
currentNbPanels: currentNbPanelsPerPage,
|
| 437 |
panels: [],
|
| 438 |
+
speeches: [],
|
| 439 |
captions: [],
|
| 440 |
upscaleQueue: {},
|
| 441 |
renderedScenes: {},
|
|
|
|
| 459 |
prompt,
|
| 460 |
panels,
|
| 461 |
renderedScenes,
|
| 462 |
+
speeches,
|
| 463 |
captions
|
| 464 |
} = get()
|
| 465 |
|
|
|
|
| 488 |
for (let i = 0; i < panels.length; i++) {
|
| 489 |
|
| 490 |
const panel = panels[i]
|
| 491 |
+
const speech = speeches[i]
|
| 492 |
const caption = captions[i]
|
| 493 |
|
| 494 |
const renderedScene = renderedScenes[`${i}`]
|
|
|
|
| 521 |
startTimeInMs: currentElapsedTimeInMs,
|
| 522 |
assetDurationInMs: defaultSegmentDurationInMs,
|
| 523 |
category: ClapSegmentCategory.DIALOGUE,
|
| 524 |
+
prompt: speech,
|
| 525 |
outputType: ClapOutputType.AUDIO,
|
| 526 |
status: ClapSegmentStatus.TO_GENERATE,
|
| 527 |
}))
|
|
|
|
| 554 |
stylePrompt: string
|
| 555 |
panels: string[]
|
| 556 |
renderedScenes: Record<string, RenderedScene>
|
| 557 |
+
speeches: string[]
|
| 558 |
captions: string[]
|
| 559 |
}> => {
|
| 560 |
|
|
|
|
| 564 |
const panels: string[] = []
|
| 565 |
const renderedScenes: Record<string, RenderedScene> = {}
|
| 566 |
const captions: string[] = []
|
| 567 |
+
const speeches: string[] = []
|
| 568 |
|
| 569 |
const panelGenerationStatus: Record<number, boolean> = {}
|
| 570 |
|
|
|
|
| 583 |
cameraShot,
|
| 584 |
clap.segments,
|
| 585 |
ClapSegmentCategory.INTERFACE,
|
| 586 |
+
).at(0) as (ClapSegment | undefined),
|
| 587 |
+
dialogue: filterSegments(
|
| 588 |
+
ClapSegmentFilteringMode.START,
|
| 589 |
+
cameraShot,
|
| 590 |
+
clap.segments,
|
| 591 |
+
ClapSegmentCategory.DIALOGUE,
|
| 592 |
).at(0) as (ClapSegment | undefined)
|
| 593 |
})).filter(item => item.storyboard && item.ui) as {
|
| 594 |
camera: ClapSegment
|
| 595 |
storyboard: ClapSegment
|
| 596 |
ui: ClapSegment
|
| 597 |
+
dialogue: ClapSegment
|
| 598 |
}[]
|
| 599 |
|
| 600 |
+
shots.forEach(({ camera, storyboard, ui, dialogue }, id) => {
|
| 601 |
|
| 602 |
panels.push(storyboard.prompt)
|
| 603 |
|
|
|
|
| 620 |
|
| 621 |
panelGenerationStatus[id] = false
|
| 622 |
|
| 623 |
+
speeches.push(dialogue?.prompt || "")
|
| 624 |
+
|
| 625 |
captions.push(ui?.prompt || "")
|
| 626 |
})
|
| 627 |
|
|
|
|
| 635 |
stylePrompt,
|
| 636 |
panels,
|
| 637 |
renderedScenes,
|
| 638 |
+
speeches,
|
| 639 |
captions,
|
| 640 |
|
| 641 |
}
|
|
|
|
| 655 |
stylePrompt,
|
| 656 |
panels,
|
| 657 |
renderedScenes,
|
| 658 |
+
speeches,
|
| 659 |
captions,
|
| 660 |
} = await convertClapToComic(currentClap)
|
| 661 |
|
|
|
|
| 671 |
// layout,
|
| 672 |
panels,
|
| 673 |
renderedScenes,
|
| 674 |
+
speeches,
|
| 675 |
captions,
|
| 676 |
currentNbPages: Math.round(currentNbPanels / currentNbPanelsPerPage),
|
| 677 |
upscaleQueue: {},
|
src/lib/bubble/injectSpeechBubbleInTheBackground.ts
ADDED
|
@@ -0,0 +1,419 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { ImageSegmenter, FilesetResolver } from "@mediapipe/tasks-vision"
|
| 2 |
+
|
| 3 |
+
export async function injectSpeechBubbleInTheBackground(params: {
|
| 4 |
+
inputImageInBase64: string;
|
| 5 |
+
text?: string;
|
| 6 |
+
shape?: "oval" | "rectangular" | "cloud" | "thought";
|
| 7 |
+
line?: "handdrawn" | "straight" | "bubble" | "chaotic";
|
| 8 |
+
font?: string;
|
| 9 |
+
debug?: boolean;
|
| 10 |
+
}): Promise<string> {
|
| 11 |
+
const {
|
| 12 |
+
inputImageInBase64,
|
| 13 |
+
text,
|
| 14 |
+
shape = "oval",
|
| 15 |
+
line = "handdrawn",
|
| 16 |
+
font = "Arial",
|
| 17 |
+
debug = false,
|
| 18 |
+
} = params;
|
| 19 |
+
|
| 20 |
+
// If no text is provided, return the original image
|
| 21 |
+
if (!text) {
|
| 22 |
+
return inputImageInBase64;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
// Load the image
|
| 26 |
+
const image = await loadImage(inputImageInBase64);
|
| 27 |
+
|
| 28 |
+
// Set up canvas
|
| 29 |
+
const canvas = document.createElement('canvas');
|
| 30 |
+
canvas.width = image.width;
|
| 31 |
+
canvas.height = image.height;
|
| 32 |
+
const ctx = canvas.getContext('2d')!;
|
| 33 |
+
ctx.drawImage(image, 0, 0);
|
| 34 |
+
|
| 35 |
+
// Set up MediaPipe Image Segmenter
|
| 36 |
+
const vision = await FilesetResolver.forVisionTasks(
|
| 37 |
+
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
|
| 38 |
+
);
|
| 39 |
+
const imageSegmenter = await ImageSegmenter.createFromOptions(vision, {
|
| 40 |
+
baseOptions: {
|
| 41 |
+
modelAssetPath: "https://storage.googleapis.com/mediapipe-models/image_segmenter/deeplab_v3/float32/1/deeplab_v3.tflite",
|
| 42 |
+
delegate: "GPU"
|
| 43 |
+
},
|
| 44 |
+
outputCategoryMask: true,
|
| 45 |
+
outputConfidenceMasks: false
|
| 46 |
+
});
|
| 47 |
+
|
| 48 |
+
const segmentationResult = imageSegmenter.segment(image);
|
| 49 |
+
let characterBoundingBox: { top: number, left: number, width: number, height: number } | null = null;
|
| 50 |
+
|
| 51 |
+
if (segmentationResult.categoryMask) {
|
| 52 |
+
const mask = segmentationResult.categoryMask.getAsUint8Array();
|
| 53 |
+
const detectedItems = analyzeSegmentationMask(mask, image.width, image.height);
|
| 54 |
+
console.log("Detected items:", detectedItems);
|
| 55 |
+
|
| 56 |
+
if (detectedItems.length > 0) {
|
| 57 |
+
characterBoundingBox = findCharacterBoundingBox(mask, image.width, image.height);
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
if (debug) {
|
| 61 |
+
drawSegmentationMask(ctx, mask, image.width, image.height);
|
| 62 |
+
}
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
const bubbleLocation = characterBoundingBox
|
| 66 |
+
? { x: characterBoundingBox.left + characterBoundingBox.width / 2, y: characterBoundingBox.top }
|
| 67 |
+
: { x: image.width / 2, y: image.height / 2 };
|
| 68 |
+
|
| 69 |
+
drawSpeechBubble(ctx, bubbleLocation, text, shape, line, font, !!characterBoundingBox, image.width, image.height, characterBoundingBox);
|
| 70 |
+
|
| 71 |
+
return canvas.toDataURL('image/png');
|
| 72 |
+
}
|
| 73 |
+
function loadImage(base64: string): Promise<HTMLImageElement> {
|
| 74 |
+
return new Promise((resolve, reject) => {
|
| 75 |
+
const img = new Image();
|
| 76 |
+
img.onload = () => resolve(img);
|
| 77 |
+
img.onerror = reject;
|
| 78 |
+
img.src = base64;
|
| 79 |
+
});
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
function analyzeSegmentationMask(mask: Uint8Array, width: number, height: number): string[] {
|
| 85 |
+
const categories = new Set<number>();
|
| 86 |
+
for (let i = 0; i < mask.length; i++) {
|
| 87 |
+
if (mask[i] > 0) {
|
| 88 |
+
categories.add(mask[i]);
|
| 89 |
+
}
|
| 90 |
+
}
|
| 91 |
+
return Array.from(categories).map(c => `unknown-${c}`);
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
function findMainCharacterLocation(mask: Uint8Array, width: number, height: number): { x: number, y: number } {
|
| 95 |
+
let sumX = 0, sumY = 0, count = 0;
|
| 96 |
+
for (let y = 0; y < height; y++) {
|
| 97 |
+
for (let x = 0; x < width; x++) {
|
| 98 |
+
const index = y * width + x;
|
| 99 |
+
if (mask[index] > 0) {
|
| 100 |
+
sumX += x;
|
| 101 |
+
sumY += y;
|
| 102 |
+
count++;
|
| 103 |
+
}
|
| 104 |
+
}
|
| 105 |
+
}
|
| 106 |
+
return count > 0 ? { x: sumX / count, y: sumY / count } : { x: width / 2, y: height / 2 };
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
function drawSegmentationMask(ctx: CanvasRenderingContext2D, mask: Uint8Array, width: number, height: number) {
|
| 111 |
+
const imageData = ctx.getImageData(0, 0, width, height);
|
| 112 |
+
const data = imageData.data;
|
| 113 |
+
for (let i = 0; i < mask.length; i++) {
|
| 114 |
+
const category = mask[i];
|
| 115 |
+
if (category > 0) {
|
| 116 |
+
// Use a different color for each category
|
| 117 |
+
const color = getCategoryColor(category);
|
| 118 |
+
data[i * 4] = color[0];
|
| 119 |
+
data[i * 4 + 1] = color[1];
|
| 120 |
+
data[i * 4 + 2] = color[2];
|
| 121 |
+
data[i * 4 + 3] = 128; // 50% opacity
|
| 122 |
+
}
|
| 123 |
+
}
|
| 124 |
+
ctx.putImageData(imageData, 0, 0);
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
function getCategoryColor(category: number): [number, number, number] {
|
| 128 |
+
// Generate a pseudo-random color based on the category
|
| 129 |
+
const hue = (category * 137) % 360;
|
| 130 |
+
return hslToRgb(hue / 360, 1, 0.5);
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
function hslToRgb(h: number, s: number, l: number): [number, number, number] {
|
| 134 |
+
let r, g, b;
|
| 135 |
+
if (s === 0) {
|
| 136 |
+
r = g = b = l;
|
| 137 |
+
} else {
|
| 138 |
+
const hue2rgb = (p: number, q: number, t: number) => {
|
| 139 |
+
if (t < 0) t += 1;
|
| 140 |
+
if (t > 1) t -= 1;
|
| 141 |
+
if (t < 1/6) return p + (q - p) * 6 * t;
|
| 142 |
+
if (t < 1/2) return q;
|
| 143 |
+
if (t < 2/3) return p + (q - p) * (2/3 - t) * 6;
|
| 144 |
+
return p;
|
| 145 |
+
};
|
| 146 |
+
const q = l < 0.5 ? l * (1 + s) : l + s - l * s;
|
| 147 |
+
const p = 2 * l - q;
|
| 148 |
+
r = hue2rgb(p, q, h + 1/3);
|
| 149 |
+
g = hue2rgb(p, q, h);
|
| 150 |
+
b = hue2rgb(p, q, h - 1/3);
|
| 151 |
+
}
|
| 152 |
+
return [Math.round(r * 255), Math.round(g * 255), Math.round(b * 255)];
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
function drawSpeechBubble(
|
| 156 |
+
ctx: CanvasRenderingContext2D,
|
| 157 |
+
location: { x: number, y: number },
|
| 158 |
+
text: string,
|
| 159 |
+
shape: "oval" | "rectangular" | "cloud" | "thought",
|
| 160 |
+
line: "handdrawn" | "straight" | "bubble" | "chaotic",
|
| 161 |
+
font: string,
|
| 162 |
+
characterDetected: boolean,
|
| 163 |
+
imageWidth: number,
|
| 164 |
+
imageHeight: number,
|
| 165 |
+
characterBoundingBox: { top: number, left: number, width: number, height: number } | null
|
| 166 |
+
) {
|
| 167 |
+
const bubbleWidth = Math.min(300, imageWidth * 0.4);
|
| 168 |
+
const bubbleHeight = Math.min(150, imageHeight * 0.3);
|
| 169 |
+
const padding = 20;
|
| 170 |
+
|
| 171 |
+
const fontSize = Math.max(15, Math.min(30, 500 / text.length)); // Increased font size by 25%
|
| 172 |
+
ctx.font = `${fontSize}px ${font}`;
|
| 173 |
+
|
| 174 |
+
const wrappedText = wrapText(ctx, text, bubbleWidth - padding * 2);
|
| 175 |
+
const textDimensions = measureTextDimensions(ctx, wrappedText);
|
| 176 |
+
|
| 177 |
+
const finalWidth = Math.max(bubbleWidth, textDimensions.width + padding * 2);
|
| 178 |
+
const finalHeight = Math.max(bubbleHeight, textDimensions.height + padding * 2);
|
| 179 |
+
|
| 180 |
+
const bubbleLocation = {
|
| 181 |
+
x: Math.max(finalWidth / 2, Math.min(imageWidth - finalWidth / 2, location.x)),
|
| 182 |
+
y: Math.max(finalHeight / 2, Math.min(imageHeight - finalHeight / 2, location.y - finalHeight))
|
| 183 |
+
};
|
| 184 |
+
|
| 185 |
+
ctx.fillStyle = 'white';
|
| 186 |
+
ctx.strokeStyle = 'black';
|
| 187 |
+
ctx.lineWidth = 2;
|
| 188 |
+
|
| 189 |
+
ctx.beginPath();
|
| 190 |
+
drawBubbleShape(ctx, shape, bubbleLocation, finalWidth, finalHeight, location);
|
| 191 |
+
ctx.fill();
|
| 192 |
+
ctx.stroke();
|
| 193 |
+
|
| 194 |
+
applyLineStyle(ctx, line);
|
| 195 |
+
|
| 196 |
+
const tailTarget = characterBoundingBox
|
| 197 |
+
? { x: characterBoundingBox.left + characterBoundingBox.width / 2, y: characterBoundingBox.top + characterBoundingBox.height * 0.2 }
|
| 198 |
+
: location;
|
| 199 |
+
|
| 200 |
+
drawTail(ctx, bubbleLocation, finalWidth, finalHeight, tailTarget, shape);
|
| 201 |
+
|
| 202 |
+
ctx.fillStyle = 'black';
|
| 203 |
+
ctx.textAlign = 'center';
|
| 204 |
+
ctx.textBaseline = 'middle';
|
| 205 |
+
drawFormattedText(ctx, wrappedText, bubbleLocation.x, bubbleLocation.y, finalWidth - padding * 2, fontSize);
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
function drawBubbleShape(
|
| 209 |
+
ctx: CanvasRenderingContext2D,
|
| 210 |
+
shape: "oval" | "rectangular" | "cloud" | "thought",
|
| 211 |
+
bubbleLocation: { x: number, y: number },
|
| 212 |
+
width: number,
|
| 213 |
+
height: number,
|
| 214 |
+
tailTarget: { x: number, y: number }
|
| 215 |
+
) {
|
| 216 |
+
switch (shape) {
|
| 217 |
+
case "oval":
|
| 218 |
+
drawOvalBubble(ctx, bubbleLocation, width, height);
|
| 219 |
+
break;
|
| 220 |
+
case "rectangular":
|
| 221 |
+
drawRectangularBubble(ctx, bubbleLocation, width, height);
|
| 222 |
+
break;
|
| 223 |
+
case "cloud":
|
| 224 |
+
drawCloudBubble(ctx, bubbleLocation, width, height);
|
| 225 |
+
break;
|
| 226 |
+
case "thought":
|
| 227 |
+
drawThoughtBubble(ctx, bubbleLocation, width, height);
|
| 228 |
+
break;
|
| 229 |
+
}
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
function drawOvalBubble(ctx: CanvasRenderingContext2D, location: { x: number, y: number }, width: number, height: number) {
|
| 233 |
+
ctx.beginPath();
|
| 234 |
+
ctx.ellipse(location.x, location.y, width / 2, height / 2, 0, 0, 2 * Math.PI);
|
| 235 |
+
ctx.closePath();
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
function drawRectangularBubble(ctx: CanvasRenderingContext2D, location: { x: number, y: number }, width: number, height: number) {
|
| 239 |
+
const radius = 20;
|
| 240 |
+
ctx.beginPath();
|
| 241 |
+
ctx.moveTo(location.x - width / 2 + radius, location.y - height / 2);
|
| 242 |
+
ctx.lineTo(location.x + width / 2 - radius, location.y - height / 2);
|
| 243 |
+
ctx.quadraticCurveTo(location.x + width / 2, location.y - height / 2, location.x + width / 2, location.y - height / 2 + radius);
|
| 244 |
+
ctx.lineTo(location.x + width / 2, location.y + height / 2 - radius);
|
| 245 |
+
ctx.quadraticCurveTo(location.x + width / 2, location.y + height / 2, location.x + width / 2 - radius, location.y + height / 2);
|
| 246 |
+
ctx.lineTo(location.x - width / 2 + radius, location.y + height / 2);
|
| 247 |
+
ctx.quadraticCurveTo(location.x - width / 2, location.y + height / 2, location.x - width / 2, location.y + height / 2 - radius);
|
| 248 |
+
ctx.lineTo(location.x - width / 2, location.y - height / 2 + radius);
|
| 249 |
+
ctx.quadraticCurveTo(location.x - width / 2, location.y - height / 2, location.x - width / 2 + radius, location.y - height / 2);
|
| 250 |
+
ctx.closePath();
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
function drawCloudBubble(ctx: CanvasRenderingContext2D, location: { x: number, y: number }, width: number, height: number) {
|
| 254 |
+
const numBumps = Math.floor(width / 40);
|
| 255 |
+
const bumpRadius = width / (numBumps * 2);
|
| 256 |
+
|
| 257 |
+
ctx.beginPath();
|
| 258 |
+
ctx.moveTo(location.x - width / 2 + bumpRadius, location.y);
|
| 259 |
+
|
| 260 |
+
// Top
|
| 261 |
+
for (let i = 0; i < numBumps; i++) {
|
| 262 |
+
const x = location.x - width / 2 + (i * 2 + 1) * bumpRadius;
|
| 263 |
+
const y = location.y - height / 2;
|
| 264 |
+
ctx.quadraticCurveTo(x, y - bumpRadius / 2, x + bumpRadius, y);
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
// Right
|
| 268 |
+
for (let i = 0; i < numBumps / 2; i++) {
|
| 269 |
+
const x = location.x + width / 2;
|
| 270 |
+
const y = location.y - height / 2 + (i * 2 + 1) * bumpRadius * 2;
|
| 271 |
+
ctx.quadraticCurveTo(x + bumpRadius / 2, y, x, y + bumpRadius * 2);
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
// Bottom
|
| 275 |
+
for (let i = numBumps; i > 0; i--) {
|
| 276 |
+
const x = location.x - width / 2 + (i * 2 - 1) * bumpRadius;
|
| 277 |
+
const y = location.y + height / 2;
|
| 278 |
+
ctx.quadraticCurveTo(x, y + bumpRadius / 2, x - bumpRadius, y);
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
// Left
|
| 282 |
+
for (let i = numBumps / 2; i > 0; i--) {
|
| 283 |
+
const x = location.x - width / 2;
|
| 284 |
+
const y = location.y - height / 2 + (i * 2 - 1) * bumpRadius * 2;
|
| 285 |
+
ctx.quadraticCurveTo(x - bumpRadius / 2, y, x, y - bumpRadius * 2);
|
| 286 |
+
}
|
| 287 |
+
ctx.closePath();
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
function drawThoughtBubble(ctx: CanvasRenderingContext2D, location: { x: number, y: number }, width: number, height: number) {
|
| 291 |
+
drawCloudBubble(ctx, location, width, height);
|
| 292 |
+
// The tail for thought bubbles is handled in the drawTail function
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
function drawTail(
|
| 296 |
+
ctx: CanvasRenderingContext2D,
|
| 297 |
+
bubbleLocation: { x: number, y: number },
|
| 298 |
+
width: number,
|
| 299 |
+
height: number,
|
| 300 |
+
tailTarget: { x: number, y: number },
|
| 301 |
+
shape: string
|
| 302 |
+
) {
|
| 303 |
+
const tailLength = Math.min(50, height / 2);
|
| 304 |
+
const startX = bubbleLocation.x + (tailTarget.x > bubbleLocation.x ? width / 4 : -width / 4);
|
| 305 |
+
const startY = bubbleLocation.y + height / 2;
|
| 306 |
+
|
| 307 |
+
ctx.beginPath();
|
| 308 |
+
ctx.moveTo(startX, startY);
|
| 309 |
+
|
| 310 |
+
if (shape === "thought") {
|
| 311 |
+
const bubbleCount = 3;
|
| 312 |
+
for (let i = 0; i < bubbleCount; i++) {
|
| 313 |
+
const t = (i + 1) / (bubbleCount + 1);
|
| 314 |
+
const x = startX + (tailTarget.x - startX) * t;
|
| 315 |
+
const y = startY + (tailTarget.y - startY) * t;
|
| 316 |
+
const radius = 5 * (1 - t);
|
| 317 |
+
ctx.lineTo(x - radius, y);
|
| 318 |
+
ctx.arc(x, y, radius, 0, Math.PI * 2);
|
| 319 |
+
}
|
| 320 |
+
} else {
|
| 321 |
+
const controlX = (startX + tailTarget.x) / 2;
|
| 322 |
+
const controlY = (startY + tailTarget.y + 20) / 2;
|
| 323 |
+
ctx.quadraticCurveTo(controlX, controlY, tailTarget.x, tailTarget.y);
|
| 324 |
+
ctx.quadraticCurveTo(controlX, controlY, startX + (tailTarget.x > bubbleLocation.x ? -10 : 10), startY);
|
| 325 |
+
}
|
| 326 |
+
ctx.closePath();
|
| 327 |
+
ctx.fill();
|
| 328 |
+
ctx.stroke();
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
function findCharacterBoundingBox(mask: Uint8Array, width: number, height: number): { top: number, left: number, width: number, height: number } {
|
| 332 |
+
let minX = width, minY = height, maxX = 0, maxY = 0;
|
| 333 |
+
for (let y = 0; y < height; y++) {
|
| 334 |
+
for (let x = 0; x < width; x++) {
|
| 335 |
+
const index = y * width + x;
|
| 336 |
+
if (mask[index] > 0) {
|
| 337 |
+
minX = Math.min(minX, x);
|
| 338 |
+
minY = Math.min(minY, y);
|
| 339 |
+
maxX = Math.max(maxX, x);
|
| 340 |
+
maxY = Math.max(maxY, y);
|
| 341 |
+
}
|
| 342 |
+
}
|
| 343 |
+
}
|
| 344 |
+
return {
|
| 345 |
+
top: minY,
|
| 346 |
+
left: minX,
|
| 347 |
+
width: maxX - minX,
|
| 348 |
+
height: maxY - minY
|
| 349 |
+
};
|
| 350 |
+
}
|
| 351 |
+
|
| 352 |
+
function applyLineStyle(ctx: CanvasRenderingContext2D, style: string) {
|
| 353 |
+
switch (style) {
|
| 354 |
+
case "handdrawn":
|
| 355 |
+
ctx.setLineDash([5, 5]);
|
| 356 |
+
break;
|
| 357 |
+
case "straight":
|
| 358 |
+
ctx.setLineDash([]);
|
| 359 |
+
break;
|
| 360 |
+
case "bubble":
|
| 361 |
+
ctx.setLineDash([0, 10]);
|
| 362 |
+
ctx.lineCap = "round";
|
| 363 |
+
break;
|
| 364 |
+
case "chaotic":
|
| 365 |
+
ctx.setLineDash([10, 5, 2, 5]);
|
| 366 |
+
break;
|
| 367 |
+
}
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
function wrapText(ctx: CanvasRenderingContext2D, text: string, maxWidth: number): string[] {
|
| 371 |
+
const words = text.split(' ');
|
| 372 |
+
const lines: string[] = [];
|
| 373 |
+
let currentLine = '';
|
| 374 |
+
|
| 375 |
+
for (const word of words) {
|
| 376 |
+
const testLine = currentLine + (currentLine ? ' ' : '') + word;
|
| 377 |
+
const metrics = ctx.measureText(testLine);
|
| 378 |
+
|
| 379 |
+
if (metrics.width > maxWidth || word.endsWith('.') || word.endsWith(',')) {
|
| 380 |
+
lines.push(currentLine);
|
| 381 |
+
currentLine = word;
|
| 382 |
+
} else {
|
| 383 |
+
currentLine = testLine;
|
| 384 |
+
}
|
| 385 |
+
}
|
| 386 |
+
|
| 387 |
+
if (currentLine) {
|
| 388 |
+
lines.push(currentLine);
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
return lines;
|
| 392 |
+
}
|
| 393 |
+
|
| 394 |
+
|
| 395 |
+
function measureTextDimensions(ctx: CanvasRenderingContext2D, lines: string[]): { width: number, height: number } {
|
| 396 |
+
let maxWidth = 0;
|
| 397 |
+
const lineHeight = ctx.measureText('M').width * 1.2;
|
| 398 |
+
const height = lineHeight * lines.length;
|
| 399 |
+
|
| 400 |
+
for (const line of lines) {
|
| 401 |
+
const metrics = ctx.measureText(line);
|
| 402 |
+
maxWidth = Math.max(maxWidth, metrics.width);
|
| 403 |
+
}
|
| 404 |
+
|
| 405 |
+
return { width: maxWidth, height };
|
| 406 |
+
}
|
| 407 |
+
|
| 408 |
+
function drawFormattedText(ctx: CanvasRenderingContext2D, lines: string[], x: number, y: number, maxWidth: number, fontSize: number) {
|
| 409 |
+
const lineHeight = fontSize * 1.2;
|
| 410 |
+
const totalHeight = lineHeight * lines.length;
|
| 411 |
+
let startY = y - totalHeight / 2 + lineHeight / 2;
|
| 412 |
+
|
| 413 |
+
for (let i = 0; i < lines.length; i++) {
|
| 414 |
+
const line = lines[i];
|
| 415 |
+
const lineY = startY + i * lineHeight;
|
| 416 |
+
const maxLineWidth = Math.min(maxWidth, maxWidth * (1 - Math.abs(i - (lines.length - 1) / 2) / lines.length));
|
| 417 |
+
ctx.fillText(line, x, lineY, maxLineWidth);
|
| 418 |
+
}
|
| 419 |
+
}
|
src/lib/createLlamaPrompt.ts
CHANGED
|
@@ -3,7 +3,7 @@ export function createLlamaPrompt(messages: Array<{ role: string, content: strin
|
|
| 3 |
const B_INST = "[INST]", E_INST = "[/INST]";
|
| 4 |
const B_SYS = "<<SYS>>\n", E_SYS = "\n<</SYS>>\n\n";
|
| 5 |
const BOS = "<s>", EOS = "</s>";
|
| 6 |
-
const DEFAULT_SYSTEM_PROMPT = "You are a helpful, respectful and honest storywriting assistant. Always answer in a creative and entertaining way, while being safe. Please ensure that your stories and captions are socially unbiased and positive in nature. If a request does not make any sense, go on anyway, as we are writing a fantasy story.";
|
| 7 |
|
| 8 |
if (messages[0].role != "system"){
|
| 9 |
messages = [
|
|
|
|
| 3 |
const B_INST = "[INST]", E_INST = "[/INST]";
|
| 4 |
const B_SYS = "<<SYS>>\n", E_SYS = "\n<</SYS>>\n\n";
|
| 5 |
const BOS = "<s>", EOS = "</s>";
|
| 6 |
+
const DEFAULT_SYSTEM_PROMPT = "You are a helpful, respectful and honest storywriting assistant. Always answer in a creative and entertaining way, while being safe. Please ensure that your stories, speeches and captions are socially unbiased and positive in nature. If a request does not make any sense, go on anyway, as we are writing a fantasy story.";
|
| 7 |
|
| 8 |
if (messages[0].role != "system"){
|
| 9 |
messages = [
|
src/lib/dirtyGeneratedPanelCleaner.ts
CHANGED
|
@@ -3,8 +3,10 @@ import { GeneratedPanel } from "@/types"
|
|
| 3 |
export function dirtyGeneratedPanelCleaner({
|
| 4 |
panel,
|
| 5 |
instructions,
|
|
|
|
| 6 |
caption
|
| 7 |
}: GeneratedPanel): GeneratedPanel {
|
|
|
|
| 8 |
let newCaption = `${caption || ""}`.split(":").pop()?.trim() || ""
|
| 9 |
let newInstructions = (
|
| 10 |
// need to remove from LLM garbage here, too
|
|
@@ -34,6 +36,7 @@ export function dirtyGeneratedPanelCleaner({
|
|
| 34 |
return {
|
| 35 |
panel,
|
| 36 |
instructions: newInstructions,
|
|
|
|
| 37 |
caption: newCaption,
|
| 38 |
}
|
| 39 |
}
|
|
|
|
| 3 |
export function dirtyGeneratedPanelCleaner({
|
| 4 |
panel,
|
| 5 |
instructions,
|
| 6 |
+
speech,
|
| 7 |
caption
|
| 8 |
}: GeneratedPanel): GeneratedPanel {
|
| 9 |
+
let newSpeech = `${speech || ""}`.split(":").pop()?.trim() || ""
|
| 10 |
let newCaption = `${caption || ""}`.split(":").pop()?.trim() || ""
|
| 11 |
let newInstructions = (
|
| 12 |
// need to remove from LLM garbage here, too
|
|
|
|
| 36 |
return {
|
| 37 |
panel,
|
| 38 |
instructions: newInstructions,
|
| 39 |
+
speech: newSpeech,
|
| 40 |
caption: newCaption,
|
| 41 |
}
|
| 42 |
}
|
src/lib/dirtyGeneratedPanelsParser.ts
CHANGED
|
@@ -14,15 +14,18 @@ export function dirtyGeneratedPanelsParser(input: string): GeneratedPanel[] {
|
|
| 14 |
|
| 15 |
const results = jsonData.map((item, i) => {
|
| 16 |
let panel = i
|
|
|
|
| 17 |
let caption = item.caption ? item.caption.trim() : ''
|
| 18 |
let instructions = item.instructions ? item.instructions.trim() : ''
|
| 19 |
-
if (!instructions && caption) {
|
|
|
|
|
|
|
| 20 |
instructions = caption
|
| 21 |
}
|
| 22 |
if (!caption && instructions) {
|
| 23 |
caption = instructions
|
| 24 |
}
|
| 25 |
-
return { panel, caption, instructions }
|
| 26 |
})
|
| 27 |
|
| 28 |
return results
|
|
|
|
| 14 |
|
| 15 |
const results = jsonData.map((item, i) => {
|
| 16 |
let panel = i
|
| 17 |
+
let speech = item.speech ? item.speech.trim() : ''
|
| 18 |
let caption = item.caption ? item.caption.trim() : ''
|
| 19 |
let instructions = item.instructions ? item.instructions.trim() : ''
|
| 20 |
+
if (!instructions && !caption && speech) {
|
| 21 |
+
instructions = speech
|
| 22 |
+
} else if (!instructions && caption) {
|
| 23 |
instructions = caption
|
| 24 |
}
|
| 25 |
if (!caption && instructions) {
|
| 26 |
caption = instructions
|
| 27 |
}
|
| 28 |
+
return { panel, speech, caption, instructions }
|
| 29 |
})
|
| 30 |
|
| 31 |
return results
|
src/lib/parseBadJSON.ts
CHANGED
|
@@ -5,7 +5,7 @@ export function parseBadJSON(jsonLikeString: string): GeneratedPanels {
|
|
| 5 |
try {
|
| 6 |
return JSON.parse(jsonLikeString) as GeneratedPanels
|
| 7 |
} catch (err) {
|
| 8 |
-
var regex = /\{\s*"panel":\s*(\d+),\s*"instructions"\s*:\s*"([^"]+)",\s*"caption":\s*"([^"]*)"\s*\}/gs;
|
| 9 |
|
| 10 |
let results = [];
|
| 11 |
let match;
|
|
@@ -14,7 +14,8 @@ export function parseBadJSON(jsonLikeString: string): GeneratedPanels {
|
|
| 14 |
let json = {
|
| 15 |
panel: Number(match[1]),
|
| 16 |
instructions: match[2],
|
| 17 |
-
|
|
|
|
| 18 |
};
|
| 19 |
results.push(json);
|
| 20 |
}
|
|
|
|
| 5 |
try {
|
| 6 |
return JSON.parse(jsonLikeString) as GeneratedPanels
|
| 7 |
} catch (err) {
|
| 8 |
+
var regex = /\{\s*"panel":\s*(\d+),\s*"instructions"\s*:\s*"([^"]+)",\s*"speech"\s*:\s*"([^"]+)",\s*"caption":\s*"([^"]*)"\s*\}/gs;
|
| 9 |
|
| 10 |
let results = [];
|
| 11 |
let match;
|
|
|
|
| 14 |
let json = {
|
| 15 |
panel: Number(match[1]),
|
| 16 |
instructions: match[2],
|
| 17 |
+
speech: match[3],
|
| 18 |
+
caption: match[4]
|
| 19 |
};
|
| 20 |
results.push(json);
|
| 21 |
}
|
src/types.ts
CHANGED
|
@@ -89,6 +89,7 @@ export interface ImageAnalysisResponse {
|
|
| 89 |
export type GeneratedPanel = {
|
| 90 |
panel: number
|
| 91 |
instructions: string
|
|
|
|
| 92 |
caption: string
|
| 93 |
}
|
| 94 |
|
|
|
|
| 89 |
export type GeneratedPanel = {
|
| 90 |
panel: number
|
| 91 |
instructions: string
|
| 92 |
+
speech: string
|
| 93 |
caption: string
|
| 94 |
}
|
| 95 |
|