linoyts HF Staff commited on
Commit
83e414e
·
verified ·
1 Parent(s): b8c775a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +166 -24
app.py CHANGED
@@ -87,6 +87,131 @@ Please strictly follow the rewriting rules below:
87
  "Rewritten": "..."
88
  }
89
  '''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  # --- Prompt Enhancement using Hugging Face InferenceClient ---
91
  def polish_prompt_hf(prompt, img_list):
92
  """
@@ -153,6 +278,25 @@ def encode_image(pil_image):
153
  pil_image.save(buffered, format="PNG")
154
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  # --- Model Loading ---
157
  dtype = torch.bfloat16
158
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -182,24 +326,24 @@ optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB",
182
  # --- UI Constants and Helpers ---
183
  MAX_SEED = np.iinfo(np.int32).max
184
 
185
- def use_output_as_input(output_images):
186
- """Convert output images to input format for the gallery"""
187
- if output_images is None or len(output_images) == 0:
188
- return []
189
- return output_images
 
190
 
191
- # --- Main Inference Function (with hardcoded negative prompt) ---
192
- @spaces.GPU(duration=300)
193
  def infer(
194
  images,
195
  prompt,
196
  seed=42,
197
  randomize_seed=False,
198
  true_guidance_scale=1.0,
199
- num_inference_steps=4,
200
- height=None,
201
- width=None,
202
- rewrite_prompt=True,
203
  num_images_per_prompt=1,
204
  progress=gr.Progress(track_tqdm=True),
205
  ):
@@ -215,19 +359,8 @@ def infer(
215
  # Set up the generator for reproducibility
216
  generator = torch.Generator(device=device).manual_seed(seed)
217
 
218
- # Load input images into PIL Images
219
- pil_images = []
220
- if images is not None:
221
- for item in images:
222
- try:
223
- if isinstance(item[0], Image.Image):
224
- pil_images.append(item[0].convert("RGB"))
225
- elif isinstance(item[0], str):
226
- pil_images.append(Image.open(item[0]).convert("RGB"))
227
- elif hasattr(item, "name"):
228
- pil_images.append(Image.open(item.name).convert("RGB"))
229
- except Exception:
230
- continue
231
 
232
  if height==256 and width==256:
233
  height, width = None, None
@@ -304,6 +437,8 @@ with gr.Blocks(css=css) as demo:
304
  placeholder="describe the edit instruction",
305
  container=False,
306
  )
 
 
307
  run_button = gr.Button("Edit!", variant="primary")
308
 
309
  with gr.Accordion("Advanced Settings", open=False):
@@ -358,6 +493,13 @@ with gr.Blocks(css=css) as demo:
358
 
359
  # gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=False)
360
 
 
 
 
 
 
 
 
361
  gr.on(
362
  triggers=[run_button.click, prompt.submit],
363
  fn=infer,
 
87
  "Rewritten": "..."
88
  }
89
  '''
90
+
91
+ # --- NEW: Next Scene Prompt System Prompt ---
92
+ NEXT_SCENE_SYSTEM_PROMPT = '''
93
+ # Next Scene Prompt Generator
94
+ You are a cinematic AI director assistant. Your task is to analyze the provided image and generate a compelling "Next Scene" prompt that describes the natural cinematic progression from the current frame.
95
+
96
+ ## Core Principles:
97
+ - Think like a film director: Consider camera dynamics, visual composition, and narrative continuity
98
+ - Create prompts that flow seamlessly from the current frame
99
+ - Focus on **visual progression** rather than static modifications
100
+ - Maintain compositional coherence while introducing organic transitions
101
+
102
+ ## Prompt Structure:
103
+ Always begin with "Next Scene: " followed by your cinematic description.
104
+
105
+ ## Key Elements to Include:
106
+ 1. **Camera Movement**: Specify one of these or combinations:
107
+ - Dolly shots (camera moves toward/away from subject)
108
+ - Push-ins or pull-backs
109
+ - Tracking moves (camera follows subject)
110
+ - Pan left/right
111
+ - Tilt up/down
112
+ - Zoom in/out
113
+
114
+ 2. **Framing Evolution**: Describe how the shot composition changes:
115
+ - Wide to close-up transitions
116
+ - Angle shifts (high angle to eye level, etc.)
117
+ - Reframing of subjects
118
+ - Revealing new elements in frame
119
+
120
+ 3. **Environmental Reveals** (if applicable):
121
+ - New characters entering frame
122
+ - Expanded scenery
123
+ - Spatial progression
124
+ - Background elements becoming visible
125
+
126
+ 4. **Atmospheric Shifts** (if enhancing the scene):
127
+ - Lighting changes (golden hour, shadows, lens flare)
128
+ - Weather evolution
129
+ - Time-of-day transitions
130
+ - Depth and mood indicators
131
+
132
+ ## Guidelines:
133
+ - Keep descriptions concise but vivid (2-3 sentences max)
134
+ - Always specify the camera action first
135
+ - Focus on what changes between this frame and the next
136
+ - Maintain the scene's existing style and mood unless intentionally transitioning
137
+ - Prefer natural, organic progressions over abrupt changes
138
+
139
+ ## Example Outputs:
140
+ - "Next Scene: The camera pulls back from a tight close-up on the airship to a sweeping aerial view, revealing an entire fleet of vessels soaring through a fantasy landscape."
141
+ - "Next Scene: The camera tracks forward and tilts down, bringing the sun and helicopters closer into frame as a strong lens flare intensifies."
142
+ - "Next Scene: The camera pans right, removing the dragon and rider from view while revealing more of the floating mountain range in the distance."
143
+ - "Next Scene: The camera moves slightly forward as sunlight breaks through the clouds, casting a soft glow around the character's silhouette in the mist. Realistic cinematic style, atmospheric depth."
144
+
145
+ ## Output Format:
146
+ Return ONLY the next scene prompt as plain text, starting with "Next Scene: "
147
+ Do NOT include JSON formatting or additional explanations.
148
+ '''
149
+
150
+ # --- NEW: Function to generate Next Scene prompts using VLM ---
151
+ def generate_next_scene_prompt(images):
152
+ """
153
+ Uses a VLM to analyze the uploaded image(s) and generate a cinematic "Next Scene" prompt
154
+ following the guidelines of the next-scene LoRA.
155
+ """
156
+ if images is None or len(images) == 0:
157
+ return "Please upload an image first to generate a next scene prompt."
158
+
159
+ # Ensure HF_TOKEN is set
160
+ api_key = os.environ.get("HF_TOKEN")
161
+ if not api_key:
162
+ return "Error: HF_TOKEN not set. Cannot generate next scene prompt."
163
+
164
+ try:
165
+ # Load input images into PIL Images using the shared helper function
166
+ pil_images = process_gallery_images(images)
167
+
168
+ if len(pil_images) == 0:
169
+ return "Error: Could not load images."
170
+
171
+ # Initialize the InferenceClient with vision-capable model
172
+ client = InferenceClient(
173
+ provider="cerebras",
174
+ api_key=api_key,
175
+ )
176
+
177
+ # Format the messages for the chat completions API
178
+ messages = [
179
+ {"role": "system", "content": NEXT_SCENE_SYSTEM_PROMPT},
180
+ {"role": "user", "content": []}
181
+ ]
182
+
183
+ # Add images to the message
184
+ for img in pil_images:
185
+ messages[1]["content"].append(
186
+ {"image": f"data:image/png;base64,{encode_image(img)}"}
187
+ )
188
+
189
+ # Add the text prompt
190
+ messages[1]["content"].append({
191
+ "text": "Analyze this image and generate a compelling 'Next Scene' prompt that describes the natural cinematic progression from this frame. Focus on camera movement, framing changes, and atmospheric evolution."
192
+ })
193
+
194
+ # Call the API
195
+ completion = client.chat.completions.create(
196
+ model="Qwen/Qwen3-235B-A22B-Instruct-2507",
197
+ messages=messages,
198
+ )
199
+
200
+ # Parse the response
201
+ result = completion.choices[0].message.content.strip()
202
+
203
+ # Ensure it starts with "Next Scene:"
204
+ if not result.startswith("Next Scene:"):
205
+ result = "Next Scene: " + result
206
+
207
+ print(f"Generated Next Scene Prompt: {result}")
208
+ return result
209
+
210
+ except Exception as e:
211
+ print(f"Error generating next scene prompt: {e}")
212
+ return f"Error: Could not generate next scene prompt. {str(e)}"
213
+
214
+
215
  # --- Prompt Enhancement using Hugging Face InferenceClient ---
216
  def polish_prompt_hf(prompt, img_list):
217
  """
 
278
  pil_image.save(buffered, format="PNG")
279
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
280
 
281
+ def process_gallery_images(images):
282
+ """
283
+ Helper function to convert Gradio gallery images to PIL Images.
284
+ Handles various input formats from the gallery component.
285
+ """
286
+ pil_images = []
287
+ if images is not None:
288
+ for item in images:
289
+ try:
290
+ if isinstance(item[0], Image.Image):
291
+ pil_images.append(item[0].convert("RGB"))
292
+ elif isinstance(item[0], str):
293
+ pil_images.append(Image.open(item[0]).convert("RGB"))
294
+ elif hasattr(item, "name"):
295
+ pil_images.append(Image.open(item.name).convert("RGB"))
296
+ except Exception:
297
+ continue
298
+ return pil_images
299
+
300
  # --- Model Loading ---
301
  dtype = torch.bfloat16
302
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
326
  # --- UI Constants and Helpers ---
327
  MAX_SEED = np.iinfo(np.int32).max
328
 
329
+ def use_output_as_input(result_gallery):
330
+ """Takes the generated images from result and moves them to input_images."""
331
+ if result_gallery:
332
+ # result_gallery is already a list of PIL images
333
+ return result_gallery
334
+ return []
335
 
336
+ @spaces.GPU
 
337
  def infer(
338
  images,
339
  prompt,
340
  seed=42,
341
  randomize_seed=False,
342
  true_guidance_scale=1.0,
343
+ num_inference_steps=8,
344
+ height=256,
345
+ width=256,
346
+ rewrite_prompt=False,
347
  num_images_per_prompt=1,
348
  progress=gr.Progress(track_tqdm=True),
349
  ):
 
359
  # Set up the generator for reproducibility
360
  generator = torch.Generator(device=device).manual_seed(seed)
361
 
362
+ # Load input images into PIL Images using the shared helper function
363
+ pil_images = process_gallery_images(images)
 
 
 
 
 
 
 
 
 
 
 
364
 
365
  if height==256 and width==256:
366
  height, width = None, None
 
437
  placeholder="describe the edit instruction",
438
  container=False,
439
  )
440
+ # NEW: Add button to generate next scene prompt
441
+ generate_next_scene_btn = gr.Button("🎬 Generate Next Scene Prompt", variant="secondary", size="sm")
442
  run_button = gr.Button("Edit!", variant="primary")
443
 
444
  with gr.Accordion("Advanced Settings", open=False):
 
493
 
494
  # gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=False)
495
 
496
+ # NEW: Wire up the next scene prompt generator button
497
+ generate_next_scene_btn.click(
498
+ fn=generate_next_scene_prompt,
499
+ inputs=[input_images],
500
+ outputs=[prompt]
501
+ )
502
+
503
  gr.on(
504
  triggers=[run_button.click, prompt.submit],
505
  fn=infer,