import base64 from io import BytesIO from PIL import Image def pil_to_base64(image: Image.Image) -> str: """Converts a PIL Image to a base64 encoded string.""" buffered = BytesIO() image.save(buffered, format="PNG") return base64.b64encode(buffered.getvalue()).decode("utf-8") def images_to_gemini_parts(images: list[Image.Image]) -> list[dict]: """ Converts a list of PIL Images to parts format suitable for Gemini-Pro-Vision API. Resizes images if too large to fit within typical API limits. """ parts = [] for img in images: if img: # Create a copy to avoid modifying the original PIL image object img_copy = img.copy() # Resize image to avoid API limits and speed up processing, maintaining aspect ratio. # Gemini-Pro-Vision usually supports up to 4k, but a smaller size like 1500px is safer and faster. max_dim = 1500 if img_copy.width > max_dim or img_copy.height > max_dim: img_copy.thumbnail((max_dim, max_dim), Image.Resampling.LANCZOS) img_base64 = pil_to_base64(img_copy) parts.append({ "mime_type": "image/png", # PNG is generally safe for base64 encoding "data": img_base64 }) return parts