Spaces:

ivanoctaviogaitansantos
/

BATUTO_INFINITY

Sleeping

App Files Files Community

Bat infinito

by ivanoctaviogaitansantos - opened Oct 22

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+110

-249

This PR is in draft mode

Files changed (2) hide show

app.py +108 -246
requirements.txt +2 -3

app.py CHANGED Viewed

@@ -12,307 +12,169 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Usando dispositivo: {device}")
 # --- API Keys ---
-# Asegúrate de configurar la variable de entorno TAVILY_API_KEY en tu Space Settings
 TAVILY_API_KEY = os.environ.get("TAVILY_API_KEY")
 if not TAVILY_API_KEY:
-    print("Advertencia: Falta TAVILY_API_KEY. La búsqueda no funcionará.")
-try:
-    tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
-except:
-    tavily_client = None
-    print("Tavily no disponible.")
-# --- Modelos (con manejo de errores) ---
 print("Cargando modelos...")
-try:
-    # 1. LLM: Flan-T5-Large (más ligero que XL para CPU; cambia a XL si tienes RAM)
-    model_name = "google/flan-t5-large"  # Cambia a "google/flan-t5-xl" si tienes >16GB RAM
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    llm_model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
-    print(f"LLM cargado: {model_name}")
-except Exception as e:
-    print(f"Error cargando LLM: {e}. Usando fallback simple.")
-    llm_model = None
-try:
-    # 2. Stable Diffusion
-    pipe_sd = StableDiffusionPipeline.from_pretrained(
-        "stabilityai/stable-diffusion-2-1",
-        torch_dtype=torch.float16 if device == "cuda" else torch.float32,
-        # safety_checker=None  # Comentar si causa warnings
-    ).to(device)
-    print("Stable Diffusion cargado.")
-except Exception as e:
-    print(f"Error cargando SD: {e}")
-    pipe_sd = None
-try:
-    # 3. BLIP para captioning (con use_fast=True para evitar warning)
-    blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base", use_fast=True)
-    blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
-    print("BLIP cargado.")
-except Exception as e:
-    print(f"Error cargando BLIP: {e}")
-    blip_processor = blip_model = None
-print("Modelos listos.")
 # --- Historial ---
 def guardar_historial(historial, user_id="default"):
-    # Usar el directorio de trabajo actual si /tmp no es accesible en el entorno
-    try:
-        with open(f'history_{user_id}.json', 'w') as f:
-            json.dump(historial[-20:], f)
-    except:
-        pass
 def cargar_historial(user_id="default"):
     try:
-        with open(f'history_{user_id}.json', 'r') as f:
             return json.load(f)
     except:
         return []
-# --- LLM: Generar respuesta ---
 def generar_respuesta_llm(prompt):
-    if llm_model is None:
-        return f"Fallback: {prompt[:100]}... (LLM no disponible)"
-    try:
-        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(device)
-        outputs = llm_model.generate(
-            **inputs,
-            max_new_tokens=256,
-            temperature=0.7,
-            do_sample=True,
-            top_p=0.9
-        )
-        return tokenizer.decode(outputs[0], skip_special_tokens=True)
-    except Exception as e:
-        return f"Error en LLM: {str(e)}"
-# --- Búsqueda ---
 def busqueda_tecnica(query):
-    if tavily_client is None:
-        return "Búsqueda no disponible (falta API key)."
     try:
         result = tavily_client.search(query, max_results=3)
         return "\n\n".join([f"**{r['title']}**\n{r['content'][:300]}..." for r in result.get('results', [])])
-    except Exception as e:
-        return f"Error en búsqueda: {str(e)}"
 # --- Generar imagen ---
 def generacion_imagenes(prompt):
-    if pipe_sd is None:
-        return None, "Stable Diffusion no disponible."
     try:
-        # Aseguramos que la etiqueta 'BATUTO-ART' est�� en el prompt para la firma
-        if 'BATUTO-ART' not in prompt:
-            prompt = f"{prompt}, simple liquid gold marker tag BATUTO-ART in top left corner"
         image = pipe_sd(prompt, num_inference_steps=20).images[0]
-        # Guardar en el directorio de trabajo para Hugging Face
-        path = "generated_img.png"
         image.save(path)
-        return Image.open(path), None
     except Exception as e:
-        return None, f"Error: {str(e)}"
 # --- Analizar imagen ---
 def analizar_imagen(image):
-    if blip_processor is None or blip_model is None:
-        return "Análisis no disponible."
-    try:
-        inputs = blip_processor(images=image, return_tensors="pt").to(device)
-        out = blip_model.generate(**inputs, max_new_tokens=50)
-        return blip_processor.decode(out[0], skip_special_tokens=True)
-    except Exception as e:
-        return f"Error: {str(e)}"
 # --- Pipeline principal ---
 def pipeline(texto, imagen, history):
     if history is None:
         history = cargar_historial()
-    # Preparar el historial para el chatbot (Gradio lo maneja como lista de listas)
-    chatbot_history = []
-    # El pipeline principal ahora regresa los mensajes para el chatbot
-    if not texto and not imagen:
-        return "", None, chatbot_history, None, "", history # No hacer nada si está vacío
-    # Si hay historial, lo cargamos al formato de Gradio para el Chatbot
-    if history:
-        for user, bot in history:
-            # Gradio Chatbot espera [user_msg, bot_msg]
-            chatbot_history.append([user, bot])
     response_text = ""
     response_img = None
     response_caption = ""
-    error_msg = ""
-    user_msg_for_history = texto or ""
-    try:
-        # --- 1. Procesar texto ---
-        if texto:
-            texto = texto.strip()
-            user_msg_for_history = texto # Guardamos el mensaje original del usuario
-            # Búsqueda
-            if texto.lower().startswith("buscar:"):
-                query = texto[7:].strip()
-                response_text = f"**Búsqueda:** {query}\n\n"
-                response_text += busqueda_tecnica(query)
-            # Generar imagen
-            elif texto.lower().startswith("imagen:") or texto.lower().startswith("dibuja:"):
-                prompt = texto.lower().replace("imagen:", "").replace("dibuja:", "").strip()
-                response_text = f"¡Órale, qué buena idea! Ya te estoy generando la imagen para: **{prompt}**"
-                response_img, err = generacion_imagenes(prompt)
-                if err:
-                    response_text += f"\n\n¡Aguas! Error al dibujar: {err}"
-            # Chat normal
-            else:
-                contexto = "Eres BATUTO_INFINITY, un asistente creativo y técnico. Responde útil y directamente.\n\n"
-                if history:
-                    contexto += "Historial reciente:\n"
-                    for user, bot in history[-3:]:
-                        contexto += f"Usuario: {user}\nAsistente: {bot}\n"
-                contexto += f"Usuario: {texto}\nAsistente:"
-                response_text = generar_respuesta_llm(contexto)
-        # --- 2. Procesar imagen (Análisis y Generación de Variante) ---
-        if imagen:
-            response_caption = analizar_imagen(imagen)
-            # Si solo se sube imagen, el texto de respuesta principal es el análisis
-            if not texto:
-                user_msg_for_history = "Análisis de imagen subida."
-                response_text = f"**Análisis de la Imagen:**\n\n{response_caption}"
-            # Siempre se intenta generar una variante si hay BLIP disponible
-            response_img_variant, err = generacion_imagenes(response_caption)
-            if response_img_variant:
-                response_img = response_img_variant # Sobreescribe la imagen generada por texto (si existe)
-            if err:
-                # Solo agregamos el error si no hay una imagen de texto previa
-                if not texto or not response_img:
-                    response_text += f"\n\n**Advertencia:** No se pudo generar la variante visual: {err}"
-        # --- 3. Guardar historial y actualizar chatbot ---
-        if user_msg_for_history or response_text:
-            # Solo guardamos si hubo interacción válida
-            history.append((user_msg_for_history, response_text))
-            guardar_historial(history)
-        # Actualizar el formato de Gradio Chatbot para la salida
-        final_chatbot_output = []
-        if history:
-             for user, bot in history:
-                final_chatbot_output.append([user, bot])
-    except Exception as e:
-        response_text = f"Error general en el pipeline: {str(e)}"
-        final_chatbot_output.append([user_msg_for_history, response_text]) # Agregar error al chat
-    # Devolvemos: mensaje vacío, imagen input vacía, el estado del chatbot, la imagen output, el caption, el estado del historial
-    return "", None, final_chatbot_output, response_img, response_caption, history
-# --- Interfaz Gradio (TEMA Y LAYOUT MODIFICADO) ---
-try:
-    # Tema dark compatible con Gradio 4.x+ para un look pro
-    theme = gr.themes.Base(
-        primary_hue="sky", # Cambié a sky para un look más techie
-        secondary_hue="gray",
-        neutral_hue="slate",
-        font=[gr.themes.GoogleFont("Space Mono")]
-    ).set(
-        body_background_fill="linear-gradient(135deg, #1e1e1e 0%, #2d2d2d 100%)",
-        block_background_fill="linear-gradient(135deg, #2d2d2d 0%, #1e1e1e 100%)",
-        button_secondary_background_fill="#3b3b3b",
-        button_secondary_background_fill_hover="#4a4a4a"
-    )
-except:
-    theme = None
-    print("Usando tema por defecto.")
-with gr.Blocks(title="BATUTO_INFINITY", theme=theme) as iface:
-    # Título más moderno
-    gr.Markdown("## 🤖 BATUTO_INFINITY: Asistente Pro\n**Chat, Web, Imágenes y Visión Multi-modal**")
     with gr.Row():
-        # --- COLUMNA 1: CHATBOT (Principal) ---
-        with gr.Column(scale=3):
-            chatbot = gr.Chatbot(
-                height=550, # Un poco más alto
-                label="Conversación con BATUTO",
-                layout="panel" # Estilo más limpio de Gradio 4
-            )
-        # --- COLUMNA 2: ENTRADAS Y SALIDAS (Controles) ---
         with gr.Column(scale=2):
-            gr.Markdown("### 💬 Instrucciones y Preguntas")
-            texto_input = gr.Textbox(
-                placeholder="Escribe: 'buscar: IA', 'imagen: un gato astronauta', o tu pregunta normal...",
-                label="Instrucción / Pregunta",
-                # Ocultamos la etiqueta grande, el placeholder es suficiente
-            )
-            gr.Markdown("---") # Separador para ordenar
-            gr.Markdown("### 🖼️ Entrada y Salida Visual")
-            with gr.Row():
-                imagen_input = gr.Image(
-                    label="Sube imagen para analizar",
-                    type="pil",
-                    height=200, # Más compacto
-                    image_mode="L",
-                    show_label=True
-                )
-                output_img = gr.Image(
-                    label="Imagen Generada",
-                    height=200
-                )
-            output_caption = gr.Textbox(
-                label="Caption / Descripción de Imagen",
-                interactive=False, # Que el usuario no lo edite
-                lines=2 # Más compacto
-            )
-    # Estado para manejar el historial (lista de tuplas [(user, bot), ...])
-    state = gr.State([])
-    # Submit
-    def submit_fn(texto, imagen, state):
-        # La función devuelve 6 valores, alineados con los outputs
-        out_texto_input, out_imagen_input, out_chatbot, out_img, out_cap, new_state = pipeline(texto, imagen, state)
-        # Gradio ahora necesita que el chatbot sea la salida directa
-        return out_texto_input, out_imagen_input, out_chatbot, out_img, out_cap, new_state
     texto_input.submit(
-        submit_fn,
         inputs=[texto_input, imagen_input, state],
-        outputs=[texto_input, imagen_input, chatbot, output_img, output_caption, state]
     )
-    # Ejemplos (lo dejamos igual)
     gr.Examples(
         examples=[
-            ["buscar: avances en IA 2025"],
-            ["imagen: un dragón cyberpunk"],
-            ["¿Cómo funciona un LLM?"],
         ],
         inputs=texto_input
     )
 if __name__ == "__main__":
-    # Usamos share=True para generar un enlace temporal, útil en entornos como Colab o si quieres compartirlo
-    iface.launch(share=True)

 print(f"Usando dispositivo: {device}")
 # --- API Keys ---
 TAVILY_API_KEY = os.environ.get("TAVILY_API_KEY")
 if not TAVILY_API_KEY:
+    raise ValueError("Falta TAVILY_API_KEY en variables de entorno")
+tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
+# --- Modelos ---
 print("Cargando modelos...")
+# 1. LLM: Flan-T5-XL (gratis, potente, funciona en Spaces)
+tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xl")
+llm_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-xl").to(device)
+# 2. Stable Diffusion
+pipe_sd = StableDiffusionPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-2-1",
+    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+    safety_checker=None
+).to(device)
+# 3. BLIP para captioning
+blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
+print("Modelos cargados.")
 # --- Historial ---
 def guardar_historial(historial, user_id="default"):
+    with open(f'/tmp/history_{user_id}.json', 'w') as f:
+        json.dump(historial[-20:], f)  # Guardar solo últimos 20
 def cargar_historial(user_id="default"):
     try:
+        with open(f'/tmp/history_{user_id}.json', 'r') as f:
             return json.load(f)
     except:
         return []
+# --- LLM: Generar respuesta inteligente ---
 def generar_respuesta_llm(prompt):
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(device)
+    outputs = llm_model.generate(
+        **inputs,
+        max_new_tokens=256,
+        temperature=0.7,
+        do_sample=True,
+        top_p=0.9
+    )
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+# --- Búsqueda Tavily ---
 def busqueda_tecnica(query):
     try:
         result = tavily_client.search(query, max_results=3)
         return "\n\n".join([f"**{r['title']}**\n{r['content'][:300]}..." for r in result.get('results', [])])
+    except:
+        return "Error en búsqueda. Revisa tu API key."
 # --- Generar imagen ---
 def generacion_imagenes(prompt):
     try:
         image = pipe_sd(prompt, num_inference_steps=20).images[0]
+        path = "/tmp/generated_img.png"
         image.save(path)
+        return Image.open(path)
     except Exception as e:
+        return None, f"Error generando imagen: {str(e)}"
 # --- Analizar imagen ---
 def analizar_imagen(image):
+    inputs = blip_processor(images=image, return_tensors="pt").to(device)
+    out = blip_model.generate(**inputs, max_new_tokens=50)
+    return blip_processor.decode(out[0], skip_special_tokens=True)
 # --- Pipeline principal ---
 def pipeline(texto, imagen, history):
     if history is None:
         history = cargar_historial()
     response_text = ""
     response_img = None
     response_caption = ""
+    # --- 1. Procesar texto ---
+    if texto:
+        texto = texto.strip()
+        # Búsqueda
+        if texto.lower().startswith("buscar:"):
+            query = texto[7:].strip()
+            response_text = f"**Búsqueda:** {query}\n\n"
+            response_text += busqueda_tecnica(query)
+        # Generar imagen desde texto
+        elif texto.lower().startswith("imagen:") or texto.lower().startswith("dibuja:"):
+            prompt = texto[7:].strip() if texto.lower().startswith("imagen:") else texto[7:].strip()
+            response_text = f"Generando imagen para: **{prompt}**"
+            response_img, error = generacion_imagenes(prompt)
+            if error:
+                response_text += f"\n\n{error}"
+        # Chat normal con LLM
+        else:
+            # Construir contexto
+            contexto = "Eres BATUTO_INFINITY, un asistente creativo y técnico. Responde útil y directamente.\n\n"
+            if history:
+                contexto += "Historial reciente:\n"
+                for user, bot in history[-3:]:
+                    contexto += f"Usuario: {user}\nAsistente: {bot}\n"
+            contexto += f"Usuario: {texto}\nAsistente:"
+            response_text = generar_respuesta_llm(contexto)
+        # Guardar en historial
+        history.append((texto, response_text))
+        guardar_historial(history)
+    # --- 2. Procesar imagen ---
+    if imagen:
+        response_caption = analizar_imagen(imagen)
+        response_text += f"\n\n**Análisis de imagen:** {response_caption}"
+        # Opcional: generar nueva imagen a partir del caption
+        response_img, _ = generacion_imagenes(response_caption)
+    return response_text, response_img, response_caption, history
+# --- Interfaz Gradio ---
+with gr.Blocks(title="BATUTO_INFINITY", theme=gr.themes.Dark()) as iface:
+    gr.Markdown("# BATUTO_INFINITY\n**Chat + Búsqueda + Imágenes + Análisis**")
     with gr.Row():
         with gr.Column(scale=2):
+            chatbot = gr.Chatbot(height=500)
+            texto_input = gr.Textbox(placeholder="Escribe: 'buscar: IA', 'imagen: un gato astronauta', o pregunta normal...", label="Entrada")
+            imagen_input = gr.Image(label="Sube imagen para analizar", type="pil")
+        with gr.Column(scale=1):
+            gr.Markdown("### Salidas")
+            output_img = gr.Image(label="Imagen generada")
+            output_caption = gr.Textbox(label="Caption")
+    # Estado oculto
+    state = gr.State([])
+    # Enviar con botón o enter
     texto_input.submit(
+        pipeline,
         inputs=[texto_input, imagen_input, state],
+        outputs=[chatbot, output_img, output_caption, state]
+    ).then(
+        lambda: ("", None), outputs=[texto_input, imagen_input]
     )
+    # Ejemplos
     gr.Examples(
         examples=[
+            ["buscar: avances en fusión nuclear 2025"],
+            ["imagen: un dragón cyberpunk volando sobre Tokio"],
+            ["¿Cómo funciona Stable Diffusion?"],
         ],
         inputs=texto_input
     )
 if __name__ == "__main__":
+    iface.launch()

requirements.txt CHANGED Viewed

@@ -1,7 +1,6 @@
-gradio>=4.0
 torch
 diffusers[torch]
 transformers
 tavily-python
-pillow
-accelerate

+gradio
 torch
 diffusers[torch]
 transformers
 tavily-python
+pillow