Spaces:
Sleeping
Sleeping
chrisx599
commited on
Commit
·
214a439
1
Parent(s):
ae3cc24
first commit
Browse files- .gitattributes +1 -0
- .gitignore +3 -0
- app.py +348 -0
- requirements.txt +83 -0
- serve/__init__.py +0 -0
- serve/__pycache__/__init__.cpython-310.pyc +0 -0
- serve/__pycache__/chat_utils.cpython-310.pyc +0 -0
- serve/__pycache__/examples.cpython-310.pyc +0 -0
- serve/__pycache__/frontend.cpython-310.pyc +0 -0
- serve/__pycache__/gradio_utils.cpython-310.pyc +0 -0
- serve/__pycache__/utils.cpython-310.pyc +0 -0
- serve/assets/Kelpy-Codos.js +100 -0
- serve/assets/avatar.png +0 -0
- serve/assets/custom.css +355 -0
- serve/assets/custom.js +22 -0
- serve/assets/favicon.ico +0 -0
- serve/chat_utils.py +497 -0
- serve/examples.py +73 -0
- serve/frontend.py +126 -0
- serve/gradio_utils.py +95 -0
- serve/inference.py +268 -0
- serve/utils.py +290 -0
- videos/demo1.mp4 +3 -0
- videos/demo2.mp4 +3 -0
.gitattributes
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
videos/*.mp4 filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
logs
|
| 2 |
+
effi.py
|
| 3 |
+
wo_effi.py
|
app.py
ADDED
|
@@ -0,0 +1,348 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import gradio as gr
|
| 3 |
+
import os
|
| 4 |
+
from PIL import Image
|
| 5 |
+
import torch
|
| 6 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 7 |
+
|
| 8 |
+
from serve.frontend import reload_javascript
|
| 9 |
+
from serve.utils import (
|
| 10 |
+
configure_logger,
|
| 11 |
+
)
|
| 12 |
+
from serve.gradio_utils import (
|
| 13 |
+
cancel_outputing,
|
| 14 |
+
delete_last_conversation,
|
| 15 |
+
reset_state,
|
| 16 |
+
reset_textbox,
|
| 17 |
+
transfer_input,
|
| 18 |
+
wrap_gen_fn,
|
| 19 |
+
)
|
| 20 |
+
from serve.chat_utils import compress_video_to_base64
|
| 21 |
+
from serve.examples import get_examples
|
| 22 |
+
|
| 23 |
+
import logging
|
| 24 |
+
|
| 25 |
+
TITLE = """<h1 align="left" style="min-width:200px; margin-top:0;">Chat with Video-XL-2 </h1>"""
|
| 26 |
+
DESCRIPTION_TOP = """<a href="https://unabletousegit.github.io/video-xl2.github.io" target="_blank">Video-XL-2</a>, a better, faster, and high-frame-count model for long video understanding."""
|
| 27 |
+
DESCRIPTION = """"""
|
| 28 |
+
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 29 |
+
DEPLOY_MODELS = dict()
|
| 30 |
+
logger = configure_logger()
|
| 31 |
+
DEFAULT_IMAGE_TOKEN = "<image>"
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def parse_args():
|
| 35 |
+
parser = argparse.ArgumentParser()
|
| 36 |
+
parser.add_argument("--model", type=str, default="Video-XL-2")
|
| 37 |
+
parser.add_argument(
|
| 38 |
+
"--local-path",
|
| 39 |
+
type=str,
|
| 40 |
+
default="/share/project/minghao/Share_1/Models/Video-XL-2",
|
| 41 |
+
help="huggingface ckpt, optional",
|
| 42 |
+
)
|
| 43 |
+
parser.add_argument("--ip", type=str, default="0.0.0.0")
|
| 44 |
+
parser.add_argument("--port", type=int, default=7860)
|
| 45 |
+
return parser.parse_args()
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def fetch_model(model_name: str):
|
| 49 |
+
global DEPLOY_MODELS
|
| 50 |
+
|
| 51 |
+
local_model_path = '/share/project/minghao/Share_1/Models/Video-XL-2'
|
| 52 |
+
|
| 53 |
+
if model_name in DEPLOY_MODELS:
|
| 54 |
+
model_info = DEPLOY_MODELS[model_name]
|
| 55 |
+
print(f"{model_name} has been loaded.")
|
| 56 |
+
else:
|
| 57 |
+
print(f"{model_name} is loading...")
|
| 58 |
+
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
|
| 59 |
+
tokenizer = AutoTokenizer.from_pretrained(local_model_path, trust_remote_code=True)
|
| 60 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 61 |
+
local_model_path,
|
| 62 |
+
trust_remote_code=True,
|
| 63 |
+
device_map=device,
|
| 64 |
+
quantization_config=None,
|
| 65 |
+
attn_implementation="sdpa",
|
| 66 |
+
torch_dtype=torch.float16,
|
| 67 |
+
low_cpu_mem_usage=True
|
| 68 |
+
)
|
| 69 |
+
DEPLOY_MODELS[model_name] = (model, tokenizer)
|
| 70 |
+
print(f"Load {model_name} successfully...")
|
| 71 |
+
model_info = DEPLOY_MODELS[model_name]
|
| 72 |
+
|
| 73 |
+
return model_info
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def preview_images(files) -> list[str]:
|
| 77 |
+
if files is None:
|
| 78 |
+
return []
|
| 79 |
+
|
| 80 |
+
image_paths = []
|
| 81 |
+
for file in files:
|
| 82 |
+
image_paths.append(file.name)
|
| 83 |
+
return image_paths
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
@wrap_gen_fn
|
| 87 |
+
def predict(
|
| 88 |
+
text,
|
| 89 |
+
images,
|
| 90 |
+
chatbot,
|
| 91 |
+
history,
|
| 92 |
+
top_p,
|
| 93 |
+
temperature,
|
| 94 |
+
max_generate_length,
|
| 95 |
+
max_context_length_tokens,
|
| 96 |
+
video_nframes,
|
| 97 |
+
chunk_size: int = 512,
|
| 98 |
+
):
|
| 99 |
+
"""
|
| 100 |
+
Predict the response for the input text and images.
|
| 101 |
+
Args:
|
| 102 |
+
text (str): The input text.
|
| 103 |
+
images (list[PIL.Image.Image]): The input images.
|
| 104 |
+
chatbot (list): The chatbot.
|
| 105 |
+
history (list): The history.
|
| 106 |
+
top_p (float): The top-p value.
|
| 107 |
+
temperature (float): The temperature value.
|
| 108 |
+
repetition_penalty (float): The repetition penalty value.
|
| 109 |
+
max_generate_length (int): The max length tokens.
|
| 110 |
+
max_context_length_tokens (int): The max context length tokens.
|
| 111 |
+
chunk_size (int): The chunk size.
|
| 112 |
+
"""
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
if images is None:
|
| 116 |
+
pil_images = history["video_path"]
|
| 117 |
+
else:
|
| 118 |
+
pil_images = images[0].name
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
print("running the prediction function")
|
| 122 |
+
try:
|
| 123 |
+
logger.info("fetching model")
|
| 124 |
+
model, tokenizer = fetch_model(args.model)
|
| 125 |
+
logger.info("model fetched")
|
| 126 |
+
if text == "":
|
| 127 |
+
yield chatbot, history, "Empty context."
|
| 128 |
+
return
|
| 129 |
+
except KeyError:
|
| 130 |
+
logger.info("no model found")
|
| 131 |
+
yield [[text, "No Model Found"]], [], "No Model Found"
|
| 132 |
+
return
|
| 133 |
+
|
| 134 |
+
gen_kwargs = {
|
| 135 |
+
"do_sample": True if temperature > 1e-2 else False,
|
| 136 |
+
"temperature": temperature,
|
| 137 |
+
"top_p": top_p,
|
| 138 |
+
"num_beams": 1,
|
| 139 |
+
"use_cache": True,
|
| 140 |
+
"max_new_tokens": max_generate_length,
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
# Check if this is the very first turn with an image
|
| 144 |
+
is_first_image_turn = (len(history) == 0 and pil_images)
|
| 145 |
+
if is_first_image_turn:
|
| 146 |
+
history["video_path"] = pil_images
|
| 147 |
+
history["context"] = None
|
| 148 |
+
|
| 149 |
+
response, temp_history = model.chat(
|
| 150 |
+
history["video_path"] if "video_path" in history else pil_images,
|
| 151 |
+
tokenizer,
|
| 152 |
+
text,
|
| 153 |
+
chat_history=history["context"],
|
| 154 |
+
return_history=True,
|
| 155 |
+
max_num_frames=video_nframes,
|
| 156 |
+
sample_fps=None,
|
| 157 |
+
max_sample_fps=None,
|
| 158 |
+
generation_config=gen_kwargs
|
| 159 |
+
)
|
| 160 |
+
|
| 161 |
+
text_for_history = text
|
| 162 |
+
|
| 163 |
+
if is_first_image_turn:
|
| 164 |
+
media_str = ""
|
| 165 |
+
b64 = compress_video_to_base64(history["video_path"] if "video_path" in history else pil_images)
|
| 166 |
+
media_str += (
|
| 167 |
+
f'<video controls style="max-width:300px;height:auto;" '
|
| 168 |
+
f'src="data:video/mp4;base64,{b64}"></video>'
|
| 169 |
+
)
|
| 170 |
+
text_for_history = media_str + text_for_history
|
| 171 |
+
chatbot.append([text_for_history, response])
|
| 172 |
+
else:
|
| 173 |
+
chatbot.append([text_for_history, response])
|
| 174 |
+
history["context"] = (temp_history)
|
| 175 |
+
|
| 176 |
+
logger.info("flushed result to gradio")
|
| 177 |
+
|
| 178 |
+
print(
|
| 179 |
+
f"temperature: {temperature}, "
|
| 180 |
+
f"top_p: {top_p}, "
|
| 181 |
+
f"max_generate_length: {max_generate_length}"
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
yield chatbot, history, "Generate: Success"
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
def retry(
|
| 188 |
+
text, # This `text` is the current text box content, not the last user input
|
| 189 |
+
images,
|
| 190 |
+
chatbot,
|
| 191 |
+
full_history, # This is the full history
|
| 192 |
+
top_p,
|
| 193 |
+
temperature,
|
| 194 |
+
max_generate_length,
|
| 195 |
+
max_context_length_tokens,
|
| 196 |
+
video_nframes,
|
| 197 |
+
chunk_size: int = 512,
|
| 198 |
+
):
|
| 199 |
+
"""
|
| 200 |
+
Retry the response for the input text and images.
|
| 201 |
+
"""
|
| 202 |
+
history = full_history["context"]
|
| 203 |
+
if len(history) == 0:
|
| 204 |
+
yield (chatbot, history, "Empty context")
|
| 205 |
+
return
|
| 206 |
+
|
| 207 |
+
# Get the last user input before popping
|
| 208 |
+
# print("history:", history)
|
| 209 |
+
last_user_input = history[-2]["content"]
|
| 210 |
+
|
| 211 |
+
# Remove the last turn from chatbot and history
|
| 212 |
+
chatbot.pop()
|
| 213 |
+
history.pop()
|
| 214 |
+
|
| 215 |
+
full_history["context"] = history
|
| 216 |
+
# Now call predict with the last user input and the modified history
|
| 217 |
+
yield from predict(
|
| 218 |
+
last_user_input, # Pass the last user input as the current text
|
| 219 |
+
images, # Images should be the same as the last turn
|
| 220 |
+
chatbot, # Updated chatbot
|
| 221 |
+
full_history, # Updated history
|
| 222 |
+
top_p,
|
| 223 |
+
temperature,
|
| 224 |
+
max_generate_length,
|
| 225 |
+
max_context_length_tokens,
|
| 226 |
+
video_nframes,
|
| 227 |
+
chunk_size,
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
def build_demo(args: argparse.Namespace) -> gr.Blocks:
|
| 232 |
+
with gr.Blocks(theme=gr.themes.Soft(), delete_cache=(1800, 1800)) as demo:
|
| 233 |
+
history = gr.State(dict())
|
| 234 |
+
input_text = gr.State()
|
| 235 |
+
input_images = gr.State()
|
| 236 |
+
|
| 237 |
+
with gr.Row():
|
| 238 |
+
gr.HTML(TITLE)
|
| 239 |
+
status_display = gr.Markdown("Success", elem_id="status_display")
|
| 240 |
+
gr.Markdown(DESCRIPTION_TOP)
|
| 241 |
+
|
| 242 |
+
with gr.Row(equal_height=True):
|
| 243 |
+
with gr.Column(scale=4):
|
| 244 |
+
with gr.Row():
|
| 245 |
+
chatbot = gr.Chatbot(
|
| 246 |
+
elem_id="Video-XL-2_Demo-chatbot",
|
| 247 |
+
show_share_button=True,
|
| 248 |
+
bubble_full_width=False,
|
| 249 |
+
height=600,
|
| 250 |
+
)
|
| 251 |
+
with gr.Row():
|
| 252 |
+
with gr.Column(scale=4):
|
| 253 |
+
text_box = gr.Textbox(show_label=False, placeholder="Enter text", container=False)
|
| 254 |
+
with gr.Column(min_width=70):
|
| 255 |
+
submit_btn = gr.Button("Send")
|
| 256 |
+
with gr.Column(min_width=70):
|
| 257 |
+
cancel_btn = gr.Button("Stop")
|
| 258 |
+
with gr.Row():
|
| 259 |
+
empty_btn = gr.Button("🧹 New Conversation")
|
| 260 |
+
retry_btn = gr.Button("🔄 Regenerate")
|
| 261 |
+
del_last_btn = gr.Button("🗑️ Remove Last Turn")
|
| 262 |
+
|
| 263 |
+
with gr.Column():
|
| 264 |
+
# add note no more than 2 images once
|
| 265 |
+
gr.Markdown("Note: you can upload images or videos!")
|
| 266 |
+
upload_images = gr.Files(file_types=["image", "video"], show_label=True)
|
| 267 |
+
gallery = gr.Gallery(columns=[3], height="200px", show_label=True)
|
| 268 |
+
upload_images.change(preview_images, inputs=upload_images, outputs=gallery)
|
| 269 |
+
|
| 270 |
+
# Parameter Setting Tab for control the generation parameters
|
| 271 |
+
with gr.Tab(label="Parameter Setting"):
|
| 272 |
+
top_p = gr.Slider(minimum=-0, maximum=1.0, value=0.001, step=0.05, interactive=True, label="Top-p")
|
| 273 |
+
temperature = gr.Slider(
|
| 274 |
+
minimum=0, maximum=1.0, value=0.01, step=0.1, interactive=True, label="Temperature"
|
| 275 |
+
)
|
| 276 |
+
max_generate_length = gr.Slider(
|
| 277 |
+
minimum=512, maximum=8192, value=4096, step=64, interactive=True, label="Max Generate Length"
|
| 278 |
+
)
|
| 279 |
+
max_context_length_tokens = gr.Slider(
|
| 280 |
+
minimum=512, maximum=65536, value=16384, step=64, interactive=True, label="Max Context Length Tokens"
|
| 281 |
+
)
|
| 282 |
+
video_nframes = gr.Slider(
|
| 283 |
+
minimum=1, maximum=128, value=128, step=1, interactive=True, label="Video Nframes"
|
| 284 |
+
)
|
| 285 |
+
show_images = gr.HTML(visible=False)
|
| 286 |
+
gr.Markdown("This demo is based on `moonshotai/Kimi-VL-A3B-Thinking` & `deepseek-ai/deepseek-vl2-small` and extends it by adding support for video input.")
|
| 287 |
+
|
| 288 |
+
gr.Examples(
|
| 289 |
+
examples=get_examples(ROOT_DIR),
|
| 290 |
+
inputs=[upload_images, show_images, text_box],
|
| 291 |
+
)
|
| 292 |
+
gr.Markdown()
|
| 293 |
+
|
| 294 |
+
input_widgets = [
|
| 295 |
+
input_text,
|
| 296 |
+
input_images,
|
| 297 |
+
chatbot,
|
| 298 |
+
history,
|
| 299 |
+
top_p,
|
| 300 |
+
temperature,
|
| 301 |
+
max_generate_length,
|
| 302 |
+
max_context_length_tokens,
|
| 303 |
+
video_nframes
|
| 304 |
+
]
|
| 305 |
+
output_widgets = [chatbot, history, status_display]
|
| 306 |
+
|
| 307 |
+
transfer_input_args = dict(
|
| 308 |
+
fn=transfer_input,
|
| 309 |
+
inputs=[text_box, upload_images],
|
| 310 |
+
outputs=[input_text, input_images, text_box, upload_images, submit_btn],
|
| 311 |
+
show_progress=True,
|
| 312 |
+
)
|
| 313 |
+
|
| 314 |
+
predict_args = dict(fn=predict, inputs=input_widgets, outputs=output_widgets, show_progress=True)
|
| 315 |
+
retry_args = dict(fn=retry, inputs=input_widgets, outputs=output_widgets, show_progress=True)
|
| 316 |
+
reset_args = dict(fn=reset_textbox, inputs=[], outputs=[text_box, status_display])
|
| 317 |
+
|
| 318 |
+
predict_events = [
|
| 319 |
+
text_box.submit(**transfer_input_args).then(**predict_args),
|
| 320 |
+
submit_btn.click(**transfer_input_args).then(**predict_args),
|
| 321 |
+
]
|
| 322 |
+
|
| 323 |
+
empty_btn.click(reset_state, outputs=output_widgets, show_progress=True)
|
| 324 |
+
empty_btn.click(**reset_args)
|
| 325 |
+
retry_btn.click(**retry_args)
|
| 326 |
+
del_last_btn.click(delete_last_conversation, [chatbot, history], output_widgets, show_progress=True)
|
| 327 |
+
cancel_btn.click(cancel_outputing, [], [status_display], cancels=predict_events)
|
| 328 |
+
|
| 329 |
+
demo.title = "Video-XL-2_Demo Chatbot"
|
| 330 |
+
return demo
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
def main(args: argparse.Namespace):
|
| 334 |
+
demo = build_demo(args)
|
| 335 |
+
reload_javascript()
|
| 336 |
+
|
| 337 |
+
# concurrency_count=CONCURRENT_COUNT, max_size=MAX_EVENTS
|
| 338 |
+
favicon_path = os.path.join("serve/assets/favicon.ico")
|
| 339 |
+
demo.queue().launch(
|
| 340 |
+
favicon_path=favicon_path if os.path.exists(favicon_path) else None,
|
| 341 |
+
server_name=args.ip,
|
| 342 |
+
server_port=args.port,
|
| 343 |
+
)
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
if __name__ == "__main__":
|
| 347 |
+
args = parse_args()
|
| 348 |
+
main(args)
|
requirements.txt
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate==0.30.0
|
| 2 |
+
aiofiles==24.1.0
|
| 3 |
+
annotated-types==0.7.0
|
| 4 |
+
anyio==4.9.0
|
| 5 |
+
certifi==2025.6.15
|
| 6 |
+
charset-normalizer==3.4.2
|
| 7 |
+
click==8.2.1
|
| 8 |
+
colorama==0.4.6
|
| 9 |
+
decorator==4.4.2
|
| 10 |
+
decord==0.6.0
|
| 11 |
+
einops==0.8.1
|
| 12 |
+
exceptiongroup==1.3.0
|
| 13 |
+
fastapi==0.115.13
|
| 14 |
+
ffmpy==0.6.0
|
| 15 |
+
filelock==3.18.0
|
| 16 |
+
fsspec==2025.5.1
|
| 17 |
+
gradio==5.25.2
|
| 18 |
+
gradio_client==1.8.0
|
| 19 |
+
groovy==0.1.2
|
| 20 |
+
h11==0.16.0
|
| 21 |
+
hf-xet==1.1.5
|
| 22 |
+
httpcore==1.0.9
|
| 23 |
+
httpx==0.28.1
|
| 24 |
+
huggingface-hub==0.33.0
|
| 25 |
+
idna==3.10
|
| 26 |
+
imageio==2.37.0
|
| 27 |
+
imageio-ffmpeg==0.6.0
|
| 28 |
+
Jinja2==3.1.4
|
| 29 |
+
latex2mathml==3.78.0
|
| 30 |
+
Markdown==3.8.2
|
| 31 |
+
markdown-it-py==3.0.0
|
| 32 |
+
MarkupSafe==2.1.5
|
| 33 |
+
mdtex2html==1.3.1
|
| 34 |
+
mdurl==0.1.2
|
| 35 |
+
moviepy==2.2.1
|
| 36 |
+
mpmath==1.3.0
|
| 37 |
+
networkx==3.3
|
| 38 |
+
numpy==1.26.4
|
| 39 |
+
opencv-python==4.11.0.86
|
| 40 |
+
orjson==3.10.18
|
| 41 |
+
packaging==25.0
|
| 42 |
+
pandas==2.3.0
|
| 43 |
+
pillow==11.0.0
|
| 44 |
+
proglog==0.1.12
|
| 45 |
+
psutil==7.0.0
|
| 46 |
+
pydantic==2.11.7
|
| 47 |
+
pydantic_core==2.33.2
|
| 48 |
+
pydub==0.25.1
|
| 49 |
+
Pygments==2.19.2
|
| 50 |
+
pypinyin==0.54.0
|
| 51 |
+
python-dateutil==2.9.0.post0
|
| 52 |
+
python-dotenv==1.1.0
|
| 53 |
+
python-multipart==0.0.20
|
| 54 |
+
pytz==2025.2
|
| 55 |
+
PyYAML==6.0.2
|
| 56 |
+
regex==2024.11.6
|
| 57 |
+
requests==2.32.4
|
| 58 |
+
rich==14.0.0
|
| 59 |
+
ruff==0.12.0
|
| 60 |
+
safehttpx==0.1.6
|
| 61 |
+
safetensors==0.5.3
|
| 62 |
+
semantic-version==2.10.0
|
| 63 |
+
sentencepiece==0.2.0
|
| 64 |
+
shellingham==1.5.4
|
| 65 |
+
six==1.17.0
|
| 66 |
+
sniffio==1.3.1
|
| 67 |
+
starlette==0.46.2
|
| 68 |
+
sympy==1.13.3
|
| 69 |
+
tokenizers==0.19.1
|
| 70 |
+
tomlkit==0.13.3
|
| 71 |
+
torch==2.1.2+cu121
|
| 72 |
+
torchaudio==2.1.2+cu121
|
| 73 |
+
torchvision==0.16.2+cu121
|
| 74 |
+
tqdm==4.67.1
|
| 75 |
+
transformers==4.43.0
|
| 76 |
+
triton==2.1.0
|
| 77 |
+
typer==0.16.0
|
| 78 |
+
typing-inspection==0.4.1
|
| 79 |
+
typing_extensions==4.14.0
|
| 80 |
+
tzdata==2025.2
|
| 81 |
+
urllib3==2.5.0
|
| 82 |
+
uvicorn==0.34.3
|
| 83 |
+
websockets==15.0.1
|
serve/__init__.py
ADDED
|
File without changes
|
serve/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (147 Bytes). View file
|
|
|
serve/__pycache__/chat_utils.cpython-310.pyc
ADDED
|
Binary file (11.7 kB). View file
|
|
|
serve/__pycache__/examples.cpython-310.pyc
ADDED
|
Binary file (2.16 kB). View file
|
|
|
serve/__pycache__/frontend.cpython-310.pyc
ADDED
|
Binary file (4.02 kB). View file
|
|
|
serve/__pycache__/gradio_utils.cpython-310.pyc
ADDED
|
Binary file (2.56 kB). View file
|
|
|
serve/__pycache__/utils.cpython-310.pyc
ADDED
|
Binary file (8.54 kB). View file
|
|
|
serve/assets/Kelpy-Codos.js
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Copyright (c) 2023-2024 DeepSeek.
|
| 3 |
+
*
|
| 4 |
+
* Permission is hereby granted, free of charge, to any person obtaining a copy of
|
| 5 |
+
* this software and associated documentation files (the "Software"), to deal in
|
| 6 |
+
* the Software without restriction, including without limitation the rights to
|
| 7 |
+
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
| 8 |
+
* the Software, and to permit persons to whom the Software is furnished to do so,
|
| 9 |
+
* subject to the following conditions:
|
| 10 |
+
*
|
| 11 |
+
* The above copyright notice and this permission notice shall be included in all
|
| 12 |
+
* copies or substantial portions of the Software.
|
| 13 |
+
*
|
| 14 |
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 15 |
+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
| 16 |
+
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
| 17 |
+
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
| 18 |
+
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
| 19 |
+
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
| 20 |
+
*/
|
| 21 |
+
|
| 22 |
+
// ==UserScript==
|
| 23 |
+
// @name Kelpy Codos
|
| 24 |
+
// @namespace https://github.com/Keldos-Li/Kelpy-Codos
|
| 25 |
+
// @version 1.0.5
|
| 26 |
+
// @author Keldos; https://keldos.me/
|
| 27 |
+
// @description Add copy button to PRE tags before CODE tag, for Chuanhu ChatGPT especially.
|
| 28 |
+
// Based on Chuanhu ChatGPT version: ac04408 (2023-3-22)
|
| 29 |
+
// @license GPL-3.0
|
| 30 |
+
// @grant none
|
| 31 |
+
// ==/UserScript==
|
| 32 |
+
|
| 33 |
+
(function () {
|
| 34 |
+
"use strict";
|
| 35 |
+
|
| 36 |
+
function addCopyButton(pre) {
|
| 37 |
+
var code = pre.querySelector("code");
|
| 38 |
+
if (!code) {
|
| 39 |
+
return; // 如果没有找到 <code> 元素,则不添加按钮
|
| 40 |
+
}
|
| 41 |
+
var firstChild = code.firstChild;
|
| 42 |
+
if (!firstChild) {
|
| 43 |
+
return; // 如果 <code> 元素没有子节点,则不添加按钮
|
| 44 |
+
}
|
| 45 |
+
var button = document.createElement("button");
|
| 46 |
+
button.textContent = "\uD83D\uDCCE"; // 使用 📎 符号作为“复制”按钮的文本
|
| 47 |
+
button.style.position = "relative";
|
| 48 |
+
button.style.float = "right";
|
| 49 |
+
button.style.fontSize = "1em"; // 可选:调整按钮大小
|
| 50 |
+
button.style.background = "none"; // 可选:去掉背景颜色
|
| 51 |
+
button.style.border = "none"; // 可选:去掉边框
|
| 52 |
+
button.style.cursor = "pointer"; // 可选:显示指针样式
|
| 53 |
+
button.addEventListener("click", function () {
|
| 54 |
+
var range = document.createRange();
|
| 55 |
+
range.selectNodeContents(code);
|
| 56 |
+
range.setStartBefore(firstChild); // 将范围设置为第一个子节点之前
|
| 57 |
+
var selection = window.getSelection();
|
| 58 |
+
selection.removeAllRanges();
|
| 59 |
+
selection.addRange(range);
|
| 60 |
+
|
| 61 |
+
try {
|
| 62 |
+
var success = document.execCommand("copy");
|
| 63 |
+
if (success) {
|
| 64 |
+
button.textContent = "\u2714";
|
| 65 |
+
setTimeout(function () {
|
| 66 |
+
button.textContent = "\uD83D\uDCCE"; // 恢复按钮为“复制”
|
| 67 |
+
}, 2000);
|
| 68 |
+
} else {
|
| 69 |
+
button.textContent = "\u2716";
|
| 70 |
+
}
|
| 71 |
+
} catch (e) {
|
| 72 |
+
console.error(e);
|
| 73 |
+
button.textContent = "\u2716";
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
selection.removeAllRanges();
|
| 77 |
+
});
|
| 78 |
+
code.insertBefore(button, firstChild); // 将按钮插入到第一个子元素之前
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
function handleNewElements(mutationsList, observer) {
|
| 82 |
+
for (var mutation of mutationsList) {
|
| 83 |
+
if (mutation.type === "childList") {
|
| 84 |
+
for (var node of mutation.addedNodes) {
|
| 85 |
+
if (node.nodeName === "PRE") {
|
| 86 |
+
addCopyButton(node);
|
| 87 |
+
}
|
| 88 |
+
}
|
| 89 |
+
}
|
| 90 |
+
}
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
var observer = new MutationObserver(handleNewElements);
|
| 94 |
+
observer.observe(document.documentElement, {
|
| 95 |
+
childList: true,
|
| 96 |
+
subtree: true,
|
| 97 |
+
});
|
| 98 |
+
|
| 99 |
+
document.querySelectorAll("pre").forEach(addCopyButton);
|
| 100 |
+
})();
|
serve/assets/avatar.png
ADDED
|
|
serve/assets/custom.css
ADDED
|
@@ -0,0 +1,355 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Copyright (c) 2023-2024 DeepSeek.
|
| 3 |
+
*
|
| 4 |
+
* Permission is hereby granted, free of charge, to any person obtaining a copy of
|
| 5 |
+
* this software and associated documentation files (the "Software"), to deal in
|
| 6 |
+
* the Software without restriction, including without limitation the rights to
|
| 7 |
+
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
| 8 |
+
* the Software, and to permit persons to whom the Software is furnished to do so,
|
| 9 |
+
* subject to the following conditions:
|
| 10 |
+
*
|
| 11 |
+
* The above copyright notice and this permission notice shall be included in all
|
| 12 |
+
* copies or substantial portions of the Software.
|
| 13 |
+
*
|
| 14 |
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 15 |
+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
| 16 |
+
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
| 17 |
+
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
| 18 |
+
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
| 19 |
+
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
| 20 |
+
*/
|
| 21 |
+
|
| 22 |
+
:root {
|
| 23 |
+
--chatbot-color-light: #f3f3f3;
|
| 24 |
+
--chatbot-color-dark: #121111;
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
/* status_display */
|
| 28 |
+
#status_display {
|
| 29 |
+
display: flex;
|
| 30 |
+
min-height: 2.5em;
|
| 31 |
+
align-items: flex-end;
|
| 32 |
+
justify-content: flex-end;
|
| 33 |
+
}
|
| 34 |
+
#status_display p {
|
| 35 |
+
font-size: 0.85em;
|
| 36 |
+
font-family: monospace;
|
| 37 |
+
color: var(--body-text-color-subdued);
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
/* usage_display */
|
| 41 |
+
#usage_display {
|
| 42 |
+
height: 1em;
|
| 43 |
+
}
|
| 44 |
+
#usage_display p {
|
| 45 |
+
padding: 0 1em;
|
| 46 |
+
font-size: 0.85em;
|
| 47 |
+
font-family: monospace;
|
| 48 |
+
color: var(--body-text-color-subdued);
|
| 49 |
+
}
|
| 50 |
+
/* list */
|
| 51 |
+
ol:not(.options),
|
| 52 |
+
ul:not(.options) {
|
| 53 |
+
padding-inline-start: 2em !important;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
/* Thank @Keldos-Li for fixing it */
|
| 57 |
+
/* Light mode (default) */
|
| 58 |
+
#deepseek_chatbot {
|
| 59 |
+
background-color: var(--chatbot-color-light) !important;
|
| 60 |
+
color: #000000 !important;
|
| 61 |
+
}
|
| 62 |
+
[data-testid="bot"] {
|
| 63 |
+
background-color: #ffffff !important;
|
| 64 |
+
}
|
| 65 |
+
[data-testid="user"] {
|
| 66 |
+
background-color: #95ec69 !important;
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
/* Dark mode */
|
| 70 |
+
.dark #deepseek_chatbot {
|
| 71 |
+
background-color: var(--chatbot-color-dark) !important;
|
| 72 |
+
color: #ffffff !important;
|
| 73 |
+
}
|
| 74 |
+
.dark [data-testid="bot"] {
|
| 75 |
+
background-color: #2c2c2c !important;
|
| 76 |
+
}
|
| 77 |
+
.dark [data-testid="user"] {
|
| 78 |
+
background-color: #26b561 !important;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
#deepseek_chatbot {
|
| 82 |
+
height: 100%;
|
| 83 |
+
min-height: 800px;
|
| 84 |
+
flex-grow: 1;
|
| 85 |
+
overflow: auto;
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
[class*="message"] {
|
| 89 |
+
border-radius: var(--radius-xl) !important;
|
| 90 |
+
border: none;
|
| 91 |
+
padding: var(--spacing-xl) !important;
|
| 92 |
+
font-size: var(--text-md) !important;
|
| 93 |
+
line-height: var(--line-md) !important;
|
| 94 |
+
min-height: calc(var(--text-md) * var(--line-md) + 2 * var(--spacing-xl));
|
| 95 |
+
min-width: calc(var(--text-md) * var(--line-md) + 2 * var(--spacing-xl));
|
| 96 |
+
}
|
| 97 |
+
[data-testid="bot"] {
|
| 98 |
+
max-width: 85%;
|
| 99 |
+
border-bottom-left-radius: 0 !important;
|
| 100 |
+
}
|
| 101 |
+
[data-testid="user"] {
|
| 102 |
+
max-width: 85%;
|
| 103 |
+
width: auto !important;
|
| 104 |
+
border-bottom-right-radius: 0 !important;
|
| 105 |
+
}
|
| 106 |
+
/* Table */
|
| 107 |
+
table {
|
| 108 |
+
margin: 1em 0;
|
| 109 |
+
border-collapse: collapse;
|
| 110 |
+
empty-cells: show;
|
| 111 |
+
}
|
| 112 |
+
td,
|
| 113 |
+
th {
|
| 114 |
+
border: 1.2px solid var(--border-color-primary) !important;
|
| 115 |
+
padding: 0.2em;
|
| 116 |
+
}
|
| 117 |
+
thead {
|
| 118 |
+
background-color: rgba(175, 184, 193, 0.2);
|
| 119 |
+
}
|
| 120 |
+
thead th {
|
| 121 |
+
padding: 0.5em 0.2em;
|
| 122 |
+
}
|
| 123 |
+
/* Inline code */
|
| 124 |
+
#deepseek_chatbot code {
|
| 125 |
+
display: inline;
|
| 126 |
+
white-space: break-spaces;
|
| 127 |
+
border-radius: 6px;
|
| 128 |
+
margin: 0 2px 0 2px;
|
| 129 |
+
padding: 0.2em 0.4em 0.1em 0.4em;
|
| 130 |
+
background-color: rgba(175, 184, 193, 0.2);
|
| 131 |
+
}
|
| 132 |
+
/* Code block */
|
| 133 |
+
#deepseek_chatbot pre code {
|
| 134 |
+
display: block;
|
| 135 |
+
overflow: auto;
|
| 136 |
+
white-space: pre;
|
| 137 |
+
background-color: #1c1d1e !important;
|
| 138 |
+
border-radius: 10px;
|
| 139 |
+
padding: 1.4em 1.2em 0em 1.4em;
|
| 140 |
+
margin: 1.2em 2em 1.2em 0.5em;
|
| 141 |
+
color: #fdf8f8;
|
| 142 |
+
box-shadow: 6px 6px 16px hsla(0, 0%, 0%, 0.2);
|
| 143 |
+
}
|
| 144 |
+
/* Hightlight */
|
| 145 |
+
#deepseek_chatbot .highlight {
|
| 146 |
+
background-color: transparent;
|
| 147 |
+
}
|
| 148 |
+
#deepseek_chatbot .highlight .hll {
|
| 149 |
+
background-color: #49483e;
|
| 150 |
+
}
|
| 151 |
+
#deepseek_chatbot .highlight .c {
|
| 152 |
+
color: #75715e;
|
| 153 |
+
} /* Comment */
|
| 154 |
+
#deepseek_chatbot .highlight .err {
|
| 155 |
+
color: #960050;
|
| 156 |
+
background-color: #1e0010;
|
| 157 |
+
} /* Error */
|
| 158 |
+
#deepseek_chatbot .highlight .k {
|
| 159 |
+
color: #66d9ef;
|
| 160 |
+
} /* Keyword */
|
| 161 |
+
#deepseek_chatbot .highlight .l {
|
| 162 |
+
color: #ae81ff;
|
| 163 |
+
} /* Literal */
|
| 164 |
+
#deepseek_chatbot .highlight .n {
|
| 165 |
+
color: #f8f8f2;
|
| 166 |
+
} /* Name */
|
| 167 |
+
#deepseek_chatbot .highlight .o {
|
| 168 |
+
color: #f92672;
|
| 169 |
+
} /* Operator */
|
| 170 |
+
#deepseek_chatbot .highlight .p {
|
| 171 |
+
color: #f8f8f2;
|
| 172 |
+
} /* Punctuation */
|
| 173 |
+
#deepseek_chatbot .highlight .ch {
|
| 174 |
+
color: #75715e;
|
| 175 |
+
} /* Comment.Hashbang */
|
| 176 |
+
#deepseek_chatbot .highlight .cm {
|
| 177 |
+
color: #75715e;
|
| 178 |
+
} /* Comment.Multiline */
|
| 179 |
+
#deepseek_chatbot .highlight .cp {
|
| 180 |
+
color: #75715e;
|
| 181 |
+
} /* Comment.Preproc */
|
| 182 |
+
#deepseek_chatbot .highlight .cpf {
|
| 183 |
+
color: #75715e;
|
| 184 |
+
} /* Comment.PreprocFile */
|
| 185 |
+
#deepseek_chatbot .highlight .c1 {
|
| 186 |
+
color: #75715e;
|
| 187 |
+
} /* Comment.Single */
|
| 188 |
+
#deepseek_chatbot .highlight .cs {
|
| 189 |
+
color: #75715e;
|
| 190 |
+
} /* Comment.Special */
|
| 191 |
+
#deepseek_chatbot .highlight .gd {
|
| 192 |
+
color: #f92672;
|
| 193 |
+
} /* Generic.Deleted */
|
| 194 |
+
#deepseek_chatbot .highlight .ge {
|
| 195 |
+
font-style: italic;
|
| 196 |
+
} /* Generic.Emph */
|
| 197 |
+
#deepseek_chatbot .highlight .gi {
|
| 198 |
+
color: #a6e22e;
|
| 199 |
+
} /* Generic.Inserted */
|
| 200 |
+
#deepseek_chatbot .highlight .gs {
|
| 201 |
+
font-weight: bold;
|
| 202 |
+
} /* Generic.Strong */
|
| 203 |
+
#deepseek_chatbot .highlight .gu {
|
| 204 |
+
color: #75715e;
|
| 205 |
+
} /* Generic.Subheading */
|
| 206 |
+
#deepseek_chatbot .highlight .kc {
|
| 207 |
+
color: #66d9ef;
|
| 208 |
+
} /* Keyword.Constant */
|
| 209 |
+
#deepseek_chatbot .highlight .kd {
|
| 210 |
+
color: #66d9ef;
|
| 211 |
+
} /* Keyword.Declaration */
|
| 212 |
+
#deepseek_chatbot .highlight .kn {
|
| 213 |
+
color: #f92672;
|
| 214 |
+
} /* Keyword.Namespace */
|
| 215 |
+
#deepseek_chatbot .highlight .kp {
|
| 216 |
+
color: #66d9ef;
|
| 217 |
+
} /* Keyword.Pseudo */
|
| 218 |
+
#deepseek_chatbot .highlight .kr {
|
| 219 |
+
color: #66d9ef;
|
| 220 |
+
} /* Keyword.Reserved */
|
| 221 |
+
#deepseek_chatbot .highlight .kt {
|
| 222 |
+
color: #66d9ef;
|
| 223 |
+
} /* Keyword.Type */
|
| 224 |
+
#deepseek_chatbot .highlight .ld {
|
| 225 |
+
color: #e6db74;
|
| 226 |
+
} /* Literal.Date */
|
| 227 |
+
#deepseek_chatbot .highlight .m {
|
| 228 |
+
color: #ae81ff;
|
| 229 |
+
} /* Literal.Number */
|
| 230 |
+
#deepseek_chatbot .highlight .s {
|
| 231 |
+
color: #e6db74;
|
| 232 |
+
} /* Literal.String */
|
| 233 |
+
#deepseek_chatbot .highlight .na {
|
| 234 |
+
color: #a6e22e;
|
| 235 |
+
} /* Name.Attribute */
|
| 236 |
+
#deepseek_chatbot .highlight .nb {
|
| 237 |
+
color: #f8f8f2;
|
| 238 |
+
} /* Name.Builtin */
|
| 239 |
+
#deepseek_chatbot .highlight .nc {
|
| 240 |
+
color: #a6e22e;
|
| 241 |
+
} /* Name.Class */
|
| 242 |
+
#deepseek_chatbot .highlight .no {
|
| 243 |
+
color: #66d9ef;
|
| 244 |
+
} /* Name.Constant */
|
| 245 |
+
#deepseek_chatbot .highlight .nd {
|
| 246 |
+
color: #a6e22e;
|
| 247 |
+
} /* Name.Decorator */
|
| 248 |
+
#deepseek_chatbot .highlight .ni {
|
| 249 |
+
color: #f8f8f2;
|
| 250 |
+
} /* Name.Entity */
|
| 251 |
+
#deepseek_chatbot .highlight .ne {
|
| 252 |
+
color: #a6e22e;
|
| 253 |
+
} /* Name.Exception */
|
| 254 |
+
#deepseek_chatbot .highlight .nf {
|
| 255 |
+
color: #a6e22e;
|
| 256 |
+
} /* Name.Function */
|
| 257 |
+
#deepseek_chatbot .highlight .nl {
|
| 258 |
+
color: #f8f8f2;
|
| 259 |
+
} /* Name.Label */
|
| 260 |
+
#deepseek_chatbot .highlight .nn {
|
| 261 |
+
color: #f8f8f2;
|
| 262 |
+
} /* Name.Namespace */
|
| 263 |
+
#deepseek_chatbot .highlight .nx {
|
| 264 |
+
color: #a6e22e;
|
| 265 |
+
} /* Name.Other */
|
| 266 |
+
#deepseek_chatbot .highlight .py {
|
| 267 |
+
color: #f8f8f2;
|
| 268 |
+
} /* Name.Property */
|
| 269 |
+
#deepseek_chatbot .highlight .nt {
|
| 270 |
+
color: #f92672;
|
| 271 |
+
} /* Name.Tag */
|
| 272 |
+
#deepseek_chatbot .highlight .nv {
|
| 273 |
+
color: #f8f8f2;
|
| 274 |
+
} /* Name.Variable */
|
| 275 |
+
#deepseek_chatbot .highlight .ow {
|
| 276 |
+
color: #f92672;
|
| 277 |
+
} /* Operator.Word */
|
| 278 |
+
#deepseek_chatbot .highlight .w {
|
| 279 |
+
color: #f8f8f2;
|
| 280 |
+
} /* Text.Whitespace */
|
| 281 |
+
#deepseek_chatbot .highlight .mb {
|
| 282 |
+
color: #ae81ff;
|
| 283 |
+
} /* Literal.Number.Bin */
|
| 284 |
+
#deepseek_chatbot .highlight .mf {
|
| 285 |
+
color: #ae81ff;
|
| 286 |
+
} /* Literal.Number.Float */
|
| 287 |
+
#deepseek_chatbot .highlight .mh {
|
| 288 |
+
color: #ae81ff;
|
| 289 |
+
} /* Literal.Number.Hex */
|
| 290 |
+
#deepseek_chatbot .highlight .mi {
|
| 291 |
+
color: #ae81ff;
|
| 292 |
+
} /* Literal.Number.Integer */
|
| 293 |
+
#deepseek_chatbot .highlight .mo {
|
| 294 |
+
color: #ae81ff;
|
| 295 |
+
} /* Literal.Number.Oct */
|
| 296 |
+
#deepseek_chatbot .highlight .sa {
|
| 297 |
+
color: #e6db74;
|
| 298 |
+
} /* Literal.String.Affix */
|
| 299 |
+
#deepseek_chatbot .highlight .sb {
|
| 300 |
+
color: #e6db74;
|
| 301 |
+
} /* Literal.String.Backtick */
|
| 302 |
+
#deepseek_chatbot .highlight .sc {
|
| 303 |
+
color: #e6db74;
|
| 304 |
+
} /* Literal.String.Char */
|
| 305 |
+
#deepseek_chatbot .highlight .dl {
|
| 306 |
+
color: #e6db74;
|
| 307 |
+
} /* Literal.String.Delimiter */
|
| 308 |
+
#deepseek_chatbot .highlight .sd {
|
| 309 |
+
color: #e6db74;
|
| 310 |
+
} /* Literal.String.Doc */
|
| 311 |
+
#deepseek_chatbot .highlight .s2 {
|
| 312 |
+
color: #e6db74;
|
| 313 |
+
} /* Literal.String.Double */
|
| 314 |
+
#deepseek_chatbot .highlight .se {
|
| 315 |
+
color: #ae81ff;
|
| 316 |
+
} /* Literal.String.Escape */
|
| 317 |
+
#deepseek_chatbot .highlight .sh {
|
| 318 |
+
color: #e6db74;
|
| 319 |
+
} /* Literal.String.Heredoc */
|
| 320 |
+
#deepseek_chatbot .highlight .si {
|
| 321 |
+
color: #e6db74;
|
| 322 |
+
} /* Literal.String.Interpol */
|
| 323 |
+
#deepseek_chatbot .highlight .sx {
|
| 324 |
+
color: #e6db74;
|
| 325 |
+
} /* Literal.String.Other */
|
| 326 |
+
#deepseek_chatbot .highlight .sr {
|
| 327 |
+
color: #e6db74;
|
| 328 |
+
} /* Literal.String.Regex */
|
| 329 |
+
#deepseek_chatbot .highlight .s1 {
|
| 330 |
+
color: #e6db74;
|
| 331 |
+
} /* Literal.String.Single */
|
| 332 |
+
#deepseek_chatbot .highlight .ss {
|
| 333 |
+
color: #e6db74;
|
| 334 |
+
} /* Literal.String.Symbol */
|
| 335 |
+
#deepseek_chatbot .highlight .bp {
|
| 336 |
+
color: #f8f8f2;
|
| 337 |
+
} /* Name.Builtin.Pseudo */
|
| 338 |
+
#deepseek_chatbot .highlight .fm {
|
| 339 |
+
color: #a6e22e;
|
| 340 |
+
} /* Name.Function.Magic */
|
| 341 |
+
#deepseek_chatbot .highlight .vc {
|
| 342 |
+
color: #f8f8f2;
|
| 343 |
+
} /* Name.Variable.Class */
|
| 344 |
+
#deepseek_chatbot .highlight .vg {
|
| 345 |
+
color: #f8f8f2;
|
| 346 |
+
} /* Name.Variable.Global */
|
| 347 |
+
#deepseek_chatbot .highlight .vi {
|
| 348 |
+
color: #f8f8f2;
|
| 349 |
+
} /* Name.Variable.Instance */
|
| 350 |
+
#deepseek_chatbot .highlight .vm {
|
| 351 |
+
color: #f8f8f2;
|
| 352 |
+
} /* Name.Variable.Magic */
|
| 353 |
+
#deepseek_chatbot .highlight .il {
|
| 354 |
+
color: #ae81ff;
|
| 355 |
+
} /* Literal.Number.Integer.Long */
|
serve/assets/custom.js
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Copyright (c) 2023-2024 DeepSeek.
|
| 3 |
+
*
|
| 4 |
+
* Permission is hereby granted, free of charge, to any person obtaining a copy of
|
| 5 |
+
* this software and associated documentation files (the "Software"), to deal in
|
| 6 |
+
* the Software without restriction, including without limitation the rights to
|
| 7 |
+
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
| 8 |
+
* the Software, and to permit persons to whom the Software is furnished to do so,
|
| 9 |
+
* subject to the following conditions:
|
| 10 |
+
*
|
| 11 |
+
* The above copyright notice and this permission notice shall be included in all
|
| 12 |
+
* copies or substantial portions of the Software.
|
| 13 |
+
*
|
| 14 |
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 15 |
+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
| 16 |
+
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
| 17 |
+
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
| 18 |
+
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
| 19 |
+
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
| 20 |
+
*/
|
| 21 |
+
|
| 22 |
+
// custom javascript here
|
serve/assets/favicon.ico
ADDED
|
|
serve/chat_utils.py
ADDED
|
@@ -0,0 +1,497 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
From https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import dataclasses
|
| 6 |
+
import logging
|
| 7 |
+
import copy
|
| 8 |
+
from enum import IntEnum, auto
|
| 9 |
+
from typing import Dict, List
|
| 10 |
+
import base64
|
| 11 |
+
|
| 12 |
+
import gradio as gr
|
| 13 |
+
import torch
|
| 14 |
+
import os
|
| 15 |
+
from .utils import pil_to_base64
|
| 16 |
+
import mimetypes
|
| 17 |
+
IMAGE_TOKEN = "<image>"
|
| 18 |
+
logger = logging.getLogger("gradio_logger")
|
| 19 |
+
|
| 20 |
+
import cv2
|
| 21 |
+
import base64
|
| 22 |
+
import tempfile
|
| 23 |
+
import os
|
| 24 |
+
import imageio
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def compress_video_to_base64(
|
| 28 |
+
video_path: str,
|
| 29 |
+
max_frames: int = 600,
|
| 30 |
+
resolution: tuple = (960, 540),
|
| 31 |
+
target_crf: int = 28
|
| 32 |
+
) -> str:
|
| 33 |
+
cap = cv2.VideoCapture(video_path)
|
| 34 |
+
if not cap.isOpened():
|
| 35 |
+
raise RuntimeError(f"无法打开视频:{video_path}")
|
| 36 |
+
|
| 37 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or None
|
| 38 |
+
original_fps = cap.get(cv2.CAP_PROP_FPS) or None
|
| 39 |
+
|
| 40 |
+
if not total_frames or not original_fps:
|
| 41 |
+
cap.release()
|
| 42 |
+
raise RuntimeError("无法获取视频帧数或帧率,请检查视频文件或使用 ffprobe。")
|
| 43 |
+
|
| 44 |
+
step = max(1, total_frames // max_frames)
|
| 45 |
+
new_fps = max(1, round(original_fps / step))
|
| 46 |
+
|
| 47 |
+
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
|
| 48 |
+
tmp_path = tmp.name
|
| 49 |
+
|
| 50 |
+
writer = imageio.get_writer(
|
| 51 |
+
tmp_path,
|
| 52 |
+
fps=new_fps,
|
| 53 |
+
codec='libx264',
|
| 54 |
+
ffmpeg_params=[
|
| 55 |
+
'-crf', str(target_crf),
|
| 56 |
+
'-pix_fmt', 'yuv420p'
|
| 57 |
+
]
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
frame_idx = 0
|
| 61 |
+
while True:
|
| 62 |
+
ret, frame = cap.read()
|
| 63 |
+
if not ret:
|
| 64 |
+
break
|
| 65 |
+
if frame_idx % step == 0:
|
| 66 |
+
small = cv2.resize(frame, resolution)
|
| 67 |
+
writer.append_data(cv2.cvtColor(small, cv2.COLOR_BGR2RGB))
|
| 68 |
+
frame_idx += 1
|
| 69 |
+
|
| 70 |
+
cap.release()
|
| 71 |
+
writer.close()
|
| 72 |
+
|
| 73 |
+
with open(tmp_path, "rb") as f:
|
| 74 |
+
data = f.read()
|
| 75 |
+
os.remove(tmp_path)
|
| 76 |
+
return base64.b64encode(data).decode("utf-8")
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
class SeparatorStyle(IntEnum):
|
| 81 |
+
"""Separator styles."""
|
| 82 |
+
|
| 83 |
+
PLAIN = auto()
|
| 84 |
+
ALIGNMENT = auto()
|
| 85 |
+
KIMI_VL = auto()
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
@dataclasses.dataclass
|
| 89 |
+
class Conversation:
|
| 90 |
+
"""A class that manages prompt templates and keeps all conversation history."""
|
| 91 |
+
|
| 92 |
+
# The name of this template
|
| 93 |
+
name: str
|
| 94 |
+
# The template of the system prompt
|
| 95 |
+
system_template: str = "{system_message}"
|
| 96 |
+
# The system message
|
| 97 |
+
system_message: str = ""
|
| 98 |
+
# The names of two roles
|
| 99 |
+
roles: List[str] = (("USER", "ASSISTANT"),)
|
| 100 |
+
# All messages. Each item is (role, message).
|
| 101 |
+
messages: List[List[str]] = ()
|
| 102 |
+
# The number of few shot examples
|
| 103 |
+
offset: int = 0
|
| 104 |
+
# The separator style and configurations
|
| 105 |
+
sep_style: SeparatorStyle = SeparatorStyle.PLAIN
|
| 106 |
+
sep: str = "\n"
|
| 107 |
+
sep2: str = None
|
| 108 |
+
# Stop criteria (the default one is EOS token)
|
| 109 |
+
stop_str: str = None
|
| 110 |
+
# Stops generation if meeting any token in this list
|
| 111 |
+
stop_token_ids: List[int] = None
|
| 112 |
+
|
| 113 |
+
def get_prompt(self) -> str:
|
| 114 |
+
"""Get the prompt for generation."""
|
| 115 |
+
system_prompt = self.system_template.format(system_message=self.system_message)
|
| 116 |
+
if self.sep_style == SeparatorStyle.PLAIN:
|
| 117 |
+
seps = [self.sep, self.sep2]
|
| 118 |
+
ret = ""
|
| 119 |
+
for i, (role, message) in enumerate(self.messages):
|
| 120 |
+
if message:
|
| 121 |
+
if type(message) is tuple:
|
| 122 |
+
message = message[0]
|
| 123 |
+
if i % 2 == 0:
|
| 124 |
+
ret += message + seps[i % 2]
|
| 125 |
+
else:
|
| 126 |
+
ret += message + seps[i % 2]
|
| 127 |
+
else:
|
| 128 |
+
ret += ""
|
| 129 |
+
return ret
|
| 130 |
+
elif self.sep_style == SeparatorStyle.ALIGNMENT:
|
| 131 |
+
seps = [self.sep, self.sep2]
|
| 132 |
+
ret = ""
|
| 133 |
+
for i, (role, message) in enumerate(self.messages):
|
| 134 |
+
if message:
|
| 135 |
+
if type(message) is tuple:
|
| 136 |
+
message, _, _ = message
|
| 137 |
+
if i % 2 == 0:
|
| 138 |
+
ret += '<image>\n' + seps[i % 2]
|
| 139 |
+
else:
|
| 140 |
+
ret += message + seps[i % 2]
|
| 141 |
+
else:
|
| 142 |
+
ret += ""
|
| 143 |
+
return ret
|
| 144 |
+
elif self.sep_style == SeparatorStyle.KIMI_VL:
|
| 145 |
+
seps = [self.sep, self.sep2]
|
| 146 |
+
if system_prompt == "" or system_prompt is None:
|
| 147 |
+
ret = ""
|
| 148 |
+
else:
|
| 149 |
+
ret = system_prompt + seps[0]
|
| 150 |
+
for i, (role, message) in enumerate(self.messages):
|
| 151 |
+
if message:
|
| 152 |
+
if type(message) is tuple:
|
| 153 |
+
message = message[0]
|
| 154 |
+
|
| 155 |
+
if role == "user":
|
| 156 |
+
ret += message + self.sep
|
| 157 |
+
else:
|
| 158 |
+
if self.sep2 is not None:
|
| 159 |
+
ret += message + self.sep2
|
| 160 |
+
else:
|
| 161 |
+
ret += message
|
| 162 |
+
else:
|
| 163 |
+
ret = ret
|
| 164 |
+
return ret
|
| 165 |
+
else:
|
| 166 |
+
raise ValueError(f"Invalid style: {self.sep_style}")
|
| 167 |
+
|
| 168 |
+
def set_system_message(self, system_message: str):
|
| 169 |
+
"""Set the system message."""
|
| 170 |
+
self.system_message = system_message
|
| 171 |
+
|
| 172 |
+
def append_message(self, role: str, message: str):
|
| 173 |
+
"""Append a new message."""
|
| 174 |
+
self.messages.append([role, message])
|
| 175 |
+
|
| 176 |
+
def update_last_message(self, message: str):
|
| 177 |
+
"""Update the last output.
|
| 178 |
+
|
| 179 |
+
The last message is typically set to be None when constructing the prompt,
|
| 180 |
+
so we need to update it in-place after getting the response from a model.
|
| 181 |
+
"""
|
| 182 |
+
self.messages[-1][1] = message
|
| 183 |
+
|
| 184 |
+
def reset_message(self):
|
| 185 |
+
"""Reset a new message."""
|
| 186 |
+
self.messages = []
|
| 187 |
+
|
| 188 |
+
def to_gradio_chatbot(self):
|
| 189 |
+
"""Convert the conversation to gradio chatbot format."""
|
| 190 |
+
ret = []
|
| 191 |
+
for i, (role, msg) in enumerate(self.messages[self.offset :]):
|
| 192 |
+
if i % 2 == 0:
|
| 193 |
+
ret.append([msg, None])
|
| 194 |
+
else:
|
| 195 |
+
ret[-1][-1] = msg
|
| 196 |
+
return ret
|
| 197 |
+
|
| 198 |
+
def to_openai_api_messages(self):
|
| 199 |
+
"""Convert the conversation to OpenAI chat completion format."""
|
| 200 |
+
system_prompt = self.system_template.format(system_message=self.system_message)
|
| 201 |
+
ret = [{"role": "system", "content": system_prompt}]
|
| 202 |
+
|
| 203 |
+
for i, (_, msg) in enumerate(self.messages[self.offset :]):
|
| 204 |
+
if i % 2 == 0:
|
| 205 |
+
ret.append({"role": "user", "content": msg})
|
| 206 |
+
else:
|
| 207 |
+
if msg is not None:
|
| 208 |
+
ret.append({"role": "assistant", "content": msg})
|
| 209 |
+
return ret
|
| 210 |
+
|
| 211 |
+
def copy(self):
|
| 212 |
+
return Conversation(
|
| 213 |
+
name=self.name,
|
| 214 |
+
system_template=self.system_template,
|
| 215 |
+
system_message=self.system_message,
|
| 216 |
+
roles=self.roles,
|
| 217 |
+
messages=[[x, y] for x, y in self.messages],
|
| 218 |
+
offset=self.offset,
|
| 219 |
+
sep_style=self.sep_style,
|
| 220 |
+
sep=self.sep,
|
| 221 |
+
sep2=self.sep2,
|
| 222 |
+
stop_str=self.stop_str,
|
| 223 |
+
stop_token_ids=self.stop_token_ids,
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
def dict(self):
|
| 227 |
+
return {
|
| 228 |
+
"template_name": self.name,
|
| 229 |
+
"system_message": self.system_message,
|
| 230 |
+
"roles": self.roles,
|
| 231 |
+
"messages": self.messages,
|
| 232 |
+
"offset": self.offset,
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
# A global registry for all conversation templates
|
| 237 |
+
conv_templates: Dict[str, Conversation] = {}
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
def register_conv_template(template: Conversation, override: bool = False):
|
| 241 |
+
"""Register a new conversation template."""
|
| 242 |
+
if not override:
|
| 243 |
+
assert template.name not in conv_templates, f"{template.name} has been registered."
|
| 244 |
+
|
| 245 |
+
conv_templates[template.name] = template
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
def get_conv_template(name: str) -> Conversation:
|
| 249 |
+
"""Get a conversation template."""
|
| 250 |
+
return conv_templates[name].copy()
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
register_conv_template(
|
| 254 |
+
Conversation(
|
| 255 |
+
name="plain",
|
| 256 |
+
system_template="",
|
| 257 |
+
system_message="",
|
| 258 |
+
roles=("", ""),
|
| 259 |
+
messages=(),
|
| 260 |
+
offset=0,
|
| 261 |
+
sep_style=SeparatorStyle.PLAIN,
|
| 262 |
+
sep="",
|
| 263 |
+
sep2="",
|
| 264 |
+
stop_token_ids=[100001],
|
| 265 |
+
stop_str=['</s>'],
|
| 266 |
+
)
|
| 267 |
+
)
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
register_conv_template(
|
| 271 |
+
Conversation(
|
| 272 |
+
name="alignment",
|
| 273 |
+
system_template="",
|
| 274 |
+
system_message="",
|
| 275 |
+
roles=("", ""),
|
| 276 |
+
messages=(),
|
| 277 |
+
offset=0,
|
| 278 |
+
sep_style=SeparatorStyle.ALIGNMENT,
|
| 279 |
+
sep="",
|
| 280 |
+
sep2="",
|
| 281 |
+
stop_token_ids=[100001],
|
| 282 |
+
stop_str=['</s>'],
|
| 283 |
+
)
|
| 284 |
+
)
|
| 285 |
+
|
| 286 |
+
register_conv_template(
|
| 287 |
+
Conversation(
|
| 288 |
+
name="kimi-vl",
|
| 289 |
+
system_template="{system_message}",
|
| 290 |
+
system_message="You are a helpful assistant",
|
| 291 |
+
roles=("user", "assistant"),
|
| 292 |
+
messages=(),
|
| 293 |
+
offset=0,
|
| 294 |
+
sep_style=SeparatorStyle.KIMI_VL,
|
| 295 |
+
sep="<|im_end|>",
|
| 296 |
+
sep2=None,
|
| 297 |
+
stop_token_ids=None,
|
| 298 |
+
stop_str=["<|im_end|>"],
|
| 299 |
+
)
|
| 300 |
+
)
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
def new_chat_template(sft_format: str = "kimi-vl"):
|
| 304 |
+
return get_conv_template(sft_format)
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
def get_prompt(conv: Conversation) -> str:
|
| 308 |
+
"""Get the prompt for generation."""
|
| 309 |
+
return conv.get_prompt()
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
def generate_prompt_with_history(text, images, history, processor, max_length=2048):
|
| 313 |
+
"""
|
| 314 |
+
Generate a prompt with the chat history.
|
| 315 |
+
|
| 316 |
+
Args:
|
| 317 |
+
text (str): The text prompt.
|
| 318 |
+
images (list[PIL.Image.Image]): The image prompt.
|
| 319 |
+
history (list): List of previous conversation messages.
|
| 320 |
+
processor (KimiVLProcessor): The chat processor used for encoding the prompt.
|
| 321 |
+
max_length (int): The maximum length of the prompt.
|
| 322 |
+
"""
|
| 323 |
+
global IMAGE_TOKEN
|
| 324 |
+
|
| 325 |
+
user_role_ind = 0
|
| 326 |
+
bot_role_ind = 1
|
| 327 |
+
|
| 328 |
+
# Initialize conversation
|
| 329 |
+
conversation = new_chat_template(sft_format="kimi-vl")
|
| 330 |
+
|
| 331 |
+
if history:
|
| 332 |
+
conversation.messages = history
|
| 333 |
+
|
| 334 |
+
if images is not None and len(images) > 0:
|
| 335 |
+
# num_image_tags = text.count(IMAGE_TOKEN)
|
| 336 |
+
# num_images = len(images)
|
| 337 |
+
# if num_images > num_image_tags:
|
| 338 |
+
# pad_image_tags = num_images - num_image_tags
|
| 339 |
+
# image_tokens = "\n".join([IMAGE_TOKEN] * pad_image_tags)
|
| 340 |
+
|
| 341 |
+
# # append the <image> in a new line after the text prompt
|
| 342 |
+
# text = image_tokens + "\n" + text
|
| 343 |
+
# elif num_images < num_image_tags:
|
| 344 |
+
# remove_image_tags = num_image_tags - num_images
|
| 345 |
+
# text = text.replace(IMAGE_TOKEN, "", remove_image_tags)
|
| 346 |
+
|
| 347 |
+
print(f"prompt = {text}, len(images) = {len(images)}")
|
| 348 |
+
text = (text, images)
|
| 349 |
+
|
| 350 |
+
conversation.append_message(conversation.roles[user_role_ind], text)
|
| 351 |
+
conversation.append_message(conversation.roles[bot_role_ind], "")
|
| 352 |
+
|
| 353 |
+
# Create a copy of the conversation to avoid history truncation in the UI
|
| 354 |
+
conversation_copy = conversation.copy()
|
| 355 |
+
logger.info("=" * 80)
|
| 356 |
+
logger.info(get_prompt(conversation))
|
| 357 |
+
|
| 358 |
+
rounds = len(conversation.messages) // 2
|
| 359 |
+
|
| 360 |
+
for _ in range(rounds):
|
| 361 |
+
current_prompt = get_prompt(conversation)
|
| 362 |
+
assert isinstance(current_prompt, str) and len(current_prompt) > 0, f"current_prompt = {current_prompt}"
|
| 363 |
+
if torch.tensor(processor.tokenizer.encode(current_prompt)).size(-1) <= max_length:
|
| 364 |
+
return conversation_copy
|
| 365 |
+
|
| 366 |
+
if len(conversation.messages) % 2 != 0:
|
| 367 |
+
gr.Error("The messages between user and assistant are not paired.")
|
| 368 |
+
return
|
| 369 |
+
|
| 370 |
+
try:
|
| 371 |
+
for _ in range(2): # pop out two messages in a row
|
| 372 |
+
conversation.messages.pop(0)
|
| 373 |
+
except IndexError:
|
| 374 |
+
gr.Error("Input text processing failed, unable to respond in this round.")
|
| 375 |
+
return None
|
| 376 |
+
|
| 377 |
+
gr.Error("Prompt could not be generated within max_length limit.")
|
| 378 |
+
return None
|
| 379 |
+
|
| 380 |
+
|
| 381 |
+
def convert_conversation_to_prompts(conversation: Conversation):
|
| 382 |
+
"""
|
| 383 |
+
Convert the conversation to prompts.
|
| 384 |
+
"""
|
| 385 |
+
conv_prompts = []
|
| 386 |
+
|
| 387 |
+
last_image = None
|
| 388 |
+
|
| 389 |
+
messages = conversation.messages
|
| 390 |
+
for i in range(0, len(messages), 2):
|
| 391 |
+
if isinstance(messages[i][1], tuple):
|
| 392 |
+
text, images = messages[i][1]
|
| 393 |
+
last_image = images[-1]
|
| 394 |
+
else:
|
| 395 |
+
text, images = messages[i][1], []
|
| 396 |
+
|
| 397 |
+
prompt = {"role": messages[i][0], "content": text, "images": images}
|
| 398 |
+
response = {"role": messages[i + 1][0], "content": messages[i + 1][1]}
|
| 399 |
+
conv_prompts.extend([prompt, response])
|
| 400 |
+
|
| 401 |
+
return conv_prompts, last_image
|
| 402 |
+
|
| 403 |
+
|
| 404 |
+
def to_gradio_chatbot2(conversation: Conversation) -> list:
|
| 405 |
+
"""Convert the conversation to gradio chatbot format."""
|
| 406 |
+
ret = []
|
| 407 |
+
for i, (_, msg) in enumerate(conversation.messages[conversation.offset :]):
|
| 408 |
+
if i % 2 == 0:
|
| 409 |
+
if type(msg) is tuple:
|
| 410 |
+
msg, images = copy.deepcopy(msg)
|
| 411 |
+
|
| 412 |
+
if isinstance(images, list):
|
| 413 |
+
img_str = ""
|
| 414 |
+
for j, image in enumerate(images):
|
| 415 |
+
if isinstance(image, str):
|
| 416 |
+
with open(image, "rb") as f:
|
| 417 |
+
data = f.read()
|
| 418 |
+
img_b64_str = base64.b64encode(data).decode()
|
| 419 |
+
image_str = (
|
| 420 |
+
f'<img src="data:image/png;base64,{img_b64_str}" '
|
| 421 |
+
f'alt="user upload image" style="max-width: 300px; height: auto;" />'
|
| 422 |
+
)
|
| 423 |
+
else:
|
| 424 |
+
image_str = pil_to_base64(image, f"user upload image_{j}", max_size=800, min_size=400)
|
| 425 |
+
|
| 426 |
+
img_str += image_str
|
| 427 |
+
msg = img_str + msg
|
| 428 |
+
else:
|
| 429 |
+
pass
|
| 430 |
+
|
| 431 |
+
ret.append([msg, None])
|
| 432 |
+
else:
|
| 433 |
+
ret[-1][-1] = msg
|
| 434 |
+
return ret
|
| 435 |
+
|
| 436 |
+
|
| 437 |
+
|
| 438 |
+
def to_gradio_chatbot(conversation: Conversation) -> list:
|
| 439 |
+
"""Convert the conversation to gradio chatbot format, supporting images and video."""
|
| 440 |
+
ret = []
|
| 441 |
+
for i, (_, msg) in enumerate(conversation.messages[conversation.offset :]):
|
| 442 |
+
# User message
|
| 443 |
+
if i % 2 == 0:
|
| 444 |
+
if isinstance(msg, tuple):
|
| 445 |
+
msg_text, media = copy.deepcopy(msg)
|
| 446 |
+
media_str = ""
|
| 447 |
+
|
| 448 |
+
# Handle list of media items
|
| 449 |
+
if isinstance(media, list):
|
| 450 |
+
items = media
|
| 451 |
+
else:
|
| 452 |
+
items = [media]
|
| 453 |
+
|
| 454 |
+
for j, item in enumerate(items):
|
| 455 |
+
# If string path, determine type
|
| 456 |
+
if isinstance(item, str) and (not item.endswith((".mp4", ".mov", ".avi", ".webm"))):
|
| 457 |
+
mime, _ = mimetypes.guess_type(item)
|
| 458 |
+
with open(item, "rb") as f:
|
| 459 |
+
data = f.read()
|
| 460 |
+
b64 = base64.b64encode(data).decode()
|
| 461 |
+
|
| 462 |
+
if mime and mime.startswith("image/"):
|
| 463 |
+
media_str += (
|
| 464 |
+
f'<img src="data:{mime};base64,{b64}" '
|
| 465 |
+
f'alt="user upload image_{j}" '
|
| 466 |
+
f'style="max-width:300px;height:auto;" />'
|
| 467 |
+
)
|
| 468 |
+
else:
|
| 469 |
+
# Fallback to link
|
| 470 |
+
media_str += f'<a href="{item}" target="_blank">{item}</a>'
|
| 471 |
+
elif isinstance(item, str) and (item.endswith((".mp4", ".mov", ".avi", ".webm"))):
|
| 472 |
+
try:
|
| 473 |
+
b64 = compress_video_to_base64(item)
|
| 474 |
+
media_str += (
|
| 475 |
+
f'<video controls style="max-width:300px;height:auto;" '
|
| 476 |
+
f'src="data:video/mp4;base64,{b64}"></video>'
|
| 477 |
+
)
|
| 478 |
+
except:
|
| 479 |
+
pass
|
| 480 |
+
|
| 481 |
+
# If PIL image
|
| 482 |
+
else:
|
| 483 |
+
media_str += pil_to_base64(item, f"user upload image_{j}", max_size=800, min_size=400)
|
| 484 |
+
|
| 485 |
+
msg = media_str + msg_text
|
| 486 |
+
|
| 487 |
+
# Append user side
|
| 488 |
+
ret.append([msg, None])
|
| 489 |
+
else:
|
| 490 |
+
# Assistant side, fill previous tuple
|
| 491 |
+
ret[-1][-1] = msg
|
| 492 |
+
return ret
|
| 493 |
+
|
| 494 |
+
|
| 495 |
+
def to_gradio_history(conversation: Conversation):
|
| 496 |
+
"""Convert the conversation to gradio history format."""
|
| 497 |
+
return conversation.messages[conversation.offset :]
|
serve/examples.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import io
|
| 3 |
+
import base64
|
| 4 |
+
import decord
|
| 5 |
+
from PIL import Image
|
| 6 |
+
|
| 7 |
+
EXAMPLES_LIST = [
|
| 8 |
+
[
|
| 9 |
+
["videos/demo1.mp4"],
|
| 10 |
+
"What's the third criteria promoted by Nick Mortimer that a continent must meet?"
|
| 11 |
+
],
|
| 12 |
+
[
|
| 13 |
+
["videos/demo2.mp4"],
|
| 14 |
+
"When does the deer appear in the video? Give me the specific time range in seconds."
|
| 15 |
+
]
|
| 16 |
+
]
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def display_example(file_list, root_dir: str = None):
|
| 20 |
+
media_html = ""
|
| 21 |
+
for _, file_path in enumerate(file_list):
|
| 22 |
+
if root_dir is not None:
|
| 23 |
+
full_file_path = os.path.join(root_dir, file_path)
|
| 24 |
+
else:
|
| 25 |
+
full_file_path = file_path
|
| 26 |
+
|
| 27 |
+
# # Check if it's a video file
|
| 28 |
+
# if full_file_path.endswith((".mp4", ".mov", ".avi", ".webm")):
|
| 29 |
+
file_ext = os.path.splitext(full_file_path)[1].lower()
|
| 30 |
+
|
| 31 |
+
# 对于视频文件,提取第一帧作为预览图
|
| 32 |
+
if file_ext in [".mp4", ".mov", ".avi", ".webm"]:
|
| 33 |
+
try:
|
| 34 |
+
# 使用 decord 读取视频
|
| 35 |
+
vr = decord.VideoReader(full_file_path)
|
| 36 |
+
# 提取第一帧 (索引为 0)
|
| 37 |
+
frame = vr[0].asnumpy()
|
| 38 |
+
|
| 39 |
+
# 将 NumPy 数组 (RGB) 转换为 PIL Image
|
| 40 |
+
image = Image.fromarray(frame)
|
| 41 |
+
|
| 42 |
+
buffered = io.BytesIO()
|
| 43 |
+
image.save(buffered, format="PNG")
|
| 44 |
+
img_b64_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
| 45 |
+
|
| 46 |
+
# 使用 img 标签显示这一帧
|
| 47 |
+
media_str = f'<img src="data:image/png;base64,{img_b64_str}" alt="Video preview for {file_path}" style="height:80px; margin-right: 10px;" />'
|
| 48 |
+
except Exception as e:
|
| 49 |
+
media_str = f"<span>Error processing video {file_path}: {e}</span>"
|
| 50 |
+
else:
|
| 51 |
+
# Assume it's an image
|
| 52 |
+
image = Image.open(full_file_path)
|
| 53 |
+
buffered = io.BytesIO()
|
| 54 |
+
image.save(buffered, format="PNG", quality=100)
|
| 55 |
+
img_b64_str = base64.b64encode(buffered.getvalue()).decode()
|
| 56 |
+
media_str = f'<img src="data:image/png;base64,{img_b64_str}" alt="{file_path}" style="height:80px; margin-right: 10px;" />'
|
| 57 |
+
media_html += media_str
|
| 58 |
+
|
| 59 |
+
result_html = f"""
|
| 60 |
+
<div style="display: flex; align-items: center; margin-bottom: 10px;">
|
| 61 |
+
<div style="flex: 1; margin-right: 10px;">{media_html}</div>
|
| 62 |
+
</div>
|
| 63 |
+
"""
|
| 64 |
+
|
| 65 |
+
return result_html
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def get_examples(root_dir: str = None):
|
| 69 |
+
examples = []
|
| 70 |
+
for files, texts in EXAMPLES_LIST:
|
| 71 |
+
examples.append([files, display_example(files, root_dir), texts])
|
| 72 |
+
|
| 73 |
+
return examples
|
serve/frontend.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import os
|
| 3 |
+
from typing import List, Tuple
|
| 4 |
+
|
| 5 |
+
import gradio as gr
|
| 6 |
+
|
| 7 |
+
from serve.utils import convert_asis, convert_mdtext, detect_converted_mark
|
| 8 |
+
|
| 9 |
+
ROOT_PATH = os.path.dirname(os.path.abspath(__file__))
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
small_and_beautiful_theme = gr.themes.Soft(
|
| 13 |
+
primary_hue=gr.themes.Color(
|
| 14 |
+
name="nvidia-green",
|
| 15 |
+
c50="#F3FAE6",
|
| 16 |
+
c100="#E3F3C2",
|
| 17 |
+
c200="#C9E98D",
|
| 18 |
+
c300="#AFDD59",
|
| 19 |
+
c400="#95D124",
|
| 20 |
+
c500="#76B900", # NVIDIA green
|
| 21 |
+
c600="#6AA600",
|
| 22 |
+
c700="#5C9300",
|
| 23 |
+
c800="#4F8000",
|
| 24 |
+
c900="#426D00",
|
| 25 |
+
c950="#2E5500",
|
| 26 |
+
),
|
| 27 |
+
secondary_hue=gr.themes.Color(
|
| 28 |
+
c50="#d3e3d3",
|
| 29 |
+
c100="#bfd6bf",
|
| 30 |
+
c200="#a9c7a9",
|
| 31 |
+
c300="#93b893",
|
| 32 |
+
c400="#7da97d",
|
| 33 |
+
c500="#689A68",
|
| 34 |
+
c600="#538B53",
|
| 35 |
+
c700="#3E7C3E",
|
| 36 |
+
c800="#296D29",
|
| 37 |
+
c900="#145E14",
|
| 38 |
+
c950="#0A4A0A",
|
| 39 |
+
),
|
| 40 |
+
neutral_hue=gr.themes.Color(
|
| 41 |
+
name="gray",
|
| 42 |
+
c50="#f6f7f8",
|
| 43 |
+
c100="#F2F2F2",
|
| 44 |
+
c200="#e5e7eb",
|
| 45 |
+
c300="#d1d5db",
|
| 46 |
+
c400="#B2B2B2",
|
| 47 |
+
c500="#808080",
|
| 48 |
+
c600="#636363",
|
| 49 |
+
c700="#515151",
|
| 50 |
+
c800="#393939",
|
| 51 |
+
c900="#2B2B2B",
|
| 52 |
+
c950="#171717",
|
| 53 |
+
),
|
| 54 |
+
radius_size=gr.themes.sizes.radius_sm,
|
| 55 |
+
).set(
|
| 56 |
+
button_primary_background_fill_dark="*primary_600",
|
| 57 |
+
button_primary_border_color_dark="*primary_600",
|
| 58 |
+
button_primary_text_color="white",
|
| 59 |
+
button_primary_text_color_dark="white",
|
| 60 |
+
button_secondary_background_fill="*neutral_100",
|
| 61 |
+
button_secondary_background_fill_hover="*neutral_50",
|
| 62 |
+
button_secondary_background_fill_dark="*neutral_900",
|
| 63 |
+
button_secondary_text_color="*neutral_800",
|
| 64 |
+
button_secondary_text_color_dark="white",
|
| 65 |
+
block_title_background_fill_dark="*primary_900",
|
| 66 |
+
block_label_background_fill_dark="*primary_900",
|
| 67 |
+
input_background_fill="#F6F6F6",
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def compact_text_chunks(self, prompt, text_chunks: List[str]) -> List[str]:
|
| 73 |
+
logging.debug("Compacting text chunks...🚀🚀🚀")
|
| 74 |
+
combined_str = [c.strip() for c in text_chunks if c.strip()]
|
| 75 |
+
combined_str = [f"[{index+1}] {c}" for index, c in enumerate(combined_str)]
|
| 76 |
+
combined_str = "\n\n".join(combined_str)
|
| 77 |
+
# resplit based on self.max_chunk_overlap
|
| 78 |
+
text_splitter = self.get_text_splitter_given_prompt(prompt, 1, padding=1)
|
| 79 |
+
return text_splitter.split_text(combined_str)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def postprocess(y: List[Tuple[str | None, str | None]]) -> List[Tuple[str | None, str | None]]:
|
| 83 |
+
"""
|
| 84 |
+
Parameters:
|
| 85 |
+
y: List of tuples representing the message and response pairs. Each message and response should be a string, which may be in Markdown format.
|
| 86 |
+
Returns:
|
| 87 |
+
List of tuples representing the message and response. Each message and response will be a string of HTML.
|
| 88 |
+
"""
|
| 89 |
+
if y is None or y == []:
|
| 90 |
+
return []
|
| 91 |
+
temp = []
|
| 92 |
+
for x in y:
|
| 93 |
+
user, bot = x
|
| 94 |
+
if not detect_converted_mark(user):
|
| 95 |
+
user = convert_asis(user)
|
| 96 |
+
if not detect_converted_mark(bot):
|
| 97 |
+
bot = convert_mdtext(bot)
|
| 98 |
+
temp.append((user, bot))
|
| 99 |
+
return temp
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
custom_js_path = os.path.join(ROOT_PATH, "assets/custom.js")
|
| 103 |
+
kelpy_codos_path = os.path.join(ROOT_PATH, "assets/Kelpy-Codos.js")
|
| 104 |
+
|
| 105 |
+
with (
|
| 106 |
+
open(custom_js_path, "r", encoding="utf-8") as f,
|
| 107 |
+
open(kelpy_codos_path, "r", encoding="utf-8") as f2,
|
| 108 |
+
):
|
| 109 |
+
customJS = f.read()
|
| 110 |
+
kelpyCodos = f2.read()
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def reload_javascript():
|
| 114 |
+
print("Reloading javascript...")
|
| 115 |
+
js = f"<script>{customJS}</script><script>{kelpyCodos}</script>"
|
| 116 |
+
|
| 117 |
+
def template_response(*args, **kwargs):
|
| 118 |
+
res = GradioTemplateResponseOriginal(*args, **kwargs)
|
| 119 |
+
res.body = res.body.replace(b"</html>", f"{js}</html>".encode("utf8"))
|
| 120 |
+
res.init_headers()
|
| 121 |
+
return res
|
| 122 |
+
|
| 123 |
+
gr.routes.templates.TemplateResponse = template_response
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
GradioTemplateResponseOriginal = gr.routes.templates.TemplateResponse
|
serve/gradio_utils.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Gradio utils for the Kimi-VL application.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import functools
|
| 6 |
+
from typing import Callable
|
| 7 |
+
import traceback
|
| 8 |
+
|
| 9 |
+
import gradio as gr
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
IMAGE_TOKEN = "<image>"
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def transfer_input(input_text, input_images):
|
| 16 |
+
"""
|
| 17 |
+
Transfer the input text and images to the input text and images.
|
| 18 |
+
"""
|
| 19 |
+
return (input_text, input_images, gr.update(value=""), gr.update(value=None), gr.Button(visible=True))
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def delete_last_conversation(chatbot, full_history):
|
| 23 |
+
"""
|
| 24 |
+
Delete the last conversation from the chatbot and history.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
chatbot (list): The chatbot list.
|
| 28 |
+
history (list): The history list.
|
| 29 |
+
"""
|
| 30 |
+
history = full_history["context"]
|
| 31 |
+
if len(history) % 2 != 0:
|
| 32 |
+
gr.Error("history length is not even")
|
| 33 |
+
return (
|
| 34 |
+
chatbot,
|
| 35 |
+
full_history,
|
| 36 |
+
"Delete Done",
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
if len(chatbot) > 0:
|
| 40 |
+
chatbot.pop()
|
| 41 |
+
|
| 42 |
+
if len(history) > 0 and len(history) % 2 == 0:
|
| 43 |
+
history.pop()
|
| 44 |
+
history.pop()
|
| 45 |
+
|
| 46 |
+
full_history["context"] = history
|
| 47 |
+
return (
|
| 48 |
+
chatbot,
|
| 49 |
+
full_history,
|
| 50 |
+
"Delete Done",
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def reset_state():
|
| 55 |
+
return [], {}, "Reset Done"
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def reset_textbox():
|
| 59 |
+
return gr.update(value=""), ""
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def cancel_outputing():
|
| 63 |
+
return "Stop Done"
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
class State:
|
| 67 |
+
interrupted = False
|
| 68 |
+
|
| 69 |
+
def interrupt(self):
|
| 70 |
+
self.interrupted = True
|
| 71 |
+
|
| 72 |
+
def recover(self):
|
| 73 |
+
self.interrupted = False
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
shared_state = State()
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def wrap_gen_fn(gen_fn: Callable):
|
| 80 |
+
"""
|
| 81 |
+
Wrap the generator function to handle errors.
|
| 82 |
+
"""
|
| 83 |
+
|
| 84 |
+
@functools.wraps(gen_fn)
|
| 85 |
+
def wrapped_gen_fn(prompt, *args, **kwargs):
|
| 86 |
+
try:
|
| 87 |
+
yield from gen_fn(prompt, *args, **kwargs)
|
| 88 |
+
except gr.Error as g_err:
|
| 89 |
+
traceback.print_exc()
|
| 90 |
+
raise g_err
|
| 91 |
+
except Exception as e:
|
| 92 |
+
traceback.print_exc()
|
| 93 |
+
raise gr.Error(f"Failed to generate text: {e}") from e
|
| 94 |
+
|
| 95 |
+
return wrapped_gen_fn
|
serve/inference.py
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import re
|
| 3 |
+
from threading import Thread
|
| 4 |
+
from typing import List, Optional
|
| 5 |
+
import os
|
| 6 |
+
import torch
|
| 7 |
+
from transformers import (
|
| 8 |
+
AutoModel,
|
| 9 |
+
AutoProcessor,
|
| 10 |
+
AutoConfig,
|
| 11 |
+
StoppingCriteria,
|
| 12 |
+
StoppingCriteriaList,
|
| 13 |
+
TextIteratorStreamer,
|
| 14 |
+
)
|
| 15 |
+
from PIL import Image
|
| 16 |
+
from .chat_utils import Conversation, get_conv_template
|
| 17 |
+
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def load_model_from_nv(model_path: str = "nvidia/Eagle-2-8B"):
|
| 22 |
+
|
| 23 |
+
token = os.environ.get("HF_TOKEN")
|
| 24 |
+
# hotfix the model to use flash attention 2
|
| 25 |
+
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True, token=token)
|
| 26 |
+
config._attn_implementation = "flash_attention_2"
|
| 27 |
+
config.vision_config._attn_implementation = "flash_attention_2"
|
| 28 |
+
config.text_config._attn_implementation = "flash_attention_2"
|
| 29 |
+
print("Successfully set the attn_implementation to flash_attention_2")
|
| 30 |
+
|
| 31 |
+
logger.info(f"token = {token[:4]}***{token[-2:]}")
|
| 32 |
+
model = AutoModel.from_pretrained(
|
| 33 |
+
model_path,
|
| 34 |
+
trust_remote_code=True,
|
| 35 |
+
torch_dtype=torch.bfloat16,
|
| 36 |
+
attn_implementation="flash_attention_2",
|
| 37 |
+
token=token
|
| 38 |
+
)
|
| 39 |
+
model.to("cuda")
|
| 40 |
+
processor = AutoProcessor.from_pretrained(model_path, config=config, trust_remote_code=True, use_fast=True, token=token)
|
| 41 |
+
|
| 42 |
+
return model, processor
|
| 43 |
+
|
| 44 |
+
def load_model_from_eagle(model_path: str = "NVEagle/Eagle2-8B"):
|
| 45 |
+
|
| 46 |
+
token = os.environ.get("HF_TOKEN")
|
| 47 |
+
logger.info(f"token = {token[:4]}***{token[-2:]}")
|
| 48 |
+
|
| 49 |
+
# hotfix the model to use flash attention 2
|
| 50 |
+
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True, token=token)
|
| 51 |
+
config._attn_implementation = "flash_attention_2"
|
| 52 |
+
config.vision_config._attn_implementation = "flash_attention_2"
|
| 53 |
+
config.text_config._attn_implementation = "flash_attention_2"
|
| 54 |
+
print("Successfully set the attn_implementation to flash_attention_2")
|
| 55 |
+
|
| 56 |
+
model = AutoModel.from_pretrained(
|
| 57 |
+
model_path,
|
| 58 |
+
trust_remote_code=True,
|
| 59 |
+
torch_dtype=torch.bfloat16,
|
| 60 |
+
attn_implementation="flash_attention_2",
|
| 61 |
+
token=token
|
| 62 |
+
)
|
| 63 |
+
model.to("cuda")
|
| 64 |
+
processor = AutoProcessor.from_pretrained(model_path, config=config, trust_remote_code=True, use_fast=True, token=token)
|
| 65 |
+
|
| 66 |
+
return model, processor
|
| 67 |
+
|
| 68 |
+
def load_model(model_path: str = "nvidia/Eagle2-8B"):
|
| 69 |
+
try:
|
| 70 |
+
model, processor = load_model_from_nv(model_path)
|
| 71 |
+
except Exception as e:
|
| 72 |
+
logger.error(f"Failed to load model from HF, trying to load from eagle: {e}")
|
| 73 |
+
model, processor = load_model_from_eagle()
|
| 74 |
+
return model, processor
|
| 75 |
+
|
| 76 |
+
class StoppingCriteriaSub(StoppingCriteria):
|
| 77 |
+
def __init__(self, stops=[], encounters=1):
|
| 78 |
+
super().__init__()
|
| 79 |
+
self.stops = [stop.to("cuda") for stop in stops]
|
| 80 |
+
|
| 81 |
+
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs):
|
| 82 |
+
for stop in self.stops:
|
| 83 |
+
if input_ids.shape[-1] < len(stop):
|
| 84 |
+
continue
|
| 85 |
+
if torch.all((stop == input_ids[0][-len(stop) :])).item():
|
| 86 |
+
return True
|
| 87 |
+
|
| 88 |
+
return False
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def preprocess(
|
| 92 |
+
messages: list[dict],
|
| 93 |
+
processor,
|
| 94 |
+
video_nframes: int = 16,
|
| 95 |
+
):
|
| 96 |
+
"""
|
| 97 |
+
Build messages from the conversations and images.
|
| 98 |
+
"""
|
| 99 |
+
# get images from conversations
|
| 100 |
+
results = [
|
| 101 |
+
{
|
| 102 |
+
"role": "system",
|
| 103 |
+
"content": """You are Eagle 2, a cutting-edge large language model developed by NVIDIA. You are highly capable, efficient, and aligned, specialized in understanding complex multimodal inputs and providing expert-level responses across domains.
|
| 104 |
+
Always be concise, accurate, and helpful. You respond like a reliable co-pilot to researchers, developers, and engineers, offering deep technical insight, step-by-step reasoning, and practical suggestions.
|
| 105 |
+
You can interpret long contexts, follow nuanced instructions, and dynamically adjust your tone to match the user's intent. If the user does not specify a tone, default to a professional, technical, yet friendly style.
|
| 106 |
+
You understand you are Eagle 2, and may refer to yourself as such when asked."""}
|
| 107 |
+
]
|
| 108 |
+
# get texts from conversations
|
| 109 |
+
# converstion = get_conv_template(sft_format)
|
| 110 |
+
# only use the last 3 round of messages
|
| 111 |
+
# latest_messages = messages[-3:]
|
| 112 |
+
|
| 113 |
+
all_images_num = 0
|
| 114 |
+
for mid, message in enumerate(messages):
|
| 115 |
+
if message["role"] == "user":
|
| 116 |
+
record = {
|
| 117 |
+
"role": message["role"],
|
| 118 |
+
"content": [],
|
| 119 |
+
}
|
| 120 |
+
if "images" in message:
|
| 121 |
+
per_round_images = message["images"]
|
| 122 |
+
for image in per_round_images:
|
| 123 |
+
|
| 124 |
+
if isinstance(image, Image.Image) and all_images_num < 128:
|
| 125 |
+
record["content"].append(
|
| 126 |
+
{
|
| 127 |
+
"type": "image",
|
| 128 |
+
"image": image,
|
| 129 |
+
}
|
| 130 |
+
)
|
| 131 |
+
all_images_num+=1
|
| 132 |
+
elif isinstance(image, str) and image.endswith((".jpeg", ".jpg", ".png", ".gif")) and all_images_num < 128:
|
| 133 |
+
record["content"].append(
|
| 134 |
+
{
|
| 135 |
+
"type": "image",
|
| 136 |
+
"image": image,
|
| 137 |
+
}
|
| 138 |
+
)
|
| 139 |
+
all_images_num+=1
|
| 140 |
+
elif isinstance(image, str) and image.endswith((".mp4", ".mov", ".avi", ".webm")) and all_images_num < 128-video_nframes:
|
| 141 |
+
record["content"].append(
|
| 142 |
+
{
|
| 143 |
+
"type": "video",
|
| 144 |
+
"video": image,
|
| 145 |
+
"nframes": video_nframes,
|
| 146 |
+
}
|
| 147 |
+
)
|
| 148 |
+
all_images_num+=video_nframes
|
| 149 |
+
if 'content' in message:
|
| 150 |
+
record["content"].append(
|
| 151 |
+
{
|
| 152 |
+
"type": "text",
|
| 153 |
+
"text": str(message["content"]).strip(),
|
| 154 |
+
}
|
| 155 |
+
)
|
| 156 |
+
results.append(record)
|
| 157 |
+
elif message["role"] == "assistant":
|
| 158 |
+
formatted_answer = message["content"].strip()
|
| 159 |
+
# ◁think▷用户说了“你好”,这是一个非常简单的问候,通常用于开启对话。我需要判断用户的意图。可能性一:用户只是礼貌性地打招呼,想要开启一段对话;可能性二:用户可能有更具体的需求,比如询问我的功能、功能或者需要帮助。由于用户没有提供更多信息,我需要保持开放,同时引导用户进一步说明他们的需求。
|
| 160 |
+
# 我的回复需要既友好又开放,不能显得过于正式或冷漠。同时,我需要避免假设用户的具体需求,而是提供一个轻松的、鼓励继续对话的回应。◁/think▷你好!很高兴见到你。有什么我可以帮助你的吗
|
| 161 |
+
# delete all the texts between ◁think▷ and ◁/think▷
|
| 162 |
+
# FIXME: this is a hack to remove the thinking texts
|
| 163 |
+
# formatted_answer = re.sub(r"◁think▷.*◁/think▷", "", formatted_answer)
|
| 164 |
+
think_end_token = '◁/think▷'
|
| 165 |
+
formatted_answer = formatted_answer.split(think_end_token)[-1]
|
| 166 |
+
results.append(
|
| 167 |
+
{
|
| 168 |
+
"role": message["role"],
|
| 169 |
+
"content": [
|
| 170 |
+
{
|
| 171 |
+
"type": "text",
|
| 172 |
+
"text": formatted_answer,
|
| 173 |
+
}
|
| 174 |
+
],
|
| 175 |
+
}
|
| 176 |
+
)
|
| 177 |
+
assert (
|
| 178 |
+
formatted_answer.count(processor.image_token) == 0
|
| 179 |
+
), f"there should be no {processor.image_token} in the assistant's reply, but got {messages}"
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
# print(f"messages = {results}")
|
| 183 |
+
text = processor.apply_chat_template(results, add_generation_prompt=False)
|
| 184 |
+
# print(f"raw text = {text}")
|
| 185 |
+
|
| 186 |
+
image_inputs, video_inputs, video_kwargs = processor.process_vision_info(results, return_video_kwargs=True)
|
| 187 |
+
|
| 188 |
+
inputs = processor(
|
| 189 |
+
images=image_inputs,
|
| 190 |
+
videos=video_inputs,
|
| 191 |
+
text=[text],
|
| 192 |
+
return_tensors="pt",
|
| 193 |
+
padding=True,
|
| 194 |
+
truncation=True,
|
| 195 |
+
videos_kwargs=video_kwargs,
|
| 196 |
+
)
|
| 197 |
+
return inputs
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
@torch.no_grad()
|
| 201 |
+
@torch.inference_mode()
|
| 202 |
+
def eagle_vl_generate(
|
| 203 |
+
model: torch.nn.Module,
|
| 204 |
+
processor: AutoProcessor,
|
| 205 |
+
conversations: list[Conversation],
|
| 206 |
+
stop_words: list,
|
| 207 |
+
max_length: int = 256,
|
| 208 |
+
temperature: float = 1.0,
|
| 209 |
+
top_p: float = 1.0,
|
| 210 |
+
chunk_size: int = -1,
|
| 211 |
+
video_nframes: int = 16,
|
| 212 |
+
):
|
| 213 |
+
# convert conversation to inputs
|
| 214 |
+
print(f"conversations = {conversations}")
|
| 215 |
+
inputs = preprocess(conversations, processor=processor, video_nframes=video_nframes)
|
| 216 |
+
inputs = inputs.to(model.device)
|
| 217 |
+
|
| 218 |
+
return generate(
|
| 219 |
+
model,
|
| 220 |
+
processor,
|
| 221 |
+
inputs,
|
| 222 |
+
max_gen_len=max_length,
|
| 223 |
+
temperature=temperature,
|
| 224 |
+
top_p=top_p,
|
| 225 |
+
stop_words=stop_words,
|
| 226 |
+
chunk_size=chunk_size,
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
def generate(
|
| 231 |
+
model,
|
| 232 |
+
processor,
|
| 233 |
+
inputs,
|
| 234 |
+
max_gen_len: int = 256,
|
| 235 |
+
temperature: float = 0,
|
| 236 |
+
top_p: float = 0.95,
|
| 237 |
+
stop_words: List[str] = [],
|
| 238 |
+
chunk_size: int = -1
|
| 239 |
+
):
|
| 240 |
+
"""Stream the text output from the multimodality model with prompt and image inputs."""
|
| 241 |
+
tokenizer = processor.tokenizer
|
| 242 |
+
stop_words_ids = [torch.tensor(tokenizer.encode(stop_word)) for stop_word in stop_words]
|
| 243 |
+
stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])
|
| 244 |
+
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
|
| 245 |
+
|
| 246 |
+
kwargs = dict(
|
| 247 |
+
**inputs,
|
| 248 |
+
max_new_tokens=max_gen_len,
|
| 249 |
+
do_sample=True,
|
| 250 |
+
streamer=streamer,
|
| 251 |
+
stopping_criteria=stopping_criteria,
|
| 252 |
+
)
|
| 253 |
+
|
| 254 |
+
if temperature > 0:
|
| 255 |
+
kwargs.update(
|
| 256 |
+
{
|
| 257 |
+
"do_sample": True,
|
| 258 |
+
"top_p": top_p,
|
| 259 |
+
"temperature": temperature,
|
| 260 |
+
}
|
| 261 |
+
)
|
| 262 |
+
else:
|
| 263 |
+
kwargs["do_sample"] = False
|
| 264 |
+
|
| 265 |
+
thread = Thread(target=model.generate, kwargs=kwargs)
|
| 266 |
+
thread.start()
|
| 267 |
+
|
| 268 |
+
yield from streamer
|
serve/utils.py
ADDED
|
@@ -0,0 +1,290 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import html
|
| 4 |
+
import logging
|
| 5 |
+
import io
|
| 6 |
+
import os
|
| 7 |
+
import re
|
| 8 |
+
import base64
|
| 9 |
+
import time
|
| 10 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 11 |
+
|
| 12 |
+
import mdtex2html
|
| 13 |
+
from markdown import markdown
|
| 14 |
+
from pygments import highlight
|
| 15 |
+
from pygments.formatters import HtmlFormatter
|
| 16 |
+
from pygments.lexers import ClassNotFound, get_lexer_by_name, guess_lexer
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
ALREADY_CONVERTED_MARK = "<!-- ALREADY CONVERTED BY PARSER. -->"
|
| 20 |
+
BOX2COLOR = {
|
| 21 |
+
0: (255, 0, 0),
|
| 22 |
+
1: (0, 255, 0),
|
| 23 |
+
2: (0, 0, 255),
|
| 24 |
+
}
|
| 25 |
+
MAX_IMAGE_SIZE = 1024
|
| 26 |
+
MIN_IMAGE_SIZE = 1024
|
| 27 |
+
logger = logging.getLogger("gradio_logger")
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def configure_logger(log_dir: str = "logs"):
|
| 31 |
+
logger = logging.getLogger("gradio_logger")
|
| 32 |
+
logger.setLevel(logging.DEBUG)
|
| 33 |
+
|
| 34 |
+
timestr = time.strftime("%Y%m%d-%H%M%S")
|
| 35 |
+
os.makedirs(log_dir, exist_ok=True)
|
| 36 |
+
file_handler = logging.FileHandler(f"{log_dir}/{timestr}_gradio_log.log")
|
| 37 |
+
console_handler = logging.StreamHandler()
|
| 38 |
+
|
| 39 |
+
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
| 40 |
+
console_handler.setFormatter(formatter)
|
| 41 |
+
file_handler.setFormatter(formatter)
|
| 42 |
+
|
| 43 |
+
console_handler.setLevel(logging.INFO)
|
| 44 |
+
file_handler.setLevel(logging.INFO)
|
| 45 |
+
|
| 46 |
+
logger.addHandler(console_handler)
|
| 47 |
+
logger.addHandler(file_handler)
|
| 48 |
+
|
| 49 |
+
return logger
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def strip_stop_words(x, stop_words):
|
| 53 |
+
for w in stop_words:
|
| 54 |
+
if w in x:
|
| 55 |
+
return x[: x.index(w)].strip()
|
| 56 |
+
return x.strip()
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def format_output(history, text, x):
|
| 60 |
+
updated_history = history + [[text, x]]
|
| 61 |
+
a = [[y[0], convert_to_markdown(y[1])] for y in updated_history]
|
| 62 |
+
return a, updated_history
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def markdown_to_html_with_syntax_highlight(md_str): # deprecated
|
| 66 |
+
def replacer(match):
|
| 67 |
+
lang = match.group(1) or "text"
|
| 68 |
+
code = match.group(2)
|
| 69 |
+
|
| 70 |
+
try:
|
| 71 |
+
lexer = get_lexer_by_name(lang, stripall=True)
|
| 72 |
+
except ValueError:
|
| 73 |
+
lexer = get_lexer_by_name("text", stripall=True)
|
| 74 |
+
|
| 75 |
+
formatter = HtmlFormatter()
|
| 76 |
+
highlighted_code = highlight(code, lexer, formatter)
|
| 77 |
+
|
| 78 |
+
return f'<pre><code class="{lang}">{highlighted_code}</code></pre>'
|
| 79 |
+
|
| 80 |
+
code_block_pattern = r"```(\w+)?\n([\s\S]+?)\n```"
|
| 81 |
+
md_str = re.sub(code_block_pattern, replacer, md_str, flags=re.MULTILINE)
|
| 82 |
+
|
| 83 |
+
html_str = markdown(md_str)
|
| 84 |
+
return html_str
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def normalize_markdown(md_text: str) -> str: # deprecated
|
| 88 |
+
lines = md_text.split("\n")
|
| 89 |
+
normalized_lines = []
|
| 90 |
+
inside_list = False
|
| 91 |
+
|
| 92 |
+
for i, line in enumerate(lines):
|
| 93 |
+
if re.match(r"^(\d+\.|-|\*|\+)\s", line.strip()):
|
| 94 |
+
if not inside_list and i > 0 and lines[i - 1].strip() != "":
|
| 95 |
+
normalized_lines.append("")
|
| 96 |
+
inside_list = True
|
| 97 |
+
normalized_lines.append(line)
|
| 98 |
+
elif inside_list and line.strip() == "":
|
| 99 |
+
if i < len(lines) - 1 and not re.match(r"^(\d+\.|-|\*|\+)\s", lines[i + 1].strip()):
|
| 100 |
+
normalized_lines.append(line)
|
| 101 |
+
continue
|
| 102 |
+
else:
|
| 103 |
+
inside_list = False
|
| 104 |
+
normalized_lines.append(line)
|
| 105 |
+
|
| 106 |
+
return "\n".join(normalized_lines)
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def convert_mdtext(md_text):
|
| 110 |
+
code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
|
| 111 |
+
inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
|
| 112 |
+
code_blocks = code_block_pattern.findall(md_text)
|
| 113 |
+
non_code_parts = code_block_pattern.split(md_text)[::2]
|
| 114 |
+
|
| 115 |
+
result = []
|
| 116 |
+
for non_code, code in zip(non_code_parts, code_blocks + [""]):
|
| 117 |
+
if non_code.strip():
|
| 118 |
+
non_code = normalize_markdown(non_code)
|
| 119 |
+
if inline_code_pattern.search(non_code):
|
| 120 |
+
result.append(markdown(non_code, extensions=["tables"]))
|
| 121 |
+
else:
|
| 122 |
+
result.append(mdtex2html.convert(non_code, extensions=["tables"]))
|
| 123 |
+
if code.strip():
|
| 124 |
+
code = f"\n```{code}\n\n```"
|
| 125 |
+
code = markdown_to_html_with_syntax_highlight(code)
|
| 126 |
+
result.append(code)
|
| 127 |
+
result = "".join(result)
|
| 128 |
+
result += ALREADY_CONVERTED_MARK
|
| 129 |
+
return result
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def convert_asis(userinput):
|
| 133 |
+
return f'<p style="white-space:pre-wrap;">{html.escape(userinput)}</p>{ALREADY_CONVERTED_MARK}'
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def is_stop_word_or_prefix(s: str, stop_words: list) -> bool:
|
| 137 |
+
return any(s.endswith(stop_word) for stop_word in stop_words)
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def detect_converted_mark(userinput):
|
| 141 |
+
return bool(userinput.endswith(ALREADY_CONVERTED_MARK))
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def detect_language(code):
|
| 145 |
+
first_line = "" if code.startswith("\n") else code.strip().split("\n", 1)[0]
|
| 146 |
+
language = first_line.lower() if first_line else ""
|
| 147 |
+
code_without_language = code[len(first_line) :].lstrip() if first_line else code
|
| 148 |
+
return language, code_without_language
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def convert_to_markdown(text):
|
| 152 |
+
text = text.replace("$", "$")
|
| 153 |
+
text = text.replace("\r\n", "\n")
|
| 154 |
+
|
| 155 |
+
def replace_leading_tabs_and_spaces(line):
|
| 156 |
+
new_line = []
|
| 157 |
+
|
| 158 |
+
for char in line:
|
| 159 |
+
if char == "\t":
|
| 160 |
+
new_line.append("	")
|
| 161 |
+
elif char == " ":
|
| 162 |
+
new_line.append(" ")
|
| 163 |
+
else:
|
| 164 |
+
break
|
| 165 |
+
return "".join(new_line) + line[len(new_line) :]
|
| 166 |
+
|
| 167 |
+
markdown_text = ""
|
| 168 |
+
lines = text.split("\n")
|
| 169 |
+
in_code_block = False
|
| 170 |
+
|
| 171 |
+
for line in lines:
|
| 172 |
+
if in_code_block is False and line.startswith("```"):
|
| 173 |
+
in_code_block = True
|
| 174 |
+
markdown_text += f"{line}\n"
|
| 175 |
+
elif in_code_block is True and line.startswith("```"):
|
| 176 |
+
in_code_block = False
|
| 177 |
+
markdown_text += f"{line}\n"
|
| 178 |
+
elif in_code_block:
|
| 179 |
+
markdown_text += f"{line}\n"
|
| 180 |
+
else:
|
| 181 |
+
line = replace_leading_tabs_and_spaces(line)
|
| 182 |
+
line = re.sub(r"^(#)", r"\\\1", line)
|
| 183 |
+
markdown_text += f"{line} \n"
|
| 184 |
+
|
| 185 |
+
return markdown_text
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
def add_language_tag(text):
|
| 189 |
+
def detect_language(code_block):
|
| 190 |
+
try:
|
| 191 |
+
lexer = guess_lexer(code_block)
|
| 192 |
+
return lexer.name.lower()
|
| 193 |
+
except ClassNotFound:
|
| 194 |
+
return ""
|
| 195 |
+
|
| 196 |
+
code_block_pattern = re.compile(r"(```)(\w*\n[^`]+```)", re.MULTILINE)
|
| 197 |
+
|
| 198 |
+
def replacement(match):
|
| 199 |
+
code_block = match.group(2)
|
| 200 |
+
if match.group(2).startswith("\n"):
|
| 201 |
+
language = detect_language(code_block)
|
| 202 |
+
return f"```{language}{code_block}```" if language else f"```\n{code_block}```"
|
| 203 |
+
else:
|
| 204 |
+
return match.group(1) + code_block + "```"
|
| 205 |
+
|
| 206 |
+
text2 = code_block_pattern.sub(replacement, text)
|
| 207 |
+
return text2
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
def is_variable_assigned(var_name: str) -> bool:
|
| 211 |
+
return var_name in locals()
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
def pil_to_base64(
|
| 215 |
+
image: Image.Image,
|
| 216 |
+
alt: str = "user upload image",
|
| 217 |
+
resize: bool = True,
|
| 218 |
+
max_size: int = MAX_IMAGE_SIZE,
|
| 219 |
+
min_size: int = MIN_IMAGE_SIZE,
|
| 220 |
+
format: str = "JPEG",
|
| 221 |
+
quality: int = 95,
|
| 222 |
+
) -> str:
|
| 223 |
+
"""
|
| 224 |
+
Convert a PIL image to a base64 string.
|
| 225 |
+
"""
|
| 226 |
+
|
| 227 |
+
if resize:
|
| 228 |
+
max_hw, min_hw = max(image.size), min(image.size)
|
| 229 |
+
aspect_ratio = max_hw / min_hw
|
| 230 |
+
shortest_edge = int(min(max_size / aspect_ratio, min_size, min_hw))
|
| 231 |
+
longest_edge = int(shortest_edge * aspect_ratio)
|
| 232 |
+
W, H = image.size
|
| 233 |
+
if H > W:
|
| 234 |
+
H, W = longest_edge, shortest_edge
|
| 235 |
+
else:
|
| 236 |
+
H, W = shortest_edge, longest_edge
|
| 237 |
+
image = image.resize((W, H))
|
| 238 |
+
|
| 239 |
+
buffered = io.BytesIO()
|
| 240 |
+
image.save(buffered, format=format, quality=quality)
|
| 241 |
+
img_b64_str = base64.b64encode(buffered.getvalue()).decode()
|
| 242 |
+
img_str = f'<img src="data:image/png;base64,{img_b64_str}" alt="{alt}" />'
|
| 243 |
+
|
| 244 |
+
return img_str
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
def parse_ref_bbox(response, image: Image.Image):
|
| 248 |
+
try:
|
| 249 |
+
image = image.copy()
|
| 250 |
+
image_h, image_w = image.size
|
| 251 |
+
draw = ImageDraw.Draw(image)
|
| 252 |
+
|
| 253 |
+
ref = re.findall(r'<\|ref\|>.*?<\|/ref\|>', response)
|
| 254 |
+
bbox = re.findall(r'<\|det\|>.*?<\|/det\|>', response)
|
| 255 |
+
assert len(ref) == len(bbox)
|
| 256 |
+
|
| 257 |
+
if len(ref) == 0:
|
| 258 |
+
return None
|
| 259 |
+
|
| 260 |
+
boxes, labels = [], []
|
| 261 |
+
for box, label in zip(bbox, ref):
|
| 262 |
+
box = box.replace('<|det|>', '').replace('<|/det|>', '')
|
| 263 |
+
label = label.replace('<|ref|>', '').replace('<|/ref|>', '')
|
| 264 |
+
box = box[1:-1]
|
| 265 |
+
for onebox in re.findall(r'\[.*?\]', box):
|
| 266 |
+
boxes.append(eval(onebox))
|
| 267 |
+
labels.append(label)
|
| 268 |
+
|
| 269 |
+
for indice, (box, label) in enumerate(zip(boxes, labels)):
|
| 270 |
+
box = (
|
| 271 |
+
int(box[0] / 999 * image_h),
|
| 272 |
+
int(box[1] / 999 * image_w),
|
| 273 |
+
int(box[2] / 999 * image_h),
|
| 274 |
+
int(box[3] / 999 * image_w),
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
box_color = BOX2COLOR[indice % len(BOX2COLOR.keys())]
|
| 278 |
+
box_width = 3
|
| 279 |
+
draw.rectangle(box, outline=box_color, width=box_width)
|
| 280 |
+
|
| 281 |
+
text_x = box[0]
|
| 282 |
+
text_y = box[1] - 20
|
| 283 |
+
text_color = box_color
|
| 284 |
+
font = ImageFont.truetype("eagle_vl/serve/assets/simsun.ttc", size=20)
|
| 285 |
+
draw.text((text_x, text_y), label, font=font, fill=text_color)
|
| 286 |
+
|
| 287 |
+
return image
|
| 288 |
+
except Exception as e:
|
| 289 |
+
logger.error(f"Error parsing reference bounding boxes: {e}")
|
| 290 |
+
return None
|
videos/demo1.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bdd8b0474a50d8ee91daf2b203f657b54054b5177a40af5b7f8838858a633fec
|
| 3 |
+
size 40465978
|
videos/demo2.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cdfff3c21bfc92d8dee611795e79fb7a6270fa8dfe3ae87de82c0879dbc0f177
|
| 3 |
+
size 18403395
|