| # mistralai/Ministral-3-3B-Instruct-2512 | |
| For now you can only launch via vLLM or Transformers-private | |
| - [vLLM](#vllm) | |
| - [Transformers](#transformers) branch: https://github.com/mistralai/Transformers-private/pull/1/ | |
| The architecture change in comparison with Mistral-Small-3.2 is using Yarn with llama4 scaling. | |
| Please note that 3B also has tied embeddings (no output layer) to reduce the number of weights. This is not the case of 8B and 14B. | |
| ## vLLM | |
| 1. install vLLM | |
| ```sh | |
| VLLM_USE_PRECOMPILED=1 uv pip install git+https://github.com/vllm-project/vllm.git | |
| ``` | |
| 2. Launch server | |
| ```sh | |
| vllm serve mistralai/Ministral-3-3B-Instruct-2512 --tool-call-parser mistral \ | |
| --enable-auto-tool-choice --tensor-parallel-size 1 | |
| ``` | |
| 3. test it | |
| ```python | |
| from datetime import datetime, timedelta | |
| from openai import OpenAI | |
| from huggingface_hub import hf_hub_download | |
| # Modify OpenAI's API key and API base to use vLLM's API server. | |
| openai_api_key = "EMPTY" | |
| openai_api_base = "http://localhost:8000/v1" | |
| TEMP = 0.15 | |
| MAX_TOK = 262144 | |
| client = OpenAI( | |
| api_key=openai_api_key, | |
| base_url=openai_api_base, | |
| ) | |
| models = client.models.list() | |
| model = models.data[0].id | |
| def load_system_prompt() -> str: | |
| file_path = hf_hub_download(repo_id="mistralai/Ministral-3-3B-Instruct-2512", filename="SYSTEM_PROMPT.txt") | |
| with open(file_path, "r") as file: | |
| system_prompt = file.read() | |
| today = datetime.today().strftime("%Y-%m-%d") | |
| yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d") | |
| return system_prompt.format(today=today, yesterday=yesterday) | |
| SYSTEM_PROMPT = load_system_prompt() | |
| image_url = "https://static.wikia.nocookie.net/essentialsdocs/images/7/70/Battle.png/revision/latest?cb=20220523172438" | |
| messages = [ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "text", | |
| "text": "What action do you think I should take in this situation? List all the possible actions and explain why you think they are good or bad.", | |
| }, | |
| {"type": "image_url", "image_url": {"url": image_url}}, | |
| ], | |
| }, | |
| ] | |
| response = client.chat.completions.create( | |
| model=model, | |
| messages=messages, | |
| temperature=TEMP, | |
| max_tokens=MAX_TOK, | |
| ) | |
| print(response.choices[0].message.content) | |
| ``` | |
| # Transformers | |
| 1. install Transformers | |
| ```sh | |
| pip install git+https://github.com/mistralai/Transformers-private@add_ministral3 | |
| ``` | |
| or clone | |
| ``` | |
| git clone [email protected]:mistralai/Transformers-private.git | |
| cd Transformers-private | |
| git checkout add_ministal3 | |
| ``` | |
| 2. test (with mistral-common) | |
| ```sh | |
| pip install mistral-common[image] | |
| ``` | |
| ```python | |
| from datetime import datetime, timedelta | |
| import torch | |
| from huggingface_hub import hf_hub_download | |
| from transformers import Mistral3ForConditionalGeneration, AutoTokenizer | |
| def load_system_prompt() -> str: | |
| file_path = hf_hub_download(repo_id="mistralai/Ministral-3-3B-Instruct-2512", filename="SYSTEM_PROMPT.txt") | |
| with open(file_path, "r") as file: | |
| system_prompt = file.read() | |
| today = datetime.today().strftime("%Y-%m-%d") | |
| yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d") | |
| return system_prompt.format(today=today, yesterday=yesterday) | |
| SYSTEM_PROMPT = load_system_prompt() | |
| tokenizer = AutoTokenizer.from_pretrained("mistralai/Ministral-3-3B-Instruct-2512", tokenizer_type="mistral") | |
| model = Mistral3ForConditionalGeneration.from_pretrained( | |
| "mistralai/Ministral-3-3B-Instruct-2512", torch_dtype=torch.bfloat16, device_map="auto" | |
| ).eval() | |
| image_url = "https://static.wikia.nocookie.net/essentialsdocs/images/7/70/Battle.png/revision/latest?cb=20220523172438" | |
| messages = [ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "text", | |
| "text": "What action do you think I should take in this situation? List all the possible actions and explain why you think they are good or bad.", | |
| }, | |
| {"type": "image_url", "image_url": {"url": image_url}}, | |
| ], | |
| }, | |
| ] | |
| tokenized = tokenizer.apply_chat_template(messages, return_dict=True) | |
| input_ids = torch.tensor(tokenized.input_ids, device="cuda").unsqueeze(0) | |
| attention_mask = torch.tensor(tokenized.attention_mask, device="cuda").unsqueeze(0) | |
| pixel_values = torch.tensor( | |
| tokenized.pixel_values[0], dtype=torch.bfloat16, device="cuda" | |
| ).unsqueeze(0) | |
| image_sizes = torch.tensor(pixel_values.shape[-2:], device="cuda").unsqueeze(0) | |
| with torch.inference_mode(): | |
| output = model.generate( | |
| input_ids=input_ids, | |
| attention_mask=attention_mask, | |
| pixel_values=pixel_values, | |
| image_sizes=image_sizes, | |
| max_new_tokens=1000, | |
| )[0] | |
| decoded_output = tokenizer.decode(output, skip_special_tokens=True) | |
| print(decoded_output) | |
| ``` | |
| 3. test (without mistral-common) | |
| ```python | |
| from datetime import datetime, timedelta | |
| import torch | |
| from huggingface_hub import hf_hub_download | |
| from transformers import Mistral3ForConditionalGeneration, AutoProcessor | |
| def load_system_prompt() -> str: | |
| file_path = hf_hub_download(repo_id="mistralai/Ministral-3-3B-Instruct-2512", filename="SYSTEM_PROMPT.txt") | |
| with open(file_path, "r") as file: | |
| system_prompt = file.read() | |
| today = datetime.today().strftime("%Y-%m-%d") | |
| yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d") | |
| return system_prompt.format(name="mistralai/Ministral-3-3B-Instruct-2512".split("/")[-1], today=today, yesterday=yesterday) | |
| SYSTEM_PROMPT = load_system_prompt() | |
| processor = AutoProcessor.from_pretrained("mistralai/Ministral-3-3B-Instruct-2512") | |
| model = Mistral3ForConditionalGeneration.from_pretrained( | |
| "mistralai/Ministral-3-3B-Instruct-2512", torch_dtype=torch.bfloat16, device_map="auto" | |
| ).eval() | |
| image_url = "https://static.wikia.nocookie.net/essentialsdocs/images/7/70/Battle.png/revision/latest?cb=20220523172438" | |
| messages = [ | |
| {"role": "system", "content": [ | |
| {"type": "text", "text": SYSTEM_PROMPT} | |
| ]}, | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "text", | |
| "text": "What action do you think I should take in this situation? List all the possible actions and explain why you think they are good or bad.", | |
| }, | |
| {"type": "image", "url": image_url}, | |
| ], | |
| }, | |
| ] | |
| inputs = processor.apply_chat_template(messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt").to(device=model.device, dtype=torch.bfloat16) | |
| with torch.inference_mode(): | |
| output = model.generate( | |
| **inputs, | |
| max_new_tokens=1000, | |
| ) | |
| decoded_output = processor.batch_decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] | |
| print(decoded_output) | |
| ``` |