Update README.md
Browse files
README.md
CHANGED
|
@@ -154,23 +154,40 @@ This model uses a specific chat format for optimal performance.
|
|
| 154 |
## Usage with HuggingFace transformers
|
| 155 |
The model can be used with HuggingFace's `transformers` library:
|
| 156 |
```python
|
|
|
|
| 157 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 158 |
|
| 159 |
-
|
|
|
|
|
|
|
| 160 |
tokenizer = AutoTokenizer.from_pretrained("OuteAI/Lite-Mistral-150M-v2-Instruct")
|
| 161 |
|
| 162 |
-
def generate_response(message):
|
| 163 |
-
#
|
| 164 |
-
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
|
| 167 |
# Decode the generated output
|
| 168 |
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
| 169 |
-
|
| 170 |
return generated_text
|
| 171 |
|
| 172 |
-
message = "
|
| 173 |
response = generate_response(message)
|
|
|
|
| 174 |
```
|
| 175 |
|
| 176 |
## Risk Disclaimer
|
|
|
|
| 154 |
## Usage with HuggingFace transformers
|
| 155 |
The model can be used with HuggingFace's `transformers` library:
|
| 156 |
```python
|
| 157 |
+
import torch
|
| 158 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 159 |
|
| 160 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 161 |
+
|
| 162 |
+
model = AutoModelForCausalLM.from_pretrained("OuteAI/Lite-Mistral-150M-v2-Instruct").to(device)
|
| 163 |
tokenizer = AutoTokenizer.from_pretrained("OuteAI/Lite-Mistral-150M-v2-Instruct")
|
| 164 |
|
| 165 |
+
def generate_response(message: str, temperature: float = 0.4, repetition_penalty: float = 1.1) -> str:
|
| 166 |
+
# Apply the chat template and convert to PyTorch tensors
|
| 167 |
+
messages = [
|
| 168 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
| 169 |
+
{"role": "user", "content": message}
|
| 170 |
+
]
|
| 171 |
+
input_ids = tokenizer.apply_chat_template(
|
| 172 |
+
messages, add_generation_prompt=True, return_tensors="pt"
|
| 173 |
+
).to(device)
|
| 174 |
+
|
| 175 |
+
# Generate the response
|
| 176 |
+
output = model.generate(
|
| 177 |
+
input_ids,
|
| 178 |
+
max_length=512,
|
| 179 |
+
temperature=temperature,
|
| 180 |
+
repetition_penalty=repetition_penalty,
|
| 181 |
+
do_sample=True
|
| 182 |
+
)
|
| 183 |
|
| 184 |
# Decode the generated output
|
| 185 |
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
|
|
|
| 186 |
return generated_text
|
| 187 |
|
| 188 |
+
message = "I'd like to learn about language models. Can you break down the concept for me?"
|
| 189 |
response = generate_response(message)
|
| 190 |
+
print(response)
|
| 191 |
```
|
| 192 |
|
| 193 |
## Risk Disclaimer
|