metadata
{}
Typhoon Translate
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
model_id = "scb10x/Typhoon-translate-4b"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
)
### Translate Thai to English
# messages = [
# {"role": "system", "content": "Translate the following text into English."},
# {"role": "user", "content": "ขอสูตรไก่ย่าง"},
# ]
### Translate English to Thai
messages = [
{"role": "system", "content": "Translate the following text into Thai."},
{"role": "user", "content": "Hello, how are you?"},
]
input_ids = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt",
).to(model.device)
outputs = model.generate(
input_ids,
max_new_tokens=512,
do_sample=True,
temperature=0.3,
)
response = outputs[0][input_ids.shape[-1]:]
print(tokenizer.decode(response, skip_special_tokens=True))