| { | |
| "tokenizer_name": "CSUMLM Tokenizer", | |
| "model_name": "CSUMLM", | |
| "description": "Tokenizer for the CognoSphere Unified Multimodal Language Model", | |
| "author": "Or4cl3 AI Solutions", | |
| "language": "Multimodal (Text, Image, Audio)", | |
| "vocab_size": 32000, | |
| "max_sequence_length": 512, | |
| "special_tokens": { | |
| "bos_token": "<BOS>", | |
| "eos_token": "<EOS>", | |
| "pad_token": "<PAD>", | |
| "unk_token": "<UNK>", | |
| "mask_token": "<MASK>" | |
| }, | |
| "tokenization_method": "Byte Pair Encoding (BPE)", | |
| "training_data": "Custom 1500 Example Dataset", | |
| "chat_template": "[BOS] {context} {user_input} {response} [EOS]", | |
| "pad_to_max_length": true, | |
| "truncation_strategy": "only_second" | |
| } | |