| { | |
| "_inserted_LatentThinkingModelSettings": { | |
| "add_latent_to_end": false, | |
| "binary_head_temp": null, | |
| "continue_token_id": 128258, | |
| "debug_mode": false, | |
| "detach_binary_head_inputs": false, | |
| "disable_checkpointing_cache_update": true, | |
| "disable_forward_input_embeds": true, | |
| "disable_input_past_key_values": false, | |
| "end_token_id": 128257, | |
| "lora_mode": false, | |
| "recurrent_filter_mode": "MLP", | |
| "start_token_id": 128256, | |
| "stop_token_id": 128259, | |
| "unused_token_ids": [ | |
| 128260, | |
| 128261, | |
| 128262, | |
| 128263, | |
| 128264, | |
| 128265, | |
| 128266, | |
| 128267, | |
| 128268, | |
| 128269, | |
| 128270, | |
| 128271, | |
| 128272, | |
| 128273, | |
| 128274, | |
| 128275, | |
| 128276, | |
| 128277, | |
| 128278, | |
| 128279, | |
| 128280, | |
| 128281, | |
| 128282, | |
| 128283, | |
| 128284, | |
| 128285, | |
| 128286, | |
| 128287, | |
| 128288, | |
| 128289, | |
| 128290, | |
| 128291, | |
| 128292, | |
| 128293, | |
| 128294, | |
| 128295, | |
| 128296, | |
| 128297, | |
| 128298, | |
| 128299, | |
| 128300, | |
| 128301, | |
| 128302, | |
| 128303, | |
| 128304, | |
| 128305, | |
| 128306, | |
| 128307, | |
| 128308, | |
| 128309, | |
| 128310, | |
| 128311, | |
| 128312, | |
| 128313, | |
| 128314, | |
| 128315, | |
| 128316, | |
| 128317, | |
| 128318, | |
| 128319, | |
| 128320, | |
| 128321, | |
| 128322, | |
| 128323, | |
| 128324, | |
| 128325, | |
| 128326, | |
| 128327, | |
| 128328, | |
| 128329, | |
| 128330, | |
| 128331, | |
| 128332, | |
| 128333, | |
| 128334, | |
| 128335, | |
| 128336, | |
| 128337, | |
| 128338, | |
| 128339, | |
| 128340, | |
| 128341, | |
| 128342, | |
| 128343, | |
| 128344, | |
| 128345, | |
| 128346, | |
| 128347, | |
| 128348, | |
| 128349, | |
| 128350, | |
| 128351, | |
| 128352, | |
| 128353, | |
| 128354, | |
| 128355, | |
| 128356, | |
| 128357, | |
| 128358, | |
| 128359, | |
| 128360, | |
| 128361, | |
| 128362, | |
| 128363, | |
| 128364, | |
| 128365, | |
| 128366, | |
| 128367, | |
| 128368, | |
| 128369, | |
| 128370, | |
| 128371, | |
| 128372, | |
| 128373, | |
| 128374, | |
| 128375, | |
| 128376, | |
| 128377, | |
| 128378, | |
| 128379, | |
| 128380, | |
| 128381, | |
| 128382, | |
| 128383 | |
| ] | |
| }, | |
| "architectures": [ | |
| "LatentThinkingModel" | |
| ], | |
| "attention_bias": false, | |
| "attention_dropout": 0.0, | |
| "bos_token_id": 128000, | |
| "eos_token_id": [ | |
| 128001, | |
| 128008, | |
| 128009 | |
| ], | |
| "head_dim": 64, | |
| "hidden_act": "silu", | |
| "hidden_size": 2048, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 8192, | |
| "max_position_embeddings": 131072, | |
| "mlp_bias": false, | |
| "model_type": "llama", | |
| "num_attention_heads": 32, | |
| "num_hidden_layers": 16, | |
| "num_key_value_heads": 8, | |
| "pretraining_tp": 1, | |
| "rms_norm_eps": 1e-05, | |
| "rope_scaling": { | |
| "factor": 32.0, | |
| "high_freq_factor": 4.0, | |
| "low_freq_factor": 1.0, | |
| "original_max_position_embeddings": 8192, | |
| "rope_type": "llama3" | |
| }, | |
| "rope_theta": 500000.0, | |
| "tie_word_embeddings": true, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.53.0", | |
| "use_cache": true, | |
| "vocab_size": 128384 | |
| } | |