Lapisbird's picture
Upload model
0b276d7 verified
{
"_inserted_LatentThinkingModelSettings": {
"add_latent_to_end": false,
"binary_head_temp": null,
"continue_token_id": 128258,
"debug_mode": false,
"detach_binary_head_inputs": false,
"disable_checkpointing_cache_update": true,
"disable_forward_input_embeds": true,
"disable_input_past_key_values": false,
"end_token_id": 128257,
"lora_mode": false,
"recurrent_filter_mode": "MLP",
"start_token_id": 128256,
"stop_token_id": 128259,
"unused_token_ids": [
128260,
128261,
128262,
128263,
128264,
128265,
128266,
128267,
128268,
128269,
128270,
128271,
128272,
128273,
128274,
128275,
128276,
128277,
128278,
128279,
128280,
128281,
128282,
128283,
128284,
128285,
128286,
128287,
128288,
128289,
128290,
128291,
128292,
128293,
128294,
128295,
128296,
128297,
128298,
128299,
128300,
128301,
128302,
128303,
128304,
128305,
128306,
128307,
128308,
128309,
128310,
128311,
128312,
128313,
128314,
128315,
128316,
128317,
128318,
128319,
128320,
128321,
128322,
128323,
128324,
128325,
128326,
128327,
128328,
128329,
128330,
128331,
128332,
128333,
128334,
128335,
128336,
128337,
128338,
128339,
128340,
128341,
128342,
128343,
128344,
128345,
128346,
128347,
128348,
128349,
128350,
128351,
128352,
128353,
128354,
128355,
128356,
128357,
128358,
128359,
128360,
128361,
128362,
128363,
128364,
128365,
128366,
128367,
128368,
128369,
128370,
128371,
128372,
128373,
128374,
128375,
128376,
128377,
128378,
128379,
128380,
128381,
128382,
128383
]
},
"architectures": [
"LatentThinkingModel"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 128000,
"eos_token_id": [
128001,
128008,
128009
],
"head_dim": 64,
"hidden_act": "silu",
"hidden_size": 2048,
"initializer_range": 0.02,
"intermediate_size": 8192,
"max_position_embeddings": 131072,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 16,
"num_key_value_heads": 8,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": {
"factor": 32.0,
"high_freq_factor": 4.0,
"low_freq_factor": 1.0,
"original_max_position_embeddings": 8192,
"rope_type": "llama3"
},
"rope_theta": 500000.0,
"tie_word_embeddings": true,
"torch_dtype": "float32",
"transformers_version": "4.53.0",
"use_cache": true,
"vocab_size": 128384
}