{ "_inserted_LatentThinkingModelSettings": { "add_latent_to_end": false, "binary_head_temp": null, "continue_token_id": 128258, "debug_mode": false, "detach_binary_head_inputs": false, "disable_checkpointing_cache_update": true, "disable_forward_input_embeds": true, "disable_input_past_key_values": false, "end_token_id": 128257, "lora_mode": false, "recurrent_filter_mode": "MLP", "start_token_id": 128256, "stop_token_id": 128259, "unused_token_ids": [ 128260, 128261, 128262, 128263, 128264, 128265, 128266, 128267, 128268, 128269, 128270, 128271, 128272, 128273, 128274, 128275, 128276, 128277, 128278, 128279, 128280, 128281, 128282, 128283, 128284, 128285, 128286, 128287, 128288, 128289, 128290, 128291, 128292, 128293, 128294, 128295, 128296, 128297, 128298, 128299, 128300, 128301, 128302, 128303, 128304, 128305, 128306, 128307, 128308, 128309, 128310, 128311, 128312, 128313, 128314, 128315, 128316, 128317, 128318, 128319, 128320, 128321, 128322, 128323, 128324, 128325, 128326, 128327, 128328, 128329, 128330, 128331, 128332, 128333, 128334, 128335, 128336, 128337, 128338, 128339, 128340, 128341, 128342, 128343, 128344, 128345, 128346, 128347, 128348, 128349, 128350, 128351, 128352, 128353, 128354, 128355, 128356, 128357, 128358, 128359, 128360, 128361, 128362, 128363, 128364, 128365, 128366, 128367, 128368, 128369, 128370, 128371, 128372, 128373, 128374, 128375, 128376, 128377, 128378, 128379, 128380, 128381, 128382, 128383 ] }, "architectures": [ "LatentThinkingModel" ], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": [ 128001, 128008, 128009 ], "head_dim": 64, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "max_position_embeddings": 131072, "mlp_bias": false, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 16, "num_key_value_heads": 8, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": { "factor": 32.0, "high_freq_factor": 4.0, "low_freq_factor": 1.0, "original_max_position_embeddings": 8192, "rope_type": "llama3" }, "rope_theta": 500000.0, "tie_word_embeddings": true, "torch_dtype": "float32", "transformers_version": "4.53.0", "use_cache": true, "vocab_size": 128384 }