| { | |
| "d_model": 256, | |
| "n_layer": 8, | |
| "d_inner": 1024, | |
| "vocab_size": 12, | |
| "resid_dropout": 0.0, | |
| "embed_dropout": 0.1, | |
| "fused_mlp": false, | |
| "fused_dropout_add_ln": true, | |
| "checkpoint_mixer": true, | |
| "checkpoint_mlp": true, | |
| "residual_in_fp32": true, | |
| "pad_vocab_size_multiple": 8, | |
| "return_hidden_state": true, | |
| "layer": { | |
| "_name_": "hyena", | |
| "emb_dim": 5, | |
| "filter_order": 64, | |
| "local_order": 3, | |
| "l_max": 1000002, | |
| "modulate": true, | |
| "w": 10, | |
| "lr": 6e-4, | |
| "wd": 0.0, | |
| "lr_pos_emb": 0.0 | |
| } | |
| } |