RonanMcGovern commited on
Commit
0e56eb3
·
verified ·
1 Parent(s): 1eeaa9e

Upload via push_to_hf.py

Browse files
sft/d20/meta_000700.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "step": 700,
3
+ "val_loss": 1.078283667564392,
4
+ "mmlu_acc": 0.3486328125,
5
+ "arc_easy_acc": 0.4345703125,
6
+ "model_config": {
7
+ "sequence_len": 2048,
8
+ "vocab_size": 65536,
9
+ "n_layer": 20,
10
+ "n_head": 10,
11
+ "n_kv_head": 10,
12
+ "n_embd": 1280,
13
+ "n_prelude": 2,
14
+ "n_recur_block": 4,
15
+ "n_coda": 2,
16
+ "train_recur_mean": 4.0,
17
+ "train_recur_max": 16,
18
+ "recur_warm_start": true,
19
+ "bptt_k": 4,
20
+ "kv_cache_recur_budget": 1,
21
+ "inject_mode": "concat_linear"
22
+ }
23
+ }
sft/d20/model_000700.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52fa1b2df267e44297a8869c78249f6945d56e953bc2310f6c2423e4a46ba689
3
+ size 1145590951