ncbateman commited on
Commit
5030f63
·
verified ·
1 Parent(s): 9a68db5

Training in progress, step 540, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc960fca69cb5a221aa8a825eefb4fade1dfb1ede30eea2200ef4e2111d697f9
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4c1a22a5ad978a253e58570d5a6e4cbeec9bfa7a8ece40f8616e2220428cc7a
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1abb3d05d6a92803ab2ebdda809e35b42b61b12d3dc690cd31f8f80303abdfd
3
  size 85723732
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9081acbf7e17111ebe4f8bbdc838b421d5fd63ae6342d91e8d860439a914c398
3
  size 85723732
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:615b22c8080fe4e88aae663f0d769e5c8cbe504c49fe0b09ae26279486ea1ec6
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a199493692cf25c16b518bf6e7216c3987b56836fc34a4b8f0b5e62bd338bbd
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8969ed750a19c2f4413dcfe1605216f211d0e516f70f002601b0027cb83f9dc
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74615d30db2006579916bf75a005a1eafb18948a8ecb1d61a6161179b736d4f2
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc5385188f3d7388d500dc532b7f97173664d249e1812dfaef1013ebeee30cc1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cccf180715c4b2e0d980973df8379f3cff06745ac6beabafbc8fc4345127563
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.674157303370786,
5
  "eval_steps": 56,
6
- "global_step": 520,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3727,6 +3727,146 @@
3727
  "learning_rate": 1.18052897181965e-06,
3728
  "loss": 0.2621,
3729
  "step": 520
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3730
  }
3731
  ],
3732
  "logging_steps": 1,
@@ -3746,7 +3886,7 @@
3746
  "attributes": {}
3747
  }
3748
  },
3749
- "total_flos": 3.0862510996114637e+18,
3750
  "train_batch_size": 4,
3751
  "trial_name": null,
3752
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.853932584269663,
5
  "eval_steps": 56,
6
+ "global_step": 540,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3727
  "learning_rate": 1.18052897181965e-06,
3728
  "loss": 0.2621,
3729
  "step": 520
3730
+ },
3731
+ {
3732
+ "epoch": 4.683146067415731,
3733
+ "grad_norm": 1.5518674850463867,
3734
+ "learning_rate": 1.1142819925730686e-06,
3735
+ "loss": 0.2573,
3736
+ "step": 521
3737
+ },
3738
+ {
3739
+ "epoch": 4.692134831460674,
3740
+ "grad_norm": 1.6397637128829956,
3741
+ "learning_rate": 1.0499269107544674e-06,
3742
+ "loss": 0.2642,
3743
+ "step": 522
3744
+ },
3745
+ {
3746
+ "epoch": 4.701123595505618,
3747
+ "grad_norm": 1.5932601690292358,
3748
+ "learning_rate": 9.874662169320258e-07,
3749
+ "loss": 0.2738,
3750
+ "step": 523
3751
+ },
3752
+ {
3753
+ "epoch": 4.710112359550562,
3754
+ "grad_norm": 1.683666706085205,
3755
+ "learning_rate": 9.269023283603296e-07,
3756
+ "loss": 0.2814,
3757
+ "step": 524
3758
+ },
3759
+ {
3760
+ "epoch": 4.719101123595506,
3761
+ "grad_norm": 1.767184853553772,
3762
+ "learning_rate": 8.682375888868166e-07,
3763
+ "loss": 0.2798,
3764
+ "step": 525
3765
+ },
3766
+ {
3767
+ "epoch": 4.72808988764045,
3768
+ "grad_norm": 1.6281753778457642,
3769
+ "learning_rate": 8.114742688610788e-07,
3770
+ "loss": 0.2491,
3771
+ "step": 526
3772
+ },
3773
+ {
3774
+ "epoch": 4.737078651685393,
3775
+ "grad_norm": 1.5955027341842651,
3776
+ "learning_rate": 7.566145650469814e-07,
3777
+ "loss": 0.2424,
3778
+ "step": 527
3779
+ },
3780
+ {
3781
+ "epoch": 4.746067415730337,
3782
+ "grad_norm": 1.6692067384719849,
3783
+ "learning_rate": 7.0366060053767e-07,
3784
+ "loss": 0.29,
3785
+ "step": 528
3786
+ },
3787
+ {
3788
+ "epoch": 4.755056179775281,
3789
+ "grad_norm": 1.8486473560333252,
3790
+ "learning_rate": 6.526144246733868e-07,
3791
+ "loss": 0.2945,
3792
+ "step": 529
3793
+ },
3794
+ {
3795
+ "epoch": 4.764044943820225,
3796
+ "grad_norm": 1.6505565643310547,
3797
+ "learning_rate": 6.034780129621664e-07,
3798
+ "loss": 0.3064,
3799
+ "step": 530
3800
+ },
3801
+ {
3802
+ "epoch": 4.773033707865169,
3803
+ "grad_norm": 1.8321211338043213,
3804
+ "learning_rate": 5.562532670033871e-07,
3805
+ "loss": 0.2854,
3806
+ "step": 531
3807
+ },
3808
+ {
3809
+ "epoch": 4.782022471910112,
3810
+ "grad_norm": 1.6400787830352783,
3811
+ "learning_rate": 5.109420144141786e-07,
3812
+ "loss": 0.3191,
3813
+ "step": 532
3814
+ },
3815
+ {
3816
+ "epoch": 4.7910112359550565,
3817
+ "grad_norm": 1.584359049797058,
3818
+ "learning_rate": 4.675460087586847e-07,
3819
+ "loss": 0.2786,
3820
+ "step": 533
3821
+ },
3822
+ {
3823
+ "epoch": 4.8,
3824
+ "grad_norm": 1.66130530834198,
3825
+ "learning_rate": 4.2606692948019556e-07,
3826
+ "loss": 0.2825,
3827
+ "step": 534
3828
+ },
3829
+ {
3830
+ "epoch": 4.808988764044944,
3831
+ "grad_norm": 1.7023285627365112,
3832
+ "learning_rate": 3.8650638183617694e-07,
3833
+ "loss": 0.3146,
3834
+ "step": 535
3835
+ },
3836
+ {
3837
+ "epoch": 4.817977528089887,
3838
+ "grad_norm": 1.9292840957641602,
3839
+ "learning_rate": 3.488658968361147e-07,
3840
+ "loss": 0.3581,
3841
+ "step": 536
3842
+ },
3843
+ {
3844
+ "epoch": 4.8269662921348315,
3845
+ "grad_norm": 1.6868337392807007,
3846
+ "learning_rate": 3.131469311822954e-07,
3847
+ "loss": 0.286,
3848
+ "step": 537
3849
+ },
3850
+ {
3851
+ "epoch": 4.835955056179776,
3852
+ "grad_norm": 1.7219264507293701,
3853
+ "learning_rate": 2.7935086721339597e-07,
3854
+ "loss": 0.2777,
3855
+ "step": 538
3856
+ },
3857
+ {
3858
+ "epoch": 4.844943820224719,
3859
+ "grad_norm": 1.7083749771118164,
3860
+ "learning_rate": 2.4747901285102624e-07,
3861
+ "loss": 0.2542,
3862
+ "step": 539
3863
+ },
3864
+ {
3865
+ "epoch": 4.853932584269663,
3866
+ "grad_norm": 1.6217012405395508,
3867
+ "learning_rate": 2.1753260154906973e-07,
3868
+ "loss": 0.2876,
3869
+ "step": 540
3870
  }
3871
  ],
3872
  "logging_steps": 1,
 
3886
  "attributes": {}
3887
  }
3888
  },
3889
+ "total_flos": 3.2049530649811354e+18,
3890
  "train_batch_size": 4,
3891
  "trial_name": null,
3892
  "trial_params": null