Training in progress, step 540, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 167832240
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4c1a22a5ad978a253e58570d5a6e4cbeec9bfa7a8ece40f8616e2220428cc7a
|
| 3 |
size 167832240
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 85723732
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9081acbf7e17111ebe4f8bbdc838b421d5fd63ae6342d91e8d860439a914c398
|
| 3 |
size 85723732
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14512
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a199493692cf25c16b518bf6e7216c3987b56836fc34a4b8f0b5e62bd338bbd
|
| 3 |
size 14512
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14512
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74615d30db2006579916bf75a005a1eafb18948a8ecb1d61a6161179b736d4f2
|
| 3 |
size 14512
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5cccf180715c4b2e0d980973df8379f3cff06745ac6beabafbc8fc4345127563
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 4.
|
| 5 |
"eval_steps": 56,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -3727,6 +3727,146 @@
|
|
| 3727 |
"learning_rate": 1.18052897181965e-06,
|
| 3728 |
"loss": 0.2621,
|
| 3729 |
"step": 520
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3730 |
}
|
| 3731 |
],
|
| 3732 |
"logging_steps": 1,
|
|
@@ -3746,7 +3886,7 @@
|
|
| 3746 |
"attributes": {}
|
| 3747 |
}
|
| 3748 |
},
|
| 3749 |
-
"total_flos": 3.
|
| 3750 |
"train_batch_size": 4,
|
| 3751 |
"trial_name": null,
|
| 3752 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 4.853932584269663,
|
| 5 |
"eval_steps": 56,
|
| 6 |
+
"global_step": 540,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 3727 |
"learning_rate": 1.18052897181965e-06,
|
| 3728 |
"loss": 0.2621,
|
| 3729 |
"step": 520
|
| 3730 |
+
},
|
| 3731 |
+
{
|
| 3732 |
+
"epoch": 4.683146067415731,
|
| 3733 |
+
"grad_norm": 1.5518674850463867,
|
| 3734 |
+
"learning_rate": 1.1142819925730686e-06,
|
| 3735 |
+
"loss": 0.2573,
|
| 3736 |
+
"step": 521
|
| 3737 |
+
},
|
| 3738 |
+
{
|
| 3739 |
+
"epoch": 4.692134831460674,
|
| 3740 |
+
"grad_norm": 1.6397637128829956,
|
| 3741 |
+
"learning_rate": 1.0499269107544674e-06,
|
| 3742 |
+
"loss": 0.2642,
|
| 3743 |
+
"step": 522
|
| 3744 |
+
},
|
| 3745 |
+
{
|
| 3746 |
+
"epoch": 4.701123595505618,
|
| 3747 |
+
"grad_norm": 1.5932601690292358,
|
| 3748 |
+
"learning_rate": 9.874662169320258e-07,
|
| 3749 |
+
"loss": 0.2738,
|
| 3750 |
+
"step": 523
|
| 3751 |
+
},
|
| 3752 |
+
{
|
| 3753 |
+
"epoch": 4.710112359550562,
|
| 3754 |
+
"grad_norm": 1.683666706085205,
|
| 3755 |
+
"learning_rate": 9.269023283603296e-07,
|
| 3756 |
+
"loss": 0.2814,
|
| 3757 |
+
"step": 524
|
| 3758 |
+
},
|
| 3759 |
+
{
|
| 3760 |
+
"epoch": 4.719101123595506,
|
| 3761 |
+
"grad_norm": 1.767184853553772,
|
| 3762 |
+
"learning_rate": 8.682375888868166e-07,
|
| 3763 |
+
"loss": 0.2798,
|
| 3764 |
+
"step": 525
|
| 3765 |
+
},
|
| 3766 |
+
{
|
| 3767 |
+
"epoch": 4.72808988764045,
|
| 3768 |
+
"grad_norm": 1.6281753778457642,
|
| 3769 |
+
"learning_rate": 8.114742688610788e-07,
|
| 3770 |
+
"loss": 0.2491,
|
| 3771 |
+
"step": 526
|
| 3772 |
+
},
|
| 3773 |
+
{
|
| 3774 |
+
"epoch": 4.737078651685393,
|
| 3775 |
+
"grad_norm": 1.5955027341842651,
|
| 3776 |
+
"learning_rate": 7.566145650469814e-07,
|
| 3777 |
+
"loss": 0.2424,
|
| 3778 |
+
"step": 527
|
| 3779 |
+
},
|
| 3780 |
+
{
|
| 3781 |
+
"epoch": 4.746067415730337,
|
| 3782 |
+
"grad_norm": 1.6692067384719849,
|
| 3783 |
+
"learning_rate": 7.0366060053767e-07,
|
| 3784 |
+
"loss": 0.29,
|
| 3785 |
+
"step": 528
|
| 3786 |
+
},
|
| 3787 |
+
{
|
| 3788 |
+
"epoch": 4.755056179775281,
|
| 3789 |
+
"grad_norm": 1.8486473560333252,
|
| 3790 |
+
"learning_rate": 6.526144246733868e-07,
|
| 3791 |
+
"loss": 0.2945,
|
| 3792 |
+
"step": 529
|
| 3793 |
+
},
|
| 3794 |
+
{
|
| 3795 |
+
"epoch": 4.764044943820225,
|
| 3796 |
+
"grad_norm": 1.6505565643310547,
|
| 3797 |
+
"learning_rate": 6.034780129621664e-07,
|
| 3798 |
+
"loss": 0.3064,
|
| 3799 |
+
"step": 530
|
| 3800 |
+
},
|
| 3801 |
+
{
|
| 3802 |
+
"epoch": 4.773033707865169,
|
| 3803 |
+
"grad_norm": 1.8321211338043213,
|
| 3804 |
+
"learning_rate": 5.562532670033871e-07,
|
| 3805 |
+
"loss": 0.2854,
|
| 3806 |
+
"step": 531
|
| 3807 |
+
},
|
| 3808 |
+
{
|
| 3809 |
+
"epoch": 4.782022471910112,
|
| 3810 |
+
"grad_norm": 1.6400787830352783,
|
| 3811 |
+
"learning_rate": 5.109420144141786e-07,
|
| 3812 |
+
"loss": 0.3191,
|
| 3813 |
+
"step": 532
|
| 3814 |
+
},
|
| 3815 |
+
{
|
| 3816 |
+
"epoch": 4.7910112359550565,
|
| 3817 |
+
"grad_norm": 1.584359049797058,
|
| 3818 |
+
"learning_rate": 4.675460087586847e-07,
|
| 3819 |
+
"loss": 0.2786,
|
| 3820 |
+
"step": 533
|
| 3821 |
+
},
|
| 3822 |
+
{
|
| 3823 |
+
"epoch": 4.8,
|
| 3824 |
+
"grad_norm": 1.66130530834198,
|
| 3825 |
+
"learning_rate": 4.2606692948019556e-07,
|
| 3826 |
+
"loss": 0.2825,
|
| 3827 |
+
"step": 534
|
| 3828 |
+
},
|
| 3829 |
+
{
|
| 3830 |
+
"epoch": 4.808988764044944,
|
| 3831 |
+
"grad_norm": 1.7023285627365112,
|
| 3832 |
+
"learning_rate": 3.8650638183617694e-07,
|
| 3833 |
+
"loss": 0.3146,
|
| 3834 |
+
"step": 535
|
| 3835 |
+
},
|
| 3836 |
+
{
|
| 3837 |
+
"epoch": 4.817977528089887,
|
| 3838 |
+
"grad_norm": 1.9292840957641602,
|
| 3839 |
+
"learning_rate": 3.488658968361147e-07,
|
| 3840 |
+
"loss": 0.3581,
|
| 3841 |
+
"step": 536
|
| 3842 |
+
},
|
| 3843 |
+
{
|
| 3844 |
+
"epoch": 4.8269662921348315,
|
| 3845 |
+
"grad_norm": 1.6868337392807007,
|
| 3846 |
+
"learning_rate": 3.131469311822954e-07,
|
| 3847 |
+
"loss": 0.286,
|
| 3848 |
+
"step": 537
|
| 3849 |
+
},
|
| 3850 |
+
{
|
| 3851 |
+
"epoch": 4.835955056179776,
|
| 3852 |
+
"grad_norm": 1.7219264507293701,
|
| 3853 |
+
"learning_rate": 2.7935086721339597e-07,
|
| 3854 |
+
"loss": 0.2777,
|
| 3855 |
+
"step": 538
|
| 3856 |
+
},
|
| 3857 |
+
{
|
| 3858 |
+
"epoch": 4.844943820224719,
|
| 3859 |
+
"grad_norm": 1.7083749771118164,
|
| 3860 |
+
"learning_rate": 2.4747901285102624e-07,
|
| 3861 |
+
"loss": 0.2542,
|
| 3862 |
+
"step": 539
|
| 3863 |
+
},
|
| 3864 |
+
{
|
| 3865 |
+
"epoch": 4.853932584269663,
|
| 3866 |
+
"grad_norm": 1.6217012405395508,
|
| 3867 |
+
"learning_rate": 2.1753260154906973e-07,
|
| 3868 |
+
"loss": 0.2876,
|
| 3869 |
+
"step": 540
|
| 3870 |
}
|
| 3871 |
],
|
| 3872 |
"logging_steps": 1,
|
|
|
|
| 3886 |
"attributes": {}
|
| 3887 |
}
|
| 3888 |
},
|
| 3889 |
+
"total_flos": 3.2049530649811354e+18,
|
| 3890 |
"train_batch_size": 4,
|
| 3891 |
"trial_name": null,
|
| 3892 |
"trial_params": null
|