rak-r05 commited on
Commit
2850c12
·
verified ·
1 Parent(s): 6d20bf9

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac63d2e3fb7ee7b77c89041ac59cd8119c851263d0126f83e26cc716412deb9a
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8da88a7160db49fe4081c9356e5298c05e77fd70559d53e7a8542d65e1336e51
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f39932a98b4bb76f17b5793af74d9a31d9751b9118d7262fbae39a0ce10d6b8
3
  size 85723284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:486b3c22c3dbf050c3395cbe4b89eb1a849ea76837f5ba8aa9fea158fcaf3ca0
3
  size 85723284
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3359fd4547f68c7f451736863a509301728a1390912cbfc7bfdf510002771c2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1f6bab0e0ac3b50230cdee09f918e47b5a05cda8a318f3c0288f311159f0c6f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d35a71cd96ab153cf58aa68810dc4405b5e1a7fbfef5501d233ae84e34aa51c4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:285055c203c0e51e433ff14d6fa6e0c364698ec978202e506a844a8372901f4a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.01789428246281835,
5
  "eval_steps": 38,
6
- "global_step": 114,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -837,6 +837,258 @@
837
  "eval_samples_per_second": 3.269,
838
  "eval_steps_per_second": 1.635,
839
  "step": 114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
840
  }
841
  ],
842
  "logging_steps": 1,
@@ -851,12 +1103,12 @@
851
  "should_evaluate": false,
852
  "should_log": false,
853
  "should_save": true,
854
- "should_training_stop": false
855
  },
856
  "attributes": {}
857
  }
858
  },
859
- "total_flos": 3.251973931401216e+16,
860
  "train_batch_size": 2,
861
  "trial_name": null,
862
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.023545108503708355,
5
  "eval_steps": 38,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
837
  "eval_samples_per_second": 3.269,
838
  "eval_steps_per_second": 1.635,
839
  "step": 114
840
+ },
841
+ {
842
+ "epoch": 0.018051249852843072,
843
+ "grad_norm": 6.698558807373047,
844
+ "learning_rate": 5.857864376269051e-05,
845
+ "loss": 2.5849,
846
+ "step": 115
847
+ },
848
+ {
849
+ "epoch": 0.018208217242867794,
850
+ "grad_norm": 10.639579772949219,
851
+ "learning_rate": 5.544102723452171e-05,
852
+ "loss": 2.6,
853
+ "step": 116
854
+ },
855
+ {
856
+ "epoch": 0.018365184632892516,
857
+ "grad_norm": 8.183032989501953,
858
+ "learning_rate": 5.237620050253189e-05,
859
+ "loss": 2.6161,
860
+ "step": 117
861
+ },
862
+ {
863
+ "epoch": 0.018522152022917238,
864
+ "grad_norm": 6.063802242279053,
865
+ "learning_rate": 4.938570679927783e-05,
866
+ "loss": 1.3668,
867
+ "step": 118
868
+ },
869
+ {
870
+ "epoch": 0.018679119412941963,
871
+ "grad_norm": 5.778029441833496,
872
+ "learning_rate": 4.647105192839778e-05,
873
+ "loss": 1.6742,
874
+ "step": 119
875
+ },
876
+ {
877
+ "epoch": 0.018836086802966685,
878
+ "grad_norm": 9.222543716430664,
879
+ "learning_rate": 4.363370350639404e-05,
880
+ "loss": 2.0035,
881
+ "step": 120
882
+ },
883
+ {
884
+ "epoch": 0.018993054192991406,
885
+ "grad_norm": 10.651296615600586,
886
+ "learning_rate": 4.087509022364382e-05,
887
+ "loss": 2.5014,
888
+ "step": 121
889
+ },
890
+ {
891
+ "epoch": 0.019150021583016128,
892
+ "grad_norm": 11.831440925598145,
893
+ "learning_rate": 3.819660112501053e-05,
894
+ "loss": 2.2869,
895
+ "step": 122
896
+ },
897
+ {
898
+ "epoch": 0.01930698897304085,
899
+ "grad_norm": 7.712268829345703,
900
+ "learning_rate": 3.5599584910418035e-05,
901
+ "loss": 2.0851,
902
+ "step": 123
903
+ },
904
+ {
905
+ "epoch": 0.01946395636306557,
906
+ "grad_norm": 9.053482055664062,
907
+ "learning_rate": 3.3085349255739474e-05,
908
+ "loss": 1.8142,
909
+ "step": 124
910
+ },
911
+ {
912
+ "epoch": 0.019620923753090297,
913
+ "grad_norm": 8.176172256469727,
914
+ "learning_rate": 3.0655160154343174e-05,
915
+ "loss": 2.3284,
916
+ "step": 125
917
+ },
918
+ {
919
+ "epoch": 0.01977789114311502,
920
+ "grad_norm": 18.283300399780273,
921
+ "learning_rate": 2.831024127962678e-05,
922
+ "loss": 1.9836,
923
+ "step": 126
924
+ },
925
+ {
926
+ "epoch": 0.01993485853313974,
927
+ "grad_norm": 7.915673732757568,
928
+ "learning_rate": 2.6051773368860934e-05,
929
+ "loss": 2.3106,
930
+ "step": 127
931
+ },
932
+ {
933
+ "epoch": 0.020091825923164462,
934
+ "grad_norm": 6.272395133972168,
935
+ "learning_rate": 2.38808936286524e-05,
936
+ "loss": 1.862,
937
+ "step": 128
938
+ },
939
+ {
940
+ "epoch": 0.020248793313189184,
941
+ "grad_norm": 9.373351097106934,
942
+ "learning_rate": 2.1798695162326442e-05,
943
+ "loss": 2.0214,
944
+ "step": 129
945
+ },
946
+ {
947
+ "epoch": 0.020405760703213906,
948
+ "grad_norm": 8.452390670776367,
949
+ "learning_rate": 1.9806226419516192e-05,
950
+ "loss": 1.8931,
951
+ "step": 130
952
+ },
953
+ {
954
+ "epoch": 0.02056272809323863,
955
+ "grad_norm": 6.104604721069336,
956
+ "learning_rate": 1.790449066823683e-05,
957
+ "loss": 1.9672,
958
+ "step": 131
959
+ },
960
+ {
961
+ "epoch": 0.020719695483263353,
962
+ "grad_norm": 5.282866477966309,
963
+ "learning_rate": 1.6094445489709885e-05,
964
+ "loss": 2.1532,
965
+ "step": 132
966
+ },
967
+ {
968
+ "epoch": 0.020876662873288074,
969
+ "grad_norm": 8.8634672164917,
970
+ "learning_rate": 1.4377002296192233e-05,
971
+ "loss": 1.9147,
972
+ "step": 133
973
+ },
974
+ {
975
+ "epoch": 0.021033630263312796,
976
+ "grad_norm": 29.37977409362793,
977
+ "learning_rate": 1.275302587205256e-05,
978
+ "loss": 3.5516,
979
+ "step": 134
980
+ },
981
+ {
982
+ "epoch": 0.021190597653337518,
983
+ "grad_norm": 12.045405387878418,
984
+ "learning_rate": 1.1223333938326485e-05,
985
+ "loss": 2.0136,
986
+ "step": 135
987
+ },
988
+ {
989
+ "epoch": 0.02134756504336224,
990
+ "grad_norm": 6.3739399909973145,
991
+ "learning_rate": 9.788696740969295e-06,
992
+ "loss": 1.7538,
993
+ "step": 136
994
+ },
995
+ {
996
+ "epoch": 0.021504532433386965,
997
+ "grad_norm": 10.208410263061523,
998
+ "learning_rate": 8.44983666301391e-06,
999
+ "loss": 1.9149,
1000
+ "step": 137
1001
+ },
1002
+ {
1003
+ "epoch": 0.021661499823411687,
1004
+ "grad_norm": 6.10170841217041,
1005
+ "learning_rate": 7.2074278608293525e-06,
1006
+ "loss": 1.7403,
1007
+ "step": 138
1008
+ },
1009
+ {
1010
+ "epoch": 0.02181846721343641,
1011
+ "grad_norm": 11.614684104919434,
1012
+ "learning_rate": 6.062095924662625e-06,
1013
+ "loss": 2.1914,
1014
+ "step": 139
1015
+ },
1016
+ {
1017
+ "epoch": 0.02197543460346113,
1018
+ "grad_norm": 13.544835090637207,
1019
+ "learning_rate": 5.0144175636352765e-06,
1020
+ "loss": 2.4752,
1021
+ "step": 140
1022
+ },
1023
+ {
1024
+ "epoch": 0.022132401993485852,
1025
+ "grad_norm": 10.997184753417969,
1026
+ "learning_rate": 4.064920315352904e-06,
1027
+ "loss": 1.691,
1028
+ "step": 141
1029
+ },
1030
+ {
1031
+ "epoch": 0.022289369383510577,
1032
+ "grad_norm": 5.709866046905518,
1033
+ "learning_rate": 3.2140822802740668e-06,
1034
+ "loss": 1.5808,
1035
+ "step": 142
1036
+ },
1037
+ {
1038
+ "epoch": 0.0224463367735353,
1039
+ "grad_norm": 8.197881698608398,
1040
+ "learning_rate": 2.462331880972468e-06,
1041
+ "loss": 2.0699,
1042
+ "step": 143
1043
+ },
1044
+ {
1045
+ "epoch": 0.02260330416356002,
1046
+ "grad_norm": 9.883748054504395,
1047
+ "learning_rate": 1.81004764641306e-06,
1048
+ "loss": 2.9457,
1049
+ "step": 144
1050
+ },
1051
+ {
1052
+ "epoch": 0.022760271553584743,
1053
+ "grad_norm": 9.526391983032227,
1054
+ "learning_rate": 1.2575580213514792e-06,
1055
+ "loss": 2.7024,
1056
+ "step": 145
1057
+ },
1058
+ {
1059
+ "epoch": 0.022917238943609464,
1060
+ "grad_norm": 6.334656715393066,
1061
+ "learning_rate": 8.051412009521864e-07,
1062
+ "loss": 2.2296,
1063
+ "step": 146
1064
+ },
1065
+ {
1066
+ "epoch": 0.023074206333634186,
1067
+ "grad_norm": 6.532505989074707,
1068
+ "learning_rate": 4.530249907087836e-07,
1069
+ "loss": 2.0629,
1070
+ "step": 147
1071
+ },
1072
+ {
1073
+ "epoch": 0.02323117372365891,
1074
+ "grad_norm": 6.112399578094482,
1075
+ "learning_rate": 2.0138669173708213e-07,
1076
+ "loss": 1.8538,
1077
+ "step": 148
1078
+ },
1079
+ {
1080
+ "epoch": 0.023388141113683633,
1081
+ "grad_norm": 6.967254161834717,
1082
+ "learning_rate": 5.035301149869387e-08,
1083
+ "loss": 1.9779,
1084
+ "step": 149
1085
+ },
1086
+ {
1087
+ "epoch": 0.023545108503708355,
1088
+ "grad_norm": 6.000161647796631,
1089
+ "learning_rate": 0.0,
1090
+ "loss": 1.649,
1091
+ "step": 150
1092
  }
1093
  ],
1094
  "logging_steps": 1,
 
1103
  "should_evaluate": false,
1104
  "should_log": false,
1105
  "should_save": true,
1106
+ "should_training_stop": true
1107
  },
1108
  "attributes": {}
1109
  }
1110
  },
1111
+ "total_flos": 4.333035535623782e+16,
1112
  "train_batch_size": 2,
1113
  "trial_name": null,
1114
  "trial_params": null