| { | |
| "best_metric": 0.042461033910512924, | |
| "best_model_checkpoint": "./teapotllm/checkpoint-876", | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 1095, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "em_boolean": 0.5294117647058824, | |
| "em_chat": 0.03783783783783784, | |
| "em_extraction": 0.5333333333333333, | |
| "em_qa": 0.5333333333333333, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 1.0, | |
| "mean_em": 0.12075471698113208, | |
| "mean_similarity": 0.6624939311014594, | |
| "mean_word_count_diff": 11.958490566037735, | |
| "similarity_boolean": 0.649950959993636, | |
| "similarity_chat": 0.6068178282194846, | |
| "similarity_extraction": 0.8274670541286469, | |
| "similarity_qa": 0.8895131280024846, | |
| "similarity_summarization": 0.7897715005609725, | |
| "similarity_unanswerable": 0.8186558306217193, | |
| "word_count_diff_boolean": 2.764705882352941, | |
| "word_count_diff_chat": 11.275675675675675, | |
| "word_count_diff_extraction": 1.8666666666666667, | |
| "word_count_diff_qa": 2.7333333333333334, | |
| "word_count_diff_summarization": 50.05555555555556, | |
| "word_count_diff_unanswerable": 4.4 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.29226627945899963, | |
| "learning_rate": 1.314e-05, | |
| "loss": 0.0548, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.04732182249426842, | |
| "eval_runtime": 17.2425, | |
| "eval_samples_per_second": 15.369, | |
| "eval_steps_per_second": 1.972, | |
| "step": 219 | |
| }, | |
| { | |
| "em_boolean": 0.7058823529411765, | |
| "em_chat": 0.05405405405405406, | |
| "em_extraction": 0.4, | |
| "em_qa": 0.4, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 2.0, | |
| "mean_em": 0.12830188679245283, | |
| "mean_similarity": 0.6936857629426808, | |
| "mean_word_count_diff": 11.475471698113207, | |
| "similarity_boolean": 0.7671171505661571, | |
| "similarity_chat": 0.6481531772158435, | |
| "similarity_extraction": 0.7221033732096355, | |
| "similarity_qa": 0.8645438591639201, | |
| "similarity_summarization": 0.824706514676412, | |
| "similarity_unanswerable": 0.8155314723650614, | |
| "word_count_diff_boolean": 2.7058823529411766, | |
| "word_count_diff_chat": 10.556756756756757, | |
| "word_count_diff_extraction": 2.933333333333333, | |
| "word_count_diff_qa": 3.2, | |
| "word_count_diff_summarization": 50.166666666666664, | |
| "word_count_diff_unanswerable": 3.1333333333333333 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.30153563618659973, | |
| "learning_rate": 2.628e-05, | |
| "loss": 0.0445, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.04412226751446724, | |
| "eval_runtime": 17.2255, | |
| "eval_samples_per_second": 15.384, | |
| "eval_steps_per_second": 1.974, | |
| "step": 438 | |
| }, | |
| { | |
| "em_boolean": 0.5882352941176471, | |
| "em_chat": 0.05945945945945946, | |
| "em_extraction": 0.4666666666666667, | |
| "em_qa": 0.5333333333333333, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 3.0, | |
| "mean_em": 0.13584905660377358, | |
| "mean_similarity": 0.7036299928109039, | |
| "mean_word_count_diff": 11.230188679245282, | |
| "similarity_boolean": 0.702288385699777, | |
| "similarity_chat": 0.6661249467549292, | |
| "similarity_extraction": 0.7363929619391759, | |
| "similarity_qa": 0.9095388889312744, | |
| "similarity_summarization": 0.7749250100718604, | |
| "similarity_unanswerable": 0.8434868295987447, | |
| "word_count_diff_boolean": 1.8823529411764706, | |
| "word_count_diff_chat": 10.41081081081081, | |
| "word_count_diff_extraction": 1.4, | |
| "word_count_diff_qa": 2.8666666666666667, | |
| "word_count_diff_summarization": 50.111111111111114, | |
| "word_count_diff_unanswerable": 3.466666666666667 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.4190770983695984, | |
| "learning_rate": 2.9365689308796065e-05, | |
| "loss": 0.0366, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.04255302622914314, | |
| "eval_runtime": 17.2484, | |
| "eval_samples_per_second": 15.364, | |
| "eval_steps_per_second": 1.971, | |
| "step": 657 | |
| }, | |
| { | |
| "em_boolean": 0.8823529411764706, | |
| "em_chat": 0.05405405405405406, | |
| "em_extraction": 0.6666666666666666, | |
| "em_qa": 0.3333333333333333, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 4.0, | |
| "mean_em": 0.1509433962264151, | |
| "mean_similarity": 0.7214625703522338, | |
| "mean_word_count_diff": 11.275471698113208, | |
| "similarity_boolean": 0.9250823608325685, | |
| "similarity_chat": 0.6629158062407294, | |
| "similarity_extraction": 0.9287973960240682, | |
| "similarity_qa": 0.8280642042557399, | |
| "similarity_summarization": 0.7930781609482236, | |
| "similarity_unanswerable": 0.8128950635592143, | |
| "word_count_diff_boolean": 0.5882352941176471, | |
| "word_count_diff_chat": 10.556756756756757, | |
| "word_count_diff_extraction": 0.4666666666666667, | |
| "word_count_diff_qa": 3.2, | |
| "word_count_diff_summarization": 50.27777777777778, | |
| "word_count_diff_unanswerable": 4.333333333333333 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.34906384348869324, | |
| "learning_rate": 2.6482696742411827e-05, | |
| "loss": 0.0301, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.042461033910512924, | |
| "eval_runtime": 17.2294, | |
| "eval_samples_per_second": 15.381, | |
| "eval_steps_per_second": 1.973, | |
| "step": 876 | |
| }, | |
| { | |
| "em_boolean": 0.8823529411764706, | |
| "em_chat": 0.07027027027027027, | |
| "em_extraction": 0.4, | |
| "em_qa": 0.6, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 5.0, | |
| "mean_em": 0.16226415094339622, | |
| "mean_similarity": 0.7417481038367973, | |
| "mean_word_count_diff": 10.89811320754717, | |
| "similarity_boolean": 0.9421353953726151, | |
| "similarity_chat": 0.6902955391922513, | |
| "similarity_extraction": 0.7581887672344844, | |
| "similarity_qa": 0.9694860418637593, | |
| "similarity_summarization": 0.795685844288932, | |
| "similarity_unanswerable": 0.840320247411728, | |
| "word_count_diff_boolean": 0.0, | |
| "word_count_diff_chat": 10.04864864864865, | |
| "word_count_diff_extraction": 2.066666666666667, | |
| "word_count_diff_qa": 2.3333333333333335, | |
| "word_count_diff_summarization": 50.111111111111114, | |
| "word_count_diff_unanswerable": 4.066666666666666 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.2575681209564209, | |
| "learning_rate": 2.17227572135781e-05, | |
| "loss": 0.0252, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 0.043407145887613297, | |
| "eval_runtime": 17.2466, | |
| "eval_samples_per_second": 15.365, | |
| "eval_steps_per_second": 1.971, | |
| "step": 1095 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2190, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.006031210545152e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |