{ "best_metric": 9.109885215759277, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.017778172848285517, "eval_steps": 25, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007111269139314207, "grad_norm": 4.551875591278076, "learning_rate": 3.3333333333333335e-05, "loss": 10.2843, "step": 1 }, { "epoch": 0.0007111269139314207, "eval_loss": 10.709673881530762, "eval_runtime": 0.4786, "eval_samples_per_second": 104.474, "eval_steps_per_second": 27.163, "step": 1 }, { "epoch": 0.0014222538278628415, "grad_norm": 4.867107391357422, "learning_rate": 6.666666666666667e-05, "loss": 10.4354, "step": 2 }, { "epoch": 0.002133380741794262, "grad_norm": 5.008926868438721, "learning_rate": 0.0001, "loss": 10.5558, "step": 3 }, { "epoch": 0.002844507655725683, "grad_norm": 4.5886006355285645, "learning_rate": 9.997376600647783e-05, "loss": 10.7391, "step": 4 }, { "epoch": 0.0035556345696571034, "grad_norm": 4.7824931144714355, "learning_rate": 9.989509461357426e-05, "loss": 10.5872, "step": 5 }, { "epoch": 0.004266761483588524, "grad_norm": 4.664742946624756, "learning_rate": 9.976407754861426e-05, "loss": 10.5323, "step": 6 }, { "epoch": 0.004977888397519945, "grad_norm": 4.836978912353516, "learning_rate": 9.958086757163489e-05, "loss": 10.5264, "step": 7 }, { "epoch": 0.005689015311451366, "grad_norm": 4.371374607086182, "learning_rate": 9.934567829727386e-05, "loss": 10.2373, "step": 8 }, { "epoch": 0.006400142225382786, "grad_norm": 4.762342929840088, "learning_rate": 9.905878394570453e-05, "loss": 10.4186, "step": 9 }, { "epoch": 0.007111269139314207, "grad_norm": 4.17842435836792, "learning_rate": 9.872051902290737e-05, "loss": 10.331, "step": 10 }, { "epoch": 0.007822396053245627, "grad_norm": 4.5304059982299805, "learning_rate": 9.833127793065098e-05, "loss": 10.4491, "step": 11 }, { "epoch": 0.008533522967177049, "grad_norm": 4.463905334472656, "learning_rate": 9.789151450663723e-05, "loss": 10.2205, "step": 12 }, { "epoch": 0.00924464988110847, "grad_norm": 3.405884027481079, "learning_rate": 9.740174149534693e-05, "loss": 9.3648, "step": 13 }, { "epoch": 0.00995577679503989, "grad_norm": 3.4769484996795654, "learning_rate": 9.686252995020249e-05, "loss": 9.4946, "step": 14 }, { "epoch": 0.01066690370897131, "grad_norm": 3.3445122241973877, "learning_rate": 9.627450856774539e-05, "loss": 9.4722, "step": 15 }, { "epoch": 0.011378030622902732, "grad_norm": 3.351897716522217, "learning_rate": 9.563836295460398e-05, "loss": 9.4804, "step": 16 }, { "epoch": 0.012089157536834151, "grad_norm": 3.094649314880371, "learning_rate": 9.495483482810688e-05, "loss": 9.5855, "step": 17 }, { "epoch": 0.012800284450765573, "grad_norm": 2.8600270748138428, "learning_rate": 9.422472115147382e-05, "loss": 9.812, "step": 18 }, { "epoch": 0.013511411364696994, "grad_norm": 2.8953499794006348, "learning_rate": 9.3448873204592e-05, "loss": 9.3379, "step": 19 }, { "epoch": 0.014222538278628414, "grad_norm": 2.8371353149414062, "learning_rate": 9.2628195591462e-05, "loss": 9.3945, "step": 20 }, { "epoch": 0.014933665192559835, "grad_norm": 2.5929179191589355, "learning_rate": 9.176364518546989e-05, "loss": 9.698, "step": 21 }, { "epoch": 0.015644792106491254, "grad_norm": 2.513587474822998, "learning_rate": 9.08562300137157e-05, "loss": 9.4719, "step": 22 }, { "epoch": 0.016355919020422677, "grad_norm": 2.3741440773010254, "learning_rate": 8.990700808169889e-05, "loss": 9.5754, "step": 23 }, { "epoch": 0.017067045934354097, "grad_norm": 2.4249138832092285, "learning_rate": 8.891708613973126e-05, "loss": 9.6962, "step": 24 }, { "epoch": 0.017778172848285517, "grad_norm": 2.4952895641326904, "learning_rate": 8.788761839251559e-05, "loss": 9.6702, "step": 25 }, { "epoch": 0.017778172848285517, "eval_loss": 9.109885215759277, "eval_runtime": 0.4854, "eval_samples_per_second": 103.018, "eval_steps_per_second": 26.785, "step": 25 } ], "logging_steps": 1, "max_steps": 95, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 116006898892800.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }