| { | |
| "best_metric": 0.8977844146136806, | |
| "best_model_checkpoint": "../../checkpoint/unlearn/nlvr2/vilt/salun/6.0/42/checkpoint-13500", | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 13500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.18518518518518517, | |
| "grad_norm": 3.2584547996520996, | |
| "learning_rate": 9.62962962962963e-06, | |
| "loss": 0.3282, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.37037037037037035, | |
| "grad_norm": 4.812547206878662, | |
| "learning_rate": 9.25925925925926e-06, | |
| "loss": 0.3197, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 4.050705909729004, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 0.3219, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.7407407407407407, | |
| "grad_norm": 3.562553882598877, | |
| "learning_rate": 8.518518518518519e-06, | |
| "loss": 0.3194, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.9259259259259259, | |
| "grad_norm": 5.053567409515381, | |
| "learning_rate": 8.148148148148148e-06, | |
| "loss": 0.3079, | |
| "step": 2500 | |
| }, | |
| { | |
| "df_accuracy": 0.854215003866976, | |
| "dt_accuracy": 0.7513606416499571, | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7513606416499571, | |
| "eval_loss": 0.5662743449211121, | |
| "eval_runtime": 113.7346, | |
| "eval_samples_per_second": 61.389, | |
| "eval_steps_per_second": 0.967, | |
| "eval_unlearn_overall_accuracy": 0.6449160678015313, | |
| "step": 2700, | |
| "unlearn_overall_accuracy": 0.6449160678015313, | |
| "unlearn_time": null | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 1.7998104095458984, | |
| "learning_rate": 7.77777777777778e-06, | |
| "loss": 0.2708, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.2962962962962963, | |
| "grad_norm": 2.322847366333008, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 0.2454, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.4814814814814814, | |
| "grad_norm": 2.8383140563964844, | |
| "learning_rate": 7.0370370370370375e-06, | |
| "loss": 0.2361, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 2.5046894550323486, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.2486, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.8518518518518519, | |
| "grad_norm": 4.192043781280518, | |
| "learning_rate": 6.296296296296297e-06, | |
| "loss": 0.2432, | |
| "step": 5000 | |
| }, | |
| { | |
| "df_accuracy": 0.7016627996906419, | |
| "dt_accuracy": 0.7433400171870524, | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7433400171870524, | |
| "eval_loss": 0.6402536630630493, | |
| "eval_runtime": 108.1015, | |
| "eval_samples_per_second": 64.587, | |
| "eval_steps_per_second": 1.018, | |
| "eval_unlearn_overall_accuracy": 0.7676276016043365, | |
| "step": 5400, | |
| "unlearn_overall_accuracy": 0.7676276016043365, | |
| "unlearn_time": null | |
| }, | |
| { | |
| "epoch": 2.037037037037037, | |
| "grad_norm": 4.436009407043457, | |
| "learning_rate": 5.925925925925926e-06, | |
| "loss": 0.2304, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 3.5263144969940186, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 0.1807, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.4074074074074074, | |
| "grad_norm": 3.2321693897247314, | |
| "learning_rate": 5.185185185185185e-06, | |
| "loss": 0.188, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.5925925925925926, | |
| "grad_norm": 3.6127853393554688, | |
| "learning_rate": 4.814814814814815e-06, | |
| "loss": 0.1931, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.7777777777777777, | |
| "grad_norm": 4.856908798217773, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": 0.1923, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.962962962962963, | |
| "grad_norm": 8.728678703308105, | |
| "learning_rate": 4.074074074074074e-06, | |
| "loss": 0.1905, | |
| "step": 8000 | |
| }, | |
| { | |
| "df_accuracy": 0.5862335653518949, | |
| "dt_accuracy": 0.7396161558292753, | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7396161558292753, | |
| "eval_loss": 0.7980125546455383, | |
| "eval_runtime": 102.947, | |
| "eval_samples_per_second": 67.821, | |
| "eval_steps_per_second": 1.069, | |
| "eval_unlearn_overall_accuracy": 0.845758066951303, | |
| "step": 8100, | |
| "unlearn_overall_accuracy": 0.845758066951303, | |
| "unlearn_time": null | |
| }, | |
| { | |
| "epoch": 3.148148148148148, | |
| "grad_norm": 0.8636759519577026, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 0.1548, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 4.8238606452941895, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.1483, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 3.5185185185185186, | |
| "grad_norm": 3.636707067489624, | |
| "learning_rate": 2.962962962962963e-06, | |
| "loss": 0.1477, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 3.7037037037037037, | |
| "grad_norm": 0.8585140705108643, | |
| "learning_rate": 2.5925925925925925e-06, | |
| "loss": 0.1434, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.888888888888889, | |
| "grad_norm": 3.986074209213257, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 0.1569, | |
| "step": 10500 | |
| }, | |
| { | |
| "df_accuracy": 0.5175947409126064, | |
| "dt_accuracy": 0.7354626181609853, | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7354626181609853, | |
| "eval_loss": 0.8833993673324585, | |
| "eval_runtime": 109.8215, | |
| "eval_samples_per_second": 63.576, | |
| "eval_steps_per_second": 1.002, | |
| "eval_unlearn_overall_accuracy": 0.8863248174250627, | |
| "step": 10800, | |
| "unlearn_overall_accuracy": 0.8863248174250627, | |
| "unlearn_time": null | |
| }, | |
| { | |
| "epoch": 4.074074074074074, | |
| "grad_norm": 2.4611103534698486, | |
| "learning_rate": 1.8518518518518519e-06, | |
| "loss": 0.1421, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 4.2592592592592595, | |
| "grad_norm": 3.0824742317199707, | |
| "learning_rate": 1.4814814814814815e-06, | |
| "loss": 0.1283, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 4.444444444444445, | |
| "grad_norm": 2.9113352298736572, | |
| "learning_rate": 1.111111111111111e-06, | |
| "loss": 0.1198, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 4.62962962962963, | |
| "grad_norm": 0.5261613130569458, | |
| "learning_rate": 7.407407407407407e-07, | |
| "loss": 0.1196, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 4.814814814814815, | |
| "grad_norm": 4.700664520263672, | |
| "learning_rate": 3.7037037037037036e-07, | |
| "loss": 0.1336, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 4.13147497177124, | |
| "learning_rate": 0.0, | |
| "loss": 0.1273, | |
| "step": 13500 | |
| }, | |
| { | |
| "df_accuracy": 0.49593967517401394, | |
| "dt_accuracy": 0.7327413348610713, | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7327413348610713, | |
| "eval_loss": 0.9442616701126099, | |
| "eval_runtime": 113.4763, | |
| "eval_samples_per_second": 61.528, | |
| "eval_steps_per_second": 0.969, | |
| "eval_unlearn_overall_accuracy": 0.8977844146136806, | |
| "step": 13500, | |
| "unlearn_overall_accuracy": 0.8977844146136806, | |
| "unlearn_time": null | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 13500, | |
| "total_flos": 8453448251631240.0, | |
| "train_loss": 0.20511870829264323, | |
| "train_runtime": 11255.3092, | |
| "train_samples_per_second": 38.37, | |
| "train_steps_per_second": 1.199 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 13500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8453448251631240.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |