{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9832402234636871,
  "eval_steps": 50,
  "global_step": 44,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0223463687150838,
      "grad_norm": 2577.74755859375,
      "learning_rate": 2e-05,
      "loss": 50.7069,
      "step": 1
    },
    {
      "epoch": 0.0223463687150838,
      "eval_loss": 3.2835566997528076,
      "eval_runtime": 2.3872,
      "eval_samples_per_second": 63.255,
      "eval_steps_per_second": 15.918,
      "step": 1
    },
    {
      "epoch": 0.0446927374301676,
      "grad_norm": 6546.3173828125,
      "learning_rate": 4e-05,
      "loss": 52.4505,
      "step": 2
    },
    {
      "epoch": 0.0670391061452514,
      "grad_norm": 2879.14501953125,
      "learning_rate": 6e-05,
      "loss": 52.5919,
      "step": 3
    },
    {
      "epoch": 0.0893854748603352,
      "grad_norm": 2090.73974609375,
      "learning_rate": 8e-05,
      "loss": 52.7518,
      "step": 4
    },
    {
      "epoch": 0.11173184357541899,
      "grad_norm": 1940.23193359375,
      "learning_rate": 0.0001,
      "loss": 51.2834,
      "step": 5
    },
    {
      "epoch": 0.1340782122905028,
      "grad_norm": 2605.8798828125,
      "learning_rate": 0.00012,
      "loss": 52.4079,
      "step": 6
    },
    {
      "epoch": 0.1564245810055866,
      "grad_norm": 6682.8388671875,
      "learning_rate": 0.00014,
      "loss": 51.4155,
      "step": 7
    },
    {
      "epoch": 0.1787709497206704,
      "grad_norm": 2514.529296875,
      "learning_rate": 0.00016,
      "loss": 50.5981,
      "step": 8
    },
    {
      "epoch": 0.2011173184357542,
      "grad_norm": 2855.07421875,
      "learning_rate": 0.00018,
      "loss": 49.7885,
      "step": 9
    },
    {
      "epoch": 0.22346368715083798,
      "grad_norm": 2247.414794921875,
      "learning_rate": 0.0002,
      "loss": 51.4268,
      "step": 10
    },
    {
      "epoch": 0.24581005586592178,
      "grad_norm": 3424.6533203125,
      "learning_rate": 0.00019957341762950344,
      "loss": 51.0585,
      "step": 11
    },
    {
      "epoch": 0.2681564245810056,
      "grad_norm": 3296.65185546875,
      "learning_rate": 0.0001982973099683902,
      "loss": 50.0642,
      "step": 12
    },
    {
      "epoch": 0.2905027932960894,
      "grad_norm": 1766.2926025390625,
      "learning_rate": 0.00019618256431728194,
      "loss": 49.1072,
      "step": 13
    },
    {
      "epoch": 0.3128491620111732,
      "grad_norm": 2249.23876953125,
      "learning_rate": 0.00019324722294043558,
      "loss": 49.0022,
      "step": 14
    },
    {
      "epoch": 0.33519553072625696,
      "grad_norm": 2969.0947265625,
      "learning_rate": 0.00018951632913550626,
      "loss": 48.8406,
      "step": 15
    },
    {
      "epoch": 0.3575418994413408,
      "grad_norm": 2778.11328125,
      "learning_rate": 0.00018502171357296144,
      "loss": 47.0118,
      "step": 16
    },
    {
      "epoch": 0.37988826815642457,
      "grad_norm": 3201.85693359375,
      "learning_rate": 0.000179801722728024,
      "loss": 47.7868,
      "step": 17
    },
    {
      "epoch": 0.4022346368715084,
      "grad_norm": 4596.56591796875,
      "learning_rate": 0.00017390089172206592,
      "loss": 46.4997,
      "step": 18
    },
    {
      "epoch": 0.4245810055865922,
      "grad_norm": 3318.73828125,
      "learning_rate": 0.00016736956436465573,
      "loss": 47.2409,
      "step": 19
    },
    {
      "epoch": 0.44692737430167595,
      "grad_norm": 2099.513671875,
      "learning_rate": 0.00016026346363792567,
      "loss": 48.2933,
      "step": 20
    },
    {
      "epoch": 0.4692737430167598,
      "grad_norm": 4453.8125,
      "learning_rate": 0.0001526432162877356,
      "loss": 48.2358,
      "step": 21
    },
    {
      "epoch": 0.49162011173184356,
      "grad_norm": 3568.524658203125,
      "learning_rate": 0.00014457383557765386,
      "loss": 46.9575,
      "step": 22
    },
    {
      "epoch": 0.5139664804469274,
      "grad_norm": 4010.7314453125,
      "learning_rate": 0.00013612416661871533,
      "loss": 46.9159,
      "step": 23
    },
    {
      "epoch": 0.5363128491620112,
      "grad_norm": 2880.6123046875,
      "learning_rate": 0.0001273662990072083,
      "loss": 45.0099,
      "step": 24
    },
    {
      "epoch": 0.5586592178770949,
      "grad_norm": 3565.14404296875,
      "learning_rate": 0.00011837495178165706,
      "loss": 46.3621,
      "step": 25
    },
    {
      "epoch": 0.5810055865921788,
      "grad_norm": 3099.607177734375,
      "learning_rate": 0.00010922683594633021,
      "loss": 45.2878,
      "step": 26
    },
    {
      "epoch": 0.6033519553072626,
      "grad_norm": 2318.313720703125,
      "learning_rate": 0.0001,
      "loss": 46.6766,
      "step": 27
    },
    {
      "epoch": 0.6256983240223464,
      "grad_norm": 3364.091552734375,
      "learning_rate": 9.077316405366981e-05,
      "loss": 45.6603,
      "step": 28
    },
    {
      "epoch": 0.6480446927374302,
      "grad_norm": 3509.09619140625,
      "learning_rate": 8.162504821834295e-05,
      "loss": 46.9604,
      "step": 29
    },
    {
      "epoch": 0.6703910614525139,
      "grad_norm": 3948.4111328125,
      "learning_rate": 7.263370099279172e-05,
      "loss": 45.7947,
      "step": 30
    },
    {
      "epoch": 0.6927374301675978,
      "grad_norm": 3185.07861328125,
      "learning_rate": 6.387583338128471e-05,
      "loss": 47.6725,
      "step": 31
    },
    {
      "epoch": 0.7150837988826816,
      "grad_norm": 3412.48486328125,
      "learning_rate": 5.542616442234618e-05,
      "loss": 45.6106,
      "step": 32
    },
    {
      "epoch": 0.7374301675977654,
      "grad_norm": 2618.543701171875,
      "learning_rate": 4.735678371226441e-05,
      "loss": 45.2124,
      "step": 33
    },
    {
      "epoch": 0.7597765363128491,
      "grad_norm": 3468.01318359375,
      "learning_rate": 3.973653636207437e-05,
      "loss": 45.141,
      "step": 34
    },
    {
      "epoch": 0.7821229050279329,
      "grad_norm": 2938.23681640625,
      "learning_rate": 3.263043563534428e-05,
      "loss": 42.9445,
      "step": 35
    },
    {
      "epoch": 0.8044692737430168,
      "grad_norm": 4326.49169921875,
      "learning_rate": 2.6099108277934103e-05,
      "loss": 47.6381,
      "step": 36
    },
    {
      "epoch": 0.8268156424581006,
      "grad_norm": 4710.1123046875,
      "learning_rate": 2.0198277271976052e-05,
      "loss": 45.6724,
      "step": 37
    },
    {
      "epoch": 0.8491620111731844,
      "grad_norm": 2800.705322265625,
      "learning_rate": 1.4978286427038601e-05,
      "loss": 45.8585,
      "step": 38
    },
    {
      "epoch": 0.8715083798882681,
      "grad_norm": 4585.00244140625,
      "learning_rate": 1.0483670864493778e-05,
      "loss": 46.6726,
      "step": 39
    },
    {
      "epoch": 0.8938547486033519,
      "grad_norm": 4168.4462890625,
      "learning_rate": 6.75277705956443e-06,
      "loss": 44.954,
      "step": 40
    },
    {
      "epoch": 0.9162011173184358,
      "grad_norm": 3351.935791015625,
      "learning_rate": 3.817435682718096e-06,
      "loss": 46.8225,
      "step": 41
    },
    {
      "epoch": 0.9385474860335196,
      "grad_norm": 3822.043212890625,
      "learning_rate": 1.7026900316098215e-06,
      "loss": 46.403,
      "step": 42
    },
    {
      "epoch": 0.9608938547486033,
      "grad_norm": 5116.40283203125,
      "learning_rate": 4.2658237049655323e-07,
      "loss": 46.336,
      "step": 43
    },
    {
      "epoch": 0.9832402234636871,
      "grad_norm": 4652.48291015625,
      "learning_rate": 0.0,
      "loss": 48.1853,
      "step": 44
    }
  ],
  "logging_steps": 1,
  "max_steps": 44,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 30,
        "early_stopping_threshold": 0.0
      },
      "attributes": {
        "early_stopping_patience_counter": 0
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2180833263747072.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}