{ "entropy": 1.2591145833333333, "epoch": 1.0, "mean_token_accuracy": 0.9201805492242178, "num_tokens": 416460333.0, "total_flos": 665167385886720.0, "train_loss": 0.7855377271239742, "train_runtime": 16972.5987, "train_samples": 993477, "train_samples_per_second": 24.303, "train_steps_per_second": 0.19, "weight_norm": 258.9971042309161 }