VyvoTTS-LFM2-Neuvillette / trainer_state.json

Upload folder using huggingface_hub

d6a817f verified 3 months ago

159 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 918,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0010893246187363835,
	"grad_norm": 22.107685089111328,
	"learning_rate": 0.0002,
	"loss": 11.1826,
	"step": 1
	},
	{
	"epoch": 0.002178649237472767,
	"grad_norm": 17.847023010253906,
	"learning_rate": 0.00019978213507625275,
	"loss": 10.5937,
	"step": 2
	},
	{
	"epoch": 0.0032679738562091504,
	"grad_norm": 13.80118179321289,
	"learning_rate": 0.00019956427015250546,
	"loss": 8.5333,
	"step": 3
	},
	{
	"epoch": 0.004357298474945534,
	"grad_norm": 8.288287162780762,
	"learning_rate": 0.0001993464052287582,
	"loss": 7.8234,
	"step": 4
	},
	{
	"epoch": 0.0054466230936819175,
	"grad_norm": 6.827712059020996,
	"learning_rate": 0.0001991285403050109,
	"loss": 7.6672,
	"step": 5
	},
	{
	"epoch": 0.006535947712418301,
	"grad_norm": 4.654490947723389,
	"learning_rate": 0.00019891067538126362,
	"loss": 6.8996,
	"step": 6
	},
	{
	"epoch": 0.007625272331154684,
	"grad_norm": 5.752150535583496,
	"learning_rate": 0.00019869281045751635,
	"loss": 6.6784,
	"step": 7
	},
	{
	"epoch": 0.008714596949891068,
	"grad_norm": 5.701947212219238,
	"learning_rate": 0.00019847494553376906,
	"loss": 6.4123,
	"step": 8
	},
	{
	"epoch": 0.00980392156862745,
	"grad_norm": 3.7915384769439697,
	"learning_rate": 0.0001982570806100218,
	"loss": 6.2328,
	"step": 9
	},
	{
	"epoch": 0.010893246187363835,
	"grad_norm": 4.150002479553223,
	"learning_rate": 0.00019803921568627454,
	"loss": 5.7886,
	"step": 10
	},
	{
	"epoch": 0.011982570806100218,
	"grad_norm": 3.281799077987671,
	"learning_rate": 0.00019782135076252725,
	"loss": 6.0742,
	"step": 11
	},
	{
	"epoch": 0.013071895424836602,
	"grad_norm": 4.8937907218933105,
	"learning_rate": 0.00019760348583877996,
	"loss": 5.9607,
	"step": 12
	},
	{
	"epoch": 0.014161220043572984,
	"grad_norm": 5.1620306968688965,
	"learning_rate": 0.0001973856209150327,
	"loss": 6.1127,
	"step": 13
	},
	{
	"epoch": 0.015250544662309368,
	"grad_norm": 4.81100606918335,
	"learning_rate": 0.0001971677559912854,
	"loss": 5.6241,
	"step": 14
	},
	{
	"epoch": 0.016339869281045753,
	"grad_norm": 3.468261480331421,
	"learning_rate": 0.00019694989106753814,
	"loss": 5.4747,
	"step": 15
	},
	{
	"epoch": 0.017429193899782137,
	"grad_norm": 3.9843785762786865,
	"learning_rate": 0.00019673202614379085,
	"loss": 5.7857,
	"step": 16
	},
	{
	"epoch": 0.018518518518518517,
	"grad_norm": 4.957090854644775,
	"learning_rate": 0.0001965141612200436,
	"loss": 5.7103,
	"step": 17
	},
	{
	"epoch": 0.0196078431372549,
	"grad_norm": 3.296520233154297,
	"learning_rate": 0.0001962962962962963,
	"loss": 5.5697,
	"step": 18
	},
	{
	"epoch": 0.020697167755991286,
	"grad_norm": 5.7215094566345215,
	"learning_rate": 0.000196078431372549,
	"loss": 5.5114,
	"step": 19
	},
	{
	"epoch": 0.02178649237472767,
	"grad_norm": 4.517280101776123,
	"learning_rate": 0.00019586056644880175,
	"loss": 5.2544,
	"step": 20
	},
	{
	"epoch": 0.02287581699346405,
	"grad_norm": 4.512917995452881,
	"learning_rate": 0.00019564270152505449,
	"loss": 5.3694,
	"step": 21
	},
	{
	"epoch": 0.023965141612200435,
	"grad_norm": 4.867700576782227,
	"learning_rate": 0.0001954248366013072,
	"loss": 5.3096,
	"step": 22
	},
	{
	"epoch": 0.02505446623093682,
	"grad_norm": 4.500598907470703,
	"learning_rate": 0.00019520697167755993,
	"loss": 5.388,
	"step": 23
	},
	{
	"epoch": 0.026143790849673203,
	"grad_norm": 4.624905586242676,
	"learning_rate": 0.00019498910675381264,
	"loss": 5.6716,
	"step": 24
	},
	{
	"epoch": 0.027233115468409588,
	"grad_norm": 4.971169471740723,
	"learning_rate": 0.00019477124183006535,
	"loss": 4.8913,
	"step": 25
	},
	{
	"epoch": 0.02832244008714597,
	"grad_norm": 4.383894920349121,
	"learning_rate": 0.0001945533769063181,
	"loss": 4.7567,
	"step": 26
	},
	{
	"epoch": 0.029411764705882353,
	"grad_norm": 5.65884256362915,
	"learning_rate": 0.0001943355119825708,
	"loss": 5.5943,
	"step": 27
	},
	{
	"epoch": 0.030501089324618737,
	"grad_norm": 4.529474258422852,
	"learning_rate": 0.00019411764705882354,
	"loss": 5.2028,
	"step": 28
	},
	{
	"epoch": 0.03159041394335512,
	"grad_norm": 5.929969310760498,
	"learning_rate": 0.00019389978213507628,
	"loss": 5.1979,
	"step": 29
	},
	{
	"epoch": 0.032679738562091505,
	"grad_norm": 4.275257110595703,
	"learning_rate": 0.000193681917211329,
	"loss": 5.0697,
	"step": 30
	},
	{
	"epoch": 0.03376906318082789,
	"grad_norm": 4.4108991622924805,
	"learning_rate": 0.0001934640522875817,
	"loss": 5.5351,
	"step": 31
	},
	{
	"epoch": 0.034858387799564274,
	"grad_norm": 3.474432945251465,
	"learning_rate": 0.00019324618736383443,
	"loss": 5.4141,
	"step": 32
	},
	{
	"epoch": 0.03594771241830065,
	"grad_norm": 3.6491479873657227,
	"learning_rate": 0.00019302832244008715,
	"loss": 5.4325,
	"step": 33
	},
	{
	"epoch": 0.037037037037037035,
	"grad_norm": 3.54929256439209,
	"learning_rate": 0.00019281045751633988,
	"loss": 5.4501,
	"step": 34
	},
	{
	"epoch": 0.03812636165577342,
	"grad_norm": 3.0727927684783936,
	"learning_rate": 0.0001925925925925926,
	"loss": 5.4118,
	"step": 35
	},
	{
	"epoch": 0.0392156862745098,
	"grad_norm": 3.3980045318603516,
	"learning_rate": 0.00019237472766884533,
	"loss": 5.4405,
	"step": 36
	},
	{
	"epoch": 0.04030501089324619,
	"grad_norm": 4.579554080963135,
	"learning_rate": 0.00019215686274509807,
	"loss": 5.1187,
	"step": 37
	},
	{
	"epoch": 0.04139433551198257,
	"grad_norm": 3.4333720207214355,
	"learning_rate": 0.00019193899782135075,
	"loss": 5.1245,
	"step": 38
	},
	{
	"epoch": 0.042483660130718956,
	"grad_norm": 3.6624631881713867,
	"learning_rate": 0.0001917211328976035,
	"loss": 5.2321,
	"step": 39
	},
	{
	"epoch": 0.04357298474945534,
	"grad_norm": 6.108523368835449,
	"learning_rate": 0.00019150326797385623,
	"loss": 5.2154,
	"step": 40
	},
	{
	"epoch": 0.044662309368191724,
	"grad_norm": 3.8794407844543457,
	"learning_rate": 0.00019128540305010894,
	"loss": 5.1781,
	"step": 41
	},
	{
	"epoch": 0.0457516339869281,
	"grad_norm": 3.7800469398498535,
	"learning_rate": 0.00019106753812636167,
	"loss": 5.1992,
	"step": 42
	},
	{
	"epoch": 0.046840958605664486,
	"grad_norm": 3.764031410217285,
	"learning_rate": 0.0001908496732026144,
	"loss": 5.0885,
	"step": 43
	},
	{
	"epoch": 0.04793028322440087,
	"grad_norm": 4.050832271575928,
	"learning_rate": 0.00019063180827886712,
	"loss": 5.1076,
	"step": 44
	},
	{
	"epoch": 0.049019607843137254,
	"grad_norm": 5.691143989562988,
	"learning_rate": 0.00019041394335511983,
	"loss": 5.1453,
	"step": 45
	},
	{
	"epoch": 0.05010893246187364,
	"grad_norm": 4.449873924255371,
	"learning_rate": 0.00019019607843137254,
	"loss": 5.5097,
	"step": 46
	},
	{
	"epoch": 0.05119825708061002,
	"grad_norm": 2.906421661376953,
	"learning_rate": 0.00018997821350762528,
	"loss": 5.2703,
	"step": 47
	},
	{
	"epoch": 0.05228758169934641,
	"grad_norm": 4.265420913696289,
	"learning_rate": 0.00018976034858387802,
	"loss": 4.9613,
	"step": 48
	},
	{
	"epoch": 0.05337690631808279,
	"grad_norm": 3.084545612335205,
	"learning_rate": 0.00018954248366013073,
	"loss": 5.2874,
	"step": 49
	},
	{
	"epoch": 0.054466230936819175,
	"grad_norm": 4.474928855895996,
	"learning_rate": 0.00018932461873638346,
	"loss": 4.9916,
	"step": 50
	},
	{
	"epoch": 0.05555555555555555,
	"grad_norm": 3.9959335327148438,
	"learning_rate": 0.00018910675381263617,
	"loss": 4.9839,
	"step": 51
	},
	{
	"epoch": 0.05664488017429194,
	"grad_norm": 6.50009298324585,
	"learning_rate": 0.00018888888888888888,
	"loss": 5.0163,
	"step": 52
	},
	{
	"epoch": 0.05773420479302832,
	"grad_norm": 4.5923566818237305,
	"learning_rate": 0.00018867102396514162,
	"loss": 5.0997,
	"step": 53
	},
	{
	"epoch": 0.058823529411764705,
	"grad_norm": 3.1194558143615723,
	"learning_rate": 0.00018845315904139433,
	"loss": 5.0717,
	"step": 54
	},
	{
	"epoch": 0.05991285403050109,
	"grad_norm": 4.631853103637695,
	"learning_rate": 0.00018823529411764707,
	"loss": 5.103,
	"step": 55
	},
	{
	"epoch": 0.06100217864923747,
	"grad_norm": 5.220804214477539,
	"learning_rate": 0.0001880174291938998,
	"loss": 5.1003,
	"step": 56
	},
	{
	"epoch": 0.06209150326797386,
	"grad_norm": 5.17025089263916,
	"learning_rate": 0.00018779956427015252,
	"loss": 5.188,
	"step": 57
	},
	{
	"epoch": 0.06318082788671024,
	"grad_norm": 3.0653014183044434,
	"learning_rate": 0.00018758169934640523,
	"loss": 5.0984,
	"step": 58
	},
	{
	"epoch": 0.06427015250544663,
	"grad_norm": 4.2605156898498535,
	"learning_rate": 0.00018736383442265796,
	"loss": 5.0787,
	"step": 59
	},
	{
	"epoch": 0.06535947712418301,
	"grad_norm": 3.5389158725738525,
	"learning_rate": 0.00018714596949891068,
	"loss": 5.0133,
	"step": 60
	},
	{
	"epoch": 0.0664488017429194,
	"grad_norm": 4.469093322753906,
	"learning_rate": 0.0001869281045751634,
	"loss": 5.0993,
	"step": 61
	},
	{
	"epoch": 0.06753812636165578,
	"grad_norm": 5.053121089935303,
	"learning_rate": 0.00018671023965141615,
	"loss": 4.9451,
	"step": 62
	},
	{
	"epoch": 0.06862745098039216,
	"grad_norm": 3.385946035385132,
	"learning_rate": 0.00018649237472766886,
	"loss": 5.1747,
	"step": 63
	},
	{
	"epoch": 0.06971677559912855,
	"grad_norm": 3.2187914848327637,
	"learning_rate": 0.00018627450980392157,
	"loss": 4.9015,
	"step": 64
	},
	{
	"epoch": 0.07080610021786492,
	"grad_norm": 4.75279426574707,
	"learning_rate": 0.00018605664488017428,
	"loss": 4.5069,
	"step": 65
	},
	{
	"epoch": 0.0718954248366013,
	"grad_norm": 4.536505699157715,
	"learning_rate": 0.00018583877995642702,
	"loss": 4.8886,
	"step": 66
	},
	{
	"epoch": 0.07298474945533769,
	"grad_norm": 4.806969165802002,
	"learning_rate": 0.00018562091503267976,
	"loss": 5.0439,
	"step": 67
	},
	{
	"epoch": 0.07407407407407407,
	"grad_norm": 5.019186496734619,
	"learning_rate": 0.00018540305010893247,
	"loss": 4.9022,
	"step": 68
	},
	{
	"epoch": 0.07516339869281045,
	"grad_norm": 5.320457458496094,
	"learning_rate": 0.0001851851851851852,
	"loss": 5.0808,
	"step": 69
	},
	{
	"epoch": 0.07625272331154684,
	"grad_norm": 3.816904306411743,
	"learning_rate": 0.0001849673202614379,
	"loss": 5.3904,
	"step": 70
	},
	{
	"epoch": 0.07734204793028322,
	"grad_norm": 4.3827667236328125,
	"learning_rate": 0.00018474945533769062,
	"loss": 5.0717,
	"step": 71
	},
	{
	"epoch": 0.0784313725490196,
	"grad_norm": 3.4211671352386475,
	"learning_rate": 0.00018453159041394336,
	"loss": 5.2476,
	"step": 72
	},
	{
	"epoch": 0.07952069716775599,
	"grad_norm": 3.2783992290496826,
	"learning_rate": 0.00018431372549019607,
	"loss": 5.1173,
	"step": 73
	},
	{
	"epoch": 0.08061002178649238,
	"grad_norm": 4.760554790496826,
	"learning_rate": 0.0001840958605664488,
	"loss": 4.8323,
	"step": 74
	},
	{
	"epoch": 0.08169934640522876,
	"grad_norm": 5.011927604675293,
	"learning_rate": 0.00018387799564270155,
	"loss": 4.986,
	"step": 75
	},
	{
	"epoch": 0.08278867102396514,
	"grad_norm": 4.87539005279541,
	"learning_rate": 0.00018366013071895426,
	"loss": 5.0816,
	"step": 76
	},
	{
	"epoch": 0.08387799564270153,
	"grad_norm": 4.076656818389893,
	"learning_rate": 0.00018344226579520697,
	"loss": 5.0201,
	"step": 77
	},
	{
	"epoch": 0.08496732026143791,
	"grad_norm": 4.8921613693237305,
	"learning_rate": 0.0001832244008714597,
	"loss": 4.8971,
	"step": 78
	},
	{
	"epoch": 0.0860566448801743,
	"grad_norm": 4.011205196380615,
	"learning_rate": 0.00018300653594771241,
	"loss": 5.1548,
	"step": 79
	},
	{
	"epoch": 0.08714596949891068,
	"grad_norm": 4.264740467071533,
	"learning_rate": 0.00018278867102396515,
	"loss": 5.0265,
	"step": 80
	},
	{
	"epoch": 0.08823529411764706,
	"grad_norm": 4.830214977264404,
	"learning_rate": 0.0001825708061002179,
	"loss": 4.9071,
	"step": 81
	},
	{
	"epoch": 0.08932461873638345,
	"grad_norm": 4.2021942138671875,
	"learning_rate": 0.0001823529411764706,
	"loss": 4.8129,
	"step": 82
	},
	{
	"epoch": 0.09041394335511982,
	"grad_norm": 3.46781587600708,
	"learning_rate": 0.00018213507625272334,
	"loss": 5.0446,
	"step": 83
	},
	{
	"epoch": 0.0915032679738562,
	"grad_norm": 3.6687159538269043,
	"learning_rate": 0.00018191721132897605,
	"loss": 5.3979,
	"step": 84
	},
	{
	"epoch": 0.09259259259259259,
	"grad_norm": 2.563159704208374,
	"learning_rate": 0.00018169934640522876,
	"loss": 5.2423,
	"step": 85
	},
	{
	"epoch": 0.09368191721132897,
	"grad_norm": 4.911458492279053,
	"learning_rate": 0.0001814814814814815,
	"loss": 4.7199,
	"step": 86
	},
	{
	"epoch": 0.09477124183006536,
	"grad_norm": 4.559939384460449,
	"learning_rate": 0.0001812636165577342,
	"loss": 5.2483,
	"step": 87
	},
	{
	"epoch": 0.09586056644880174,
	"grad_norm": 4.162410259246826,
	"learning_rate": 0.00018104575163398694,
	"loss": 5.151,
	"step": 88
	},
	{
	"epoch": 0.09694989106753812,
	"grad_norm": 3.32928466796875,
	"learning_rate": 0.00018082788671023968,
	"loss": 5.0597,
	"step": 89
	},
	{
	"epoch": 0.09803921568627451,
	"grad_norm": 4.435458660125732,
	"learning_rate": 0.0001806100217864924,
	"loss": 5.3589,
	"step": 90
	},
	{
	"epoch": 0.09912854030501089,
	"grad_norm": 3.7495155334472656,
	"learning_rate": 0.0001803921568627451,
	"loss": 5.0074,
	"step": 91
	},
	{
	"epoch": 0.10021786492374728,
	"grad_norm": 3.2599384784698486,
	"learning_rate": 0.0001801742919389978,
	"loss": 5.0846,
	"step": 92
	},
	{
	"epoch": 0.10130718954248366,
	"grad_norm": 2.7934536933898926,
	"learning_rate": 0.00017995642701525055,
	"loss": 5.388,
	"step": 93
	},
	{
	"epoch": 0.10239651416122005,
	"grad_norm": 4.288060188293457,
	"learning_rate": 0.00017973856209150329,
	"loss": 5.1296,
	"step": 94
	},
	{
	"epoch": 0.10348583877995643,
	"grad_norm": 3.235417366027832,
	"learning_rate": 0.000179520697167756,
	"loss": 5.011,
	"step": 95
	},
	{
	"epoch": 0.10457516339869281,
	"grad_norm": 2.9654250144958496,
	"learning_rate": 0.00017930283224400873,
	"loss": 5.001,
	"step": 96
	},
	{
	"epoch": 0.1056644880174292,
	"grad_norm": 2.7460479736328125,
	"learning_rate": 0.00017908496732026144,
	"loss": 5.0625,
	"step": 97
	},
	{
	"epoch": 0.10675381263616558,
	"grad_norm": 3.7850425243377686,
	"learning_rate": 0.00017886710239651415,
	"loss": 4.9888,
	"step": 98
	},
	{
	"epoch": 0.10784313725490197,
	"grad_norm": 2.826519250869751,
	"learning_rate": 0.0001786492374727669,
	"loss": 4.8845,
	"step": 99
	},
	{
	"epoch": 0.10893246187363835,
	"grad_norm": 3.151165723800659,
	"learning_rate": 0.00017843137254901963,
	"loss": 5.0597,
	"step": 100
	},
	{
	"epoch": 0.11002178649237472,
	"grad_norm": 3.682316303253174,
	"learning_rate": 0.00017821350762527234,
	"loss": 5.3154,
	"step": 101
	},
	{
	"epoch": 0.1111111111111111,
	"grad_norm": 2.2441463470458984,
	"learning_rate": 0.00017799564270152508,
	"loss": 5.2023,
	"step": 102
	},
	{
	"epoch": 0.11220043572984749,
	"grad_norm": 3.245762825012207,
	"learning_rate": 0.00017777777777777779,
	"loss": 5.1972,
	"step": 103
	},
	{
	"epoch": 0.11328976034858387,
	"grad_norm": 3.5761213302612305,
	"learning_rate": 0.0001775599128540305,
	"loss": 5.2941,
	"step": 104
	},
	{
	"epoch": 0.11437908496732026,
	"grad_norm": 3.046706199645996,
	"learning_rate": 0.00017734204793028323,
	"loss": 5.0266,
	"step": 105
	},
	{
	"epoch": 0.11546840958605664,
	"grad_norm": 4.248337268829346,
	"learning_rate": 0.00017712418300653594,
	"loss": 5.3828,
	"step": 106
	},
	{
	"epoch": 0.11655773420479303,
	"grad_norm": 3.2203943729400635,
	"learning_rate": 0.00017690631808278868,
	"loss": 4.7748,
	"step": 107
	},
	{
	"epoch": 0.11764705882352941,
	"grad_norm": 3.3694331645965576,
	"learning_rate": 0.00017668845315904142,
	"loss": 5.0062,
	"step": 108
	},
	{
	"epoch": 0.1187363834422658,
	"grad_norm": 3.9723663330078125,
	"learning_rate": 0.00017647058823529413,
	"loss": 5.151,
	"step": 109
	},
	{
	"epoch": 0.11982570806100218,
	"grad_norm": 4.699850559234619,
	"learning_rate": 0.00017625272331154684,
	"loss": 5.3586,
	"step": 110
	},
	{
	"epoch": 0.12091503267973856,
	"grad_norm": 4.248794078826904,
	"learning_rate": 0.00017603485838779955,
	"loss": 4.9772,
	"step": 111
	},
	{
	"epoch": 0.12200435729847495,
	"grad_norm": 3.284792184829712,
	"learning_rate": 0.0001758169934640523,
	"loss": 5.0481,
	"step": 112
	},
	{
	"epoch": 0.12309368191721133,
	"grad_norm": 3.2022602558135986,
	"learning_rate": 0.00017559912854030502,
	"loss": 4.8391,
	"step": 113
	},
	{
	"epoch": 0.12418300653594772,
	"grad_norm": 3.369584560394287,
	"learning_rate": 0.00017538126361655773,
	"loss": 4.8921,
	"step": 114
	},
	{
	"epoch": 0.12527233115468409,
	"grad_norm": 2.8831474781036377,
	"learning_rate": 0.00017516339869281047,
	"loss": 5.0684,
	"step": 115
	},
	{
	"epoch": 0.12636165577342048,
	"grad_norm": 3.45534610748291,
	"learning_rate": 0.0001749455337690632,
	"loss": 4.9971,
	"step": 116
	},
	{
	"epoch": 0.12745098039215685,
	"grad_norm": 4.895359039306641,
	"learning_rate": 0.0001747276688453159,
	"loss": 5.1069,
	"step": 117
	},
	{
	"epoch": 0.12854030501089325,
	"grad_norm": 3.229856252670288,
	"learning_rate": 0.00017450980392156863,
	"loss": 5.032,
	"step": 118
	},
	{
	"epoch": 0.12962962962962962,
	"grad_norm": 3.38332200050354,
	"learning_rate": 0.00017429193899782137,
	"loss": 4.9788,
	"step": 119
	},
	{
	"epoch": 0.13071895424836602,
	"grad_norm": 4.4389967918396,
	"learning_rate": 0.00017407407407407408,
	"loss": 4.9471,
	"step": 120
	},
	{
	"epoch": 0.1318082788671024,
	"grad_norm": 4.08867073059082,
	"learning_rate": 0.00017385620915032682,
	"loss": 4.9688,
	"step": 121
	},
	{
	"epoch": 0.1328976034858388,
	"grad_norm": 3.1234259605407715,
	"learning_rate": 0.00017363834422657953,
	"loss": 5.275,
	"step": 122
	},
	{
	"epoch": 0.13398692810457516,
	"grad_norm": 3.3533644676208496,
	"learning_rate": 0.00017342047930283226,
	"loss": 4.8402,
	"step": 123
	},
	{
	"epoch": 0.13507625272331156,
	"grad_norm": 4.490350246429443,
	"learning_rate": 0.00017320261437908497,
	"loss": 4.8748,
	"step": 124
	},
	{
	"epoch": 0.13616557734204793,
	"grad_norm": 3.3828914165496826,
	"learning_rate": 0.00017298474945533768,
	"loss": 4.9867,
	"step": 125
	},
	{
	"epoch": 0.13725490196078433,
	"grad_norm": 2.948503017425537,
	"learning_rate": 0.00017276688453159042,
	"loss": 5.0548,
	"step": 126
	},
	{
	"epoch": 0.1383442265795207,
	"grad_norm": 3.690495491027832,
	"learning_rate": 0.00017254901960784316,
	"loss": 4.46,
	"step": 127
	},
	{
	"epoch": 0.1394335511982571,
	"grad_norm": 4.029051303863525,
	"learning_rate": 0.00017233115468409587,
	"loss": 5.2837,
	"step": 128
	},
	{
	"epoch": 0.14052287581699346,
	"grad_norm": 4.606770992279053,
	"learning_rate": 0.0001721132897603486,
	"loss": 4.2575,
	"step": 129
	},
	{
	"epoch": 0.14161220043572983,
	"grad_norm": 3.888659715652466,
	"learning_rate": 0.00017189542483660132,
	"loss": 4.9312,
	"step": 130
	},
	{
	"epoch": 0.14270152505446623,
	"grad_norm": 3.3389322757720947,
	"learning_rate": 0.00017167755991285403,
	"loss": 4.7385,
	"step": 131
	},
	{
	"epoch": 0.1437908496732026,
	"grad_norm": 4.276157379150391,
	"learning_rate": 0.00017145969498910676,
	"loss": 5.0385,
	"step": 132
	},
	{
	"epoch": 0.144880174291939,
	"grad_norm": 2.9289348125457764,
	"learning_rate": 0.00017124183006535947,
	"loss": 5.1092,
	"step": 133
	},
	{
	"epoch": 0.14596949891067537,
	"grad_norm": 2.7905993461608887,
	"learning_rate": 0.0001710239651416122,
	"loss": 5.0769,
	"step": 134
	},
	{
	"epoch": 0.14705882352941177,
	"grad_norm": 2.67677640914917,
	"learning_rate": 0.00017080610021786495,
	"loss": 4.9621,
	"step": 135
	},
	{
	"epoch": 0.14814814814814814,
	"grad_norm": 3.1127524375915527,
	"learning_rate": 0.00017058823529411766,
	"loss": 5.0922,
	"step": 136
	},
	{
	"epoch": 0.14923747276688454,
	"grad_norm": 4.093358516693115,
	"learning_rate": 0.00017037037037037037,
	"loss": 4.8967,
	"step": 137
	},
	{
	"epoch": 0.1503267973856209,
	"grad_norm": 4.0308427810668945,
	"learning_rate": 0.0001701525054466231,
	"loss": 4.9229,
	"step": 138
	},
	{
	"epoch": 0.1514161220043573,
	"grad_norm": 3.8967127799987793,
	"learning_rate": 0.00016993464052287582,
	"loss": 4.9517,
	"step": 139
	},
	{
	"epoch": 0.15250544662309368,
	"grad_norm": 3.387913942337036,
	"learning_rate": 0.00016971677559912855,
	"loss": 4.9562,
	"step": 140
	},
	{
	"epoch": 0.15359477124183007,
	"grad_norm": 4.264173984527588,
	"learning_rate": 0.00016949891067538126,
	"loss": 5.0844,
	"step": 141
	},
	{
	"epoch": 0.15468409586056645,
	"grad_norm": 16.368194580078125,
	"learning_rate": 0.000169281045751634,
	"loss": 5.635,
	"step": 142
	},
	{
	"epoch": 0.15577342047930284,
	"grad_norm": 3.6264078617095947,
	"learning_rate": 0.0001690631808278867,
	"loss": 4.8617,
	"step": 143
	},
	{
	"epoch": 0.1568627450980392,
	"grad_norm": 4.278415203094482,
	"learning_rate": 0.00016884531590413942,
	"loss": 5.2121,
	"step": 144
	},
	{
	"epoch": 0.1579520697167756,
	"grad_norm": 2.9585821628570557,
	"learning_rate": 0.00016862745098039216,
	"loss": 4.8859,
	"step": 145
	},
	{
	"epoch": 0.15904139433551198,
	"grad_norm": 3.371652364730835,
	"learning_rate": 0.0001684095860566449,
	"loss": 4.7501,
	"step": 146
	},
	{
	"epoch": 0.16013071895424835,
	"grad_norm": 3.98421573638916,
	"learning_rate": 0.0001681917211328976,
	"loss": 4.9839,
	"step": 147
	},
	{
	"epoch": 0.16122004357298475,
	"grad_norm": 3.6334128379821777,
	"learning_rate": 0.00016797385620915035,
	"loss": 5.2846,
	"step": 148
	},
	{
	"epoch": 0.16230936819172112,
	"grad_norm": 4.04555082321167,
	"learning_rate": 0.00016775599128540308,
	"loss": 4.9533,
	"step": 149
	},
	{
	"epoch": 0.16339869281045752,
	"grad_norm": 4.256022930145264,
	"learning_rate": 0.00016753812636165577,
	"loss": 5.2371,
	"step": 150
	},
	{
	"epoch": 0.1644880174291939,
	"grad_norm": 2.657806634902954,
	"learning_rate": 0.0001673202614379085,
	"loss": 5.2251,
	"step": 151
	},
	{
	"epoch": 0.1655773420479303,
	"grad_norm": 3.7488322257995605,
	"learning_rate": 0.0001671023965141612,
	"loss": 5.0817,
	"step": 152
	},
	{
	"epoch": 0.16666666666666666,
	"grad_norm": 3.3456156253814697,
	"learning_rate": 0.00016688453159041395,
	"loss": 5.1065,
	"step": 153
	},
	{
	"epoch": 0.16775599128540306,
	"grad_norm": 3.5670382976531982,
	"learning_rate": 0.0001666666666666667,
	"loss": 4.6952,
	"step": 154
	},
	{
	"epoch": 0.16884531590413943,
	"grad_norm": 3.841510534286499,
	"learning_rate": 0.0001664488017429194,
	"loss": 4.9876,
	"step": 155
	},
	{
	"epoch": 0.16993464052287582,
	"grad_norm": 3.7906384468078613,
	"learning_rate": 0.00016623093681917214,
	"loss": 4.9622,
	"step": 156
	},
	{
	"epoch": 0.1710239651416122,
	"grad_norm": 2.9914352893829346,
	"learning_rate": 0.00016601307189542485,
	"loss": 4.7751,
	"step": 157
	},
	{
	"epoch": 0.1721132897603486,
	"grad_norm": 3.1363089084625244,
	"learning_rate": 0.00016579520697167756,
	"loss": 4.761,
	"step": 158
	},
	{
	"epoch": 0.17320261437908496,
	"grad_norm": 3.877608299255371,
	"learning_rate": 0.0001655773420479303,
	"loss": 4.9925,
	"step": 159
	},
	{
	"epoch": 0.17429193899782136,
	"grad_norm": 2.659391164779663,
	"learning_rate": 0.000165359477124183,
	"loss": 4.8792,
	"step": 160
	},
	{
	"epoch": 0.17538126361655773,
	"grad_norm": 3.540314197540283,
	"learning_rate": 0.00016514161220043574,
	"loss": 4.7221,
	"step": 161
	},
	{
	"epoch": 0.17647058823529413,
	"grad_norm": 3.9212710857391357,
	"learning_rate": 0.00016492374727668848,
	"loss": 4.7605,
	"step": 162
	},
	{
	"epoch": 0.1775599128540305,
	"grad_norm": 3.4231927394866943,
	"learning_rate": 0.0001647058823529412,
	"loss": 4.8396,
	"step": 163
	},
	{
	"epoch": 0.1786492374727669,
	"grad_norm": 2.970974922180176,
	"learning_rate": 0.0001644880174291939,
	"loss": 5.2121,
	"step": 164
	},
	{
	"epoch": 0.17973856209150327,
	"grad_norm": 4.227552890777588,
	"learning_rate": 0.00016427015250544664,
	"loss": 4.9817,
	"step": 165
	},
	{
	"epoch": 0.18082788671023964,
	"grad_norm": 4.467804908752441,
	"learning_rate": 0.00016405228758169935,
	"loss": 4.8429,
	"step": 166
	},
	{
	"epoch": 0.18191721132897604,
	"grad_norm": 3.249866008758545,
	"learning_rate": 0.00016383442265795208,
	"loss": 4.7504,
	"step": 167
	},
	{
	"epoch": 0.1830065359477124,
	"grad_norm": 3.1638054847717285,
	"learning_rate": 0.00016361655773420482,
	"loss": 4.9806,
	"step": 168
	},
	{
	"epoch": 0.1840958605664488,
	"grad_norm": 3.232707977294922,
	"learning_rate": 0.00016339869281045753,
	"loss": 4.8697,
	"step": 169
	},
	{
	"epoch": 0.18518518518518517,
	"grad_norm": 3.216461181640625,
	"learning_rate": 0.00016318082788671024,
	"loss": 5.2147,
	"step": 170
	},
	{
	"epoch": 0.18627450980392157,
	"grad_norm": 2.564060688018799,
	"learning_rate": 0.00016296296296296295,
	"loss": 4.9599,
	"step": 171
	},
	{
	"epoch": 0.18736383442265794,
	"grad_norm": 3.218874216079712,
	"learning_rate": 0.0001627450980392157,
	"loss": 5.0425,
	"step": 172
	},
	{
	"epoch": 0.18845315904139434,
	"grad_norm": 2.866109848022461,
	"learning_rate": 0.00016252723311546843,
	"loss": 4.8431,
	"step": 173
	},
	{
	"epoch": 0.1895424836601307,
	"grad_norm": 4.1832075119018555,
	"learning_rate": 0.00016230936819172114,
	"loss": 5.1622,
	"step": 174
	},
	{
	"epoch": 0.1906318082788671,
	"grad_norm": 2.445281744003296,
	"learning_rate": 0.00016209150326797388,
	"loss": 5.0674,
	"step": 175
	},
	{
	"epoch": 0.19172113289760348,
	"grad_norm": 2.9621212482452393,
	"learning_rate": 0.00016187363834422659,
	"loss": 4.9052,
	"step": 176
	},
	{
	"epoch": 0.19281045751633988,
	"grad_norm": 2.8743746280670166,
	"learning_rate": 0.0001616557734204793,
	"loss": 4.9333,
	"step": 177
	},
	{
	"epoch": 0.19389978213507625,
	"grad_norm": 2.8737668991088867,
	"learning_rate": 0.00016143790849673203,
	"loss": 4.9724,
	"step": 178
	},
	{
	"epoch": 0.19498910675381265,
	"grad_norm": 2.8740956783294678,
	"learning_rate": 0.00016122004357298474,
	"loss": 4.9445,
	"step": 179
	},
	{
	"epoch": 0.19607843137254902,
	"grad_norm": 2.64280104637146,
	"learning_rate": 0.00016100217864923748,
	"loss": 5.0494,
	"step": 180
	},
	{
	"epoch": 0.19716775599128541,
	"grad_norm": 2.543142795562744,
	"learning_rate": 0.00016078431372549022,
	"loss": 4.7056,
	"step": 181
	},
	{
	"epoch": 0.19825708061002179,
	"grad_norm": 3.3072926998138428,
	"learning_rate": 0.00016056644880174293,
	"loss": 4.6861,
	"step": 182
	},
	{
	"epoch": 0.19934640522875818,
	"grad_norm": 3.1435420513153076,
	"learning_rate": 0.00016034858387799564,
	"loss": 5.5872,
	"step": 183
	},
	{
	"epoch": 0.20043572984749455,
	"grad_norm": 2.7487738132476807,
	"learning_rate": 0.00016013071895424838,
	"loss": 4.8017,
	"step": 184
	},
	{
	"epoch": 0.20152505446623092,
	"grad_norm": 3.8782405853271484,
	"learning_rate": 0.00015991285403050109,
	"loss": 5.0304,
	"step": 185
	},
	{
	"epoch": 0.20261437908496732,
	"grad_norm": 2.4751572608947754,
	"learning_rate": 0.00015969498910675382,
	"loss": 5.1402,
	"step": 186
	},
	{
	"epoch": 0.2037037037037037,
	"grad_norm": 3.057116746902466,
	"learning_rate": 0.00015947712418300656,
	"loss": 5.0166,
	"step": 187
	},
	{
	"epoch": 0.2047930283224401,
	"grad_norm": 3.6688828468322754,
	"learning_rate": 0.00015925925925925927,
	"loss": 4.7331,
	"step": 188
	},
	{
	"epoch": 0.20588235294117646,
	"grad_norm": 2.466041088104248,
	"learning_rate": 0.00015904139433551198,
	"loss": 4.84,
	"step": 189
	},
	{
	"epoch": 0.20697167755991286,
	"grad_norm": 3.641002893447876,
	"learning_rate": 0.0001588235294117647,
	"loss": 4.7463,
	"step": 190
	},
	{
	"epoch": 0.20806100217864923,
	"grad_norm": 4.757946968078613,
	"learning_rate": 0.00015860566448801743,
	"loss": 5.1682,
	"step": 191
	},
	{
	"epoch": 0.20915032679738563,
	"grad_norm": 4.83253288269043,
	"learning_rate": 0.00015838779956427017,
	"loss": 5.076,
	"step": 192
	},
	{
	"epoch": 0.210239651416122,
	"grad_norm": 3.409478187561035,
	"learning_rate": 0.00015816993464052288,
	"loss": 4.9811,
	"step": 193
	},
	{
	"epoch": 0.2113289760348584,
	"grad_norm": 4.051446437835693,
	"learning_rate": 0.00015795206971677561,
	"loss": 4.8564,
	"step": 194
	},
	{
	"epoch": 0.21241830065359477,
	"grad_norm": 4.4804205894470215,
	"learning_rate": 0.00015773420479302835,
	"loss": 4.4211,
	"step": 195
	},
	{
	"epoch": 0.21350762527233116,
	"grad_norm": 4.66760778427124,
	"learning_rate": 0.00015751633986928106,
	"loss": 4.9098,
	"step": 196
	},
	{
	"epoch": 0.21459694989106753,
	"grad_norm": 4.293209552764893,
	"learning_rate": 0.00015729847494553377,
	"loss": 5.0321,
	"step": 197
	},
	{
	"epoch": 0.21568627450980393,
	"grad_norm": 4.768712520599365,
	"learning_rate": 0.00015708061002178648,
	"loss": 4.651,
	"step": 198
	},
	{
	"epoch": 0.2167755991285403,
	"grad_norm": 2.8598990440368652,
	"learning_rate": 0.00015686274509803922,
	"loss": 4.7659,
	"step": 199
	},
	{
	"epoch": 0.2178649237472767,
	"grad_norm": 3.9935877323150635,
	"learning_rate": 0.00015664488017429196,
	"loss": 4.9602,
	"step": 200
	},
	{
	"epoch": 0.21895424836601307,
	"grad_norm": 2.618224859237671,
	"learning_rate": 0.00015642701525054467,
	"loss": 4.9664,
	"step": 201
	},
	{
	"epoch": 0.22004357298474944,
	"grad_norm": 3.4870638847351074,
	"learning_rate": 0.0001562091503267974,
	"loss": 4.7686,
	"step": 202
	},
	{
	"epoch": 0.22113289760348584,
	"grad_norm": 2.9350011348724365,
	"learning_rate": 0.00015599128540305012,
	"loss": 4.7934,
	"step": 203
	},
	{
	"epoch": 0.2222222222222222,
	"grad_norm": 1.970569372177124,
	"learning_rate": 0.00015577342047930283,
	"loss": 5.0841,
	"step": 204
	},
	{
	"epoch": 0.2233115468409586,
	"grad_norm": 4.244380474090576,
	"learning_rate": 0.00015555555555555556,
	"loss": 4.7992,
	"step": 205
	},
	{
	"epoch": 0.22440087145969498,
	"grad_norm": 2.9138362407684326,
	"learning_rate": 0.0001553376906318083,
	"loss": 4.6564,
	"step": 206
	},
	{
	"epoch": 0.22549019607843138,
	"grad_norm": 3.677152156829834,
	"learning_rate": 0.000155119825708061,
	"loss": 5.3402,
	"step": 207
	},
	{
	"epoch": 0.22657952069716775,
	"grad_norm": 3.5209450721740723,
	"learning_rate": 0.00015490196078431375,
	"loss": 4.4968,
	"step": 208
	},
	{
	"epoch": 0.22766884531590414,
	"grad_norm": 3.3340156078338623,
	"learning_rate": 0.00015468409586056646,
	"loss": 4.8884,
	"step": 209
	},
	{
	"epoch": 0.22875816993464052,
	"grad_norm": 3.2705368995666504,
	"learning_rate": 0.00015446623093681917,
	"loss": 4.7711,
	"step": 210
	},
	{
	"epoch": 0.2298474945533769,
	"grad_norm": 3.3847126960754395,
	"learning_rate": 0.0001542483660130719,
	"loss": 5.2646,
	"step": 211
	},
	{
	"epoch": 0.23093681917211328,
	"grad_norm": 4.557519912719727,
	"learning_rate": 0.00015403050108932462,
	"loss": 5.265,
	"step": 212
	},
	{
	"epoch": 0.23202614379084968,
	"grad_norm": 3.7597949504852295,
	"learning_rate": 0.00015381263616557735,
	"loss": 4.9571,
	"step": 213
	},
	{
	"epoch": 0.23311546840958605,
	"grad_norm": 5.981929302215576,
	"learning_rate": 0.0001535947712418301,
	"loss": 4.77,
	"step": 214
	},
	{
	"epoch": 0.23420479302832245,
	"grad_norm": 3.622166395187378,
	"learning_rate": 0.0001533769063180828,
	"loss": 4.9361,
	"step": 215
	},
	{
	"epoch": 0.23529411764705882,
	"grad_norm": 4.268712520599365,
	"learning_rate": 0.0001531590413943355,
	"loss": 4.5784,
	"step": 216
	},
	{
	"epoch": 0.23638344226579522,
	"grad_norm": 3.211376190185547,
	"learning_rate": 0.00015294117647058822,
	"loss": 4.9316,
	"step": 217
	},
	{
	"epoch": 0.2374727668845316,
	"grad_norm": 3.7996368408203125,
	"learning_rate": 0.00015272331154684096,
	"loss": 5.2139,
	"step": 218
	},
	{
	"epoch": 0.238562091503268,
	"grad_norm": 4.062546253204346,
	"learning_rate": 0.0001525054466230937,
	"loss": 4.2757,
	"step": 219
	},
	{
	"epoch": 0.23965141612200436,
	"grad_norm": 3.065821886062622,
	"learning_rate": 0.0001522875816993464,
	"loss": 4.7295,
	"step": 220
	},
	{
	"epoch": 0.24074074074074073,
	"grad_norm": 3.3586819171905518,
	"learning_rate": 0.00015206971677559914,
	"loss": 5.0229,
	"step": 221
	},
	{
	"epoch": 0.24183006535947713,
	"grad_norm": 2.5833959579467773,
	"learning_rate": 0.00015185185185185185,
	"loss": 4.9391,
	"step": 222
	},
	{
	"epoch": 0.2429193899782135,
	"grad_norm": 4.792468070983887,
	"learning_rate": 0.00015163398692810456,
	"loss": 5.0563,
	"step": 223
	},
	{
	"epoch": 0.2440087145969499,
	"grad_norm": 2.994100332260132,
	"learning_rate": 0.0001514161220043573,
	"loss": 4.6381,
	"step": 224
	},
	{
	"epoch": 0.24509803921568626,
	"grad_norm": 8.28174877166748,
	"learning_rate": 0.00015119825708061004,
	"loss": 5.2376,
	"step": 225
	},
	{
	"epoch": 0.24618736383442266,
	"grad_norm": 2.4422318935394287,
	"learning_rate": 0.00015098039215686275,
	"loss": 5.009,
	"step": 226
	},
	{
	"epoch": 0.24727668845315903,
	"grad_norm": 3.0637447834014893,
	"learning_rate": 0.0001507625272331155,
	"loss": 5.0266,
	"step": 227
	},
	{
	"epoch": 0.24836601307189543,
	"grad_norm": 4.186681270599365,
	"learning_rate": 0.0001505446623093682,
	"loss": 4.8132,
	"step": 228
	},
	{
	"epoch": 0.2494553376906318,
	"grad_norm": 3.8071372509002686,
	"learning_rate": 0.0001503267973856209,
	"loss": 4.7549,
	"step": 229
	},
	{
	"epoch": 0.25054466230936817,
	"grad_norm": 4.450962066650391,
	"learning_rate": 0.00015010893246187365,
	"loss": 4.61,
	"step": 230
	},
	{
	"epoch": 0.25163398692810457,
	"grad_norm": 3.4045302867889404,
	"learning_rate": 0.00014989106753812636,
	"loss": 4.5865,
	"step": 231
	},
	{
	"epoch": 0.25272331154684097,
	"grad_norm": 3.5248048305511475,
	"learning_rate": 0.0001496732026143791,
	"loss": 4.8463,
	"step": 232
	},
	{
	"epoch": 0.25381263616557737,
	"grad_norm": 2.9256203174591064,
	"learning_rate": 0.00014945533769063183,
	"loss": 4.9082,
	"step": 233
	},
	{
	"epoch": 0.2549019607843137,
	"grad_norm": 3.887249708175659,
	"learning_rate": 0.00014923747276688454,
	"loss": 4.6206,
	"step": 234
	},
	{
	"epoch": 0.2559912854030501,
	"grad_norm": 3.5457653999328613,
	"learning_rate": 0.00014901960784313728,
	"loss": 4.7352,
	"step": 235
	},
	{
	"epoch": 0.2570806100217865,
	"grad_norm": 2.6026275157928467,
	"learning_rate": 0.00014880174291939,
	"loss": 5.0141,
	"step": 236
	},
	{
	"epoch": 0.2581699346405229,
	"grad_norm": 2.7921388149261475,
	"learning_rate": 0.0001485838779956427,
	"loss": 4.9637,
	"step": 237
	},
	{
	"epoch": 0.25925925925925924,
	"grad_norm": 7.660345554351807,
	"learning_rate": 0.00014836601307189544,
	"loss": 4.7919,
	"step": 238
	},
	{
	"epoch": 0.26034858387799564,
	"grad_norm": 2.67083740234375,
	"learning_rate": 0.00014814814814814815,
	"loss": 5.0893,
	"step": 239
	},
	{
	"epoch": 0.26143790849673204,
	"grad_norm": 4.7418036460876465,
	"learning_rate": 0.00014793028322440088,
	"loss": 4.6364,
	"step": 240
	},
	{
	"epoch": 0.2625272331154684,
	"grad_norm": 3.2569048404693604,
	"learning_rate": 0.00014771241830065362,
	"loss": 4.7866,
	"step": 241
	},
	{
	"epoch": 0.2636165577342048,
	"grad_norm": 3.1199724674224854,
	"learning_rate": 0.00014749455337690633,
	"loss": 4.8152,
	"step": 242
	},
	{
	"epoch": 0.2647058823529412,
	"grad_norm": 4.056507110595703,
	"learning_rate": 0.00014727668845315904,
	"loss": 5.1888,
	"step": 243
	},
	{
	"epoch": 0.2657952069716776,
	"grad_norm": 2.9097986221313477,
	"learning_rate": 0.00014705882352941178,
	"loss": 5.0211,
	"step": 244
	},
	{
	"epoch": 0.2668845315904139,
	"grad_norm": 2.2584147453308105,
	"learning_rate": 0.0001468409586056645,
	"loss": 4.9742,
	"step": 245
	},
	{
	"epoch": 0.2679738562091503,
	"grad_norm": 5.296759605407715,
	"learning_rate": 0.00014662309368191723,
	"loss": 4.9444,
	"step": 246
	},
	{
	"epoch": 0.2690631808278867,
	"grad_norm": 2.9959006309509277,
	"learning_rate": 0.00014640522875816994,
	"loss": 5.061,
	"step": 247
	},
	{
	"epoch": 0.2701525054466231,
	"grad_norm": 5.515058994293213,
	"learning_rate": 0.00014618736383442267,
	"loss": 4.8149,
	"step": 248
	},
	{
	"epoch": 0.27124183006535946,
	"grad_norm": 2.8291754722595215,
	"learning_rate": 0.00014596949891067538,
	"loss": 4.9713,
	"step": 249
	},
	{
	"epoch": 0.27233115468409586,
	"grad_norm": 2.9711527824401855,
	"learning_rate": 0.0001457516339869281,
	"loss": 4.703,
	"step": 250
	},
	{
	"epoch": 0.27342047930283225,
	"grad_norm": 2.836789131164551,
	"learning_rate": 0.00014553376906318083,
	"loss": 4.7403,
	"step": 251
	},
	{
	"epoch": 0.27450980392156865,
	"grad_norm": 4.1027350425720215,
	"learning_rate": 0.00014531590413943357,
	"loss": 5.2626,
	"step": 252
	},
	{
	"epoch": 0.275599128540305,
	"grad_norm": 3.8692238330841064,
	"learning_rate": 0.00014509803921568628,
	"loss": 4.8325,
	"step": 253
	},
	{
	"epoch": 0.2766884531590414,
	"grad_norm": 2.9437451362609863,
	"learning_rate": 0.00014488017429193902,
	"loss": 4.5083,
	"step": 254
	},
	{
	"epoch": 0.2777777777777778,
	"grad_norm": 3.134019374847412,
	"learning_rate": 0.00014466230936819173,
	"loss": 4.4554,
	"step": 255
	},
	{
	"epoch": 0.2788671023965142,
	"grad_norm": 2.429337501525879,
	"learning_rate": 0.00014444444444444444,
	"loss": 4.7566,
	"step": 256
	},
	{
	"epoch": 0.27995642701525053,
	"grad_norm": 3.900141954421997,
	"learning_rate": 0.00014422657952069718,
	"loss": 4.4774,
	"step": 257
	},
	{
	"epoch": 0.28104575163398693,
	"grad_norm": 2.6738038063049316,
	"learning_rate": 0.00014400871459694989,
	"loss": 5.0235,
	"step": 258
	},
	{
	"epoch": 0.2821350762527233,
	"grad_norm": 3.294783353805542,
	"learning_rate": 0.00014379084967320262,
	"loss": 5.1043,
	"step": 259
	},
	{
	"epoch": 0.28322440087145967,
	"grad_norm": 3.346564531326294,
	"learning_rate": 0.00014357298474945536,
	"loss": 4.9083,
	"step": 260
	},
	{
	"epoch": 0.28431372549019607,
	"grad_norm": 4.410298824310303,
	"learning_rate": 0.00014335511982570807,
	"loss": 5.2007,
	"step": 261
	},
	{
	"epoch": 0.28540305010893247,
	"grad_norm": 4.235734939575195,
	"learning_rate": 0.00014313725490196078,
	"loss": 4.446,
	"step": 262
	},
	{
	"epoch": 0.28649237472766886,
	"grad_norm": 4.334876537322998,
	"learning_rate": 0.00014291938997821352,
	"loss": 4.8909,
	"step": 263
	},
	{
	"epoch": 0.2875816993464052,
	"grad_norm": 2.46726393699646,
	"learning_rate": 0.00014270152505446623,
	"loss": 4.7269,
	"step": 264
	},
	{
	"epoch": 0.2886710239651416,
	"grad_norm": 3.194774866104126,
	"learning_rate": 0.00014248366013071897,
	"loss": 4.8303,
	"step": 265
	},
	{
	"epoch": 0.289760348583878,
	"grad_norm": 3.8563270568847656,
	"learning_rate": 0.00014226579520697168,
	"loss": 5.0194,
	"step": 266
	},
	{
	"epoch": 0.2908496732026144,
	"grad_norm": 2.417151927947998,
	"learning_rate": 0.0001420479302832244,
	"loss": 5.0109,
	"step": 267
	},
	{
	"epoch": 0.29193899782135074,
	"grad_norm": 3.7174580097198486,
	"learning_rate": 0.00014183006535947715,
	"loss": 4.864,
	"step": 268
	},
	{
	"epoch": 0.29302832244008714,
	"grad_norm": 3.464721441268921,
	"learning_rate": 0.00014161220043572983,
	"loss": 4.4532,
	"step": 269
	},
	{
	"epoch": 0.29411764705882354,
	"grad_norm": 3.013181686401367,
	"learning_rate": 0.00014139433551198257,
	"loss": 4.6633,
	"step": 270
	},
	{
	"epoch": 0.29520697167755994,
	"grad_norm": 2.9733364582061768,
	"learning_rate": 0.0001411764705882353,
	"loss": 4.8566,
	"step": 271
	},
	{
	"epoch": 0.2962962962962963,
	"grad_norm": 3.807645797729492,
	"learning_rate": 0.00014095860566448802,
	"loss": 5.0846,
	"step": 272
	},
	{
	"epoch": 0.2973856209150327,
	"grad_norm": 2.4930531978607178,
	"learning_rate": 0.00014074074074074076,
	"loss": 4.8018,
	"step": 273
	},
	{
	"epoch": 0.2984749455337691,
	"grad_norm": 3.7660248279571533,
	"learning_rate": 0.00014052287581699347,
	"loss": 4.6897,
	"step": 274
	},
	{
	"epoch": 0.2995642701525055,
	"grad_norm": 2.6249687671661377,
	"learning_rate": 0.0001403050108932462,
	"loss": 4.8823,
	"step": 275
	},
	{
	"epoch": 0.3006535947712418,
	"grad_norm": 4.599347114562988,
	"learning_rate": 0.00014008714596949891,
	"loss": 4.6467,
	"step": 276
	},
	{
	"epoch": 0.3017429193899782,
	"grad_norm": 3.233173131942749,
	"learning_rate": 0.00013986928104575162,
	"loss": 4.3214,
	"step": 277
	},
	{
	"epoch": 0.3028322440087146,
	"grad_norm": 3.9185855388641357,
	"learning_rate": 0.00013965141612200436,
	"loss": 5.0813,
	"step": 278
	},
	{
	"epoch": 0.30392156862745096,
	"grad_norm": 3.683941125869751,
	"learning_rate": 0.0001394335511982571,
	"loss": 5.1563,
	"step": 279
	},
	{
	"epoch": 0.30501089324618735,
	"grad_norm": 3.983316659927368,
	"learning_rate": 0.0001392156862745098,
	"loss": 4.7226,
	"step": 280
	},
	{
	"epoch": 0.30610021786492375,
	"grad_norm": 3.2122104167938232,
	"learning_rate": 0.00013899782135076255,
	"loss": 4.8329,
	"step": 281
	},
	{
	"epoch": 0.30718954248366015,
	"grad_norm": 4.021376132965088,
	"learning_rate": 0.00013877995642701526,
	"loss": 4.8389,
	"step": 282
	},
	{
	"epoch": 0.3082788671023965,
	"grad_norm": 5.099759101867676,
	"learning_rate": 0.00013856209150326797,
	"loss": 4.8081,
	"step": 283
	},
	{
	"epoch": 0.3093681917211329,
	"grad_norm": 2.6392247676849365,
	"learning_rate": 0.0001383442265795207,
	"loss": 4.7018,
	"step": 284
	},
	{
	"epoch": 0.3104575163398693,
	"grad_norm": 2.628859519958496,
	"learning_rate": 0.00013812636165577342,
	"loss": 4.9003,
	"step": 285
	},
	{
	"epoch": 0.3115468409586057,
	"grad_norm": 9.484817504882812,
	"learning_rate": 0.00013790849673202615,
	"loss": 5.015,
	"step": 286
	},
	{
	"epoch": 0.31263616557734203,
	"grad_norm": 5.714864253997803,
	"learning_rate": 0.0001376906318082789,
	"loss": 4.9453,
	"step": 287
	},
	{
	"epoch": 0.3137254901960784,
	"grad_norm": 4.550031661987305,
	"learning_rate": 0.0001374727668845316,
	"loss": 5.7539,
	"step": 288
	},
	{
	"epoch": 0.3148148148148148,
	"grad_norm": 3.3271303176879883,
	"learning_rate": 0.0001372549019607843,
	"loss": 5.2101,
	"step": 289
	},
	{
	"epoch": 0.3159041394335512,
	"grad_norm": 4.195096492767334,
	"learning_rate": 0.00013703703703703705,
	"loss": 4.8807,
	"step": 290
	},
	{
	"epoch": 0.31699346405228757,
	"grad_norm": 3.0190374851226807,
	"learning_rate": 0.00013681917211328976,
	"loss": 4.8295,
	"step": 291
	},
	{
	"epoch": 0.31808278867102396,
	"grad_norm": 3.7823071479797363,
	"learning_rate": 0.0001366013071895425,
	"loss": 5.084,
	"step": 292
	},
	{
	"epoch": 0.31917211328976036,
	"grad_norm": 3.7246618270874023,
	"learning_rate": 0.0001363834422657952,
	"loss": 4.9432,
	"step": 293
	},
	{
	"epoch": 0.3202614379084967,
	"grad_norm": 3.8362350463867188,
	"learning_rate": 0.00013616557734204794,
	"loss": 4.836,
	"step": 294
	},
	{
	"epoch": 0.3213507625272331,
	"grad_norm": 3.490386486053467,
	"learning_rate": 0.00013594771241830065,
	"loss": 4.614,
	"step": 295
	},
	{
	"epoch": 0.3224400871459695,
	"grad_norm": 3.012450695037842,
	"learning_rate": 0.00013572984749455336,
	"loss": 4.7246,
	"step": 296
	},
	{
	"epoch": 0.3235294117647059,
	"grad_norm": 3.188887357711792,
	"learning_rate": 0.0001355119825708061,
	"loss": 4.9364,
	"step": 297
	},
	{
	"epoch": 0.32461873638344224,
	"grad_norm": 3.366766929626465,
	"learning_rate": 0.00013529411764705884,
	"loss": 5.071,
	"step": 298
	},
	{
	"epoch": 0.32570806100217864,
	"grad_norm": 4.414743900299072,
	"learning_rate": 0.00013507625272331155,
	"loss": 4.6582,
	"step": 299
	},
	{
	"epoch": 0.32679738562091504,
	"grad_norm": 4.645547389984131,
	"learning_rate": 0.00013485838779956429,
	"loss": 4.7929,
	"step": 300
	},
	{
	"epoch": 0.32788671023965144,
	"grad_norm": 3.6290218830108643,
	"learning_rate": 0.000134640522875817,
	"loss": 5.1134,
	"step": 301
	},
	{
	"epoch": 0.3289760348583878,
	"grad_norm": 2.694545030593872,
	"learning_rate": 0.0001344226579520697,
	"loss": 5.0862,
	"step": 302
	},
	{
	"epoch": 0.3300653594771242,
	"grad_norm": 4.14566707611084,
	"learning_rate": 0.00013420479302832244,
	"loss": 5.0835,
	"step": 303
	},
	{
	"epoch": 0.3311546840958606,
	"grad_norm": 4.463272571563721,
	"learning_rate": 0.00013398692810457515,
	"loss": 4.4719,
	"step": 304
	},
	{
	"epoch": 0.332244008714597,
	"grad_norm": 3.1239616870880127,
	"learning_rate": 0.0001337690631808279,
	"loss": 5.1211,
	"step": 305
	},
	{
	"epoch": 0.3333333333333333,
	"grad_norm": 2.634913206100464,
	"learning_rate": 0.00013355119825708063,
	"loss": 5.2042,
	"step": 306
	},
	{
	"epoch": 0.3344226579520697,
	"grad_norm": 4.657975673675537,
	"learning_rate": 0.00013333333333333334,
	"loss": 4.7762,
	"step": 307
	},
	{
	"epoch": 0.3355119825708061,
	"grad_norm": 2.70094633102417,
	"learning_rate": 0.00013311546840958608,
	"loss": 4.9501,
	"step": 308
	},
	{
	"epoch": 0.3366013071895425,
	"grad_norm": 2.618452787399292,
	"learning_rate": 0.0001328976034858388,
	"loss": 4.7893,
	"step": 309
	},
	{
	"epoch": 0.33769063180827885,
	"grad_norm": 2.8765547275543213,
	"learning_rate": 0.0001326797385620915,
	"loss": 4.4451,
	"step": 310
	},
	{
	"epoch": 0.33877995642701525,
	"grad_norm": 4.317745208740234,
	"learning_rate": 0.00013246187363834424,
	"loss": 4.4262,
	"step": 311
	},
	{
	"epoch": 0.33986928104575165,
	"grad_norm": 4.650866508483887,
	"learning_rate": 0.00013224400871459695,
	"loss": 4.53,
	"step": 312
	},
	{
	"epoch": 0.340958605664488,
	"grad_norm": 2.9868412017822266,
	"learning_rate": 0.00013202614379084968,
	"loss": 4.4741,
	"step": 313
	},
	{
	"epoch": 0.3420479302832244,
	"grad_norm": 4.8326826095581055,
	"learning_rate": 0.00013180827886710242,
	"loss": 4.9097,
	"step": 314
	},
	{
	"epoch": 0.3431372549019608,
	"grad_norm": 3.1381747722625732,
	"learning_rate": 0.00013159041394335513,
	"loss": 4.4471,
	"step": 315
	},
	{
	"epoch": 0.3442265795206972,
	"grad_norm": 2.8750381469726562,
	"learning_rate": 0.00013137254901960784,
	"loss": 4.812,
	"step": 316
	},
	{
	"epoch": 0.3453159041394335,
	"grad_norm": 4.262397766113281,
	"learning_rate": 0.00013115468409586058,
	"loss": 4.317,
	"step": 317
	},
	{
	"epoch": 0.3464052287581699,
	"grad_norm": 3.056037425994873,
	"learning_rate": 0.0001309368191721133,
	"loss": 5.0436,
	"step": 318
	},
	{
	"epoch": 0.3474945533769063,
	"grad_norm": 2.79681134223938,
	"learning_rate": 0.00013071895424836603,
	"loss": 4.6181,
	"step": 319
	},
	{
	"epoch": 0.3485838779956427,
	"grad_norm": 3.8306972980499268,
	"learning_rate": 0.00013050108932461876,
	"loss": 4.5201,
	"step": 320
	},
	{
	"epoch": 0.34967320261437906,
	"grad_norm": 3.9447734355926514,
	"learning_rate": 0.00013028322440087147,
	"loss": 4.4423,
	"step": 321
	},
	{
	"epoch": 0.35076252723311546,
	"grad_norm": 3.210547685623169,
	"learning_rate": 0.00013006535947712418,
	"loss": 4.4836,
	"step": 322
	},
	{
	"epoch": 0.35185185185185186,
	"grad_norm": 3.065279006958008,
	"learning_rate": 0.0001298474945533769,
	"loss": 4.9008,
	"step": 323
	},
	{
	"epoch": 0.35294117647058826,
	"grad_norm": 3.705817222595215,
	"learning_rate": 0.00012962962962962963,
	"loss": 5.5698,
	"step": 324
	},
	{
	"epoch": 0.3540305010893246,
	"grad_norm": 3.503516912460327,
	"learning_rate": 0.00012941176470588237,
	"loss": 4.8409,
	"step": 325
	},
	{
	"epoch": 0.355119825708061,
	"grad_norm": 3.5819544792175293,
	"learning_rate": 0.00012919389978213508,
	"loss": 4.8196,
	"step": 326
	},
	{
	"epoch": 0.3562091503267974,
	"grad_norm": 5.018744945526123,
	"learning_rate": 0.00012897603485838782,
	"loss": 4.7224,
	"step": 327
	},
	{
	"epoch": 0.3572984749455338,
	"grad_norm": 4.197869300842285,
	"learning_rate": 0.00012875816993464053,
	"loss": 4.3352,
	"step": 328
	},
	{
	"epoch": 0.35838779956427014,
	"grad_norm": 3.5066583156585693,
	"learning_rate": 0.00012854030501089324,
	"loss": 4.5001,
	"step": 329
	},
	{
	"epoch": 0.35947712418300654,
	"grad_norm": 2.969836950302124,
	"learning_rate": 0.00012832244008714597,
	"loss": 4.6297,
	"step": 330
	},
	{
	"epoch": 0.36056644880174293,
	"grad_norm": 3.936361789703369,
	"learning_rate": 0.00012810457516339868,
	"loss": 4.6566,
	"step": 331
	},
	{
	"epoch": 0.3616557734204793,
	"grad_norm": 3.509218692779541,
	"learning_rate": 0.00012788671023965142,
	"loss": 4.9033,
	"step": 332
	},
	{
	"epoch": 0.3627450980392157,
	"grad_norm": 3.586121082305908,
	"learning_rate": 0.00012766884531590416,
	"loss": 4.7765,
	"step": 333
	},
	{
	"epoch": 0.3638344226579521,
	"grad_norm": 4.253020763397217,
	"learning_rate": 0.00012745098039215687,
	"loss": 5.0747,
	"step": 334
	},
	{
	"epoch": 0.36492374727668847,
	"grad_norm": 2.871368408203125,
	"learning_rate": 0.00012723311546840958,
	"loss": 4.6841,
	"step": 335
	},
	{
	"epoch": 0.3660130718954248,
	"grad_norm": 3.784471273422241,
	"learning_rate": 0.00012701525054466232,
	"loss": 4.9569,
	"step": 336
	},
	{
	"epoch": 0.3671023965141612,
	"grad_norm": 4.249979496002197,
	"learning_rate": 0.00012679738562091503,
	"loss": 4.6959,
	"step": 337
	},
	{
	"epoch": 0.3681917211328976,
	"grad_norm": 4.012763500213623,
	"learning_rate": 0.00012657952069716776,
	"loss": 5.4651,
	"step": 338
	},
	{
	"epoch": 0.369281045751634,
	"grad_norm": 4.050015926361084,
	"learning_rate": 0.0001263616557734205,
	"loss": 4.5269,
	"step": 339
	},
	{
	"epoch": 0.37037037037037035,
	"grad_norm": 3.3827953338623047,
	"learning_rate": 0.0001261437908496732,
	"loss": 4.6672,
	"step": 340
	},
	{
	"epoch": 0.37145969498910675,
	"grad_norm": 3.166588544845581,
	"learning_rate": 0.00012592592592592592,
	"loss": 4.8382,
	"step": 341
	},
	{
	"epoch": 0.37254901960784315,
	"grad_norm": 3.722085952758789,
	"learning_rate": 0.00012570806100217863,
	"loss": 4.59,
	"step": 342
	},
	{
	"epoch": 0.37363834422657954,
	"grad_norm": 3.9224953651428223,
	"learning_rate": 0.00012549019607843137,
	"loss": 4.6477,
	"step": 343
	},
	{
	"epoch": 0.3747276688453159,
	"grad_norm": 3.167107582092285,
	"learning_rate": 0.0001252723311546841,
	"loss": 5.0107,
	"step": 344
	},
	{
	"epoch": 0.3758169934640523,
	"grad_norm": 3.407217264175415,
	"learning_rate": 0.00012505446623093682,
	"loss": 5.09,
	"step": 345
	},
	{
	"epoch": 0.3769063180827887,
	"grad_norm": 3.8298895359039307,
	"learning_rate": 0.00012483660130718956,
	"loss": 5.0545,
	"step": 346
	},
	{
	"epoch": 0.3779956427015251,
	"grad_norm": 6.0477800369262695,
	"learning_rate": 0.0001246187363834423,
	"loss": 4.9539,
	"step": 347
	},
	{
	"epoch": 0.3790849673202614,
	"grad_norm": 3.269374132156372,
	"learning_rate": 0.000124400871459695,
	"loss": 4.8141,
	"step": 348
	},
	{
	"epoch": 0.3801742919389978,
	"grad_norm": 3.4903624057769775,
	"learning_rate": 0.00012418300653594771,
	"loss": 4.5177,
	"step": 349
	},
	{
	"epoch": 0.3812636165577342,
	"grad_norm": 3.221745491027832,
	"learning_rate": 0.00012396514161220045,
	"loss": 4.7741,
	"step": 350
	},
	{
	"epoch": 0.38235294117647056,
	"grad_norm": 3.368844985961914,
	"learning_rate": 0.00012374727668845316,
	"loss": 4.7539,
	"step": 351
	},
	{
	"epoch": 0.38344226579520696,
	"grad_norm": 3.398777723312378,
	"learning_rate": 0.0001235294117647059,
	"loss": 4.8483,
	"step": 352
	},
	{
	"epoch": 0.38453159041394336,
	"grad_norm": 3.093210220336914,
	"learning_rate": 0.0001233115468409586,
	"loss": 5.1118,
	"step": 353
	},
	{
	"epoch": 0.38562091503267976,
	"grad_norm": 2.6734519004821777,
	"learning_rate": 0.00012309368191721135,
	"loss": 4.7035,
	"step": 354
	},
	{
	"epoch": 0.3867102396514161,
	"grad_norm": 2.951702356338501,
	"learning_rate": 0.00012287581699346406,
	"loss": 4.6663,
	"step": 355
	},
	{
	"epoch": 0.3877995642701525,
	"grad_norm": 2.9217047691345215,
	"learning_rate": 0.00012265795206971677,
	"loss": 4.7074,
	"step": 356
	},
	{
	"epoch": 0.3888888888888889,
	"grad_norm": 3.06427001953125,
	"learning_rate": 0.0001224400871459695,
	"loss": 4.7311,
	"step": 357
	},
	{
	"epoch": 0.3899782135076253,
	"grad_norm": 4.140756130218506,
	"learning_rate": 0.00012222222222222224,
	"loss": 4.816,
	"step": 358
	},
	{
	"epoch": 0.39106753812636164,
	"grad_norm": 4.553076267242432,
	"learning_rate": 0.00012200435729847495,
	"loss": 5.5209,
	"step": 359
	},
	{
	"epoch": 0.39215686274509803,
	"grad_norm": 3.7939252853393555,
	"learning_rate": 0.00012178649237472768,
	"loss": 4.7103,
	"step": 360
	},
	{
	"epoch": 0.39324618736383443,
	"grad_norm": 3.0563583374023438,
	"learning_rate": 0.00012156862745098039,
	"loss": 5.1193,
	"step": 361
	},
	{
	"epoch": 0.39433551198257083,
	"grad_norm": 3.1910042762756348,
	"learning_rate": 0.00012135076252723312,
	"loss": 4.7778,
	"step": 362
	},
	{
	"epoch": 0.3954248366013072,
	"grad_norm": 3.6231610774993896,
	"learning_rate": 0.00012113289760348585,
	"loss": 4.5034,
	"step": 363
	},
	{
	"epoch": 0.39651416122004357,
	"grad_norm": 3.0576488971710205,
	"learning_rate": 0.00012091503267973856,
	"loss": 4.7385,
	"step": 364
	},
	{
	"epoch": 0.39760348583877997,
	"grad_norm": 7.9060821533203125,
	"learning_rate": 0.0001206971677559913,
	"loss": 4.7028,
	"step": 365
	},
	{
	"epoch": 0.39869281045751637,
	"grad_norm": 4.208837985992432,
	"learning_rate": 0.00012047930283224402,
	"loss": 4.9435,
	"step": 366
	},
	{
	"epoch": 0.3997821350762527,
	"grad_norm": 2.8220272064208984,
	"learning_rate": 0.00012026143790849673,
	"loss": 4.6085,
	"step": 367
	},
	{
	"epoch": 0.4008714596949891,
	"grad_norm": 2.2707273960113525,
	"learning_rate": 0.00012004357298474947,
	"loss": 4.5269,
	"step": 368
	},
	{
	"epoch": 0.4019607843137255,
	"grad_norm": 4.206027507781982,
	"learning_rate": 0.00011982570806100219,
	"loss": 4.2867,
	"step": 369
	},
	{
	"epoch": 0.40305010893246185,
	"grad_norm": 2.74226450920105,
	"learning_rate": 0.0001196078431372549,
	"loss": 4.9979,
	"step": 370
	},
	{
	"epoch": 0.40413943355119825,
	"grad_norm": 2.58813214302063,
	"learning_rate": 0.00011938997821350764,
	"loss": 4.8099,
	"step": 371
	},
	{
	"epoch": 0.40522875816993464,
	"grad_norm": 2.26802396774292,
	"learning_rate": 0.00011917211328976035,
	"loss": 5.2378,
	"step": 372
	},
	{
	"epoch": 0.40631808278867104,
	"grad_norm": 3.560227155685425,
	"learning_rate": 0.00011895424836601307,
	"loss": 4.8105,
	"step": 373
	},
	{
	"epoch": 0.4074074074074074,
	"grad_norm": 3.6395950317382812,
	"learning_rate": 0.00011873638344226581,
	"loss": 4.4134,
	"step": 374
	},
	{
	"epoch": 0.4084967320261438,
	"grad_norm": 2.737851619720459,
	"learning_rate": 0.00011851851851851852,
	"loss": 4.9353,
	"step": 375
	},
	{
	"epoch": 0.4095860566448802,
	"grad_norm": 2.9248199462890625,
	"learning_rate": 0.00011830065359477124,
	"loss": 4.6186,
	"step": 376
	},
	{
	"epoch": 0.4106753812636166,
	"grad_norm": 3.741907835006714,
	"learning_rate": 0.00011808278867102398,
	"loss": 4.678,
	"step": 377
	},
	{
	"epoch": 0.4117647058823529,
	"grad_norm": 4.954297065734863,
	"learning_rate": 0.00011786492374727669,
	"loss": 4.7581,
	"step": 378
	},
	{
	"epoch": 0.4128540305010893,
	"grad_norm": 2.864030122756958,
	"learning_rate": 0.00011764705882352942,
	"loss": 5.0273,
	"step": 379
	},
	{
	"epoch": 0.4139433551198257,
	"grad_norm": 2.821183919906616,
	"learning_rate": 0.00011742919389978213,
	"loss": 4.7005,
	"step": 380
	},
	{
	"epoch": 0.4150326797385621,
	"grad_norm": 2.887479782104492,
	"learning_rate": 0.00011721132897603486,
	"loss": 4.8337,
	"step": 381
	},
	{
	"epoch": 0.41612200435729846,
	"grad_norm": 3.6127638816833496,
	"learning_rate": 0.0001169934640522876,
	"loss": 4.0269,
	"step": 382
	},
	{
	"epoch": 0.41721132897603486,
	"grad_norm": 3.419294834136963,
	"learning_rate": 0.0001167755991285403,
	"loss": 4.9576,
	"step": 383
	},
	{
	"epoch": 0.41830065359477125,
	"grad_norm": 4.136377334594727,
	"learning_rate": 0.00011655773420479303,
	"loss": 4.5996,
	"step": 384
	},
	{
	"epoch": 0.41938997821350765,
	"grad_norm": 3.0449485778808594,
	"learning_rate": 0.00011633986928104577,
	"loss": 4.8109,
	"step": 385
	},
	{
	"epoch": 0.420479302832244,
	"grad_norm": 2.4126100540161133,
	"learning_rate": 0.00011612200435729847,
	"loss": 4.6888,
	"step": 386
	},
	{
	"epoch": 0.4215686274509804,
	"grad_norm": 3.2058181762695312,
	"learning_rate": 0.0001159041394335512,
	"loss": 5.0234,
	"step": 387
	},
	{
	"epoch": 0.4226579520697168,
	"grad_norm": 2.861661672592163,
	"learning_rate": 0.00011568627450980394,
	"loss": 4.5451,
	"step": 388
	},
	{
	"epoch": 0.42374727668845313,
	"grad_norm": 3.904843807220459,
	"learning_rate": 0.00011546840958605665,
	"loss": 4.6205,
	"step": 389
	},
	{
	"epoch": 0.42483660130718953,
	"grad_norm": 2.517266273498535,
	"learning_rate": 0.00011525054466230938,
	"loss": 4.7441,
	"step": 390
	},
	{
	"epoch": 0.42592592592592593,
	"grad_norm": 4.648169994354248,
	"learning_rate": 0.00011503267973856209,
	"loss": 4.7592,
	"step": 391
	},
	{
	"epoch": 0.42701525054466233,
	"grad_norm": 5.185862064361572,
	"learning_rate": 0.00011481481481481482,
	"loss": 5.187,
	"step": 392
	},
	{
	"epoch": 0.42810457516339867,
	"grad_norm": 3.9632956981658936,
	"learning_rate": 0.00011459694989106755,
	"loss": 4.3847,
	"step": 393
	},
	{
	"epoch": 0.42919389978213507,
	"grad_norm": 3.6875765323638916,
	"learning_rate": 0.00011437908496732026,
	"loss": 4.7651,
	"step": 394
	},
	{
	"epoch": 0.43028322440087147,
	"grad_norm": 5.440472602844238,
	"learning_rate": 0.000114161220043573,
	"loss": 5.3146,
	"step": 395
	},
	{
	"epoch": 0.43137254901960786,
	"grad_norm": 2.51242995262146,
	"learning_rate": 0.00011394335511982572,
	"loss": 4.6351,
	"step": 396
	},
	{
	"epoch": 0.4324618736383442,
	"grad_norm": 2.9834821224212646,
	"learning_rate": 0.00011372549019607843,
	"loss": 4.7093,
	"step": 397
	},
	{
	"epoch": 0.4335511982570806,
	"grad_norm": 2.766350507736206,
	"learning_rate": 0.00011350762527233117,
	"loss": 4.5507,
	"step": 398
	},
	{
	"epoch": 0.434640522875817,
	"grad_norm": 2.832143783569336,
	"learning_rate": 0.00011328976034858388,
	"loss": 4.5613,
	"step": 399
	},
	{
	"epoch": 0.4357298474945534,
	"grad_norm": 2.6001861095428467,
	"learning_rate": 0.0001130718954248366,
	"loss": 4.6082,
	"step": 400
	},
	{
	"epoch": 0.43681917211328974,
	"grad_norm": 2.4849538803100586,
	"learning_rate": 0.00011285403050108934,
	"loss": 4.6671,
	"step": 401
	},
	{
	"epoch": 0.43790849673202614,
	"grad_norm": 3.406707286834717,
	"learning_rate": 0.00011263616557734205,
	"loss": 4.7908,
	"step": 402
	},
	{
	"epoch": 0.43899782135076254,
	"grad_norm": 3.0030910968780518,
	"learning_rate": 0.00011241830065359477,
	"loss": 4.8129,
	"step": 403
	},
	{
	"epoch": 0.4400871459694989,
	"grad_norm": 2.924572467803955,
	"learning_rate": 0.00011220043572984751,
	"loss": 4.719,
	"step": 404
	},
	{
	"epoch": 0.4411764705882353,
	"grad_norm": 2.3402299880981445,
	"learning_rate": 0.00011198257080610022,
	"loss": 4.9935,
	"step": 405
	},
	{
	"epoch": 0.4422657952069717,
	"grad_norm": 3.6391263008117676,
	"learning_rate": 0.00011176470588235294,
	"loss": 4.3632,
	"step": 406
	},
	{
	"epoch": 0.4433551198257081,
	"grad_norm": 3.7474288940429688,
	"learning_rate": 0.00011154684095860568,
	"loss": 4.7672,
	"step": 407
	},
	{
	"epoch": 0.4444444444444444,
	"grad_norm": 3.856614351272583,
	"learning_rate": 0.00011132897603485839,
	"loss": 4.4382,
	"step": 408
	},
	{
	"epoch": 0.4455337690631808,
	"grad_norm": 3.2774574756622314,
	"learning_rate": 0.00011111111111111112,
	"loss": 4.5095,
	"step": 409
	},
	{
	"epoch": 0.4466230936819172,
	"grad_norm": 4.317390441894531,
	"learning_rate": 0.00011089324618736383,
	"loss": 4.9977,
	"step": 410
	},
	{
	"epoch": 0.4477124183006536,
	"grad_norm": 2.799553394317627,
	"learning_rate": 0.00011067538126361656,
	"loss": 4.6128,
	"step": 411
	},
	{
	"epoch": 0.44880174291938996,
	"grad_norm": 3.6662847995758057,
	"learning_rate": 0.00011045751633986929,
	"loss": 4.8917,
	"step": 412
	},
	{
	"epoch": 0.44989106753812635,
	"grad_norm": 4.539546489715576,
	"learning_rate": 0.000110239651416122,
	"loss": 4.7873,
	"step": 413
	},
	{
	"epoch": 0.45098039215686275,
	"grad_norm": 3.262467861175537,
	"learning_rate": 0.00011002178649237474,
	"loss": 5.2149,
	"step": 414
	},
	{
	"epoch": 0.45206971677559915,
	"grad_norm": 4.247523307800293,
	"learning_rate": 0.00010980392156862746,
	"loss": 4.7652,
	"step": 415
	},
	{
	"epoch": 0.4531590413943355,
	"grad_norm": 4.986163139343262,
	"learning_rate": 0.00010958605664488017,
	"loss": 4.7074,
	"step": 416
	},
	{
	"epoch": 0.4542483660130719,
	"grad_norm": 2.9192981719970703,
	"learning_rate": 0.00010936819172113291,
	"loss": 4.798,
	"step": 417
	},
	{
	"epoch": 0.4553376906318083,
	"grad_norm": 2.6577565670013428,
	"learning_rate": 0.00010915032679738562,
	"loss": 4.8006,
	"step": 418
	},
	{
	"epoch": 0.4564270152505447,
	"grad_norm": 2.3315024375915527,
	"learning_rate": 0.00010893246187363834,
	"loss": 4.9458,
	"step": 419
	},
	{
	"epoch": 0.45751633986928103,
	"grad_norm": 2.9295709133148193,
	"learning_rate": 0.00010871459694989108,
	"loss": 5.2466,
	"step": 420
	},
	{
	"epoch": 0.45860566448801743,
	"grad_norm": 2.850202798843384,
	"learning_rate": 0.00010849673202614379,
	"loss": 4.7409,
	"step": 421
	},
	{
	"epoch": 0.4596949891067538,
	"grad_norm": 3.31477689743042,
	"learning_rate": 0.00010827886710239653,
	"loss": 4.7718,
	"step": 422
	},
	{
	"epoch": 0.46078431372549017,
	"grad_norm": 2.7019755840301514,
	"learning_rate": 0.00010806100217864925,
	"loss": 4.8779,
	"step": 423
	},
	{
	"epoch": 0.46187363834422657,
	"grad_norm": 4.396770477294922,
	"learning_rate": 0.00010784313725490196,
	"loss": 4.7548,
	"step": 424
	},
	{
	"epoch": 0.46296296296296297,
	"grad_norm": 2.4405012130737305,
	"learning_rate": 0.0001076252723311547,
	"loss": 4.9687,
	"step": 425
	},
	{
	"epoch": 0.46405228758169936,
	"grad_norm": 4.194379806518555,
	"learning_rate": 0.00010740740740740742,
	"loss": 4.5067,
	"step": 426
	},
	{
	"epoch": 0.4651416122004357,
	"grad_norm": 2.688394784927368,
	"learning_rate": 0.00010718954248366013,
	"loss": 4.7729,
	"step": 427
	},
	{
	"epoch": 0.4662309368191721,
	"grad_norm": 2.670652389526367,
	"learning_rate": 0.00010697167755991287,
	"loss": 4.5563,
	"step": 428
	},
	{
	"epoch": 0.4673202614379085,
	"grad_norm": 3.3149876594543457,
	"learning_rate": 0.00010675381263616558,
	"loss": 4.8264,
	"step": 429
	},
	{
	"epoch": 0.4684095860566449,
	"grad_norm": 9.103561401367188,
	"learning_rate": 0.0001065359477124183,
	"loss": 4.642,
	"step": 430
	},
	{
	"epoch": 0.46949891067538124,
	"grad_norm": 2.6664645671844482,
	"learning_rate": 0.00010631808278867104,
	"loss": 4.9578,
	"step": 431
	},
	{
	"epoch": 0.47058823529411764,
	"grad_norm": 4.866668701171875,
	"learning_rate": 0.00010610021786492375,
	"loss": 5.1092,
	"step": 432
	},
	{
	"epoch": 0.47167755991285404,
	"grad_norm": 2.7630739212036133,
	"learning_rate": 0.00010588235294117647,
	"loss": 4.6703,
	"step": 433
	},
	{
	"epoch": 0.47276688453159044,
	"grad_norm": 2.743724822998047,
	"learning_rate": 0.00010566448801742921,
	"loss": 4.9822,
	"step": 434
	},
	{
	"epoch": 0.4738562091503268,
	"grad_norm": 2.2937724590301514,
	"learning_rate": 0.00010544662309368192,
	"loss": 4.6221,
	"step": 435
	},
	{
	"epoch": 0.4749455337690632,
	"grad_norm": 4.061140537261963,
	"learning_rate": 0.00010522875816993465,
	"loss": 4.6573,
	"step": 436
	},
	{
	"epoch": 0.4760348583877996,
	"grad_norm": 3.9372758865356445,
	"learning_rate": 0.00010501089324618736,
	"loss": 4.4482,
	"step": 437
	},
	{
	"epoch": 0.477124183006536,
	"grad_norm": 5.1214776039123535,
	"learning_rate": 0.0001047930283224401,
	"loss": 5.1205,
	"step": 438
	},
	{
	"epoch": 0.4782135076252723,
	"grad_norm": 4.609157085418701,
	"learning_rate": 0.00010457516339869282,
	"loss": 4.6272,
	"step": 439
	},
	{
	"epoch": 0.4793028322440087,
	"grad_norm": 2.4764328002929688,
	"learning_rate": 0.00010435729847494553,
	"loss": 4.8355,
	"step": 440
	},
	{
	"epoch": 0.4803921568627451,
	"grad_norm": 3.3244357109069824,
	"learning_rate": 0.00010413943355119827,
	"loss": 4.7154,
	"step": 441
	},
	{
	"epoch": 0.48148148148148145,
	"grad_norm": 2.6419830322265625,
	"learning_rate": 0.00010392156862745099,
	"loss": 4.5512,
	"step": 442
	},
	{
	"epoch": 0.48257080610021785,
	"grad_norm": 2.7745046615600586,
	"learning_rate": 0.0001037037037037037,
	"loss": 4.4104,
	"step": 443
	},
	{
	"epoch": 0.48366013071895425,
	"grad_norm": 3.4725728034973145,
	"learning_rate": 0.00010348583877995644,
	"loss": 4.068,
	"step": 444
	},
	{
	"epoch": 0.48474945533769065,
	"grad_norm": 3.839707851409912,
	"learning_rate": 0.00010326797385620916,
	"loss": 4.6368,
	"step": 445
	},
	{
	"epoch": 0.485838779956427,
	"grad_norm": 3.2895514965057373,
	"learning_rate": 0.00010305010893246187,
	"loss": 4.7474,
	"step": 446
	},
	{
	"epoch": 0.4869281045751634,
	"grad_norm": 5.714195728302002,
	"learning_rate": 0.00010283224400871461,
	"loss": 4.229,
	"step": 447
	},
	{
	"epoch": 0.4880174291938998,
	"grad_norm": 4.010753154754639,
	"learning_rate": 0.00010261437908496732,
	"loss": 5.1063,
	"step": 448
	},
	{
	"epoch": 0.4891067538126362,
	"grad_norm": 4.544241905212402,
	"learning_rate": 0.00010239651416122004,
	"loss": 5.0706,
	"step": 449
	},
	{
	"epoch": 0.49019607843137253,
	"grad_norm": 3.7243716716766357,
	"learning_rate": 0.00010217864923747278,
	"loss": 4.8247,
	"step": 450
	},
	{
	"epoch": 0.4912854030501089,
	"grad_norm": 3.6378531455993652,
	"learning_rate": 0.00010196078431372549,
	"loss": 4.9122,
	"step": 451
	},
	{
	"epoch": 0.4923747276688453,
	"grad_norm": 2.5549800395965576,
	"learning_rate": 0.00010174291938997821,
	"loss": 4.4748,
	"step": 452
	},
	{
	"epoch": 0.4934640522875817,
	"grad_norm": 4.132662296295166,
	"learning_rate": 0.00010152505446623095,
	"loss": 4.5698,
	"step": 453
	},
	{
	"epoch": 0.49455337690631807,
	"grad_norm": 3.922738790512085,
	"learning_rate": 0.00010130718954248366,
	"loss": 5.0917,
	"step": 454
	},
	{
	"epoch": 0.49564270152505446,
	"grad_norm": 3.2766265869140625,
	"learning_rate": 0.00010108932461873639,
	"loss": 4.4083,
	"step": 455
	},
	{
	"epoch": 0.49673202614379086,
	"grad_norm": 3.297292709350586,
	"learning_rate": 0.0001008714596949891,
	"loss": 4.7993,
	"step": 456
	},
	{
	"epoch": 0.49782135076252726,
	"grad_norm": 3.353689193725586,
	"learning_rate": 0.00010065359477124183,
	"loss": 4.6677,
	"step": 457
	},
	{
	"epoch": 0.4989106753812636,
	"grad_norm": 2.691397190093994,
	"learning_rate": 0.00010043572984749457,
	"loss": 4.5091,
	"step": 458
	},
	{
	"epoch": 0.5,
	"grad_norm": 2.4701926708221436,
	"learning_rate": 0.00010021786492374727,
	"loss": 4.674,
	"step": 459
	},
	{
	"epoch": 0.5010893246187363,
	"grad_norm": 2.686195135116577,
	"learning_rate": 0.0001,
	"loss": 4.5644,
	"step": 460
	},
	{
	"epoch": 0.5021786492374728,
	"grad_norm": 4.287258625030518,
	"learning_rate": 9.978213507625273e-05,
	"loss": 4.4426,
	"step": 461
	},
	{
	"epoch": 0.5032679738562091,
	"grad_norm": 2.4832026958465576,
	"learning_rate": 9.956427015250545e-05,
	"loss": 4.664,
	"step": 462
	},
	{
	"epoch": 0.5043572984749455,
	"grad_norm": 2.9472968578338623,
	"learning_rate": 9.934640522875818e-05,
	"loss": 4.534,
	"step": 463
	},
	{
	"epoch": 0.5054466230936819,
	"grad_norm": 2.76766300201416,
	"learning_rate": 9.91285403050109e-05,
	"loss": 4.5646,
	"step": 464
	},
	{
	"epoch": 0.5065359477124183,
	"grad_norm": 3.476855516433716,
	"learning_rate": 9.891067538126362e-05,
	"loss": 4.2473,
	"step": 465
	},
	{
	"epoch": 0.5076252723311547,
	"grad_norm": 2.654572010040283,
	"learning_rate": 9.869281045751635e-05,
	"loss": 4.7984,
	"step": 466
	},
	{
	"epoch": 0.5087145969498911,
	"grad_norm": 2.742908239364624,
	"learning_rate": 9.847494553376907e-05,
	"loss": 4.7206,
	"step": 467
	},
	{
	"epoch": 0.5098039215686274,
	"grad_norm": 4.9197611808776855,
	"learning_rate": 9.82570806100218e-05,
	"loss": 4.147,
	"step": 468
	},
	{
	"epoch": 0.5108932461873639,
	"grad_norm": 2.5709521770477295,
	"learning_rate": 9.80392156862745e-05,
	"loss": 4.9892,
	"step": 469
	},
	{
	"epoch": 0.5119825708061002,
	"grad_norm": 2.8320975303649902,
	"learning_rate": 9.782135076252724e-05,
	"loss": 5.088,
	"step": 470
	},
	{
	"epoch": 0.5130718954248366,
	"grad_norm": 2.810145378112793,
	"learning_rate": 9.760348583877997e-05,
	"loss": 4.6947,
	"step": 471
	},
	{
	"epoch": 0.514161220043573,
	"grad_norm": 3.477754592895508,
	"learning_rate": 9.738562091503268e-05,
	"loss": 4.6429,
	"step": 472
	},
	{
	"epoch": 0.5152505446623094,
	"grad_norm": 2.9826571941375732,
	"learning_rate": 9.71677559912854e-05,
	"loss": 4.4291,
	"step": 473
	},
	{
	"epoch": 0.5163398692810458,
	"grad_norm": 4.387795925140381,
	"learning_rate": 9.694989106753814e-05,
	"loss": 5.3528,
	"step": 474
	},
	{
	"epoch": 0.5174291938997821,
	"grad_norm": 4.61776876449585,
	"learning_rate": 9.673202614379085e-05,
	"loss": 4.3188,
	"step": 475
	},
	{
	"epoch": 0.5185185185185185,
	"grad_norm": 2.3165104389190674,
	"learning_rate": 9.651416122004357e-05,
	"loss": 4.7733,
	"step": 476
	},
	{
	"epoch": 0.5196078431372549,
	"grad_norm": 2.644317150115967,
	"learning_rate": 9.62962962962963e-05,
	"loss": 4.5561,
	"step": 477
	},
	{
	"epoch": 0.5206971677559913,
	"grad_norm": 3.5700933933258057,
	"learning_rate": 9.607843137254903e-05,
	"loss": 4.6602,
	"step": 478
	},
	{
	"epoch": 0.5217864923747276,
	"grad_norm": 2.1070199012756348,
	"learning_rate": 9.586056644880174e-05,
	"loss": 4.8822,
	"step": 479
	},
	{
	"epoch": 0.5228758169934641,
	"grad_norm": 3.0725560188293457,
	"learning_rate": 9.564270152505447e-05,
	"loss": 4.7155,
	"step": 480
	},
	{
	"epoch": 0.5239651416122004,
	"grad_norm": 3.228832960128784,
	"learning_rate": 9.54248366013072e-05,
	"loss": 4.4635,
	"step": 481
	},
	{
	"epoch": 0.5250544662309368,
	"grad_norm": 3.046309232711792,
	"learning_rate": 9.520697167755992e-05,
	"loss": 4.5285,
	"step": 482
	},
	{
	"epoch": 0.5261437908496732,
	"grad_norm": 4.328576564788818,
	"learning_rate": 9.498910675381264e-05,
	"loss": 4.8138,
	"step": 483
	},
	{
	"epoch": 0.5272331154684096,
	"grad_norm": 2.9554691314697266,
	"learning_rate": 9.477124183006536e-05,
	"loss": 4.5337,
	"step": 484
	},
	{
	"epoch": 0.528322440087146,
	"grad_norm": 2.700284242630005,
	"learning_rate": 9.455337690631809e-05,
	"loss": 4.3948,
	"step": 485
	},
	{
	"epoch": 0.5294117647058824,
	"grad_norm": 2.9909026622772217,
	"learning_rate": 9.433551198257081e-05,
	"loss": 4.4274,
	"step": 486
	},
	{
	"epoch": 0.5305010893246187,
	"grad_norm": 3.623758316040039,
	"learning_rate": 9.411764705882353e-05,
	"loss": 4.6817,
	"step": 487
	},
	{
	"epoch": 0.5315904139433552,
	"grad_norm": 4.6507415771484375,
	"learning_rate": 9.389978213507626e-05,
	"loss": 4.4477,
	"step": 488
	},
	{
	"epoch": 0.5326797385620915,
	"grad_norm": 2.5566675662994385,
	"learning_rate": 9.368191721132898e-05,
	"loss": 4.8413,
	"step": 489
	},
	{
	"epoch": 0.5337690631808278,
	"grad_norm": 3.6067495346069336,
	"learning_rate": 9.34640522875817e-05,
	"loss": 4.5247,
	"step": 490
	},
	{
	"epoch": 0.5348583877995643,
	"grad_norm": 2.9623374938964844,
	"learning_rate": 9.324618736383443e-05,
	"loss": 4.7552,
	"step": 491
	},
	{
	"epoch": 0.5359477124183006,
	"grad_norm": 3.302311420440674,
	"learning_rate": 9.302832244008714e-05,
	"loss": 4.4709,
	"step": 492
	},
	{
	"epoch": 0.5370370370370371,
	"grad_norm": 3.787867546081543,
	"learning_rate": 9.281045751633988e-05,
	"loss": 4.5812,
	"step": 493
	},
	{
	"epoch": 0.5381263616557734,
	"grad_norm": 2.7494025230407715,
	"learning_rate": 9.25925925925926e-05,
	"loss": 4.6292,
	"step": 494
	},
	{
	"epoch": 0.5392156862745098,
	"grad_norm": 2.9627022743225098,
	"learning_rate": 9.237472766884531e-05,
	"loss": 4.859,
	"step": 495
	},
	{
	"epoch": 0.5403050108932462,
	"grad_norm": 2.661006212234497,
	"learning_rate": 9.215686274509804e-05,
	"loss": 4.5307,
	"step": 496
	},
	{
	"epoch": 0.5413943355119826,
	"grad_norm": 2.73852276802063,
	"learning_rate": 9.193899782135077e-05,
	"loss": 4.7416,
	"step": 497
	},
	{
	"epoch": 0.5424836601307189,
	"grad_norm": 3.0369839668273926,
	"learning_rate": 9.172113289760348e-05,
	"loss": 4.4017,
	"step": 498
	},
	{
	"epoch": 0.5435729847494554,
	"grad_norm": 3.2155401706695557,
	"learning_rate": 9.150326797385621e-05,
	"loss": 4.6235,
	"step": 499
	},
	{
	"epoch": 0.5446623093681917,
	"grad_norm": 4.628570556640625,
	"learning_rate": 9.128540305010894e-05,
	"loss": 4.3991,
	"step": 500
	},
	{
	"epoch": 0.545751633986928,
	"grad_norm": 3.397460460662842,
	"learning_rate": 9.106753812636167e-05,
	"loss": 4.4024,
	"step": 501
	},
	{
	"epoch": 0.5468409586056645,
	"grad_norm": 3.134575128555298,
	"learning_rate": 9.084967320261438e-05,
	"loss": 4.4325,
	"step": 502
	},
	{
	"epoch": 0.5479302832244008,
	"grad_norm": 4.651434421539307,
	"learning_rate": 9.06318082788671e-05,
	"loss": 4.5204,
	"step": 503
	},
	{
	"epoch": 0.5490196078431373,
	"grad_norm": 4.166567325592041,
	"learning_rate": 9.041394335511984e-05,
	"loss": 4.679,
	"step": 504
	},
	{
	"epoch": 0.5501089324618736,
	"grad_norm": 2.6662089824676514,
	"learning_rate": 9.019607843137255e-05,
	"loss": 4.2818,
	"step": 505
	},
	{
	"epoch": 0.55119825708061,
	"grad_norm": 2.519526958465576,
	"learning_rate": 8.997821350762527e-05,
	"loss": 4.5338,
	"step": 506
	},
	{
	"epoch": 0.5522875816993464,
	"grad_norm": 2.561030149459839,
	"learning_rate": 8.9760348583878e-05,
	"loss": 4.5726,
	"step": 507
	},
	{
	"epoch": 0.5533769063180828,
	"grad_norm": 3.1477291584014893,
	"learning_rate": 8.954248366013072e-05,
	"loss": 4.4923,
	"step": 508
	},
	{
	"epoch": 0.5544662309368191,
	"grad_norm": 3.5873420238494873,
	"learning_rate": 8.932461873638345e-05,
	"loss": 4.3401,
	"step": 509
	},
	{
	"epoch": 0.5555555555555556,
	"grad_norm": 3.926547050476074,
	"learning_rate": 8.910675381263617e-05,
	"loss": 5.1414,
	"step": 510
	},
	{
	"epoch": 0.5566448801742919,
	"grad_norm": 3.5044329166412354,
	"learning_rate": 8.888888888888889e-05,
	"loss": 4.3844,
	"step": 511
	},
	{
	"epoch": 0.5577342047930284,
	"grad_norm": 3.4362680912017822,
	"learning_rate": 8.867102396514162e-05,
	"loss": 4.4701,
	"step": 512
	},
	{
	"epoch": 0.5588235294117647,
	"grad_norm": 3.804070472717285,
	"learning_rate": 8.845315904139434e-05,
	"loss": 4.5234,
	"step": 513
	},
	{
	"epoch": 0.5599128540305011,
	"grad_norm": 3.2715303897857666,
	"learning_rate": 8.823529411764706e-05,
	"loss": 5.3599,
	"step": 514
	},
	{
	"epoch": 0.5610021786492375,
	"grad_norm": 4.5119123458862305,
	"learning_rate": 8.801742919389977e-05,
	"loss": 4.3403,
	"step": 515
	},
	{
	"epoch": 0.5620915032679739,
	"grad_norm": 2.9930667877197266,
	"learning_rate": 8.779956427015251e-05,
	"loss": 4.6728,
	"step": 516
	},
	{
	"epoch": 0.5631808278867102,
	"grad_norm": 3.3414700031280518,
	"learning_rate": 8.758169934640524e-05,
	"loss": 4.2796,
	"step": 517
	},
	{
	"epoch": 0.5642701525054467,
	"grad_norm": 3.454094409942627,
	"learning_rate": 8.736383442265795e-05,
	"loss": 4.6207,
	"step": 518
	},
	{
	"epoch": 0.565359477124183,
	"grad_norm": 2.343726634979248,
	"learning_rate": 8.714596949891068e-05,
	"loss": 4.6071,
	"step": 519
	},
	{
	"epoch": 0.5664488017429193,
	"grad_norm": 4.373025894165039,
	"learning_rate": 8.692810457516341e-05,
	"loss": 4.8911,
	"step": 520
	},
	{
	"epoch": 0.5675381263616558,
	"grad_norm": 3.511699676513672,
	"learning_rate": 8.671023965141613e-05,
	"loss": 4.6699,
	"step": 521
	},
	{
	"epoch": 0.5686274509803921,
	"grad_norm": 3.171043872833252,
	"learning_rate": 8.649237472766884e-05,
	"loss": 4.9787,
	"step": 522
	},
	{
	"epoch": 0.5697167755991286,
	"grad_norm": 4.076540470123291,
	"learning_rate": 8.627450980392158e-05,
	"loss": 4.9232,
	"step": 523
	},
	{
	"epoch": 0.5708061002178649,
	"grad_norm": 3.0852832794189453,
	"learning_rate": 8.60566448801743e-05,
	"loss": 4.7163,
	"step": 524
	},
	{
	"epoch": 0.5718954248366013,
	"grad_norm": 2.6939470767974854,
	"learning_rate": 8.583877995642701e-05,
	"loss": 4.4997,
	"step": 525
	},
	{
	"epoch": 0.5729847494553377,
	"grad_norm": 3.620074510574341,
	"learning_rate": 8.562091503267974e-05,
	"loss": 4.8771,
	"step": 526
	},
	{
	"epoch": 0.5740740740740741,
	"grad_norm": 2.4933581352233887,
	"learning_rate": 8.540305010893247e-05,
	"loss": 4.5819,
	"step": 527
	},
	{
	"epoch": 0.5751633986928104,
	"grad_norm": 3.1998159885406494,
	"learning_rate": 8.518518518518518e-05,
	"loss": 4.4039,
	"step": 528
	},
	{
	"epoch": 0.5762527233115469,
	"grad_norm": 2.7315926551818848,
	"learning_rate": 8.496732026143791e-05,
	"loss": 4.9051,
	"step": 529
	},
	{
	"epoch": 0.5773420479302832,
	"grad_norm": 3.002873420715332,
	"learning_rate": 8.474945533769063e-05,
	"loss": 4.4435,
	"step": 530
	},
	{
	"epoch": 0.5784313725490197,
	"grad_norm": 3.9929301738739014,
	"learning_rate": 8.453159041394336e-05,
	"loss": 4.6196,
	"step": 531
	},
	{
	"epoch": 0.579520697167756,
	"grad_norm": 4.095264434814453,
	"learning_rate": 8.431372549019608e-05,
	"loss": 4.6282,
	"step": 532
	},
	{
	"epoch": 0.5806100217864923,
	"grad_norm": 2.285243272781372,
	"learning_rate": 8.40958605664488e-05,
	"loss": 4.9182,
	"step": 533
	},
	{
	"epoch": 0.5816993464052288,
	"grad_norm": 2.6674435138702393,
	"learning_rate": 8.387799564270154e-05,
	"loss": 4.5317,
	"step": 534
	},
	{
	"epoch": 0.5827886710239651,
	"grad_norm": 2.477482557296753,
	"learning_rate": 8.366013071895425e-05,
	"loss": 4.7032,
	"step": 535
	},
	{
	"epoch": 0.5838779956427015,
	"grad_norm": 4.01880407333374,
	"learning_rate": 8.344226579520698e-05,
	"loss": 4.4193,
	"step": 536
	},
	{
	"epoch": 0.5849673202614379,
	"grad_norm": 2.3387510776519775,
	"learning_rate": 8.32244008714597e-05,
	"loss": 4.7034,
	"step": 537
	},
	{
	"epoch": 0.5860566448801743,
	"grad_norm": 2.9223763942718506,
	"learning_rate": 8.300653594771242e-05,
	"loss": 4.7173,
	"step": 538
	},
	{
	"epoch": 0.5871459694989106,
	"grad_norm": 2.537396192550659,
	"learning_rate": 8.278867102396515e-05,
	"loss": 4.4837,
	"step": 539
	},
	{
	"epoch": 0.5882352941176471,
	"grad_norm": 2.9067773818969727,
	"learning_rate": 8.257080610021787e-05,
	"loss": 4.7819,
	"step": 540
	},
	{
	"epoch": 0.5893246187363834,
	"grad_norm": 3.539597988128662,
	"learning_rate": 8.23529411764706e-05,
	"loss": 4.2396,
	"step": 541
	},
	{
	"epoch": 0.5904139433551199,
	"grad_norm": 3.219714641571045,
	"learning_rate": 8.213507625272332e-05,
	"loss": 4.8601,
	"step": 542
	},
	{
	"epoch": 0.5915032679738562,
	"grad_norm": 4.1988325119018555,
	"learning_rate": 8.191721132897604e-05,
	"loss": 4.4636,
	"step": 543
	},
	{
	"epoch": 0.5925925925925926,
	"grad_norm": 3.404759407043457,
	"learning_rate": 8.169934640522877e-05,
	"loss": 4.6973,
	"step": 544
	},
	{
	"epoch": 0.593681917211329,
	"grad_norm": 3.848076820373535,
	"learning_rate": 8.148148148148148e-05,
	"loss": 4.5368,
	"step": 545
	},
	{
	"epoch": 0.5947712418300654,
	"grad_norm": 3.773057460784912,
	"learning_rate": 8.126361655773421e-05,
	"loss": 4.5394,
	"step": 546
	},
	{
	"epoch": 0.5958605664488017,
	"grad_norm": 3.8611278533935547,
	"learning_rate": 8.104575163398694e-05,
	"loss": 4.8637,
	"step": 547
	},
	{
	"epoch": 0.5969498910675382,
	"grad_norm": 2.950388193130493,
	"learning_rate": 8.082788671023965e-05,
	"loss": 4.3669,
	"step": 548
	},
	{
	"epoch": 0.5980392156862745,
	"grad_norm": 2.911773443222046,
	"learning_rate": 8.061002178649237e-05,
	"loss": 4.9239,
	"step": 549
	},
	{
	"epoch": 0.599128540305011,
	"grad_norm": 2.4563143253326416,
	"learning_rate": 8.039215686274511e-05,
	"loss": 4.9816,
	"step": 550
	},
	{
	"epoch": 0.6002178649237473,
	"grad_norm": 3.053799867630005,
	"learning_rate": 8.017429193899782e-05,
	"loss": 4.4451,
	"step": 551
	},
	{
	"epoch": 0.6013071895424836,
	"grad_norm": 4.562179088592529,
	"learning_rate": 7.995642701525054e-05,
	"loss": 4.3851,
	"step": 552
	},
	{
	"epoch": 0.6023965141612201,
	"grad_norm": 3.585679054260254,
	"learning_rate": 7.973856209150328e-05,
	"loss": 4.4014,
	"step": 553
	},
	{
	"epoch": 0.6034858387799564,
	"grad_norm": 3.0385072231292725,
	"learning_rate": 7.952069716775599e-05,
	"loss": 4.753,
	"step": 554
	},
	{
	"epoch": 0.6045751633986928,
	"grad_norm": 1.95540189743042,
	"learning_rate": 7.930283224400871e-05,
	"loss": 4.6713,
	"step": 555
	},
	{
	"epoch": 0.6056644880174292,
	"grad_norm": 2.5057144165039062,
	"learning_rate": 7.908496732026144e-05,
	"loss": 4.5111,
	"step": 556
	},
	{
	"epoch": 0.6067538126361656,
	"grad_norm": 2.4981019496917725,
	"learning_rate": 7.886710239651418e-05,
	"loss": 4.5696,
	"step": 557
	},
	{
	"epoch": 0.6078431372549019,
	"grad_norm": 3.182044506072998,
	"learning_rate": 7.864923747276689e-05,
	"loss": 4.6675,
	"step": 558
	},
	{
	"epoch": 0.6089324618736384,
	"grad_norm": 3.999967575073242,
	"learning_rate": 7.843137254901961e-05,
	"loss": 4.557,
	"step": 559
	},
	{
	"epoch": 0.6100217864923747,
	"grad_norm": 2.430976152420044,
	"learning_rate": 7.821350762527233e-05,
	"loss": 4.6301,
	"step": 560
	},
	{
	"epoch": 0.6111111111111112,
	"grad_norm": 4.943197250366211,
	"learning_rate": 7.799564270152506e-05,
	"loss": 5.0166,
	"step": 561
	},
	{
	"epoch": 0.6122004357298475,
	"grad_norm": 2.988447427749634,
	"learning_rate": 7.777777777777778e-05,
	"loss": 4.5796,
	"step": 562
	},
	{
	"epoch": 0.6132897603485838,
	"grad_norm": 2.7854208946228027,
	"learning_rate": 7.75599128540305e-05,
	"loss": 4.814,
	"step": 563
	},
	{
	"epoch": 0.6143790849673203,
	"grad_norm": 3.3578221797943115,
	"learning_rate": 7.734204793028323e-05,
	"loss": 4.5014,
	"step": 564
	},
	{
	"epoch": 0.6154684095860566,
	"grad_norm": 2.4927005767822266,
	"learning_rate": 7.712418300653595e-05,
	"loss": 4.3526,
	"step": 565
	},
	{
	"epoch": 0.616557734204793,
	"grad_norm": 3.176056385040283,
	"learning_rate": 7.690631808278868e-05,
	"loss": 4.5428,
	"step": 566
	},
	{
	"epoch": 0.6176470588235294,
	"grad_norm": 2.8541767597198486,
	"learning_rate": 7.66884531590414e-05,
	"loss": 4.6105,
	"step": 567
	},
	{
	"epoch": 0.6187363834422658,
	"grad_norm": 4.8937153816223145,
	"learning_rate": 7.647058823529411e-05,
	"loss": 3.912,
	"step": 568
	},
	{
	"epoch": 0.6198257080610022,
	"grad_norm": 4.21392297744751,
	"learning_rate": 7.625272331154685e-05,
	"loss": 4.1306,
	"step": 569
	},
	{
	"epoch": 0.6209150326797386,
	"grad_norm": 2.5804617404937744,
	"learning_rate": 7.603485838779957e-05,
	"loss": 4.7057,
	"step": 570
	},
	{
	"epoch": 0.6220043572984749,
	"grad_norm": 2.3549726009368896,
	"learning_rate": 7.581699346405228e-05,
	"loss": 4.6464,
	"step": 571
	},
	{
	"epoch": 0.6230936819172114,
	"grad_norm": 3.9271023273468018,
	"learning_rate": 7.559912854030502e-05,
	"loss": 4.1135,
	"step": 572
	},
	{
	"epoch": 0.6241830065359477,
	"grad_norm": 3.2713518142700195,
	"learning_rate": 7.538126361655774e-05,
	"loss": 4.7463,
	"step": 573
	},
	{
	"epoch": 0.6252723311546841,
	"grad_norm": 2.842008113861084,
	"learning_rate": 7.516339869281045e-05,
	"loss": 4.6107,
	"step": 574
	},
	{
	"epoch": 0.6263616557734205,
	"grad_norm": 2.7107882499694824,
	"learning_rate": 7.494553376906318e-05,
	"loss": 4.7955,
	"step": 575
	},
	{
	"epoch": 0.6274509803921569,
	"grad_norm": 3.076648235321045,
	"learning_rate": 7.472766884531592e-05,
	"loss": 4.8382,
	"step": 576
	},
	{
	"epoch": 0.6285403050108932,
	"grad_norm": 2.7458837032318115,
	"learning_rate": 7.450980392156864e-05,
	"loss": 4.4344,
	"step": 577
	},
	{
	"epoch": 0.6296296296296297,
	"grad_norm": 2.543877124786377,
	"learning_rate": 7.429193899782135e-05,
	"loss": 4.3536,
	"step": 578
	},
	{
	"epoch": 0.630718954248366,
	"grad_norm": 3.883885145187378,
	"learning_rate": 7.407407407407407e-05,
	"loss": 4.8026,
	"step": 579
	},
	{
	"epoch": 0.6318082788671024,
	"grad_norm": 2.9665493965148926,
	"learning_rate": 7.385620915032681e-05,
	"loss": 4.779,
	"step": 580
	},
	{
	"epoch": 0.6328976034858388,
	"grad_norm": 3.1739346981048584,
	"learning_rate": 7.363834422657952e-05,
	"loss": 4.7859,
	"step": 581
	},
	{
	"epoch": 0.6339869281045751,
	"grad_norm": 3.4699697494506836,
	"learning_rate": 7.342047930283224e-05,
	"loss": 4.2899,
	"step": 582
	},
	{
	"epoch": 0.6350762527233116,
	"grad_norm": 4.120363235473633,
	"learning_rate": 7.320261437908497e-05,
	"loss": 4.3786,
	"step": 583
	},
	{
	"epoch": 0.6361655773420479,
	"grad_norm": 2.930485248565674,
	"learning_rate": 7.298474945533769e-05,
	"loss": 4.4614,
	"step": 584
	},
	{
	"epoch": 0.6372549019607843,
	"grad_norm": 2.5551388263702393,
	"learning_rate": 7.276688453159042e-05,
	"loss": 4.5241,
	"step": 585
	},
	{
	"epoch": 0.6383442265795207,
	"grad_norm": 2.2362239360809326,
	"learning_rate": 7.254901960784314e-05,
	"loss": 4.763,
	"step": 586
	},
	{
	"epoch": 0.6394335511982571,
	"grad_norm": 2.7569496631622314,
	"learning_rate": 7.233115468409586e-05,
	"loss": 4.3938,
	"step": 587
	},
	{
	"epoch": 0.6405228758169934,
	"grad_norm": 2.8717620372772217,
	"learning_rate": 7.211328976034859e-05,
	"loss": 4.4601,
	"step": 588
	},
	{
	"epoch": 0.6416122004357299,
	"grad_norm": 4.488802433013916,
	"learning_rate": 7.189542483660131e-05,
	"loss": 4.3694,
	"step": 589
	},
	{
	"epoch": 0.6427015250544662,
	"grad_norm": 3.735525131225586,
	"learning_rate": 7.167755991285404e-05,
	"loss": 5.0611,
	"step": 590
	},
	{
	"epoch": 0.6437908496732027,
	"grad_norm": 3.061038017272949,
	"learning_rate": 7.145969498910676e-05,
	"loss": 4.5695,
	"step": 591
	},
	{
	"epoch": 0.644880174291939,
	"grad_norm": 4.214951992034912,
	"learning_rate": 7.124183006535948e-05,
	"loss": 4.5174,
	"step": 592
	},
	{
	"epoch": 0.6459694989106753,
	"grad_norm": 3.226206064224243,
	"learning_rate": 7.10239651416122e-05,
	"loss": 4.9365,
	"step": 593
	},
	{
	"epoch": 0.6470588235294118,
	"grad_norm": 2.9899861812591553,
	"learning_rate": 7.080610021786492e-05,
	"loss": 4.2227,
	"step": 594
	},
	{
	"epoch": 0.6481481481481481,
	"grad_norm": 2.587507963180542,
	"learning_rate": 7.058823529411765e-05,
	"loss": 4.8616,
	"step": 595
	},
	{
	"epoch": 0.6492374727668845,
	"grad_norm": 3.3234870433807373,
	"learning_rate": 7.037037037037038e-05,
	"loss": 5.0433,
	"step": 596
	},
	{
	"epoch": 0.6503267973856209,
	"grad_norm": 2.378889560699463,
	"learning_rate": 7.01525054466231e-05,
	"loss": 4.658,
	"step": 597
	},
	{
	"epoch": 0.6514161220043573,
	"grad_norm": 3.2097010612487793,
	"learning_rate": 6.993464052287581e-05,
	"loss": 4.1424,
	"step": 598
	},
	{
	"epoch": 0.6525054466230937,
	"grad_norm": 2.9745841026306152,
	"learning_rate": 6.971677559912855e-05,
	"loss": 4.3658,
	"step": 599
	},
	{
	"epoch": 0.6535947712418301,
	"grad_norm": 3.336416482925415,
	"learning_rate": 6.949891067538127e-05,
	"loss": 4.5247,
	"step": 600
	},
	{
	"epoch": 0.6546840958605664,
	"grad_norm": 3.381309986114502,
	"learning_rate": 6.928104575163398e-05,
	"loss": 4.9413,
	"step": 601
	},
	{
	"epoch": 0.6557734204793029,
	"grad_norm": 2.6095030307769775,
	"learning_rate": 6.906318082788671e-05,
	"loss": 4.5653,
	"step": 602
	},
	{
	"epoch": 0.6568627450980392,
	"grad_norm": 2.8048579692840576,
	"learning_rate": 6.884531590413945e-05,
	"loss": 4.5627,
	"step": 603
	},
	{
	"epoch": 0.6579520697167756,
	"grad_norm": 3.750295400619507,
	"learning_rate": 6.862745098039216e-05,
	"loss": 4.6294,
	"step": 604
	},
	{
	"epoch": 0.659041394335512,
	"grad_norm": 2.7482926845550537,
	"learning_rate": 6.840958605664488e-05,
	"loss": 4.821,
	"step": 605
	},
	{
	"epoch": 0.6601307189542484,
	"grad_norm": 2.6860432624816895,
	"learning_rate": 6.81917211328976e-05,
	"loss": 4.5562,
	"step": 606
	},
	{
	"epoch": 0.6612200435729847,
	"grad_norm": 3.9789106845855713,
	"learning_rate": 6.797385620915033e-05,
	"loss": 4.7817,
	"step": 607
	},
	{
	"epoch": 0.6623093681917211,
	"grad_norm": 2.963273048400879,
	"learning_rate": 6.775599128540305e-05,
	"loss": 4.8488,
	"step": 608
	},
	{
	"epoch": 0.6633986928104575,
	"grad_norm": 3.09425687789917,
	"learning_rate": 6.753812636165577e-05,
	"loss": 4.5239,
	"step": 609
	},
	{
	"epoch": 0.664488017429194,
	"grad_norm": 2.897761106491089,
	"learning_rate": 6.73202614379085e-05,
	"loss": 4.6252,
	"step": 610
	},
	{
	"epoch": 0.6655773420479303,
	"grad_norm": 2.7389283180236816,
	"learning_rate": 6.710239651416122e-05,
	"loss": 4.4114,
	"step": 611
	},
	{
	"epoch": 0.6666666666666666,
	"grad_norm": 3.7740859985351562,
	"learning_rate": 6.688453159041395e-05,
	"loss": 5.1252,
	"step": 612
	},
	{
	"epoch": 0.6677559912854031,
	"grad_norm": 2.69930362701416,
	"learning_rate": 6.666666666666667e-05,
	"loss": 4.8494,
	"step": 613
	},
	{
	"epoch": 0.6688453159041394,
	"grad_norm": 3.3595452308654785,
	"learning_rate": 6.64488017429194e-05,
	"loss": 4.5464,
	"step": 614
	},
	{
	"epoch": 0.6699346405228758,
	"grad_norm": 3.3550498485565186,
	"learning_rate": 6.623093681917212e-05,
	"loss": 4.0987,
	"step": 615
	},
	{
	"epoch": 0.6710239651416122,
	"grad_norm": 2.9367904663085938,
	"learning_rate": 6.601307189542484e-05,
	"loss": 4.5163,
	"step": 616
	},
	{
	"epoch": 0.6721132897603486,
	"grad_norm": 3.16866135597229,
	"learning_rate": 6.579520697167757e-05,
	"loss": 4.7276,
	"step": 617
	},
	{
	"epoch": 0.673202614379085,
	"grad_norm": 2.5311694145202637,
	"learning_rate": 6.557734204793029e-05,
	"loss": 4.5561,
	"step": 618
	},
	{
	"epoch": 0.6742919389978214,
	"grad_norm": 3.9656856060028076,
	"learning_rate": 6.535947712418301e-05,
	"loss": 4.7085,
	"step": 619
	},
	{
	"epoch": 0.6753812636165577,
	"grad_norm": 3.392267942428589,
	"learning_rate": 6.514161220043574e-05,
	"loss": 4.4498,
	"step": 620
	},
	{
	"epoch": 0.6764705882352942,
	"grad_norm": 3.2795276641845703,
	"learning_rate": 6.492374727668845e-05,
	"loss": 5.0139,
	"step": 621
	},
	{
	"epoch": 0.6775599128540305,
	"grad_norm": 3.897383451461792,
	"learning_rate": 6.470588235294118e-05,
	"loss": 4.4428,
	"step": 622
	},
	{
	"epoch": 0.6786492374727668,
	"grad_norm": 3.054150104522705,
	"learning_rate": 6.448801742919391e-05,
	"loss": 4.574,
	"step": 623
	},
	{
	"epoch": 0.6797385620915033,
	"grad_norm": 3.345979690551758,
	"learning_rate": 6.427015250544662e-05,
	"loss": 4.7765,
	"step": 624
	},
	{
	"epoch": 0.6808278867102396,
	"grad_norm": 2.7274982929229736,
	"learning_rate": 6.405228758169934e-05,
	"loss": 4.435,
	"step": 625
	},
	{
	"epoch": 0.681917211328976,
	"grad_norm": 4.5814032554626465,
	"learning_rate": 6.383442265795208e-05,
	"loss": 4.1487,
	"step": 626
	},
	{
	"epoch": 0.6830065359477124,
	"grad_norm": 4.058572292327881,
	"learning_rate": 6.361655773420479e-05,
	"loss": 4.386,
	"step": 627
	},
	{
	"epoch": 0.6840958605664488,
	"grad_norm": 3.2232651710510254,
	"learning_rate": 6.339869281045751e-05,
	"loss": 4.8899,
	"step": 628
	},
	{
	"epoch": 0.6851851851851852,
	"grad_norm": 3.23541259765625,
	"learning_rate": 6.318082788671025e-05,
	"loss": 4.6912,
	"step": 629
	},
	{
	"epoch": 0.6862745098039216,
	"grad_norm": 4.35893440246582,
	"learning_rate": 6.296296296296296e-05,
	"loss": 4.6744,
	"step": 630
	},
	{
	"epoch": 0.6873638344226579,
	"grad_norm": 2.809992790222168,
	"learning_rate": 6.274509803921569e-05,
	"loss": 4.4697,
	"step": 631
	},
	{
	"epoch": 0.6884531590413944,
	"grad_norm": 3.3621973991394043,
	"learning_rate": 6.252723311546841e-05,
	"loss": 4.5625,
	"step": 632
	},
	{
	"epoch": 0.6895424836601307,
	"grad_norm": 3.5378973484039307,
	"learning_rate": 6.230936819172115e-05,
	"loss": 4.4222,
	"step": 633
	},
	{
	"epoch": 0.690631808278867,
	"grad_norm": 4.445308208465576,
	"learning_rate": 6.209150326797386e-05,
	"loss": 4.4816,
	"step": 634
	},
	{
	"epoch": 0.6917211328976035,
	"grad_norm": 4.621172904968262,
	"learning_rate": 6.187363834422658e-05,
	"loss": 4.8582,
	"step": 635
	},
	{
	"epoch": 0.6928104575163399,
	"grad_norm": 2.6876468658447266,
	"learning_rate": 6.16557734204793e-05,
	"loss": 5.1406,
	"step": 636
	},
	{
	"epoch": 0.6938997821350763,
	"grad_norm": 2.912463665008545,
	"learning_rate": 6.143790849673203e-05,
	"loss": 4.3075,
	"step": 637
	},
	{
	"epoch": 0.6949891067538126,
	"grad_norm": 3.2914223670959473,
	"learning_rate": 6.122004357298475e-05,
	"loss": 4.5181,
	"step": 638
	},
	{
	"epoch": 0.696078431372549,
	"grad_norm": 3.1938445568084717,
	"learning_rate": 6.1002178649237476e-05,
	"loss": 4.2892,
	"step": 639
	},
	{
	"epoch": 0.6971677559912854,
	"grad_norm": 3.2888052463531494,
	"learning_rate": 6.078431372549019e-05,
	"loss": 4.3035,
	"step": 640
	},
	{
	"epoch": 0.6982570806100218,
	"grad_norm": 3.7282330989837646,
	"learning_rate": 6.0566448801742924e-05,
	"loss": 4.5092,
	"step": 641
	},
	{
	"epoch": 0.6993464052287581,
	"grad_norm": 3.121951103210449,
	"learning_rate": 6.034858387799565e-05,
	"loss": 4.41,
	"step": 642
	},
	{
	"epoch": 0.7004357298474946,
	"grad_norm": 3.6562163829803467,
	"learning_rate": 6.0130718954248365e-05,
	"loss": 4.5502,
	"step": 643
	},
	{
	"epoch": 0.7015250544662309,
	"grad_norm": 4.061103343963623,
	"learning_rate": 5.9912854030501095e-05,
	"loss": 4.4169,
	"step": 644
	},
	{
	"epoch": 0.7026143790849673,
	"grad_norm": 2.988943338394165,
	"learning_rate": 5.969498910675382e-05,
	"loss": 4.4949,
	"step": 645
	},
	{
	"epoch": 0.7037037037037037,
	"grad_norm": 3.66359281539917,
	"learning_rate": 5.9477124183006536e-05,
	"loss": 4.3983,
	"step": 646
	},
	{
	"epoch": 0.7047930283224401,
	"grad_norm": 2.7517478466033936,
	"learning_rate": 5.925925925925926e-05,
	"loss": 4.5619,
	"step": 647
	},
	{
	"epoch": 0.7058823529411765,
	"grad_norm": 3.325645923614502,
	"learning_rate": 5.904139433551199e-05,
	"loss": 4.3944,
	"step": 648
	},
	{
	"epoch": 0.7069716775599129,
	"grad_norm": 2.393507242202759,
	"learning_rate": 5.882352941176471e-05,
	"loss": 4.3706,
	"step": 649
	},
	{
	"epoch": 0.7080610021786492,
	"grad_norm": 2.5588436126708984,
	"learning_rate": 5.860566448801743e-05,
	"loss": 4.5016,
	"step": 650
	},
	{
	"epoch": 0.7091503267973857,
	"grad_norm": 2.5094637870788574,
	"learning_rate": 5.838779956427015e-05,
	"loss": 4.3259,
	"step": 651
	},
	{
	"epoch": 0.710239651416122,
	"grad_norm": 2.8639650344848633,
	"learning_rate": 5.8169934640522886e-05,
	"loss": 4.3741,
	"step": 652
	},
	{
	"epoch": 0.7113289760348583,
	"grad_norm": 2.851107358932495,
	"learning_rate": 5.79520697167756e-05,
	"loss": 4.5406,
	"step": 653
	},
	{
	"epoch": 0.7124183006535948,
	"grad_norm": 2.6943624019622803,
	"learning_rate": 5.773420479302833e-05,
	"loss": 4.7667,
	"step": 654
	},
	{
	"epoch": 0.7135076252723311,
	"grad_norm": 3.8529598712921143,
	"learning_rate": 5.7516339869281044e-05,
	"loss": 4.1106,
	"step": 655
	},
	{
	"epoch": 0.7145969498910676,
	"grad_norm": 3.688046932220459,
	"learning_rate": 5.7298474945533774e-05,
	"loss": 4.872,
	"step": 656
	},
	{
	"epoch": 0.7156862745098039,
	"grad_norm": 6.292950630187988,
	"learning_rate": 5.70806100217865e-05,
	"loss": 4.9386,
	"step": 657
	},
	{
	"epoch": 0.7167755991285403,
	"grad_norm": 2.9463226795196533,
	"learning_rate": 5.6862745098039215e-05,
	"loss": 4.3529,
	"step": 658
	},
	{
	"epoch": 0.7178649237472767,
	"grad_norm": 3.75313138961792,
	"learning_rate": 5.664488017429194e-05,
	"loss": 4.3738,
	"step": 659
	},
	{
	"epoch": 0.7189542483660131,
	"grad_norm": 3.0259487628936768,
	"learning_rate": 5.642701525054467e-05,
	"loss": 4.517,
	"step": 660
	},
	{
	"epoch": 0.7200435729847494,
	"grad_norm": 3.362027168273926,
	"learning_rate": 5.620915032679739e-05,
	"loss": 4.5903,
	"step": 661
	},
	{
	"epoch": 0.7211328976034859,
	"grad_norm": 3.0693953037261963,
	"learning_rate": 5.599128540305011e-05,
	"loss": 4.7722,
	"step": 662
	},
	{
	"epoch": 0.7222222222222222,
	"grad_norm": 4.313173770904541,
	"learning_rate": 5.577342047930284e-05,
	"loss": 4.705,
	"step": 663
	},
	{
	"epoch": 0.7233115468409586,
	"grad_norm": 3.1795902252197266,
	"learning_rate": 5.555555555555556e-05,
	"loss": 4.6037,
	"step": 664
	},
	{
	"epoch": 0.724400871459695,
	"grad_norm": 2.650792360305786,
	"learning_rate": 5.533769063180828e-05,
	"loss": 4.5835,
	"step": 665
	},
	{
	"epoch": 0.7254901960784313,
	"grad_norm": 3.9786386489868164,
	"learning_rate": 5.5119825708061e-05,
	"loss": 5.0628,
	"step": 666
	},
	{
	"epoch": 0.7265795206971678,
	"grad_norm": 2.3720548152923584,
	"learning_rate": 5.490196078431373e-05,
	"loss": 4.4845,
	"step": 667
	},
	{
	"epoch": 0.7276688453159041,
	"grad_norm": 4.014529705047607,
	"learning_rate": 5.4684095860566454e-05,
	"loss": 4.8008,
	"step": 668
	},
	{
	"epoch": 0.7287581699346405,
	"grad_norm": 3.989478826522827,
	"learning_rate": 5.446623093681917e-05,
	"loss": 4.353,
	"step": 669
	},
	{
	"epoch": 0.7298474945533769,
	"grad_norm": 2.8843913078308105,
	"learning_rate": 5.4248366013071894e-05,
	"loss": 4.5807,
	"step": 670
	},
	{
	"epoch": 0.7309368191721133,
	"grad_norm": 4.555778980255127,
	"learning_rate": 5.4030501089324625e-05,
	"loss": 4.3632,
	"step": 671
	},
	{
	"epoch": 0.7320261437908496,
	"grad_norm": 2.7697739601135254,
	"learning_rate": 5.381263616557735e-05,
	"loss": 4.7757,
	"step": 672
	},
	{
	"epoch": 0.7331154684095861,
	"grad_norm": 6.817968845367432,
	"learning_rate": 5.3594771241830066e-05,
	"loss": 4.3715,
	"step": 673
	},
	{
	"epoch": 0.7342047930283224,
	"grad_norm": 3.6644465923309326,
	"learning_rate": 5.337690631808279e-05,
	"loss": 4.3353,
	"step": 674
	},
	{
	"epoch": 0.7352941176470589,
	"grad_norm": 2.378873586654663,
	"learning_rate": 5.315904139433552e-05,
	"loss": 4.3944,
	"step": 675
	},
	{
	"epoch": 0.7363834422657952,
	"grad_norm": 1.8505460023880005,
	"learning_rate": 5.294117647058824e-05,
	"loss": 4.8842,
	"step": 676
	},
	{
	"epoch": 0.7374727668845316,
	"grad_norm": 2.434199333190918,
	"learning_rate": 5.272331154684096e-05,
	"loss": 4.6757,
	"step": 677
	},
	{
	"epoch": 0.738562091503268,
	"grad_norm": 3.9741463661193848,
	"learning_rate": 5.250544662309368e-05,
	"loss": 4.5491,
	"step": 678
	},
	{
	"epoch": 0.7396514161220044,
	"grad_norm": 2.569467306137085,
	"learning_rate": 5.228758169934641e-05,
	"loss": 4.7716,
	"step": 679
	},
	{
	"epoch": 0.7407407407407407,
	"grad_norm": 3.0343027114868164,
	"learning_rate": 5.206971677559913e-05,
	"loss": 4.8048,
	"step": 680
	},
	{
	"epoch": 0.7418300653594772,
	"grad_norm": 1.8422495126724243,
	"learning_rate": 5.185185185185185e-05,
	"loss": 4.5785,
	"step": 681
	},
	{
	"epoch": 0.7429193899782135,
	"grad_norm": 3.4610087871551514,
	"learning_rate": 5.163398692810458e-05,
	"loss": 4.5243,
	"step": 682
	},
	{
	"epoch": 0.7440087145969498,
	"grad_norm": 3.4240050315856934,
	"learning_rate": 5.1416122004357304e-05,
	"loss": 4.4241,
	"step": 683
	},
	{
	"epoch": 0.7450980392156863,
	"grad_norm": 3.1401493549346924,
	"learning_rate": 5.119825708061002e-05,
	"loss": 4.5411,
	"step": 684
	},
	{
	"epoch": 0.7461873638344226,
	"grad_norm": 2.829155921936035,
	"learning_rate": 5.0980392156862745e-05,
	"loss": 4.9659,
	"step": 685
	},
	{
	"epoch": 0.7472766884531591,
	"grad_norm": 4.25803279876709,
	"learning_rate": 5.0762527233115476e-05,
	"loss": 4.3894,
	"step": 686
	},
	{
	"epoch": 0.7483660130718954,
	"grad_norm": 3.9126133918762207,
	"learning_rate": 5.054466230936819e-05,
	"loss": 3.9024,
	"step": 687
	},
	{
	"epoch": 0.7494553376906318,
	"grad_norm": 3.5430266857147217,
	"learning_rate": 5.032679738562092e-05,
	"loss": 4.282,
	"step": 688
	},
	{
	"epoch": 0.7505446623093682,
	"grad_norm": 3.804924249649048,
	"learning_rate": 5.0108932461873634e-05,
	"loss": 4.1057,
	"step": 689
	},
	{
	"epoch": 0.7516339869281046,
	"grad_norm": 3.732877492904663,
	"learning_rate": 4.9891067538126364e-05,
	"loss": 4.4652,
	"step": 690
	},
	{
	"epoch": 0.7527233115468409,
	"grad_norm": 2.9085025787353516,
	"learning_rate": 4.967320261437909e-05,
	"loss": 4.5023,
	"step": 691
	},
	{
	"epoch": 0.7538126361655774,
	"grad_norm": 3.6516687870025635,
	"learning_rate": 4.945533769063181e-05,
	"loss": 4.8358,
	"step": 692
	},
	{
	"epoch": 0.7549019607843137,
	"grad_norm": 4.306848526000977,
	"learning_rate": 4.9237472766884536e-05,
	"loss": 4.2137,
	"step": 693
	},
	{
	"epoch": 0.7559912854030502,
	"grad_norm": 3.881239652633667,
	"learning_rate": 4.901960784313725e-05,
	"loss": 4.2181,
	"step": 694
	},
	{
	"epoch": 0.7570806100217865,
	"grad_norm": 2.368997097015381,
	"learning_rate": 4.8801742919389983e-05,
	"loss": 4.6,
	"step": 695
	},
	{
	"epoch": 0.7581699346405228,
	"grad_norm": 3.4072535037994385,
	"learning_rate": 4.85838779956427e-05,
	"loss": 4.4402,
	"step": 696
	},
	{
	"epoch": 0.7592592592592593,
	"grad_norm": 2.909001588821411,
	"learning_rate": 4.8366013071895424e-05,
	"loss": 4.2182,
	"step": 697
	},
	{
	"epoch": 0.7603485838779956,
	"grad_norm": 3.1234962940216064,
	"learning_rate": 4.814814814814815e-05,
	"loss": 4.2472,
	"step": 698
	},
	{
	"epoch": 0.761437908496732,
	"grad_norm": 2.94049334526062,
	"learning_rate": 4.793028322440087e-05,
	"loss": 4.6319,
	"step": 699
	},
	{
	"epoch": 0.7625272331154684,
	"grad_norm": 2.517805576324463,
	"learning_rate": 4.77124183006536e-05,
	"loss": 4.5978,
	"step": 700
	},
	{
	"epoch": 0.7636165577342048,
	"grad_norm": 3.0102155208587646,
	"learning_rate": 4.749455337690632e-05,
	"loss": 4.4812,
	"step": 701
	},
	{
	"epoch": 0.7647058823529411,
	"grad_norm": 3.5447139739990234,
	"learning_rate": 4.7276688453159044e-05,
	"loss": 4.8397,
	"step": 702
	},
	{
	"epoch": 0.7657952069716776,
	"grad_norm": 2.5676867961883545,
	"learning_rate": 4.705882352941177e-05,
	"loss": 4.3917,
	"step": 703
	},
	{
	"epoch": 0.7668845315904139,
	"grad_norm": 5.074711322784424,
	"learning_rate": 4.684095860566449e-05,
	"loss": 4.8466,
	"step": 704
	},
	{
	"epoch": 0.7679738562091504,
	"grad_norm": 3.212968587875366,
	"learning_rate": 4.6623093681917215e-05,
	"loss": 4.6356,
	"step": 705
	},
	{
	"epoch": 0.7690631808278867,
	"grad_norm": 2.5076026916503906,
	"learning_rate": 4.640522875816994e-05,
	"loss": 4.3863,
	"step": 706
	},
	{
	"epoch": 0.7701525054466231,
	"grad_norm": 3.6537904739379883,
	"learning_rate": 4.6187363834422656e-05,
	"loss": 4.1982,
	"step": 707
	},
	{
	"epoch": 0.7712418300653595,
	"grad_norm": 3.170794725418091,
	"learning_rate": 4.5969498910675387e-05,
	"loss": 4.4426,
	"step": 708
	},
	{
	"epoch": 0.7723311546840959,
	"grad_norm": 3.2717745304107666,
	"learning_rate": 4.5751633986928104e-05,
	"loss": 4.923,
	"step": 709
	},
	{
	"epoch": 0.7734204793028322,
	"grad_norm": 3.9620261192321777,
	"learning_rate": 4.5533769063180834e-05,
	"loss": 4.2521,
	"step": 710
	},
	{
	"epoch": 0.7745098039215687,
	"grad_norm": 4.35859489440918,
	"learning_rate": 4.531590413943355e-05,
	"loss": 3.9256,
	"step": 711
	},
	{
	"epoch": 0.775599128540305,
	"grad_norm": 3.554739236831665,
	"learning_rate": 4.5098039215686275e-05,
	"loss": 4.2547,
	"step": 712
	},
	{
	"epoch": 0.7766884531590414,
	"grad_norm": 2.5886950492858887,
	"learning_rate": 4.4880174291939e-05,
	"loss": 4.3773,
	"step": 713
	},
	{
	"epoch": 0.7777777777777778,
	"grad_norm": 2.9419705867767334,
	"learning_rate": 4.466230936819172e-05,
	"loss": 4.4662,
	"step": 714
	},
	{
	"epoch": 0.7788671023965141,
	"grad_norm": 3.4473655223846436,
	"learning_rate": 4.4444444444444447e-05,
	"loss": 3.8291,
	"step": 715
	},
	{
	"epoch": 0.7799564270152506,
	"grad_norm": 2.6257734298706055,
	"learning_rate": 4.422657952069717e-05,
	"loss": 4.5269,
	"step": 716
	},
	{
	"epoch": 0.7810457516339869,
	"grad_norm": 4.320521831512451,
	"learning_rate": 4.400871459694989e-05,
	"loss": 4.5898,
	"step": 717
	},
	{
	"epoch": 0.7821350762527233,
	"grad_norm": 3.2959165573120117,
	"learning_rate": 4.379084967320262e-05,
	"loss": 4.4443,
	"step": 718
	},
	{
	"epoch": 0.7832244008714597,
	"grad_norm": 3.6437246799468994,
	"learning_rate": 4.357298474945534e-05,
	"loss": 4.5023,
	"step": 719
	},
	{
	"epoch": 0.7843137254901961,
	"grad_norm": 2.935994863510132,
	"learning_rate": 4.3355119825708066e-05,
	"loss": 4.4349,
	"step": 720
	},
	{
	"epoch": 0.7854030501089324,
	"grad_norm": 3.684237241744995,
	"learning_rate": 4.313725490196079e-05,
	"loss": 4.0764,
	"step": 721
	},
	{
	"epoch": 0.7864923747276689,
	"grad_norm": 2.6446266174316406,
	"learning_rate": 4.291938997821351e-05,
	"loss": 4.483,
	"step": 722
	},
	{
	"epoch": 0.7875816993464052,
	"grad_norm": 3.3432328701019287,
	"learning_rate": 4.270152505446624e-05,
	"loss": 4.1032,
	"step": 723
	},
	{
	"epoch": 0.7886710239651417,
	"grad_norm": 2.900771379470825,
	"learning_rate": 4.2483660130718954e-05,
	"loss": 4.128,
	"step": 724
	},
	{
	"epoch": 0.789760348583878,
	"grad_norm": 3.470869302749634,
	"learning_rate": 4.226579520697168e-05,
	"loss": 4.1844,
	"step": 725
	},
	{
	"epoch": 0.7908496732026143,
	"grad_norm": 3.150548219680786,
	"learning_rate": 4.20479302832244e-05,
	"loss": 4.4777,
	"step": 726
	},
	{
	"epoch": 0.7919389978213508,
	"grad_norm": 3.979973077774048,
	"learning_rate": 4.1830065359477126e-05,
	"loss": 4.5166,
	"step": 727
	},
	{
	"epoch": 0.7930283224400871,
	"grad_norm": 3.882768392562866,
	"learning_rate": 4.161220043572985e-05,
	"loss": 4.3409,
	"step": 728
	},
	{
	"epoch": 0.7941176470588235,
	"grad_norm": 3.7101409435272217,
	"learning_rate": 4.1394335511982573e-05,
	"loss": 4.6513,
	"step": 729
	},
	{
	"epoch": 0.7952069716775599,
	"grad_norm": 3.294309616088867,
	"learning_rate": 4.11764705882353e-05,
	"loss": 4.6719,
	"step": 730
	},
	{
	"epoch": 0.7962962962962963,
	"grad_norm": 3.4256606101989746,
	"learning_rate": 4.095860566448802e-05,
	"loss": 4.277,
	"step": 731
	},
	{
	"epoch": 0.7973856209150327,
	"grad_norm": 3.583495616912842,
	"learning_rate": 4.074074074074074e-05,
	"loss": 4.2184,
	"step": 732
	},
	{
	"epoch": 0.7984749455337691,
	"grad_norm": 2.8277370929718018,
	"learning_rate": 4.052287581699347e-05,
	"loss": 4.4167,
	"step": 733
	},
	{
	"epoch": 0.7995642701525054,
	"grad_norm": 4.443241596221924,
	"learning_rate": 4.0305010893246186e-05,
	"loss": 4.1597,
	"step": 734
	},
	{
	"epoch": 0.8006535947712419,
	"grad_norm": 3.9684319496154785,
	"learning_rate": 4.008714596949891e-05,
	"loss": 4.5253,
	"step": 735
	},
	{
	"epoch": 0.8017429193899782,
	"grad_norm": 3.9552969932556152,
	"learning_rate": 3.986928104575164e-05,
	"loss": 4.4641,
	"step": 736
	},
	{
	"epoch": 0.8028322440087146,
	"grad_norm": 2.4811275005340576,
	"learning_rate": 3.965141612200436e-05,
	"loss": 4.8492,
	"step": 737
	},
	{
	"epoch": 0.803921568627451,
	"grad_norm": 2.727994918823242,
	"learning_rate": 3.943355119825709e-05,
	"loss": 4.7617,
	"step": 738
	},
	{
	"epoch": 0.8050108932461874,
	"grad_norm": 3.1672701835632324,
	"learning_rate": 3.9215686274509805e-05,
	"loss": 4.341,
	"step": 739
	},
	{
	"epoch": 0.8061002178649237,
	"grad_norm": 5.139643669128418,
	"learning_rate": 3.899782135076253e-05,
	"loss": 4.3594,
	"step": 740
	},
	{
	"epoch": 0.8071895424836601,
	"grad_norm": 2.6922364234924316,
	"learning_rate": 3.877995642701525e-05,
	"loss": 4.786,
	"step": 741
	},
	{
	"epoch": 0.8082788671023965,
	"grad_norm": 4.037826061248779,
	"learning_rate": 3.8562091503267977e-05,
	"loss": 4.7951,
	"step": 742
	},
	{
	"epoch": 0.809368191721133,
	"grad_norm": 4.6529927253723145,
	"learning_rate": 3.83442265795207e-05,
	"loss": 4.7118,
	"step": 743
	},
	{
	"epoch": 0.8104575163398693,
	"grad_norm": 4.143136978149414,
	"learning_rate": 3.8126361655773424e-05,
	"loss": 4.0313,
	"step": 744
	},
	{
	"epoch": 0.8115468409586056,
	"grad_norm": 3.8727409839630127,
	"learning_rate": 3.790849673202614e-05,
	"loss": 4.9551,
	"step": 745
	},
	{
	"epoch": 0.8126361655773421,
	"grad_norm": 3.256863594055176,
	"learning_rate": 3.769063180827887e-05,
	"loss": 4.4688,
	"step": 746
	},
	{
	"epoch": 0.8137254901960784,
	"grad_norm": 3.701784610748291,
	"learning_rate": 3.747276688453159e-05,
	"loss": 4.3752,
	"step": 747
	},
	{
	"epoch": 0.8148148148148148,
	"grad_norm": 3.1670970916748047,
	"learning_rate": 3.725490196078432e-05,
	"loss": 4.5925,
	"step": 748
	},
	{
	"epoch": 0.8159041394335512,
	"grad_norm": 2.430621385574341,
	"learning_rate": 3.7037037037037037e-05,
	"loss": 4.497,
	"step": 749
	},
	{
	"epoch": 0.8169934640522876,
	"grad_norm": 5.46519660949707,
	"learning_rate": 3.681917211328976e-05,
	"loss": 4.4218,
	"step": 750
	},
	{
	"epoch": 0.818082788671024,
	"grad_norm": 2.524573564529419,
	"learning_rate": 3.6601307189542484e-05,
	"loss": 4.5478,
	"step": 751
	},
	{
	"epoch": 0.8191721132897604,
	"grad_norm": 3.3287127017974854,
	"learning_rate": 3.638344226579521e-05,
	"loss": 4.3598,
	"step": 752
	},
	{
	"epoch": 0.8202614379084967,
	"grad_norm": 3.003361940383911,
	"learning_rate": 3.616557734204793e-05,
	"loss": 4.2639,
	"step": 753
	},
	{
	"epoch": 0.8213507625272332,
	"grad_norm": 3.4090774059295654,
	"learning_rate": 3.5947712418300656e-05,
	"loss": 4.3133,
	"step": 754
	},
	{
	"epoch": 0.8224400871459695,
	"grad_norm": 2.570692300796509,
	"learning_rate": 3.572984749455338e-05,
	"loss": 4.4859,
	"step": 755
	},
	{
	"epoch": 0.8235294117647058,
	"grad_norm": 3.2437076568603516,
	"learning_rate": 3.55119825708061e-05,
	"loss": 4.9471,
	"step": 756
	},
	{
	"epoch": 0.8246187363834423,
	"grad_norm": 2.33784556388855,
	"learning_rate": 3.529411764705883e-05,
	"loss": 4.3232,
	"step": 757
	},
	{
	"epoch": 0.8257080610021786,
	"grad_norm": 3.095179319381714,
	"learning_rate": 3.507625272331155e-05,
	"loss": 4.2713,
	"step": 758
	},
	{
	"epoch": 0.826797385620915,
	"grad_norm": 2.924217462539673,
	"learning_rate": 3.4858387799564275e-05,
	"loss": 4.3772,
	"step": 759
	},
	{
	"epoch": 0.8278867102396514,
	"grad_norm": 4.275852203369141,
	"learning_rate": 3.464052287581699e-05,
	"loss": 4.6822,
	"step": 760
	},
	{
	"epoch": 0.8289760348583878,
	"grad_norm": 3.064005136489868,
	"learning_rate": 3.442265795206972e-05,
	"loss": 4.8159,
	"step": 761
	},
	{
	"epoch": 0.8300653594771242,
	"grad_norm": 2.3928287029266357,
	"learning_rate": 3.420479302832244e-05,
	"loss": 4.4421,
	"step": 762
	},
	{
	"epoch": 0.8311546840958606,
	"grad_norm": 3.7825465202331543,
	"learning_rate": 3.3986928104575163e-05,
	"loss": 4.3476,
	"step": 763
	},
	{
	"epoch": 0.8322440087145969,
	"grad_norm": 3.0275168418884277,
	"learning_rate": 3.376906318082789e-05,
	"loss": 4.6799,
	"step": 764
	},
	{
	"epoch": 0.8333333333333334,
	"grad_norm": 2.415347099304199,
	"learning_rate": 3.355119825708061e-05,
	"loss": 4.5567,
	"step": 765
	},
	{
	"epoch": 0.8344226579520697,
	"grad_norm": 4.181748390197754,
	"learning_rate": 3.3333333333333335e-05,
	"loss": 4.8625,
	"step": 766
	},
	{
	"epoch": 0.835511982570806,
	"grad_norm": 2.8122055530548096,
	"learning_rate": 3.311546840958606e-05,
	"loss": 4.4339,
	"step": 767
	},
	{
	"epoch": 0.8366013071895425,
	"grad_norm": 5.145498752593994,
	"learning_rate": 3.289760348583878e-05,
	"loss": 4.4479,
	"step": 768
	},
	{
	"epoch": 0.8376906318082789,
	"grad_norm": 2.8015735149383545,
	"learning_rate": 3.2679738562091506e-05,
	"loss": 4.4831,
	"step": 769
	},
	{
	"epoch": 0.8387799564270153,
	"grad_norm": 2.6297879219055176,
	"learning_rate": 3.2461873638344223e-05,
	"loss": 4.3064,
	"step": 770
	},
	{
	"epoch": 0.8398692810457516,
	"grad_norm": 3.4533751010894775,
	"learning_rate": 3.2244008714596954e-05,
	"loss": 4.3691,
	"step": 771
	},
	{
	"epoch": 0.840958605664488,
	"grad_norm": 3.411375045776367,
	"learning_rate": 3.202614379084967e-05,
	"loss": 4.8371,
	"step": 772
	},
	{
	"epoch": 0.8420479302832244,
	"grad_norm": 3.217210531234741,
	"learning_rate": 3.1808278867102395e-05,
	"loss": 4.521,
	"step": 773
	},
	{
	"epoch": 0.8431372549019608,
	"grad_norm": 3.2574751377105713,
	"learning_rate": 3.1590413943355126e-05,
	"loss": 4.3908,
	"step": 774
	},
	{
	"epoch": 0.8442265795206971,
	"grad_norm": 3.3532497882843018,
	"learning_rate": 3.137254901960784e-05,
	"loss": 4.4482,
	"step": 775
	},
	{
	"epoch": 0.8453159041394336,
	"grad_norm": 3.3096394538879395,
	"learning_rate": 3.115468409586057e-05,
	"loss": 4.6897,
	"step": 776
	},
	{
	"epoch": 0.8464052287581699,
	"grad_norm": 2.804602861404419,
	"learning_rate": 3.093681917211329e-05,
	"loss": 4.4341,
	"step": 777
	},
	{
	"epoch": 0.8474945533769063,
	"grad_norm": 2.5430572032928467,
	"learning_rate": 3.0718954248366014e-05,
	"loss": 4.4146,
	"step": 778
	},
	{
	"epoch": 0.8485838779956427,
	"grad_norm": 3.859699249267578,
	"learning_rate": 3.0501089324618738e-05,
	"loss": 4.3238,
	"step": 779
	},
	{
	"epoch": 0.8496732026143791,
	"grad_norm": 3.4429585933685303,
	"learning_rate": 3.0283224400871462e-05,
	"loss": 4.1805,
	"step": 780
	},
	{
	"epoch": 0.8507625272331155,
	"grad_norm": 3.5958237648010254,
	"learning_rate": 3.0065359477124182e-05,
	"loss": 4.6116,
	"step": 781
	},
	{
	"epoch": 0.8518518518518519,
	"grad_norm": 4.12628173828125,
	"learning_rate": 2.984749455337691e-05,
	"loss": 4.0494,
	"step": 782
	},
	{
	"epoch": 0.8529411764705882,
	"grad_norm": 3.068495988845825,
	"learning_rate": 2.962962962962963e-05,
	"loss": 4.236,
	"step": 783
	},
	{
	"epoch": 0.8540305010893247,
	"grad_norm": 4.414676189422607,
	"learning_rate": 2.9411764705882354e-05,
	"loss": 4.2029,
	"step": 784
	},
	{
	"epoch": 0.855119825708061,
	"grad_norm": 2.343167543411255,
	"learning_rate": 2.9193899782135074e-05,
	"loss": 4.6678,
	"step": 785
	},
	{
	"epoch": 0.8562091503267973,
	"grad_norm": 2.415961980819702,
	"learning_rate": 2.89760348583878e-05,
	"loss": 4.4856,
	"step": 786
	},
	{
	"epoch": 0.8572984749455338,
	"grad_norm": 2.7041404247283936,
	"learning_rate": 2.8758169934640522e-05,
	"loss": 4.2077,
	"step": 787
	},
	{
	"epoch": 0.8583877995642701,
	"grad_norm": 3.7065887451171875,
	"learning_rate": 2.854030501089325e-05,
	"loss": 4.24,
	"step": 788
	},
	{
	"epoch": 0.8594771241830066,
	"grad_norm": 2.7964565753936768,
	"learning_rate": 2.832244008714597e-05,
	"loss": 4.4061,
	"step": 789
	},
	{
	"epoch": 0.8605664488017429,
	"grad_norm": 2.7362513542175293,
	"learning_rate": 2.8104575163398693e-05,
	"loss": 4.6079,
	"step": 790
	},
	{
	"epoch": 0.8616557734204793,
	"grad_norm": 3.7887816429138184,
	"learning_rate": 2.788671023965142e-05,
	"loss": 4.8322,
	"step": 791
	},
	{
	"epoch": 0.8627450980392157,
	"grad_norm": 2.9420385360717773,
	"learning_rate": 2.766884531590414e-05,
	"loss": 4.4393,
	"step": 792
	},
	{
	"epoch": 0.8638344226579521,
	"grad_norm": 2.545464515686035,
	"learning_rate": 2.7450980392156865e-05,
	"loss": 4.1903,
	"step": 793
	},
	{
	"epoch": 0.8649237472766884,
	"grad_norm": 2.869817018508911,
	"learning_rate": 2.7233115468409585e-05,
	"loss": 4.5073,
	"step": 794
	},
	{
	"epoch": 0.8660130718954249,
	"grad_norm": 4.065328121185303,
	"learning_rate": 2.7015250544662313e-05,
	"loss": 4.131,
	"step": 795
	},
	{
	"epoch": 0.8671023965141612,
	"grad_norm": 4.19705867767334,
	"learning_rate": 2.6797385620915033e-05,
	"loss": 3.9346,
	"step": 796
	},
	{
	"epoch": 0.8681917211328976,
	"grad_norm": 2.0968496799468994,
	"learning_rate": 2.657952069716776e-05,
	"loss": 4.6387,
	"step": 797
	},
	{
	"epoch": 0.869281045751634,
	"grad_norm": 3.3844027519226074,
	"learning_rate": 2.636165577342048e-05,
	"loss": 4.1972,
	"step": 798
	},
	{
	"epoch": 0.8703703703703703,
	"grad_norm": 4.317149639129639,
	"learning_rate": 2.6143790849673204e-05,
	"loss": 3.8641,
	"step": 799
	},
	{
	"epoch": 0.8714596949891068,
	"grad_norm": 2.7906928062438965,
	"learning_rate": 2.5925925925925925e-05,
	"loss": 4.895,
	"step": 800
	},
	{
	"epoch": 0.8725490196078431,
	"grad_norm": 5.625737190246582,
	"learning_rate": 2.5708061002178652e-05,
	"loss": 4.1917,
	"step": 801
	},
	{
	"epoch": 0.8736383442265795,
	"grad_norm": 3.180407762527466,
	"learning_rate": 2.5490196078431373e-05,
	"loss": 4.4799,
	"step": 802
	},
	{
	"epoch": 0.8747276688453159,
	"grad_norm": 2.444420576095581,
	"learning_rate": 2.5272331154684096e-05,
	"loss": 4.4138,
	"step": 803
	},
	{
	"epoch": 0.8758169934640523,
	"grad_norm": 3.7271342277526855,
	"learning_rate": 2.5054466230936817e-05,
	"loss": 4.5968,
	"step": 804
	},
	{
	"epoch": 0.8769063180827886,
	"grad_norm": 2.372236967086792,
	"learning_rate": 2.4836601307189544e-05,
	"loss": 4.6474,
	"step": 805
	},
	{
	"epoch": 0.8779956427015251,
	"grad_norm": 3.3021676540374756,
	"learning_rate": 2.4618736383442268e-05,
	"loss": 4.3627,
	"step": 806
	},
	{
	"epoch": 0.8790849673202614,
	"grad_norm": 5.0191779136657715,
	"learning_rate": 2.4400871459694992e-05,
	"loss": 4.4277,
	"step": 807
	},
	{
	"epoch": 0.8801742919389978,
	"grad_norm": 2.840336322784424,
	"learning_rate": 2.4183006535947712e-05,
	"loss": 4.821,
	"step": 808
	},
	{
	"epoch": 0.8812636165577342,
	"grad_norm": 2.553668260574341,
	"learning_rate": 2.3965141612200436e-05,
	"loss": 4.2192,
	"step": 809
	},
	{
	"epoch": 0.8823529411764706,
	"grad_norm": 3.1289873123168945,
	"learning_rate": 2.374727668845316e-05,
	"loss": 4.6784,
	"step": 810
	},
	{
	"epoch": 0.883442265795207,
	"grad_norm": 2.4871625900268555,
	"learning_rate": 2.3529411764705884e-05,
	"loss": 4.2971,
	"step": 811
	},
	{
	"epoch": 0.8845315904139434,
	"grad_norm": 3.015580654144287,
	"learning_rate": 2.3311546840958608e-05,
	"loss": 4.1916,
	"step": 812
	},
	{
	"epoch": 0.8856209150326797,
	"grad_norm": 3.12770676612854,
	"learning_rate": 2.3093681917211328e-05,
	"loss": 4.8488,
	"step": 813
	},
	{
	"epoch": 0.8867102396514162,
	"grad_norm": 4.185814380645752,
	"learning_rate": 2.2875816993464052e-05,
	"loss": 4.1931,
	"step": 814
	},
	{
	"epoch": 0.8877995642701525,
	"grad_norm": 3.2363178730010986,
	"learning_rate": 2.2657952069716776e-05,
	"loss": 3.9761,
	"step": 815
	},
	{
	"epoch": 0.8888888888888888,
	"grad_norm": 3.264817953109741,
	"learning_rate": 2.24400871459695e-05,
	"loss": 3.9483,
	"step": 816
	},
	{
	"epoch": 0.8899782135076253,
	"grad_norm": 2.8902573585510254,
	"learning_rate": 2.2222222222222223e-05,
	"loss": 4.3474,
	"step": 817
	},
	{
	"epoch": 0.8910675381263616,
	"grad_norm": 4.015153408050537,
	"learning_rate": 2.2004357298474944e-05,
	"loss": 5.0101,
	"step": 818
	},
	{
	"epoch": 0.8921568627450981,
	"grad_norm": 2.7316837310791016,
	"learning_rate": 2.178649237472767e-05,
	"loss": 4.6326,
	"step": 819
	},
	{
	"epoch": 0.8932461873638344,
	"grad_norm": 3.2079977989196777,
	"learning_rate": 2.1568627450980395e-05,
	"loss": 4.6429,
	"step": 820
	},
	{
	"epoch": 0.8943355119825708,
	"grad_norm": 3.246037006378174,
	"learning_rate": 2.135076252723312e-05,
	"loss": 4.6208,
	"step": 821
	},
	{
	"epoch": 0.8954248366013072,
	"grad_norm": 3.2515430450439453,
	"learning_rate": 2.113289760348584e-05,
	"loss": 4.5181,
	"step": 822
	},
	{
	"epoch": 0.8965141612200436,
	"grad_norm": 3.3833346366882324,
	"learning_rate": 2.0915032679738563e-05,
	"loss": 4.4344,
	"step": 823
	},
	{
	"epoch": 0.8976034858387799,
	"grad_norm": 3.3735289573669434,
	"learning_rate": 2.0697167755991287e-05,
	"loss": 4.184,
	"step": 824
	},
	{
	"epoch": 0.8986928104575164,
	"grad_norm": 3.0778541564941406,
	"learning_rate": 2.047930283224401e-05,
	"loss": 4.3511,
	"step": 825
	},
	{
	"epoch": 0.8997821350762527,
	"grad_norm": 3.2135345935821533,
	"learning_rate": 2.0261437908496734e-05,
	"loss": 4.2923,
	"step": 826
	},
	{
	"epoch": 0.900871459694989,
	"grad_norm": 3.828021287918091,
	"learning_rate": 2.0043572984749455e-05,
	"loss": 4.0934,
	"step": 827
	},
	{
	"epoch": 0.9019607843137255,
	"grad_norm": 2.8927841186523438,
	"learning_rate": 1.982570806100218e-05,
	"loss": 4.4145,
	"step": 828
	},
	{
	"epoch": 0.9030501089324618,
	"grad_norm": 2.8813278675079346,
	"learning_rate": 1.9607843137254903e-05,
	"loss": 4.2101,
	"step": 829
	},
	{
	"epoch": 0.9041394335511983,
	"grad_norm": 3.247687816619873,
	"learning_rate": 1.9389978213507626e-05,
	"loss": 4.449,
	"step": 830
	},
	{
	"epoch": 0.9052287581699346,
	"grad_norm": 2.423374652862549,
	"learning_rate": 1.917211328976035e-05,
	"loss": 4.2909,
	"step": 831
	},
	{
	"epoch": 0.906318082788671,
	"grad_norm": 2.915576934814453,
	"learning_rate": 1.895424836601307e-05,
	"loss": 4.2515,
	"step": 832
	},
	{
	"epoch": 0.9074074074074074,
	"grad_norm": 3.332075595855713,
	"learning_rate": 1.8736383442265794e-05,
	"loss": 4.1769,
	"step": 833
	},
	{
	"epoch": 0.9084967320261438,
	"grad_norm": 3.268340587615967,
	"learning_rate": 1.8518518518518518e-05,
	"loss": 4.0638,
	"step": 834
	},
	{
	"epoch": 0.9095860566448801,
	"grad_norm": 4.253492832183838,
	"learning_rate": 1.8300653594771242e-05,
	"loss": 4.6748,
	"step": 835
	},
	{
	"epoch": 0.9106753812636166,
	"grad_norm": 3.4260220527648926,
	"learning_rate": 1.8082788671023966e-05,
	"loss": 4.1754,
	"step": 836
	},
	{
	"epoch": 0.9117647058823529,
	"grad_norm": 4.5677266120910645,
	"learning_rate": 1.786492374727669e-05,
	"loss": 4.4002,
	"step": 837
	},
	{
	"epoch": 0.9128540305010894,
	"grad_norm": 2.8352746963500977,
	"learning_rate": 1.7647058823529414e-05,
	"loss": 4.1695,
	"step": 838
	},
	{
	"epoch": 0.9139433551198257,
	"grad_norm": 3.011715888977051,
	"learning_rate": 1.7429193899782137e-05,
	"loss": 4.2904,
	"step": 839
	},
	{
	"epoch": 0.9150326797385621,
	"grad_norm": 3.083839178085327,
	"learning_rate": 1.721132897603486e-05,
	"loss": 4.4036,
	"step": 840
	},
	{
	"epoch": 0.9161220043572985,
	"grad_norm": 4.099680423736572,
	"learning_rate": 1.6993464052287582e-05,
	"loss": 5.0838,
	"step": 841
	},
	{
	"epoch": 0.9172113289760349,
	"grad_norm": 2.89890193939209,
	"learning_rate": 1.6775599128540306e-05,
	"loss": 4.235,
	"step": 842
	},
	{
	"epoch": 0.9183006535947712,
	"grad_norm": 3.054748296737671,
	"learning_rate": 1.655773420479303e-05,
	"loss": 4.7427,
	"step": 843
	},
	{
	"epoch": 0.9193899782135077,
	"grad_norm": 2.4290452003479004,
	"learning_rate": 1.6339869281045753e-05,
	"loss": 4.5623,
	"step": 844
	},
	{
	"epoch": 0.920479302832244,
	"grad_norm": 3.0590555667877197,
	"learning_rate": 1.6122004357298477e-05,
	"loss": 4.617,
	"step": 845
	},
	{
	"epoch": 0.9215686274509803,
	"grad_norm": 3.4882876873016357,
	"learning_rate": 1.5904139433551197e-05,
	"loss": 4.4919,
	"step": 846
	},
	{
	"epoch": 0.9226579520697168,
	"grad_norm": 2.5416266918182373,
	"learning_rate": 1.568627450980392e-05,
	"loss": 4.2478,
	"step": 847
	},
	{
	"epoch": 0.9237472766884531,
	"grad_norm": 3.034259796142578,
	"learning_rate": 1.5468409586056645e-05,
	"loss": 4.334,
	"step": 848
	},
	{
	"epoch": 0.9248366013071896,
	"grad_norm": 3.5231168270111084,
	"learning_rate": 1.5250544662309369e-05,
	"loss": 4.4518,
	"step": 849
	},
	{
	"epoch": 0.9259259259259259,
	"grad_norm": 3.0768980979919434,
	"learning_rate": 1.5032679738562091e-05,
	"loss": 4.2201,
	"step": 850
	},
	{
	"epoch": 0.9270152505446623,
	"grad_norm": 2.9910175800323486,
	"learning_rate": 1.4814814814814815e-05,
	"loss": 4.5128,
	"step": 851
	},
	{
	"epoch": 0.9281045751633987,
	"grad_norm": 4.108496189117432,
	"learning_rate": 1.4596949891067537e-05,
	"loss": 4.472,
	"step": 852
	},
	{
	"epoch": 0.9291938997821351,
	"grad_norm": 2.6037635803222656,
	"learning_rate": 1.4379084967320261e-05,
	"loss": 4.4553,
	"step": 853
	},
	{
	"epoch": 0.9302832244008714,
	"grad_norm": 3.0736937522888184,
	"learning_rate": 1.4161220043572985e-05,
	"loss": 4.226,
	"step": 854
	},
	{
	"epoch": 0.9313725490196079,
	"grad_norm": 2.5338780879974365,
	"learning_rate": 1.394335511982571e-05,
	"loss": 4.4091,
	"step": 855
	},
	{
	"epoch": 0.9324618736383442,
	"grad_norm": 3.7258126735687256,
	"learning_rate": 1.3725490196078432e-05,
	"loss": 4.6917,
	"step": 856
	},
	{
	"epoch": 0.9335511982570807,
	"grad_norm": 4.166601181030273,
	"learning_rate": 1.3507625272331156e-05,
	"loss": 4.143,
	"step": 857
	},
	{
	"epoch": 0.934640522875817,
	"grad_norm": 2.4743645191192627,
	"learning_rate": 1.328976034858388e-05,
	"loss": 4.1835,
	"step": 858
	},
	{
	"epoch": 0.9357298474945533,
	"grad_norm": 2.661558151245117,
	"learning_rate": 1.3071895424836602e-05,
	"loss": 4.5051,
	"step": 859
	},
	{
	"epoch": 0.9368191721132898,
	"grad_norm": 2.705817699432373,
	"learning_rate": 1.2854030501089326e-05,
	"loss": 4.4325,
	"step": 860
	},
	{
	"epoch": 0.9379084967320261,
	"grad_norm": 2.885896921157837,
	"learning_rate": 1.2636165577342048e-05,
	"loss": 4.5767,
	"step": 861
	},
	{
	"epoch": 0.9389978213507625,
	"grad_norm": 3.1418848037719727,
	"learning_rate": 1.2418300653594772e-05,
	"loss": 4.2073,
	"step": 862
	},
	{
	"epoch": 0.9400871459694989,
	"grad_norm": 2.982358694076538,
	"learning_rate": 1.2200435729847496e-05,
	"loss": 4.4629,
	"step": 863
	},
	{
	"epoch": 0.9411764705882353,
	"grad_norm": 2.5018253326416016,
	"learning_rate": 1.1982570806100218e-05,
	"loss": 4.3306,
	"step": 864
	},
	{
	"epoch": 0.9422657952069716,
	"grad_norm": 2.8917834758758545,
	"learning_rate": 1.1764705882352942e-05,
	"loss": 4.096,
	"step": 865
	},
	{
	"epoch": 0.9433551198257081,
	"grad_norm": 4.460819721221924,
	"learning_rate": 1.1546840958605664e-05,
	"loss": 4.4547,
	"step": 866
	},
	{
	"epoch": 0.9444444444444444,
	"grad_norm": 3.612231492996216,
	"learning_rate": 1.1328976034858388e-05,
	"loss": 4.4797,
	"step": 867
	},
	{
	"epoch": 0.9455337690631809,
	"grad_norm": 3.5711090564727783,
	"learning_rate": 1.1111111111111112e-05,
	"loss": 4.5295,
	"step": 868
	},
	{
	"epoch": 0.9466230936819172,
	"grad_norm": 3.783479928970337,
	"learning_rate": 1.0893246187363835e-05,
	"loss": 3.9625,
	"step": 869
	},
	{
	"epoch": 0.9477124183006536,
	"grad_norm": 2.980947256088257,
	"learning_rate": 1.067538126361656e-05,
	"loss": 4.487,
	"step": 870
	},
	{
	"epoch": 0.94880174291939,
	"grad_norm": 3.743603229522705,
	"learning_rate": 1.0457516339869281e-05,
	"loss": 4.4444,
	"step": 871
	},
	{
	"epoch": 0.9498910675381264,
	"grad_norm": 2.9100987911224365,
	"learning_rate": 1.0239651416122005e-05,
	"loss": 4.4357,
	"step": 872
	},
	{
	"epoch": 0.9509803921568627,
	"grad_norm": 3.2935433387756348,
	"learning_rate": 1.0021786492374727e-05,
	"loss": 4.6886,
	"step": 873
	},
	{
	"epoch": 0.9520697167755992,
	"grad_norm": 3.8862123489379883,
	"learning_rate": 9.803921568627451e-06,
	"loss": 4.1871,
	"step": 874
	},
	{
	"epoch": 0.9531590413943355,
	"grad_norm": 3.0392003059387207,
	"learning_rate": 9.586056644880175e-06,
	"loss": 4.7113,
	"step": 875
	},
	{
	"epoch": 0.954248366013072,
	"grad_norm": 2.377678871154785,
	"learning_rate": 9.368191721132897e-06,
	"loss": 4.7133,
	"step": 876
	},
	{
	"epoch": 0.9553376906318083,
	"grad_norm": 4.4908223152160645,
	"learning_rate": 9.150326797385621e-06,
	"loss": 4.5903,
	"step": 877
	},
	{
	"epoch": 0.9564270152505446,
	"grad_norm": 4.116386413574219,
	"learning_rate": 8.932461873638345e-06,
	"loss": 4.3146,
	"step": 878
	},
	{
	"epoch": 0.9575163398692811,
	"grad_norm": 3.787520408630371,
	"learning_rate": 8.714596949891069e-06,
	"loss": 4.7772,
	"step": 879
	},
	{
	"epoch": 0.9586056644880174,
	"grad_norm": 4.713115692138672,
	"learning_rate": 8.496732026143791e-06,
	"loss": 4.077,
	"step": 880
	},
	{
	"epoch": 0.9596949891067538,
	"grad_norm": 3.4946200847625732,
	"learning_rate": 8.278867102396515e-06,
	"loss": 4.4494,
	"step": 881
	},
	{
	"epoch": 0.9607843137254902,
	"grad_norm": 7.297133922576904,
	"learning_rate": 8.061002178649239e-06,
	"loss": 3.8255,
	"step": 882
	},
	{
	"epoch": 0.9618736383442266,
	"grad_norm": 3.2210330963134766,
	"learning_rate": 7.84313725490196e-06,
	"loss": 4.3539,
	"step": 883
	},
	{
	"epoch": 0.9629629629629629,
	"grad_norm": 3.9630517959594727,
	"learning_rate": 7.6252723311546845e-06,
	"loss": 4.9081,
	"step": 884
	},
	{
	"epoch": 0.9640522875816994,
	"grad_norm": 3.165914297103882,
	"learning_rate": 7.4074074074074075e-06,
	"loss": 4.2638,
	"step": 885
	},
	{
	"epoch": 0.9651416122004357,
	"grad_norm": 2.8624045848846436,
	"learning_rate": 7.1895424836601305e-06,
	"loss": 4.3948,
	"step": 886
	},
	{
	"epoch": 0.9662309368191722,
	"grad_norm": 3.314714193344116,
	"learning_rate": 6.971677559912855e-06,
	"loss": 4.7292,
	"step": 887
	},
	{
	"epoch": 0.9673202614379085,
	"grad_norm": 2.2992618083953857,
	"learning_rate": 6.753812636165578e-06,
	"loss": 4.4612,
	"step": 888
	},
	{
	"epoch": 0.9684095860566448,
	"grad_norm": 4.193181991577148,
	"learning_rate": 6.535947712418301e-06,
	"loss": 4.3218,
	"step": 889
	},
	{
	"epoch": 0.9694989106753813,
	"grad_norm": 2.4635162353515625,
	"learning_rate": 6.318082788671024e-06,
	"loss": 4.5963,
	"step": 890
	},
	{
	"epoch": 0.9705882352941176,
	"grad_norm": 2.4746878147125244,
	"learning_rate": 6.100217864923748e-06,
	"loss": 4.3439,
	"step": 891
	},
	{
	"epoch": 0.971677559912854,
	"grad_norm": 2.770826578140259,
	"learning_rate": 5.882352941176471e-06,
	"loss": 4.1497,
	"step": 892
	},
	{
	"epoch": 0.9727668845315904,
	"grad_norm": 2.483689546585083,
	"learning_rate": 5.664488017429194e-06,
	"loss": 4.3472,
	"step": 893
	},
	{
	"epoch": 0.9738562091503268,
	"grad_norm": 3.953700542449951,
	"learning_rate": 5.446623093681918e-06,
	"loss": 4.1945,
	"step": 894
	},
	{
	"epoch": 0.9749455337690632,
	"grad_norm": 3.3949203491210938,
	"learning_rate": 5.228758169934641e-06,
	"loss": 4.2244,
	"step": 895
	},
	{
	"epoch": 0.9760348583877996,
	"grad_norm": 3.2721428871154785,
	"learning_rate": 5.010893246187364e-06,
	"loss": 5.1877,
	"step": 896
	},
	{
	"epoch": 0.9771241830065359,
	"grad_norm": 3.0004539489746094,
	"learning_rate": 4.7930283224400875e-06,
	"loss": 4.4266,
	"step": 897
	},
	{
	"epoch": 0.9782135076252724,
	"grad_norm": 2.427356243133545,
	"learning_rate": 4.5751633986928105e-06,
	"loss": 4.3152,
	"step": 898
	},
	{
	"epoch": 0.9793028322440087,
	"grad_norm": 2.8808891773223877,
	"learning_rate": 4.357298474945534e-06,
	"loss": 4.5186,
	"step": 899
	},
	{
	"epoch": 0.9803921568627451,
	"grad_norm": 2.924720287322998,
	"learning_rate": 4.139433551198257e-06,
	"loss": 4.4141,
	"step": 900
	},
	{
	"epoch": 0.9814814814814815,
	"grad_norm": 2.356206178665161,
	"learning_rate": 3.92156862745098e-06,
	"loss": 4.6122,
	"step": 901
	},
	{
	"epoch": 0.9825708061002179,
	"grad_norm": 2.7468602657318115,
	"learning_rate": 3.7037037037037037e-06,
	"loss": 4.3972,
	"step": 902
	},
	{
	"epoch": 0.9836601307189542,
	"grad_norm": 3.2754995822906494,
	"learning_rate": 3.4858387799564276e-06,
	"loss": 4.2037,
	"step": 903
	},
	{
	"epoch": 0.9847494553376906,
	"grad_norm": 3.5675883293151855,
	"learning_rate": 3.2679738562091506e-06,
	"loss": 4.1336,
	"step": 904
	},
	{
	"epoch": 0.985838779956427,
	"grad_norm": 4.010306358337402,
	"learning_rate": 3.050108932461874e-06,
	"loss": 4.7152,
	"step": 905
	},
	{
	"epoch": 0.9869281045751634,
	"grad_norm": 3.2009243965148926,
	"learning_rate": 2.832244008714597e-06,
	"loss": 4.3139,
	"step": 906
	},
	{
	"epoch": 0.9880174291938998,
	"grad_norm": 3.407482624053955,
	"learning_rate": 2.6143790849673204e-06,
	"loss": 4.4083,
	"step": 907
	},
	{
	"epoch": 0.9891067538126361,
	"grad_norm": 2.407134532928467,
	"learning_rate": 2.3965141612200438e-06,
	"loss": 4.3022,
	"step": 908
	},
	{
	"epoch": 0.9901960784313726,
	"grad_norm": 2.962719202041626,
	"learning_rate": 2.178649237472767e-06,
	"loss": 4.9264,
	"step": 909
	},
	{
	"epoch": 0.9912854030501089,
	"grad_norm": 2.8014609813690186,
	"learning_rate": 1.96078431372549e-06,
	"loss": 4.5361,
	"step": 910
	},
	{
	"epoch": 0.9923747276688453,
	"grad_norm": 3.681654453277588,
	"learning_rate": 1.7429193899782138e-06,
	"loss": 4.358,
	"step": 911
	},
	{
	"epoch": 0.9934640522875817,
	"grad_norm": 2.6658272743225098,
	"learning_rate": 1.525054466230937e-06,
	"loss": 4.4824,
	"step": 912
	},
	{
	"epoch": 0.9945533769063181,
	"grad_norm": 3.949403762817383,
	"learning_rate": 1.3071895424836602e-06,
	"loss": 4.1765,
	"step": 913
	},
	{
	"epoch": 0.9956427015250545,
	"grad_norm": 2.6979477405548096,
	"learning_rate": 1.0893246187363836e-06,
	"loss": 4.34,
	"step": 914
	},
	{
	"epoch": 0.9967320261437909,
	"grad_norm": 3.2136738300323486,
	"learning_rate": 8.714596949891069e-07,
	"loss": 4.4514,
	"step": 915
	},
	{
	"epoch": 0.9978213507625272,
	"grad_norm": 3.675879955291748,
	"learning_rate": 6.535947712418301e-07,
	"loss": 4.4091,
	"step": 916
	},
	{
	"epoch": 0.9989106753812637,
	"grad_norm": 4.200655460357666,
	"learning_rate": 4.3572984749455345e-07,
	"loss": 4.7659,
	"step": 917
	},
	{
	"epoch": 1.0,
	"grad_norm": 5.2691826820373535,
	"learning_rate": 2.1786492374727672e-07,
	"loss": 4.521,
	"step": 918
	}
	],
	"logging_steps": 1,
	"max_steps": 918,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 5000,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 921405048453120.0,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}