| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 8.0, | |
| "eval_steps": 500, | |
| "global_step": 504, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07976071784646062, | |
| "grad_norm": 8.951485022406759, | |
| "learning_rate": 3.1372549019607846e-06, | |
| "loss": 1.6286, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.15952143569292124, | |
| "grad_norm": 1.7911944426834843, | |
| "learning_rate": 7.058823529411766e-06, | |
| "loss": 1.4082, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.23928215353938184, | |
| "grad_norm": 1.188948888919745, | |
| "learning_rate": 1.0980392156862747e-05, | |
| "loss": 1.2277, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.3190428713858425, | |
| "grad_norm": 0.6895721187833729, | |
| "learning_rate": 1.4901960784313726e-05, | |
| "loss": 1.1183, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.3988035892323031, | |
| "grad_norm": 0.5800787932266839, | |
| "learning_rate": 1.8823529411764708e-05, | |
| "loss": 1.065, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.4785643070787637, | |
| "grad_norm": 0.5106042574703776, | |
| "learning_rate": 2.274509803921569e-05, | |
| "loss": 1.0376, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.5583250249252243, | |
| "grad_norm": 0.5130368281162043, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 1.0193, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.638085742771685, | |
| "grad_norm": 0.5048169792373989, | |
| "learning_rate": 3.0588235294117644e-05, | |
| "loss": 1.0101, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.7178464606181456, | |
| "grad_norm": 0.49100483855991833, | |
| "learning_rate": 3.450980392156863e-05, | |
| "loss": 0.9855, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.7976071784646062, | |
| "grad_norm": 0.5082070499580861, | |
| "learning_rate": 3.8431372549019614e-05, | |
| "loss": 0.9472, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.8773678963110668, | |
| "grad_norm": 0.45791310359750687, | |
| "learning_rate": 3.999567157212646e-05, | |
| "loss": 0.9543, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.9571286141575274, | |
| "grad_norm": 0.45379188386242486, | |
| "learning_rate": 3.996922685294587e-05, | |
| "loss": 0.9476, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.0319042871385842, | |
| "grad_norm": 0.6240889944377968, | |
| "learning_rate": 3.991877385171789e-05, | |
| "loss": 0.897, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.111665004985045, | |
| "grad_norm": 0.5440466161961147, | |
| "learning_rate": 3.9844373226268305e-05, | |
| "loss": 0.8082, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.1914257228315055, | |
| "grad_norm": 0.5107070923561905, | |
| "learning_rate": 3.97461144257888e-05, | |
| "loss": 0.8094, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.271186440677966, | |
| "grad_norm": 0.4651202387725849, | |
| "learning_rate": 3.9624115583295375e-05, | |
| "loss": 0.7905, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.3509471585244266, | |
| "grad_norm": 0.4774453312775275, | |
| "learning_rate": 3.9478523373601325e-05, | |
| "loss": 0.8298, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.4307078763708874, | |
| "grad_norm": 0.4556406153190642, | |
| "learning_rate": 3.930951283697534e-05, | |
| "loss": 0.7867, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.510468594217348, | |
| "grad_norm": 0.4830106629278, | |
| "learning_rate": 3.9117287168696956e-05, | |
| "loss": 0.8198, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.5902293120638085, | |
| "grad_norm": 0.5022923875137891, | |
| "learning_rate": 3.8902077474762155e-05, | |
| "loss": 0.7834, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.6699900299102692, | |
| "grad_norm": 0.44123948565588444, | |
| "learning_rate": 3.866414249403295e-05, | |
| "loss": 0.8185, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.7497507477567298, | |
| "grad_norm": 0.4430352770475653, | |
| "learning_rate": 3.840376828716499e-05, | |
| "loss": 0.8124, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.8295114656031903, | |
| "grad_norm": 0.4924851273312515, | |
| "learning_rate": 3.812126789268712e-05, | |
| "loss": 0.8057, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.909272183449651, | |
| "grad_norm": 0.46057083006170463, | |
| "learning_rate": 3.781698095064647e-05, | |
| "loss": 0.8145, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.9890329012961117, | |
| "grad_norm": 0.4611804285939624, | |
| "learning_rate": 3.7491273294271386e-05, | |
| "loss": 0.7913, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 2.0638085742771684, | |
| "grad_norm": 0.6959783330087537, | |
| "learning_rate": 3.7144536510143436e-05, | |
| "loss": 0.6452, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.143569292123629, | |
| "grad_norm": 0.5166517982510289, | |
| "learning_rate": 3.6777187467406857e-05, | |
| "loss": 0.5773, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 2.22333000997009, | |
| "grad_norm": 0.571267388696848, | |
| "learning_rate": 3.638966781658187e-05, | |
| "loss": 0.5875, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.30309072781655, | |
| "grad_norm": 0.5939430686190696, | |
| "learning_rate": 3.598244345858412e-05, | |
| "loss": 0.5823, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 2.382851445663011, | |
| "grad_norm": 0.5778600291078902, | |
| "learning_rate": 3.555600398458885e-05, | |
| "loss": 0.582, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.4626121635094718, | |
| "grad_norm": 0.5529898865422133, | |
| "learning_rate": 3.511086208741303e-05, | |
| "loss": 0.5911, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 2.542372881355932, | |
| "grad_norm": 0.5081756947326533, | |
| "learning_rate": 3.464755294512325e-05, | |
| "loss": 0.5844, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.622133599202393, | |
| "grad_norm": 0.4848619192082748, | |
| "learning_rate": 3.4166633577610425e-05, | |
| "loss": 0.5893, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 2.701894317048853, | |
| "grad_norm": 0.5322251164040472, | |
| "learning_rate": 3.366868217690482e-05, | |
| "loss": 0.5912, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.781655034895314, | |
| "grad_norm": 0.48557366731521723, | |
| "learning_rate": 3.315429741203666e-05, | |
| "loss": 0.5813, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 2.8614157527417747, | |
| "grad_norm": 0.48422520390869345, | |
| "learning_rate": 3.2624097709277855e-05, | |
| "loss": 0.5943, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.9411764705882355, | |
| "grad_norm": 0.447236488520265, | |
| "learning_rate": 3.2078720508630427e-05, | |
| "loss": 0.5924, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 3.015952143569292, | |
| "grad_norm": 1.2170595347725597, | |
| "learning_rate": 3.1518821497455326e-05, | |
| "loss": 0.5568, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 3.0957128614157527, | |
| "grad_norm": 0.9128571202802258, | |
| "learning_rate": 3.094507382216312e-05, | |
| "loss": 0.3975, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 3.1754735792622135, | |
| "grad_norm": 0.6352191401696539, | |
| "learning_rate": 3.0358167278914387e-05, | |
| "loss": 0.3864, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.255234297108674, | |
| "grad_norm": 0.5562946553628527, | |
| "learning_rate": 2.9758807484302566e-05, | |
| "loss": 0.3815, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 3.3349950149551346, | |
| "grad_norm": 0.5968079984333592, | |
| "learning_rate": 2.9147715027016593e-05, | |
| "loss": 0.3902, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 3.4147557328015954, | |
| "grad_norm": 0.5987175013101326, | |
| "learning_rate": 2.8525624601503055e-05, | |
| "loss": 0.3884, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 3.4945164506480557, | |
| "grad_norm": 0.582626716390902, | |
| "learning_rate": 2.789328412466953e-05, | |
| "loss": 0.4097, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 3.5742771684945165, | |
| "grad_norm": 0.5696325846022255, | |
| "learning_rate": 2.725145383669106e-05, | |
| "loss": 0.3813, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 3.6540378863409773, | |
| "grad_norm": 0.6291341426490248, | |
| "learning_rate": 2.6600905387000716e-05, | |
| "loss": 0.3874, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 3.7337986041874376, | |
| "grad_norm": 0.5512196870000226, | |
| "learning_rate": 2.594242090656335e-05, | |
| "loss": 0.3864, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 3.8135593220338984, | |
| "grad_norm": 0.5172511656987956, | |
| "learning_rate": 2.5276792067547672e-05, | |
| "loss": 0.4047, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.8933200398803587, | |
| "grad_norm": 0.5434124466868848, | |
| "learning_rate": 2.460481913152734e-05, | |
| "loss": 0.3849, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 3.9730807577268195, | |
| "grad_norm": 0.5364334555823091, | |
| "learning_rate": 2.392730998735529e-05, | |
| "loss": 0.3827, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 4.047856430707876, | |
| "grad_norm": 0.6202233480341806, | |
| "learning_rate": 2.3245079179868054e-05, | |
| "loss": 0.2996, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 4.127617148554337, | |
| "grad_norm": 0.675034415488875, | |
| "learning_rate": 2.2558946930587907e-05, | |
| "loss": 0.2318, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 4.2073778664007975, | |
| "grad_norm": 0.6378422283616842, | |
| "learning_rate": 2.18697381516e-05, | |
| "loss": 0.2264, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 4.287138584247258, | |
| "grad_norm": 0.5828333823927044, | |
| "learning_rate": 2.1178281453790358e-05, | |
| "loss": 0.2249, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 4.366899302093719, | |
| "grad_norm": 0.5645506561383866, | |
| "learning_rate": 2.0485408150636804e-05, | |
| "loss": 0.2257, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 4.44666001994018, | |
| "grad_norm": 0.5286311194195279, | |
| "learning_rate": 1.979195125875072e-05, | |
| "loss": 0.2258, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 4.526420737786641, | |
| "grad_norm": 0.5354213636574895, | |
| "learning_rate": 1.909874449637122e-05, | |
| "loss": 0.2277, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 4.6061814556331, | |
| "grad_norm": 0.5132597119160957, | |
| "learning_rate": 1.84066212810157e-05, | |
| "loss": 0.2263, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 4.685942173479561, | |
| "grad_norm": 0.4992674146867158, | |
| "learning_rate": 1.7716413727492035e-05, | |
| "loss": 0.2309, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 4.765702891326022, | |
| "grad_norm": 0.5871955206074188, | |
| "learning_rate": 1.7028951647476862e-05, | |
| "loss": 0.2256, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.845463609172483, | |
| "grad_norm": 0.5483461557535811, | |
| "learning_rate": 1.634506155186295e-05, | |
| "loss": 0.2366, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 4.9252243270189435, | |
| "grad_norm": 0.5615291707528146, | |
| "learning_rate": 1.5665565657074874e-05, | |
| "loss": 0.2177, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.5560610126898546, | |
| "learning_rate": 1.4991280896547893e-05, | |
| "loss": 0.217, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 5.079760717846461, | |
| "grad_norm": 0.63285574638701, | |
| "learning_rate": 1.4323017938558245e-05, | |
| "loss": 0.1192, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 5.1595214356929215, | |
| "grad_norm": 0.51986072558017, | |
| "learning_rate": 1.3661580211585947e-05, | |
| "loss": 0.1174, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 5.239282153539381, | |
| "grad_norm": 0.4103698636395131, | |
| "learning_rate": 1.3007762938381619e-05, | |
| "loss": 0.1121, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 5.319042871385842, | |
| "grad_norm": 0.47784813487742994, | |
| "learning_rate": 1.2362352179898855e-05, | |
| "loss": 0.1191, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 5.398803589232303, | |
| "grad_norm": 0.424232714663781, | |
| "learning_rate": 1.1726123890241439e-05, | |
| "loss": 0.1128, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 5.478564307078764, | |
| "grad_norm": 0.48412806315859497, | |
| "learning_rate": 1.1099842983761712e-05, | |
| "loss": 0.1144, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 5.5583250249252245, | |
| "grad_norm": 0.4266976645996111, | |
| "learning_rate": 1.0484262415431536e-05, | |
| "loss": 0.1056, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 5.638085742771685, | |
| "grad_norm": 0.3812408369839083, | |
| "learning_rate": 9.880122275591752e-06, | |
| "loss": 0.1106, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 5.717846460618146, | |
| "grad_norm": 0.422453041096817, | |
| "learning_rate": 9.288148900168122e-06, | |
| "loss": 0.1056, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 5.797607178464606, | |
| "grad_norm": 0.4322042305854276, | |
| "learning_rate": 8.70905399742389e-06, | |
| "loss": 0.112, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 5.877367896311067, | |
| "grad_norm": 0.41261441396205645, | |
| "learning_rate": 8.143533792298545e-06, | |
| "loss": 0.1055, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 5.9571286141575275, | |
| "grad_norm": 0.4218939861247821, | |
| "learning_rate": 7.59226818936166e-06, | |
| "loss": 0.1054, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 6.031904287138584, | |
| "grad_norm": 0.4601781395305167, | |
| "learning_rate": 7.055919955388122e-06, | |
| "loss": 0.0903, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 6.111665004985045, | |
| "grad_norm": 0.42572965163147103, | |
| "learning_rate": 6.535133922537513e-06, | |
| "loss": 0.056, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 6.1914257228315055, | |
| "grad_norm": 0.39199127923177135, | |
| "learning_rate": 6.0305362130956504e-06, | |
| "loss": 0.0553, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 6.271186440677966, | |
| "grad_norm": 0.30614111878955885, | |
| "learning_rate": 5.542733486710299e-06, | |
| "loss": 0.0538, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 6.350947158524427, | |
| "grad_norm": 0.2838511806116735, | |
| "learning_rate": 5.072312211026125e-06, | |
| "loss": 0.0533, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 6.430707876370887, | |
| "grad_norm": 0.3527112901949325, | |
| "learning_rate": 4.619837956595825e-06, | |
| "loss": 0.0579, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 6.510468594217348, | |
| "grad_norm": 0.2802030569387254, | |
| "learning_rate": 4.185854716914952e-06, | |
| "loss": 0.0516, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 6.5902293120638085, | |
| "grad_norm": 0.28919103613267777, | |
| "learning_rate": 3.7708842543981928e-06, | |
| "loss": 0.0522, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 6.669990029910269, | |
| "grad_norm": 0.28238569460732055, | |
| "learning_rate": 3.375425473083185e-06, | |
| "loss": 0.0554, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 6.74975074775673, | |
| "grad_norm": 0.35381160740706563, | |
| "learning_rate": 2.9999538188161705e-06, | |
| "loss": 0.0536, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 6.829511465603191, | |
| "grad_norm": 0.27756519386816786, | |
| "learning_rate": 2.6449207076405857e-06, | |
| "loss": 0.0511, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 6.909272183449651, | |
| "grad_norm": 0.3349698709554607, | |
| "learning_rate": 2.310752983075819e-06, | |
| "loss": 0.0531, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 6.989032901296111, | |
| "grad_norm": 0.3120620614394932, | |
| "learning_rate": 1.9978524029386026e-06, | |
| "loss": 0.0538, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 7.063808574277169, | |
| "grad_norm": 0.2631675945808927, | |
| "learning_rate": 1.7065951563241022e-06, | |
| "loss": 0.0479, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 7.14356929212363, | |
| "grad_norm": 0.26021291176981515, | |
| "learning_rate": 1.437331411327274e-06, | |
| "loss": 0.0451, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 7.2233300099700894, | |
| "grad_norm": 0.2211041691097816, | |
| "learning_rate": 1.1903848940484241e-06, | |
| "loss": 0.0373, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 7.30309072781655, | |
| "grad_norm": 0.30062035959934313, | |
| "learning_rate": 9.660524993889386e-07, | |
| "loss": 0.0389, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 7.382851445663011, | |
| "grad_norm": 0.24836493024466927, | |
| "learning_rate": 7.646039341052747e-07, | |
| "loss": 0.0345, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 7.462612163509472, | |
| "grad_norm": 0.2128207343872171, | |
| "learning_rate": 5.862813925502209e-07, | |
| "loss": 0.0335, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 7.5423728813559325, | |
| "grad_norm": 0.22526753960255325, | |
| "learning_rate": 4.3129926549136057e-07, | |
| "loss": 0.0356, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 7.622133599202392, | |
| "grad_norm": 0.21640137907033566, | |
| "learning_rate": 2.99843882356774e-07, | |
| "loss": 0.0352, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 7.701894317048853, | |
| "grad_norm": 0.22957439567149518, | |
| "learning_rate": 1.9207328721788653e-07, | |
| "loss": 0.0366, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 7.781655034895314, | |
| "grad_norm": 0.20281851671161966, | |
| "learning_rate": 1.0811704877875528e-07, | |
| "loss": 0.0355, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 7.861415752741775, | |
| "grad_norm": 0.2399792418034644, | |
| "learning_rate": 4.807610460030976e-08, | |
| "loss": 0.0345, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 7.9411764705882355, | |
| "grad_norm": 0.2284150095647677, | |
| "learning_rate": 1.202263974674045e-08, | |
| "loss": 0.0367, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "step": 504, | |
| "total_flos": 718744629805056.0, | |
| "train_loss": 0.414989875806939, | |
| "train_runtime": 39904.848, | |
| "train_samples_per_second": 1.608, | |
| "train_steps_per_second": 0.013 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 504, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 8, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 718744629805056.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |