| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 918, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0010893246187363835, | |
| "grad_norm": 22.107685089111328, | |
| "learning_rate": 0.0002, | |
| "loss": 11.1826, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.002178649237472767, | |
| "grad_norm": 17.847023010253906, | |
| "learning_rate": 0.00019978213507625275, | |
| "loss": 10.5937, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0032679738562091504, | |
| "grad_norm": 13.80118179321289, | |
| "learning_rate": 0.00019956427015250546, | |
| "loss": 8.5333, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.004357298474945534, | |
| "grad_norm": 8.288287162780762, | |
| "learning_rate": 0.0001993464052287582, | |
| "loss": 7.8234, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0054466230936819175, | |
| "grad_norm": 6.827712059020996, | |
| "learning_rate": 0.0001991285403050109, | |
| "loss": 7.6672, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.006535947712418301, | |
| "grad_norm": 4.654490947723389, | |
| "learning_rate": 0.00019891067538126362, | |
| "loss": 6.8996, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.007625272331154684, | |
| "grad_norm": 5.752150535583496, | |
| "learning_rate": 0.00019869281045751635, | |
| "loss": 6.6784, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.008714596949891068, | |
| "grad_norm": 5.701947212219238, | |
| "learning_rate": 0.00019847494553376906, | |
| "loss": 6.4123, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.00980392156862745, | |
| "grad_norm": 3.7915384769439697, | |
| "learning_rate": 0.0001982570806100218, | |
| "loss": 6.2328, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.010893246187363835, | |
| "grad_norm": 4.150002479553223, | |
| "learning_rate": 0.00019803921568627454, | |
| "loss": 5.7886, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.011982570806100218, | |
| "grad_norm": 3.281799077987671, | |
| "learning_rate": 0.00019782135076252725, | |
| "loss": 6.0742, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.013071895424836602, | |
| "grad_norm": 4.8937907218933105, | |
| "learning_rate": 0.00019760348583877996, | |
| "loss": 5.9607, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.014161220043572984, | |
| "grad_norm": 5.1620306968688965, | |
| "learning_rate": 0.0001973856209150327, | |
| "loss": 6.1127, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.015250544662309368, | |
| "grad_norm": 4.81100606918335, | |
| "learning_rate": 0.0001971677559912854, | |
| "loss": 5.6241, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.016339869281045753, | |
| "grad_norm": 3.468261480331421, | |
| "learning_rate": 0.00019694989106753814, | |
| "loss": 5.4747, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.017429193899782137, | |
| "grad_norm": 3.9843785762786865, | |
| "learning_rate": 0.00019673202614379085, | |
| "loss": 5.7857, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.018518518518518517, | |
| "grad_norm": 4.957090854644775, | |
| "learning_rate": 0.0001965141612200436, | |
| "loss": 5.7103, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0196078431372549, | |
| "grad_norm": 3.296520233154297, | |
| "learning_rate": 0.0001962962962962963, | |
| "loss": 5.5697, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.020697167755991286, | |
| "grad_norm": 5.7215094566345215, | |
| "learning_rate": 0.000196078431372549, | |
| "loss": 5.5114, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.02178649237472767, | |
| "grad_norm": 4.517280101776123, | |
| "learning_rate": 0.00019586056644880175, | |
| "loss": 5.2544, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02287581699346405, | |
| "grad_norm": 4.512917995452881, | |
| "learning_rate": 0.00019564270152505449, | |
| "loss": 5.3694, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.023965141612200435, | |
| "grad_norm": 4.867700576782227, | |
| "learning_rate": 0.0001954248366013072, | |
| "loss": 5.3096, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.02505446623093682, | |
| "grad_norm": 4.500598907470703, | |
| "learning_rate": 0.00019520697167755993, | |
| "loss": 5.388, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.026143790849673203, | |
| "grad_norm": 4.624905586242676, | |
| "learning_rate": 0.00019498910675381264, | |
| "loss": 5.6716, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.027233115468409588, | |
| "grad_norm": 4.971169471740723, | |
| "learning_rate": 0.00019477124183006535, | |
| "loss": 4.8913, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.02832244008714597, | |
| "grad_norm": 4.383894920349121, | |
| "learning_rate": 0.0001945533769063181, | |
| "loss": 4.7567, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.029411764705882353, | |
| "grad_norm": 5.65884256362915, | |
| "learning_rate": 0.0001943355119825708, | |
| "loss": 5.5943, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.030501089324618737, | |
| "grad_norm": 4.529474258422852, | |
| "learning_rate": 0.00019411764705882354, | |
| "loss": 5.2028, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.03159041394335512, | |
| "grad_norm": 5.929969310760498, | |
| "learning_rate": 0.00019389978213507628, | |
| "loss": 5.1979, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.032679738562091505, | |
| "grad_norm": 4.275257110595703, | |
| "learning_rate": 0.000193681917211329, | |
| "loss": 5.0697, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03376906318082789, | |
| "grad_norm": 4.4108991622924805, | |
| "learning_rate": 0.0001934640522875817, | |
| "loss": 5.5351, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.034858387799564274, | |
| "grad_norm": 3.474432945251465, | |
| "learning_rate": 0.00019324618736383443, | |
| "loss": 5.4141, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.03594771241830065, | |
| "grad_norm": 3.6491479873657227, | |
| "learning_rate": 0.00019302832244008715, | |
| "loss": 5.4325, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.037037037037037035, | |
| "grad_norm": 3.54929256439209, | |
| "learning_rate": 0.00019281045751633988, | |
| "loss": 5.4501, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.03812636165577342, | |
| "grad_norm": 3.0727927684783936, | |
| "learning_rate": 0.0001925925925925926, | |
| "loss": 5.4118, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0392156862745098, | |
| "grad_norm": 3.3980045318603516, | |
| "learning_rate": 0.00019237472766884533, | |
| "loss": 5.4405, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.04030501089324619, | |
| "grad_norm": 4.579554080963135, | |
| "learning_rate": 0.00019215686274509807, | |
| "loss": 5.1187, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.04139433551198257, | |
| "grad_norm": 3.4333720207214355, | |
| "learning_rate": 0.00019193899782135075, | |
| "loss": 5.1245, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.042483660130718956, | |
| "grad_norm": 3.6624631881713867, | |
| "learning_rate": 0.0001917211328976035, | |
| "loss": 5.2321, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.04357298474945534, | |
| "grad_norm": 6.108523368835449, | |
| "learning_rate": 0.00019150326797385623, | |
| "loss": 5.2154, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.044662309368191724, | |
| "grad_norm": 3.8794407844543457, | |
| "learning_rate": 0.00019128540305010894, | |
| "loss": 5.1781, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.0457516339869281, | |
| "grad_norm": 3.7800469398498535, | |
| "learning_rate": 0.00019106753812636167, | |
| "loss": 5.1992, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.046840958605664486, | |
| "grad_norm": 3.764031410217285, | |
| "learning_rate": 0.0001908496732026144, | |
| "loss": 5.0885, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.04793028322440087, | |
| "grad_norm": 4.050832271575928, | |
| "learning_rate": 0.00019063180827886712, | |
| "loss": 5.1076, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.049019607843137254, | |
| "grad_norm": 5.691143989562988, | |
| "learning_rate": 0.00019041394335511983, | |
| "loss": 5.1453, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.05010893246187364, | |
| "grad_norm": 4.449873924255371, | |
| "learning_rate": 0.00019019607843137254, | |
| "loss": 5.5097, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.05119825708061002, | |
| "grad_norm": 2.906421661376953, | |
| "learning_rate": 0.00018997821350762528, | |
| "loss": 5.2703, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.05228758169934641, | |
| "grad_norm": 4.265420913696289, | |
| "learning_rate": 0.00018976034858387802, | |
| "loss": 4.9613, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.05337690631808279, | |
| "grad_norm": 3.084545612335205, | |
| "learning_rate": 0.00018954248366013073, | |
| "loss": 5.2874, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.054466230936819175, | |
| "grad_norm": 4.474928855895996, | |
| "learning_rate": 0.00018932461873638346, | |
| "loss": 4.9916, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05555555555555555, | |
| "grad_norm": 3.9959335327148438, | |
| "learning_rate": 0.00018910675381263617, | |
| "loss": 4.9839, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.05664488017429194, | |
| "grad_norm": 6.50009298324585, | |
| "learning_rate": 0.00018888888888888888, | |
| "loss": 5.0163, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.05773420479302832, | |
| "grad_norm": 4.5923566818237305, | |
| "learning_rate": 0.00018867102396514162, | |
| "loss": 5.0997, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.058823529411764705, | |
| "grad_norm": 3.1194558143615723, | |
| "learning_rate": 0.00018845315904139433, | |
| "loss": 5.0717, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.05991285403050109, | |
| "grad_norm": 4.631853103637695, | |
| "learning_rate": 0.00018823529411764707, | |
| "loss": 5.103, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.06100217864923747, | |
| "grad_norm": 5.220804214477539, | |
| "learning_rate": 0.0001880174291938998, | |
| "loss": 5.1003, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.06209150326797386, | |
| "grad_norm": 5.17025089263916, | |
| "learning_rate": 0.00018779956427015252, | |
| "loss": 5.188, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.06318082788671024, | |
| "grad_norm": 3.0653014183044434, | |
| "learning_rate": 0.00018758169934640523, | |
| "loss": 5.0984, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.06427015250544663, | |
| "grad_norm": 4.2605156898498535, | |
| "learning_rate": 0.00018736383442265796, | |
| "loss": 5.0787, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.06535947712418301, | |
| "grad_norm": 3.5389158725738525, | |
| "learning_rate": 0.00018714596949891068, | |
| "loss": 5.0133, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0664488017429194, | |
| "grad_norm": 4.469093322753906, | |
| "learning_rate": 0.0001869281045751634, | |
| "loss": 5.0993, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.06753812636165578, | |
| "grad_norm": 5.053121089935303, | |
| "learning_rate": 0.00018671023965141615, | |
| "loss": 4.9451, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.06862745098039216, | |
| "grad_norm": 3.385946035385132, | |
| "learning_rate": 0.00018649237472766886, | |
| "loss": 5.1747, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.06971677559912855, | |
| "grad_norm": 3.2187914848327637, | |
| "learning_rate": 0.00018627450980392157, | |
| "loss": 4.9015, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.07080610021786492, | |
| "grad_norm": 4.75279426574707, | |
| "learning_rate": 0.00018605664488017428, | |
| "loss": 4.5069, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.0718954248366013, | |
| "grad_norm": 4.536505699157715, | |
| "learning_rate": 0.00018583877995642702, | |
| "loss": 4.8886, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.07298474945533769, | |
| "grad_norm": 4.806969165802002, | |
| "learning_rate": 0.00018562091503267976, | |
| "loss": 5.0439, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.07407407407407407, | |
| "grad_norm": 5.019186496734619, | |
| "learning_rate": 0.00018540305010893247, | |
| "loss": 4.9022, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.07516339869281045, | |
| "grad_norm": 5.320457458496094, | |
| "learning_rate": 0.0001851851851851852, | |
| "loss": 5.0808, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.07625272331154684, | |
| "grad_norm": 3.816904306411743, | |
| "learning_rate": 0.0001849673202614379, | |
| "loss": 5.3904, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07734204793028322, | |
| "grad_norm": 4.3827667236328125, | |
| "learning_rate": 0.00018474945533769062, | |
| "loss": 5.0717, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.0784313725490196, | |
| "grad_norm": 3.4211671352386475, | |
| "learning_rate": 0.00018453159041394336, | |
| "loss": 5.2476, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.07952069716775599, | |
| "grad_norm": 3.2783992290496826, | |
| "learning_rate": 0.00018431372549019607, | |
| "loss": 5.1173, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.08061002178649238, | |
| "grad_norm": 4.760554790496826, | |
| "learning_rate": 0.0001840958605664488, | |
| "loss": 4.8323, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.08169934640522876, | |
| "grad_norm": 5.011927604675293, | |
| "learning_rate": 0.00018387799564270155, | |
| "loss": 4.986, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.08278867102396514, | |
| "grad_norm": 4.87539005279541, | |
| "learning_rate": 0.00018366013071895426, | |
| "loss": 5.0816, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.08387799564270153, | |
| "grad_norm": 4.076656818389893, | |
| "learning_rate": 0.00018344226579520697, | |
| "loss": 5.0201, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.08496732026143791, | |
| "grad_norm": 4.8921613693237305, | |
| "learning_rate": 0.0001832244008714597, | |
| "loss": 4.8971, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.0860566448801743, | |
| "grad_norm": 4.011205196380615, | |
| "learning_rate": 0.00018300653594771241, | |
| "loss": 5.1548, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.08714596949891068, | |
| "grad_norm": 4.264740467071533, | |
| "learning_rate": 0.00018278867102396515, | |
| "loss": 5.0265, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08823529411764706, | |
| "grad_norm": 4.830214977264404, | |
| "learning_rate": 0.0001825708061002179, | |
| "loss": 4.9071, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.08932461873638345, | |
| "grad_norm": 4.2021942138671875, | |
| "learning_rate": 0.0001823529411764706, | |
| "loss": 4.8129, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.09041394335511982, | |
| "grad_norm": 3.46781587600708, | |
| "learning_rate": 0.00018213507625272334, | |
| "loss": 5.0446, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.0915032679738562, | |
| "grad_norm": 3.6687159538269043, | |
| "learning_rate": 0.00018191721132897605, | |
| "loss": 5.3979, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.09259259259259259, | |
| "grad_norm": 2.563159704208374, | |
| "learning_rate": 0.00018169934640522876, | |
| "loss": 5.2423, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.09368191721132897, | |
| "grad_norm": 4.911458492279053, | |
| "learning_rate": 0.0001814814814814815, | |
| "loss": 4.7199, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.09477124183006536, | |
| "grad_norm": 4.559939384460449, | |
| "learning_rate": 0.0001812636165577342, | |
| "loss": 5.2483, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.09586056644880174, | |
| "grad_norm": 4.162410259246826, | |
| "learning_rate": 0.00018104575163398694, | |
| "loss": 5.151, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.09694989106753812, | |
| "grad_norm": 3.32928466796875, | |
| "learning_rate": 0.00018082788671023968, | |
| "loss": 5.0597, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.09803921568627451, | |
| "grad_norm": 4.435458660125732, | |
| "learning_rate": 0.0001806100217864924, | |
| "loss": 5.3589, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.09912854030501089, | |
| "grad_norm": 3.7495155334472656, | |
| "learning_rate": 0.0001803921568627451, | |
| "loss": 5.0074, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.10021786492374728, | |
| "grad_norm": 3.2599384784698486, | |
| "learning_rate": 0.0001801742919389978, | |
| "loss": 5.0846, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.10130718954248366, | |
| "grad_norm": 2.7934536933898926, | |
| "learning_rate": 0.00017995642701525055, | |
| "loss": 5.388, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.10239651416122005, | |
| "grad_norm": 4.288060188293457, | |
| "learning_rate": 0.00017973856209150329, | |
| "loss": 5.1296, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.10348583877995643, | |
| "grad_norm": 3.235417366027832, | |
| "learning_rate": 0.000179520697167756, | |
| "loss": 5.011, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.10457516339869281, | |
| "grad_norm": 2.9654250144958496, | |
| "learning_rate": 0.00017930283224400873, | |
| "loss": 5.001, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.1056644880174292, | |
| "grad_norm": 2.7460479736328125, | |
| "learning_rate": 0.00017908496732026144, | |
| "loss": 5.0625, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.10675381263616558, | |
| "grad_norm": 3.7850425243377686, | |
| "learning_rate": 0.00017886710239651415, | |
| "loss": 4.9888, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.10784313725490197, | |
| "grad_norm": 2.826519250869751, | |
| "learning_rate": 0.0001786492374727669, | |
| "loss": 4.8845, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.10893246187363835, | |
| "grad_norm": 3.151165723800659, | |
| "learning_rate": 0.00017843137254901963, | |
| "loss": 5.0597, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11002178649237472, | |
| "grad_norm": 3.682316303253174, | |
| "learning_rate": 0.00017821350762527234, | |
| "loss": 5.3154, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.1111111111111111, | |
| "grad_norm": 2.2441463470458984, | |
| "learning_rate": 0.00017799564270152508, | |
| "loss": 5.2023, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.11220043572984749, | |
| "grad_norm": 3.245762825012207, | |
| "learning_rate": 0.00017777777777777779, | |
| "loss": 5.1972, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.11328976034858387, | |
| "grad_norm": 3.5761213302612305, | |
| "learning_rate": 0.0001775599128540305, | |
| "loss": 5.2941, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.11437908496732026, | |
| "grad_norm": 3.046706199645996, | |
| "learning_rate": 0.00017734204793028323, | |
| "loss": 5.0266, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.11546840958605664, | |
| "grad_norm": 4.248337268829346, | |
| "learning_rate": 0.00017712418300653594, | |
| "loss": 5.3828, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.11655773420479303, | |
| "grad_norm": 3.2203943729400635, | |
| "learning_rate": 0.00017690631808278868, | |
| "loss": 4.7748, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.11764705882352941, | |
| "grad_norm": 3.3694331645965576, | |
| "learning_rate": 0.00017668845315904142, | |
| "loss": 5.0062, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.1187363834422658, | |
| "grad_norm": 3.9723663330078125, | |
| "learning_rate": 0.00017647058823529413, | |
| "loss": 5.151, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.11982570806100218, | |
| "grad_norm": 4.699850559234619, | |
| "learning_rate": 0.00017625272331154684, | |
| "loss": 5.3586, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.12091503267973856, | |
| "grad_norm": 4.248794078826904, | |
| "learning_rate": 0.00017603485838779955, | |
| "loss": 4.9772, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.12200435729847495, | |
| "grad_norm": 3.284792184829712, | |
| "learning_rate": 0.0001758169934640523, | |
| "loss": 5.0481, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.12309368191721133, | |
| "grad_norm": 3.2022602558135986, | |
| "learning_rate": 0.00017559912854030502, | |
| "loss": 4.8391, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.12418300653594772, | |
| "grad_norm": 3.369584560394287, | |
| "learning_rate": 0.00017538126361655773, | |
| "loss": 4.8921, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.12527233115468409, | |
| "grad_norm": 2.8831474781036377, | |
| "learning_rate": 0.00017516339869281047, | |
| "loss": 5.0684, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.12636165577342048, | |
| "grad_norm": 3.45534610748291, | |
| "learning_rate": 0.0001749455337690632, | |
| "loss": 4.9971, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.12745098039215685, | |
| "grad_norm": 4.895359039306641, | |
| "learning_rate": 0.0001747276688453159, | |
| "loss": 5.1069, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.12854030501089325, | |
| "grad_norm": 3.229856252670288, | |
| "learning_rate": 0.00017450980392156863, | |
| "loss": 5.032, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.12962962962962962, | |
| "grad_norm": 3.38332200050354, | |
| "learning_rate": 0.00017429193899782137, | |
| "loss": 4.9788, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.13071895424836602, | |
| "grad_norm": 4.4389967918396, | |
| "learning_rate": 0.00017407407407407408, | |
| "loss": 4.9471, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1318082788671024, | |
| "grad_norm": 4.08867073059082, | |
| "learning_rate": 0.00017385620915032682, | |
| "loss": 4.9688, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.1328976034858388, | |
| "grad_norm": 3.1234259605407715, | |
| "learning_rate": 0.00017363834422657953, | |
| "loss": 5.275, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.13398692810457516, | |
| "grad_norm": 3.3533644676208496, | |
| "learning_rate": 0.00017342047930283226, | |
| "loss": 4.8402, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.13507625272331156, | |
| "grad_norm": 4.490350246429443, | |
| "learning_rate": 0.00017320261437908497, | |
| "loss": 4.8748, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.13616557734204793, | |
| "grad_norm": 3.3828914165496826, | |
| "learning_rate": 0.00017298474945533768, | |
| "loss": 4.9867, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.13725490196078433, | |
| "grad_norm": 2.948503017425537, | |
| "learning_rate": 0.00017276688453159042, | |
| "loss": 5.0548, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.1383442265795207, | |
| "grad_norm": 3.690495491027832, | |
| "learning_rate": 0.00017254901960784316, | |
| "loss": 4.46, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.1394335511982571, | |
| "grad_norm": 4.029051303863525, | |
| "learning_rate": 0.00017233115468409587, | |
| "loss": 5.2837, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.14052287581699346, | |
| "grad_norm": 4.606770992279053, | |
| "learning_rate": 0.0001721132897603486, | |
| "loss": 4.2575, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.14161220043572983, | |
| "grad_norm": 3.888659715652466, | |
| "learning_rate": 0.00017189542483660132, | |
| "loss": 4.9312, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.14270152505446623, | |
| "grad_norm": 3.3389322757720947, | |
| "learning_rate": 0.00017167755991285403, | |
| "loss": 4.7385, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.1437908496732026, | |
| "grad_norm": 4.276157379150391, | |
| "learning_rate": 0.00017145969498910676, | |
| "loss": 5.0385, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.144880174291939, | |
| "grad_norm": 2.9289348125457764, | |
| "learning_rate": 0.00017124183006535947, | |
| "loss": 5.1092, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.14596949891067537, | |
| "grad_norm": 2.7905993461608887, | |
| "learning_rate": 0.0001710239651416122, | |
| "loss": 5.0769, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.14705882352941177, | |
| "grad_norm": 2.67677640914917, | |
| "learning_rate": 0.00017080610021786495, | |
| "loss": 4.9621, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.14814814814814814, | |
| "grad_norm": 3.1127524375915527, | |
| "learning_rate": 0.00017058823529411766, | |
| "loss": 5.0922, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.14923747276688454, | |
| "grad_norm": 4.093358516693115, | |
| "learning_rate": 0.00017037037037037037, | |
| "loss": 4.8967, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.1503267973856209, | |
| "grad_norm": 4.0308427810668945, | |
| "learning_rate": 0.0001701525054466231, | |
| "loss": 4.9229, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.1514161220043573, | |
| "grad_norm": 3.8967127799987793, | |
| "learning_rate": 0.00016993464052287582, | |
| "loss": 4.9517, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.15250544662309368, | |
| "grad_norm": 3.387913942337036, | |
| "learning_rate": 0.00016971677559912855, | |
| "loss": 4.9562, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.15359477124183007, | |
| "grad_norm": 4.264173984527588, | |
| "learning_rate": 0.00016949891067538126, | |
| "loss": 5.0844, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.15468409586056645, | |
| "grad_norm": 16.368194580078125, | |
| "learning_rate": 0.000169281045751634, | |
| "loss": 5.635, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.15577342047930284, | |
| "grad_norm": 3.6264078617095947, | |
| "learning_rate": 0.0001690631808278867, | |
| "loss": 4.8617, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.1568627450980392, | |
| "grad_norm": 4.278415203094482, | |
| "learning_rate": 0.00016884531590413942, | |
| "loss": 5.2121, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.1579520697167756, | |
| "grad_norm": 2.9585821628570557, | |
| "learning_rate": 0.00016862745098039216, | |
| "loss": 4.8859, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.15904139433551198, | |
| "grad_norm": 3.371652364730835, | |
| "learning_rate": 0.0001684095860566449, | |
| "loss": 4.7501, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.16013071895424835, | |
| "grad_norm": 3.98421573638916, | |
| "learning_rate": 0.0001681917211328976, | |
| "loss": 4.9839, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.16122004357298475, | |
| "grad_norm": 3.6334128379821777, | |
| "learning_rate": 0.00016797385620915035, | |
| "loss": 5.2846, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.16230936819172112, | |
| "grad_norm": 4.04555082321167, | |
| "learning_rate": 0.00016775599128540308, | |
| "loss": 4.9533, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.16339869281045752, | |
| "grad_norm": 4.256022930145264, | |
| "learning_rate": 0.00016753812636165577, | |
| "loss": 5.2371, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1644880174291939, | |
| "grad_norm": 2.657806634902954, | |
| "learning_rate": 0.0001673202614379085, | |
| "loss": 5.2251, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.1655773420479303, | |
| "grad_norm": 3.7488322257995605, | |
| "learning_rate": 0.0001671023965141612, | |
| "loss": 5.0817, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 3.3456156253814697, | |
| "learning_rate": 0.00016688453159041395, | |
| "loss": 5.1065, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.16775599128540306, | |
| "grad_norm": 3.5670382976531982, | |
| "learning_rate": 0.0001666666666666667, | |
| "loss": 4.6952, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.16884531590413943, | |
| "grad_norm": 3.841510534286499, | |
| "learning_rate": 0.0001664488017429194, | |
| "loss": 4.9876, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.16993464052287582, | |
| "grad_norm": 3.7906384468078613, | |
| "learning_rate": 0.00016623093681917214, | |
| "loss": 4.9622, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.1710239651416122, | |
| "grad_norm": 2.9914352893829346, | |
| "learning_rate": 0.00016601307189542485, | |
| "loss": 4.7751, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.1721132897603486, | |
| "grad_norm": 3.1363089084625244, | |
| "learning_rate": 0.00016579520697167756, | |
| "loss": 4.761, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.17320261437908496, | |
| "grad_norm": 3.877608299255371, | |
| "learning_rate": 0.0001655773420479303, | |
| "loss": 4.9925, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.17429193899782136, | |
| "grad_norm": 2.659391164779663, | |
| "learning_rate": 0.000165359477124183, | |
| "loss": 4.8792, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.17538126361655773, | |
| "grad_norm": 3.540314197540283, | |
| "learning_rate": 0.00016514161220043574, | |
| "loss": 4.7221, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.17647058823529413, | |
| "grad_norm": 3.9212710857391357, | |
| "learning_rate": 0.00016492374727668848, | |
| "loss": 4.7605, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.1775599128540305, | |
| "grad_norm": 3.4231927394866943, | |
| "learning_rate": 0.0001647058823529412, | |
| "loss": 4.8396, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.1786492374727669, | |
| "grad_norm": 2.970974922180176, | |
| "learning_rate": 0.0001644880174291939, | |
| "loss": 5.2121, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.17973856209150327, | |
| "grad_norm": 4.227552890777588, | |
| "learning_rate": 0.00016427015250544664, | |
| "loss": 4.9817, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.18082788671023964, | |
| "grad_norm": 4.467804908752441, | |
| "learning_rate": 0.00016405228758169935, | |
| "loss": 4.8429, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.18191721132897604, | |
| "grad_norm": 3.249866008758545, | |
| "learning_rate": 0.00016383442265795208, | |
| "loss": 4.7504, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.1830065359477124, | |
| "grad_norm": 3.1638054847717285, | |
| "learning_rate": 0.00016361655773420482, | |
| "loss": 4.9806, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.1840958605664488, | |
| "grad_norm": 3.232707977294922, | |
| "learning_rate": 0.00016339869281045753, | |
| "loss": 4.8697, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.18518518518518517, | |
| "grad_norm": 3.216461181640625, | |
| "learning_rate": 0.00016318082788671024, | |
| "loss": 5.2147, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.18627450980392157, | |
| "grad_norm": 2.564060688018799, | |
| "learning_rate": 0.00016296296296296295, | |
| "loss": 4.9599, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.18736383442265794, | |
| "grad_norm": 3.218874216079712, | |
| "learning_rate": 0.0001627450980392157, | |
| "loss": 5.0425, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.18845315904139434, | |
| "grad_norm": 2.866109848022461, | |
| "learning_rate": 0.00016252723311546843, | |
| "loss": 4.8431, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.1895424836601307, | |
| "grad_norm": 4.1832075119018555, | |
| "learning_rate": 0.00016230936819172114, | |
| "loss": 5.1622, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.1906318082788671, | |
| "grad_norm": 2.445281744003296, | |
| "learning_rate": 0.00016209150326797388, | |
| "loss": 5.0674, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.19172113289760348, | |
| "grad_norm": 2.9621212482452393, | |
| "learning_rate": 0.00016187363834422659, | |
| "loss": 4.9052, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.19281045751633988, | |
| "grad_norm": 2.8743746280670166, | |
| "learning_rate": 0.0001616557734204793, | |
| "loss": 4.9333, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.19389978213507625, | |
| "grad_norm": 2.8737668991088867, | |
| "learning_rate": 0.00016143790849673203, | |
| "loss": 4.9724, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.19498910675381265, | |
| "grad_norm": 2.8740956783294678, | |
| "learning_rate": 0.00016122004357298474, | |
| "loss": 4.9445, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.19607843137254902, | |
| "grad_norm": 2.64280104637146, | |
| "learning_rate": 0.00016100217864923748, | |
| "loss": 5.0494, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.19716775599128541, | |
| "grad_norm": 2.543142795562744, | |
| "learning_rate": 0.00016078431372549022, | |
| "loss": 4.7056, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.19825708061002179, | |
| "grad_norm": 3.3072926998138428, | |
| "learning_rate": 0.00016056644880174293, | |
| "loss": 4.6861, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.19934640522875818, | |
| "grad_norm": 3.1435420513153076, | |
| "learning_rate": 0.00016034858387799564, | |
| "loss": 5.5872, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.20043572984749455, | |
| "grad_norm": 2.7487738132476807, | |
| "learning_rate": 0.00016013071895424838, | |
| "loss": 4.8017, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.20152505446623092, | |
| "grad_norm": 3.8782405853271484, | |
| "learning_rate": 0.00015991285403050109, | |
| "loss": 5.0304, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.20261437908496732, | |
| "grad_norm": 2.4751572608947754, | |
| "learning_rate": 0.00015969498910675382, | |
| "loss": 5.1402, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.2037037037037037, | |
| "grad_norm": 3.057116746902466, | |
| "learning_rate": 0.00015947712418300656, | |
| "loss": 5.0166, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.2047930283224401, | |
| "grad_norm": 3.6688828468322754, | |
| "learning_rate": 0.00015925925925925927, | |
| "loss": 4.7331, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.20588235294117646, | |
| "grad_norm": 2.466041088104248, | |
| "learning_rate": 0.00015904139433551198, | |
| "loss": 4.84, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.20697167755991286, | |
| "grad_norm": 3.641002893447876, | |
| "learning_rate": 0.0001588235294117647, | |
| "loss": 4.7463, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.20806100217864923, | |
| "grad_norm": 4.757946968078613, | |
| "learning_rate": 0.00015860566448801743, | |
| "loss": 5.1682, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.20915032679738563, | |
| "grad_norm": 4.83253288269043, | |
| "learning_rate": 0.00015838779956427017, | |
| "loss": 5.076, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.210239651416122, | |
| "grad_norm": 3.409478187561035, | |
| "learning_rate": 0.00015816993464052288, | |
| "loss": 4.9811, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.2113289760348584, | |
| "grad_norm": 4.051446437835693, | |
| "learning_rate": 0.00015795206971677561, | |
| "loss": 4.8564, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.21241830065359477, | |
| "grad_norm": 4.4804205894470215, | |
| "learning_rate": 0.00015773420479302835, | |
| "loss": 4.4211, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.21350762527233116, | |
| "grad_norm": 4.66760778427124, | |
| "learning_rate": 0.00015751633986928106, | |
| "loss": 4.9098, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.21459694989106753, | |
| "grad_norm": 4.293209552764893, | |
| "learning_rate": 0.00015729847494553377, | |
| "loss": 5.0321, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.21568627450980393, | |
| "grad_norm": 4.768712520599365, | |
| "learning_rate": 0.00015708061002178648, | |
| "loss": 4.651, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.2167755991285403, | |
| "grad_norm": 2.8598990440368652, | |
| "learning_rate": 0.00015686274509803922, | |
| "loss": 4.7659, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.2178649237472767, | |
| "grad_norm": 3.9935877323150635, | |
| "learning_rate": 0.00015664488017429196, | |
| "loss": 4.9602, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.21895424836601307, | |
| "grad_norm": 2.618224859237671, | |
| "learning_rate": 0.00015642701525054467, | |
| "loss": 4.9664, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.22004357298474944, | |
| "grad_norm": 3.4870638847351074, | |
| "learning_rate": 0.0001562091503267974, | |
| "loss": 4.7686, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.22113289760348584, | |
| "grad_norm": 2.9350011348724365, | |
| "learning_rate": 0.00015599128540305012, | |
| "loss": 4.7934, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 1.970569372177124, | |
| "learning_rate": 0.00015577342047930283, | |
| "loss": 5.0841, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.2233115468409586, | |
| "grad_norm": 4.244380474090576, | |
| "learning_rate": 0.00015555555555555556, | |
| "loss": 4.7992, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.22440087145969498, | |
| "grad_norm": 2.9138362407684326, | |
| "learning_rate": 0.0001553376906318083, | |
| "loss": 4.6564, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.22549019607843138, | |
| "grad_norm": 3.677152156829834, | |
| "learning_rate": 0.000155119825708061, | |
| "loss": 5.3402, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.22657952069716775, | |
| "grad_norm": 3.5209450721740723, | |
| "learning_rate": 0.00015490196078431375, | |
| "loss": 4.4968, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.22766884531590414, | |
| "grad_norm": 3.3340156078338623, | |
| "learning_rate": 0.00015468409586056646, | |
| "loss": 4.8884, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.22875816993464052, | |
| "grad_norm": 3.2705368995666504, | |
| "learning_rate": 0.00015446623093681917, | |
| "loss": 4.7711, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2298474945533769, | |
| "grad_norm": 3.3847126960754395, | |
| "learning_rate": 0.0001542483660130719, | |
| "loss": 5.2646, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.23093681917211328, | |
| "grad_norm": 4.557519912719727, | |
| "learning_rate": 0.00015403050108932462, | |
| "loss": 5.265, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.23202614379084968, | |
| "grad_norm": 3.7597949504852295, | |
| "learning_rate": 0.00015381263616557735, | |
| "loss": 4.9571, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.23311546840958605, | |
| "grad_norm": 5.981929302215576, | |
| "learning_rate": 0.0001535947712418301, | |
| "loss": 4.77, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.23420479302832245, | |
| "grad_norm": 3.622166395187378, | |
| "learning_rate": 0.0001533769063180828, | |
| "loss": 4.9361, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.23529411764705882, | |
| "grad_norm": 4.268712520599365, | |
| "learning_rate": 0.0001531590413943355, | |
| "loss": 4.5784, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.23638344226579522, | |
| "grad_norm": 3.211376190185547, | |
| "learning_rate": 0.00015294117647058822, | |
| "loss": 4.9316, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.2374727668845316, | |
| "grad_norm": 3.7996368408203125, | |
| "learning_rate": 0.00015272331154684096, | |
| "loss": 5.2139, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.238562091503268, | |
| "grad_norm": 4.062546253204346, | |
| "learning_rate": 0.0001525054466230937, | |
| "loss": 4.2757, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.23965141612200436, | |
| "grad_norm": 3.065821886062622, | |
| "learning_rate": 0.0001522875816993464, | |
| "loss": 4.7295, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.24074074074074073, | |
| "grad_norm": 3.3586819171905518, | |
| "learning_rate": 0.00015206971677559914, | |
| "loss": 5.0229, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.24183006535947713, | |
| "grad_norm": 2.5833959579467773, | |
| "learning_rate": 0.00015185185185185185, | |
| "loss": 4.9391, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.2429193899782135, | |
| "grad_norm": 4.792468070983887, | |
| "learning_rate": 0.00015163398692810456, | |
| "loss": 5.0563, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.2440087145969499, | |
| "grad_norm": 2.994100332260132, | |
| "learning_rate": 0.0001514161220043573, | |
| "loss": 4.6381, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.24509803921568626, | |
| "grad_norm": 8.28174877166748, | |
| "learning_rate": 0.00015119825708061004, | |
| "loss": 5.2376, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.24618736383442266, | |
| "grad_norm": 2.4422318935394287, | |
| "learning_rate": 0.00015098039215686275, | |
| "loss": 5.009, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.24727668845315903, | |
| "grad_norm": 3.0637447834014893, | |
| "learning_rate": 0.0001507625272331155, | |
| "loss": 5.0266, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.24836601307189543, | |
| "grad_norm": 4.186681270599365, | |
| "learning_rate": 0.0001505446623093682, | |
| "loss": 4.8132, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.2494553376906318, | |
| "grad_norm": 3.8071372509002686, | |
| "learning_rate": 0.0001503267973856209, | |
| "loss": 4.7549, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.25054466230936817, | |
| "grad_norm": 4.450962066650391, | |
| "learning_rate": 0.00015010893246187365, | |
| "loss": 4.61, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.25163398692810457, | |
| "grad_norm": 3.4045302867889404, | |
| "learning_rate": 0.00014989106753812636, | |
| "loss": 4.5865, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.25272331154684097, | |
| "grad_norm": 3.5248048305511475, | |
| "learning_rate": 0.0001496732026143791, | |
| "loss": 4.8463, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.25381263616557737, | |
| "grad_norm": 2.9256203174591064, | |
| "learning_rate": 0.00014945533769063183, | |
| "loss": 4.9082, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.2549019607843137, | |
| "grad_norm": 3.887249708175659, | |
| "learning_rate": 0.00014923747276688454, | |
| "loss": 4.6206, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.2559912854030501, | |
| "grad_norm": 3.5457653999328613, | |
| "learning_rate": 0.00014901960784313728, | |
| "loss": 4.7352, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.2570806100217865, | |
| "grad_norm": 2.6026275157928467, | |
| "learning_rate": 0.00014880174291939, | |
| "loss": 5.0141, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.2581699346405229, | |
| "grad_norm": 2.7921388149261475, | |
| "learning_rate": 0.0001485838779956427, | |
| "loss": 4.9637, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.25925925925925924, | |
| "grad_norm": 7.660345554351807, | |
| "learning_rate": 0.00014836601307189544, | |
| "loss": 4.7919, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.26034858387799564, | |
| "grad_norm": 2.67083740234375, | |
| "learning_rate": 0.00014814814814814815, | |
| "loss": 5.0893, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.26143790849673204, | |
| "grad_norm": 4.7418036460876465, | |
| "learning_rate": 0.00014793028322440088, | |
| "loss": 4.6364, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2625272331154684, | |
| "grad_norm": 3.2569048404693604, | |
| "learning_rate": 0.00014771241830065362, | |
| "loss": 4.7866, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.2636165577342048, | |
| "grad_norm": 3.1199724674224854, | |
| "learning_rate": 0.00014749455337690633, | |
| "loss": 4.8152, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.2647058823529412, | |
| "grad_norm": 4.056507110595703, | |
| "learning_rate": 0.00014727668845315904, | |
| "loss": 5.1888, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.2657952069716776, | |
| "grad_norm": 2.9097986221313477, | |
| "learning_rate": 0.00014705882352941178, | |
| "loss": 5.0211, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.2668845315904139, | |
| "grad_norm": 2.2584147453308105, | |
| "learning_rate": 0.0001468409586056645, | |
| "loss": 4.9742, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.2679738562091503, | |
| "grad_norm": 5.296759605407715, | |
| "learning_rate": 0.00014662309368191723, | |
| "loss": 4.9444, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.2690631808278867, | |
| "grad_norm": 2.9959006309509277, | |
| "learning_rate": 0.00014640522875816994, | |
| "loss": 5.061, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.2701525054466231, | |
| "grad_norm": 5.515058994293213, | |
| "learning_rate": 0.00014618736383442267, | |
| "loss": 4.8149, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.27124183006535946, | |
| "grad_norm": 2.8291754722595215, | |
| "learning_rate": 0.00014596949891067538, | |
| "loss": 4.9713, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.27233115468409586, | |
| "grad_norm": 2.9711527824401855, | |
| "learning_rate": 0.0001457516339869281, | |
| "loss": 4.703, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.27342047930283225, | |
| "grad_norm": 2.836789131164551, | |
| "learning_rate": 0.00014553376906318083, | |
| "loss": 4.7403, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.27450980392156865, | |
| "grad_norm": 4.1027350425720215, | |
| "learning_rate": 0.00014531590413943357, | |
| "loss": 5.2626, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.275599128540305, | |
| "grad_norm": 3.8692238330841064, | |
| "learning_rate": 0.00014509803921568628, | |
| "loss": 4.8325, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.2766884531590414, | |
| "grad_norm": 2.9437451362609863, | |
| "learning_rate": 0.00014488017429193902, | |
| "loss": 4.5083, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.2777777777777778, | |
| "grad_norm": 3.134019374847412, | |
| "learning_rate": 0.00014466230936819173, | |
| "loss": 4.4554, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.2788671023965142, | |
| "grad_norm": 2.429337501525879, | |
| "learning_rate": 0.00014444444444444444, | |
| "loss": 4.7566, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.27995642701525053, | |
| "grad_norm": 3.900141954421997, | |
| "learning_rate": 0.00014422657952069718, | |
| "loss": 4.4774, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.28104575163398693, | |
| "grad_norm": 2.6738038063049316, | |
| "learning_rate": 0.00014400871459694989, | |
| "loss": 5.0235, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.2821350762527233, | |
| "grad_norm": 3.294783353805542, | |
| "learning_rate": 0.00014379084967320262, | |
| "loss": 5.1043, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.28322440087145967, | |
| "grad_norm": 3.346564531326294, | |
| "learning_rate": 0.00014357298474945536, | |
| "loss": 4.9083, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.28431372549019607, | |
| "grad_norm": 4.410298824310303, | |
| "learning_rate": 0.00014335511982570807, | |
| "loss": 5.2007, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.28540305010893247, | |
| "grad_norm": 4.235734939575195, | |
| "learning_rate": 0.00014313725490196078, | |
| "loss": 4.446, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.28649237472766886, | |
| "grad_norm": 4.334876537322998, | |
| "learning_rate": 0.00014291938997821352, | |
| "loss": 4.8909, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.2875816993464052, | |
| "grad_norm": 2.46726393699646, | |
| "learning_rate": 0.00014270152505446623, | |
| "loss": 4.7269, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.2886710239651416, | |
| "grad_norm": 3.194774866104126, | |
| "learning_rate": 0.00014248366013071897, | |
| "loss": 4.8303, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.289760348583878, | |
| "grad_norm": 3.8563270568847656, | |
| "learning_rate": 0.00014226579520697168, | |
| "loss": 5.0194, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.2908496732026144, | |
| "grad_norm": 2.417151927947998, | |
| "learning_rate": 0.0001420479302832244, | |
| "loss": 5.0109, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.29193899782135074, | |
| "grad_norm": 3.7174580097198486, | |
| "learning_rate": 0.00014183006535947715, | |
| "loss": 4.864, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.29302832244008714, | |
| "grad_norm": 3.464721441268921, | |
| "learning_rate": 0.00014161220043572983, | |
| "loss": 4.4532, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.29411764705882354, | |
| "grad_norm": 3.013181686401367, | |
| "learning_rate": 0.00014139433551198257, | |
| "loss": 4.6633, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.29520697167755994, | |
| "grad_norm": 2.9733364582061768, | |
| "learning_rate": 0.0001411764705882353, | |
| "loss": 4.8566, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.2962962962962963, | |
| "grad_norm": 3.807645797729492, | |
| "learning_rate": 0.00014095860566448802, | |
| "loss": 5.0846, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.2973856209150327, | |
| "grad_norm": 2.4930531978607178, | |
| "learning_rate": 0.00014074074074074076, | |
| "loss": 4.8018, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.2984749455337691, | |
| "grad_norm": 3.7660248279571533, | |
| "learning_rate": 0.00014052287581699347, | |
| "loss": 4.6897, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.2995642701525055, | |
| "grad_norm": 2.6249687671661377, | |
| "learning_rate": 0.0001403050108932462, | |
| "loss": 4.8823, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.3006535947712418, | |
| "grad_norm": 4.599347114562988, | |
| "learning_rate": 0.00014008714596949891, | |
| "loss": 4.6467, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.3017429193899782, | |
| "grad_norm": 3.233173131942749, | |
| "learning_rate": 0.00013986928104575162, | |
| "loss": 4.3214, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.3028322440087146, | |
| "grad_norm": 3.9185855388641357, | |
| "learning_rate": 0.00013965141612200436, | |
| "loss": 5.0813, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.30392156862745096, | |
| "grad_norm": 3.683941125869751, | |
| "learning_rate": 0.0001394335511982571, | |
| "loss": 5.1563, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.30501089324618735, | |
| "grad_norm": 3.983316659927368, | |
| "learning_rate": 0.0001392156862745098, | |
| "loss": 4.7226, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.30610021786492375, | |
| "grad_norm": 3.2122104167938232, | |
| "learning_rate": 0.00013899782135076255, | |
| "loss": 4.8329, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.30718954248366015, | |
| "grad_norm": 4.021376132965088, | |
| "learning_rate": 0.00013877995642701526, | |
| "loss": 4.8389, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.3082788671023965, | |
| "grad_norm": 5.099759101867676, | |
| "learning_rate": 0.00013856209150326797, | |
| "loss": 4.8081, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.3093681917211329, | |
| "grad_norm": 2.6392247676849365, | |
| "learning_rate": 0.0001383442265795207, | |
| "loss": 4.7018, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.3104575163398693, | |
| "grad_norm": 2.628859519958496, | |
| "learning_rate": 0.00013812636165577342, | |
| "loss": 4.9003, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.3115468409586057, | |
| "grad_norm": 9.484817504882812, | |
| "learning_rate": 0.00013790849673202615, | |
| "loss": 5.015, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.31263616557734203, | |
| "grad_norm": 5.714864253997803, | |
| "learning_rate": 0.0001376906318082789, | |
| "loss": 4.9453, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.3137254901960784, | |
| "grad_norm": 4.550031661987305, | |
| "learning_rate": 0.0001374727668845316, | |
| "loss": 5.7539, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.3148148148148148, | |
| "grad_norm": 3.3271303176879883, | |
| "learning_rate": 0.0001372549019607843, | |
| "loss": 5.2101, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.3159041394335512, | |
| "grad_norm": 4.195096492767334, | |
| "learning_rate": 0.00013703703703703705, | |
| "loss": 4.8807, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.31699346405228757, | |
| "grad_norm": 3.0190374851226807, | |
| "learning_rate": 0.00013681917211328976, | |
| "loss": 4.8295, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.31808278867102396, | |
| "grad_norm": 3.7823071479797363, | |
| "learning_rate": 0.0001366013071895425, | |
| "loss": 5.084, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.31917211328976036, | |
| "grad_norm": 3.7246618270874023, | |
| "learning_rate": 0.0001363834422657952, | |
| "loss": 4.9432, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.3202614379084967, | |
| "grad_norm": 3.8362350463867188, | |
| "learning_rate": 0.00013616557734204794, | |
| "loss": 4.836, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.3213507625272331, | |
| "grad_norm": 3.490386486053467, | |
| "learning_rate": 0.00013594771241830065, | |
| "loss": 4.614, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.3224400871459695, | |
| "grad_norm": 3.012450695037842, | |
| "learning_rate": 0.00013572984749455336, | |
| "loss": 4.7246, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.3235294117647059, | |
| "grad_norm": 3.188887357711792, | |
| "learning_rate": 0.0001355119825708061, | |
| "loss": 4.9364, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.32461873638344224, | |
| "grad_norm": 3.366766929626465, | |
| "learning_rate": 0.00013529411764705884, | |
| "loss": 5.071, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.32570806100217864, | |
| "grad_norm": 4.414743900299072, | |
| "learning_rate": 0.00013507625272331155, | |
| "loss": 4.6582, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.32679738562091504, | |
| "grad_norm": 4.645547389984131, | |
| "learning_rate": 0.00013485838779956429, | |
| "loss": 4.7929, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.32788671023965144, | |
| "grad_norm": 3.6290218830108643, | |
| "learning_rate": 0.000134640522875817, | |
| "loss": 5.1134, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.3289760348583878, | |
| "grad_norm": 2.694545030593872, | |
| "learning_rate": 0.0001344226579520697, | |
| "loss": 5.0862, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.3300653594771242, | |
| "grad_norm": 4.14566707611084, | |
| "learning_rate": 0.00013420479302832244, | |
| "loss": 5.0835, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.3311546840958606, | |
| "grad_norm": 4.463272571563721, | |
| "learning_rate": 0.00013398692810457515, | |
| "loss": 4.4719, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.332244008714597, | |
| "grad_norm": 3.1239616870880127, | |
| "learning_rate": 0.0001337690631808279, | |
| "loss": 5.1211, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 2.634913206100464, | |
| "learning_rate": 0.00013355119825708063, | |
| "loss": 5.2042, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.3344226579520697, | |
| "grad_norm": 4.657975673675537, | |
| "learning_rate": 0.00013333333333333334, | |
| "loss": 4.7762, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.3355119825708061, | |
| "grad_norm": 2.70094633102417, | |
| "learning_rate": 0.00013311546840958608, | |
| "loss": 4.9501, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.3366013071895425, | |
| "grad_norm": 2.618452787399292, | |
| "learning_rate": 0.0001328976034858388, | |
| "loss": 4.7893, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.33769063180827885, | |
| "grad_norm": 2.8765547275543213, | |
| "learning_rate": 0.0001326797385620915, | |
| "loss": 4.4451, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.33877995642701525, | |
| "grad_norm": 4.317745208740234, | |
| "learning_rate": 0.00013246187363834424, | |
| "loss": 4.4262, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.33986928104575165, | |
| "grad_norm": 4.650866508483887, | |
| "learning_rate": 0.00013224400871459695, | |
| "loss": 4.53, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.340958605664488, | |
| "grad_norm": 2.9868412017822266, | |
| "learning_rate": 0.00013202614379084968, | |
| "loss": 4.4741, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.3420479302832244, | |
| "grad_norm": 4.8326826095581055, | |
| "learning_rate": 0.00013180827886710242, | |
| "loss": 4.9097, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.3431372549019608, | |
| "grad_norm": 3.1381747722625732, | |
| "learning_rate": 0.00013159041394335513, | |
| "loss": 4.4471, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.3442265795206972, | |
| "grad_norm": 2.8750381469726562, | |
| "learning_rate": 0.00013137254901960784, | |
| "loss": 4.812, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.3453159041394335, | |
| "grad_norm": 4.262397766113281, | |
| "learning_rate": 0.00013115468409586058, | |
| "loss": 4.317, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.3464052287581699, | |
| "grad_norm": 3.056037425994873, | |
| "learning_rate": 0.0001309368191721133, | |
| "loss": 5.0436, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.3474945533769063, | |
| "grad_norm": 2.79681134223938, | |
| "learning_rate": 0.00013071895424836603, | |
| "loss": 4.6181, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.3485838779956427, | |
| "grad_norm": 3.8306972980499268, | |
| "learning_rate": 0.00013050108932461876, | |
| "loss": 4.5201, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.34967320261437906, | |
| "grad_norm": 3.9447734355926514, | |
| "learning_rate": 0.00013028322440087147, | |
| "loss": 4.4423, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.35076252723311546, | |
| "grad_norm": 3.210547685623169, | |
| "learning_rate": 0.00013006535947712418, | |
| "loss": 4.4836, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.35185185185185186, | |
| "grad_norm": 3.065279006958008, | |
| "learning_rate": 0.0001298474945533769, | |
| "loss": 4.9008, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.35294117647058826, | |
| "grad_norm": 3.705817222595215, | |
| "learning_rate": 0.00012962962962962963, | |
| "loss": 5.5698, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.3540305010893246, | |
| "grad_norm": 3.503516912460327, | |
| "learning_rate": 0.00012941176470588237, | |
| "loss": 4.8409, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.355119825708061, | |
| "grad_norm": 3.5819544792175293, | |
| "learning_rate": 0.00012919389978213508, | |
| "loss": 4.8196, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.3562091503267974, | |
| "grad_norm": 5.018744945526123, | |
| "learning_rate": 0.00012897603485838782, | |
| "loss": 4.7224, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.3572984749455338, | |
| "grad_norm": 4.197869300842285, | |
| "learning_rate": 0.00012875816993464053, | |
| "loss": 4.3352, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.35838779956427014, | |
| "grad_norm": 3.5066583156585693, | |
| "learning_rate": 0.00012854030501089324, | |
| "loss": 4.5001, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.35947712418300654, | |
| "grad_norm": 2.969836950302124, | |
| "learning_rate": 0.00012832244008714597, | |
| "loss": 4.6297, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.36056644880174293, | |
| "grad_norm": 3.936361789703369, | |
| "learning_rate": 0.00012810457516339868, | |
| "loss": 4.6566, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.3616557734204793, | |
| "grad_norm": 3.509218692779541, | |
| "learning_rate": 0.00012788671023965142, | |
| "loss": 4.9033, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.3627450980392157, | |
| "grad_norm": 3.586121082305908, | |
| "learning_rate": 0.00012766884531590416, | |
| "loss": 4.7765, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.3638344226579521, | |
| "grad_norm": 4.253020763397217, | |
| "learning_rate": 0.00012745098039215687, | |
| "loss": 5.0747, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.36492374727668847, | |
| "grad_norm": 2.871368408203125, | |
| "learning_rate": 0.00012723311546840958, | |
| "loss": 4.6841, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.3660130718954248, | |
| "grad_norm": 3.784471273422241, | |
| "learning_rate": 0.00012701525054466232, | |
| "loss": 4.9569, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.3671023965141612, | |
| "grad_norm": 4.249979496002197, | |
| "learning_rate": 0.00012679738562091503, | |
| "loss": 4.6959, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.3681917211328976, | |
| "grad_norm": 4.012763500213623, | |
| "learning_rate": 0.00012657952069716776, | |
| "loss": 5.4651, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.369281045751634, | |
| "grad_norm": 4.050015926361084, | |
| "learning_rate": 0.0001263616557734205, | |
| "loss": 4.5269, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.37037037037037035, | |
| "grad_norm": 3.3827953338623047, | |
| "learning_rate": 0.0001261437908496732, | |
| "loss": 4.6672, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.37145969498910675, | |
| "grad_norm": 3.166588544845581, | |
| "learning_rate": 0.00012592592592592592, | |
| "loss": 4.8382, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.37254901960784315, | |
| "grad_norm": 3.722085952758789, | |
| "learning_rate": 0.00012570806100217863, | |
| "loss": 4.59, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.37363834422657954, | |
| "grad_norm": 3.9224953651428223, | |
| "learning_rate": 0.00012549019607843137, | |
| "loss": 4.6477, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.3747276688453159, | |
| "grad_norm": 3.167107582092285, | |
| "learning_rate": 0.0001252723311546841, | |
| "loss": 5.0107, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.3758169934640523, | |
| "grad_norm": 3.407217264175415, | |
| "learning_rate": 0.00012505446623093682, | |
| "loss": 5.09, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.3769063180827887, | |
| "grad_norm": 3.8298895359039307, | |
| "learning_rate": 0.00012483660130718956, | |
| "loss": 5.0545, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.3779956427015251, | |
| "grad_norm": 6.0477800369262695, | |
| "learning_rate": 0.0001246187363834423, | |
| "loss": 4.9539, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.3790849673202614, | |
| "grad_norm": 3.269374132156372, | |
| "learning_rate": 0.000124400871459695, | |
| "loss": 4.8141, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.3801742919389978, | |
| "grad_norm": 3.4903624057769775, | |
| "learning_rate": 0.00012418300653594771, | |
| "loss": 4.5177, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.3812636165577342, | |
| "grad_norm": 3.221745491027832, | |
| "learning_rate": 0.00012396514161220045, | |
| "loss": 4.7741, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.38235294117647056, | |
| "grad_norm": 3.368844985961914, | |
| "learning_rate": 0.00012374727668845316, | |
| "loss": 4.7539, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.38344226579520696, | |
| "grad_norm": 3.398777723312378, | |
| "learning_rate": 0.0001235294117647059, | |
| "loss": 4.8483, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.38453159041394336, | |
| "grad_norm": 3.093210220336914, | |
| "learning_rate": 0.0001233115468409586, | |
| "loss": 5.1118, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.38562091503267976, | |
| "grad_norm": 2.6734519004821777, | |
| "learning_rate": 0.00012309368191721135, | |
| "loss": 4.7035, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.3867102396514161, | |
| "grad_norm": 2.951702356338501, | |
| "learning_rate": 0.00012287581699346406, | |
| "loss": 4.6663, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.3877995642701525, | |
| "grad_norm": 2.9217047691345215, | |
| "learning_rate": 0.00012265795206971677, | |
| "loss": 4.7074, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.3888888888888889, | |
| "grad_norm": 3.06427001953125, | |
| "learning_rate": 0.0001224400871459695, | |
| "loss": 4.7311, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.3899782135076253, | |
| "grad_norm": 4.140756130218506, | |
| "learning_rate": 0.00012222222222222224, | |
| "loss": 4.816, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.39106753812636164, | |
| "grad_norm": 4.553076267242432, | |
| "learning_rate": 0.00012200435729847495, | |
| "loss": 5.5209, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.39215686274509803, | |
| "grad_norm": 3.7939252853393555, | |
| "learning_rate": 0.00012178649237472768, | |
| "loss": 4.7103, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.39324618736383443, | |
| "grad_norm": 3.0563583374023438, | |
| "learning_rate": 0.00012156862745098039, | |
| "loss": 5.1193, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.39433551198257083, | |
| "grad_norm": 3.1910042762756348, | |
| "learning_rate": 0.00012135076252723312, | |
| "loss": 4.7778, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.3954248366013072, | |
| "grad_norm": 3.6231610774993896, | |
| "learning_rate": 0.00012113289760348585, | |
| "loss": 4.5034, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.39651416122004357, | |
| "grad_norm": 3.0576488971710205, | |
| "learning_rate": 0.00012091503267973856, | |
| "loss": 4.7385, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.39760348583877997, | |
| "grad_norm": 7.9060821533203125, | |
| "learning_rate": 0.0001206971677559913, | |
| "loss": 4.7028, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.39869281045751637, | |
| "grad_norm": 4.208837985992432, | |
| "learning_rate": 0.00012047930283224402, | |
| "loss": 4.9435, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.3997821350762527, | |
| "grad_norm": 2.8220272064208984, | |
| "learning_rate": 0.00012026143790849673, | |
| "loss": 4.6085, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.4008714596949891, | |
| "grad_norm": 2.2707273960113525, | |
| "learning_rate": 0.00012004357298474947, | |
| "loss": 4.5269, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.4019607843137255, | |
| "grad_norm": 4.206027507781982, | |
| "learning_rate": 0.00011982570806100219, | |
| "loss": 4.2867, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.40305010893246185, | |
| "grad_norm": 2.74226450920105, | |
| "learning_rate": 0.0001196078431372549, | |
| "loss": 4.9979, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.40413943355119825, | |
| "grad_norm": 2.58813214302063, | |
| "learning_rate": 0.00011938997821350764, | |
| "loss": 4.8099, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.40522875816993464, | |
| "grad_norm": 2.26802396774292, | |
| "learning_rate": 0.00011917211328976035, | |
| "loss": 5.2378, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.40631808278867104, | |
| "grad_norm": 3.560227155685425, | |
| "learning_rate": 0.00011895424836601307, | |
| "loss": 4.8105, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.4074074074074074, | |
| "grad_norm": 3.6395950317382812, | |
| "learning_rate": 0.00011873638344226581, | |
| "loss": 4.4134, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.4084967320261438, | |
| "grad_norm": 2.737851619720459, | |
| "learning_rate": 0.00011851851851851852, | |
| "loss": 4.9353, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.4095860566448802, | |
| "grad_norm": 2.9248199462890625, | |
| "learning_rate": 0.00011830065359477124, | |
| "loss": 4.6186, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.4106753812636166, | |
| "grad_norm": 3.741907835006714, | |
| "learning_rate": 0.00011808278867102398, | |
| "loss": 4.678, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.4117647058823529, | |
| "grad_norm": 4.954297065734863, | |
| "learning_rate": 0.00011786492374727669, | |
| "loss": 4.7581, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.4128540305010893, | |
| "grad_norm": 2.864030122756958, | |
| "learning_rate": 0.00011764705882352942, | |
| "loss": 5.0273, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.4139433551198257, | |
| "grad_norm": 2.821183919906616, | |
| "learning_rate": 0.00011742919389978213, | |
| "loss": 4.7005, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.4150326797385621, | |
| "grad_norm": 2.887479782104492, | |
| "learning_rate": 0.00011721132897603486, | |
| "loss": 4.8337, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.41612200435729846, | |
| "grad_norm": 3.6127638816833496, | |
| "learning_rate": 0.0001169934640522876, | |
| "loss": 4.0269, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.41721132897603486, | |
| "grad_norm": 3.419294834136963, | |
| "learning_rate": 0.0001167755991285403, | |
| "loss": 4.9576, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.41830065359477125, | |
| "grad_norm": 4.136377334594727, | |
| "learning_rate": 0.00011655773420479303, | |
| "loss": 4.5996, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.41938997821350765, | |
| "grad_norm": 3.0449485778808594, | |
| "learning_rate": 0.00011633986928104577, | |
| "loss": 4.8109, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.420479302832244, | |
| "grad_norm": 2.4126100540161133, | |
| "learning_rate": 0.00011612200435729847, | |
| "loss": 4.6888, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.4215686274509804, | |
| "grad_norm": 3.2058181762695312, | |
| "learning_rate": 0.0001159041394335512, | |
| "loss": 5.0234, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.4226579520697168, | |
| "grad_norm": 2.861661672592163, | |
| "learning_rate": 0.00011568627450980394, | |
| "loss": 4.5451, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.42374727668845313, | |
| "grad_norm": 3.904843807220459, | |
| "learning_rate": 0.00011546840958605665, | |
| "loss": 4.6205, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.42483660130718953, | |
| "grad_norm": 2.517266273498535, | |
| "learning_rate": 0.00011525054466230938, | |
| "loss": 4.7441, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.42592592592592593, | |
| "grad_norm": 4.648169994354248, | |
| "learning_rate": 0.00011503267973856209, | |
| "loss": 4.7592, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.42701525054466233, | |
| "grad_norm": 5.185862064361572, | |
| "learning_rate": 0.00011481481481481482, | |
| "loss": 5.187, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.42810457516339867, | |
| "grad_norm": 3.9632956981658936, | |
| "learning_rate": 0.00011459694989106755, | |
| "loss": 4.3847, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.42919389978213507, | |
| "grad_norm": 3.6875765323638916, | |
| "learning_rate": 0.00011437908496732026, | |
| "loss": 4.7651, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.43028322440087147, | |
| "grad_norm": 5.440472602844238, | |
| "learning_rate": 0.000114161220043573, | |
| "loss": 5.3146, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.43137254901960786, | |
| "grad_norm": 2.51242995262146, | |
| "learning_rate": 0.00011394335511982572, | |
| "loss": 4.6351, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.4324618736383442, | |
| "grad_norm": 2.9834821224212646, | |
| "learning_rate": 0.00011372549019607843, | |
| "loss": 4.7093, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.4335511982570806, | |
| "grad_norm": 2.766350507736206, | |
| "learning_rate": 0.00011350762527233117, | |
| "loss": 4.5507, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.434640522875817, | |
| "grad_norm": 2.832143783569336, | |
| "learning_rate": 0.00011328976034858388, | |
| "loss": 4.5613, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.4357298474945534, | |
| "grad_norm": 2.6001861095428467, | |
| "learning_rate": 0.0001130718954248366, | |
| "loss": 4.6082, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.43681917211328974, | |
| "grad_norm": 2.4849538803100586, | |
| "learning_rate": 0.00011285403050108934, | |
| "loss": 4.6671, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.43790849673202614, | |
| "grad_norm": 3.406707286834717, | |
| "learning_rate": 0.00011263616557734205, | |
| "loss": 4.7908, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.43899782135076254, | |
| "grad_norm": 3.0030910968780518, | |
| "learning_rate": 0.00011241830065359477, | |
| "loss": 4.8129, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.4400871459694989, | |
| "grad_norm": 2.924572467803955, | |
| "learning_rate": 0.00011220043572984751, | |
| "loss": 4.719, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.4411764705882353, | |
| "grad_norm": 2.3402299880981445, | |
| "learning_rate": 0.00011198257080610022, | |
| "loss": 4.9935, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.4422657952069717, | |
| "grad_norm": 3.6391263008117676, | |
| "learning_rate": 0.00011176470588235294, | |
| "loss": 4.3632, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.4433551198257081, | |
| "grad_norm": 3.7474288940429688, | |
| "learning_rate": 0.00011154684095860568, | |
| "loss": 4.7672, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 3.856614351272583, | |
| "learning_rate": 0.00011132897603485839, | |
| "loss": 4.4382, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.4455337690631808, | |
| "grad_norm": 3.2774574756622314, | |
| "learning_rate": 0.00011111111111111112, | |
| "loss": 4.5095, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.4466230936819172, | |
| "grad_norm": 4.317390441894531, | |
| "learning_rate": 0.00011089324618736383, | |
| "loss": 4.9977, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.4477124183006536, | |
| "grad_norm": 2.799553394317627, | |
| "learning_rate": 0.00011067538126361656, | |
| "loss": 4.6128, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.44880174291938996, | |
| "grad_norm": 3.6662847995758057, | |
| "learning_rate": 0.00011045751633986929, | |
| "loss": 4.8917, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.44989106753812635, | |
| "grad_norm": 4.539546489715576, | |
| "learning_rate": 0.000110239651416122, | |
| "loss": 4.7873, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.45098039215686275, | |
| "grad_norm": 3.262467861175537, | |
| "learning_rate": 0.00011002178649237474, | |
| "loss": 5.2149, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.45206971677559915, | |
| "grad_norm": 4.247523307800293, | |
| "learning_rate": 0.00010980392156862746, | |
| "loss": 4.7652, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.4531590413943355, | |
| "grad_norm": 4.986163139343262, | |
| "learning_rate": 0.00010958605664488017, | |
| "loss": 4.7074, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.4542483660130719, | |
| "grad_norm": 2.9192981719970703, | |
| "learning_rate": 0.00010936819172113291, | |
| "loss": 4.798, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.4553376906318083, | |
| "grad_norm": 2.6577565670013428, | |
| "learning_rate": 0.00010915032679738562, | |
| "loss": 4.8006, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.4564270152505447, | |
| "grad_norm": 2.3315024375915527, | |
| "learning_rate": 0.00010893246187363834, | |
| "loss": 4.9458, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.45751633986928103, | |
| "grad_norm": 2.9295709133148193, | |
| "learning_rate": 0.00010871459694989108, | |
| "loss": 5.2466, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.45860566448801743, | |
| "grad_norm": 2.850202798843384, | |
| "learning_rate": 0.00010849673202614379, | |
| "loss": 4.7409, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.4596949891067538, | |
| "grad_norm": 3.31477689743042, | |
| "learning_rate": 0.00010827886710239653, | |
| "loss": 4.7718, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.46078431372549017, | |
| "grad_norm": 2.7019755840301514, | |
| "learning_rate": 0.00010806100217864925, | |
| "loss": 4.8779, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.46187363834422657, | |
| "grad_norm": 4.396770477294922, | |
| "learning_rate": 0.00010784313725490196, | |
| "loss": 4.7548, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.46296296296296297, | |
| "grad_norm": 2.4405012130737305, | |
| "learning_rate": 0.0001076252723311547, | |
| "loss": 4.9687, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.46405228758169936, | |
| "grad_norm": 4.194379806518555, | |
| "learning_rate": 0.00010740740740740742, | |
| "loss": 4.5067, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.4651416122004357, | |
| "grad_norm": 2.688394784927368, | |
| "learning_rate": 0.00010718954248366013, | |
| "loss": 4.7729, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.4662309368191721, | |
| "grad_norm": 2.670652389526367, | |
| "learning_rate": 0.00010697167755991287, | |
| "loss": 4.5563, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.4673202614379085, | |
| "grad_norm": 3.3149876594543457, | |
| "learning_rate": 0.00010675381263616558, | |
| "loss": 4.8264, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.4684095860566449, | |
| "grad_norm": 9.103561401367188, | |
| "learning_rate": 0.0001065359477124183, | |
| "loss": 4.642, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.46949891067538124, | |
| "grad_norm": 2.6664645671844482, | |
| "learning_rate": 0.00010631808278867104, | |
| "loss": 4.9578, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 4.866668701171875, | |
| "learning_rate": 0.00010610021786492375, | |
| "loss": 5.1092, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.47167755991285404, | |
| "grad_norm": 2.7630739212036133, | |
| "learning_rate": 0.00010588235294117647, | |
| "loss": 4.6703, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.47276688453159044, | |
| "grad_norm": 2.743724822998047, | |
| "learning_rate": 0.00010566448801742921, | |
| "loss": 4.9822, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.4738562091503268, | |
| "grad_norm": 2.2937724590301514, | |
| "learning_rate": 0.00010544662309368192, | |
| "loss": 4.6221, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.4749455337690632, | |
| "grad_norm": 4.061140537261963, | |
| "learning_rate": 0.00010522875816993465, | |
| "loss": 4.6573, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.4760348583877996, | |
| "grad_norm": 3.9372758865356445, | |
| "learning_rate": 0.00010501089324618736, | |
| "loss": 4.4482, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.477124183006536, | |
| "grad_norm": 5.1214776039123535, | |
| "learning_rate": 0.0001047930283224401, | |
| "loss": 5.1205, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.4782135076252723, | |
| "grad_norm": 4.609157085418701, | |
| "learning_rate": 0.00010457516339869282, | |
| "loss": 4.6272, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.4793028322440087, | |
| "grad_norm": 2.4764328002929688, | |
| "learning_rate": 0.00010435729847494553, | |
| "loss": 4.8355, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.4803921568627451, | |
| "grad_norm": 3.3244357109069824, | |
| "learning_rate": 0.00010413943355119827, | |
| "loss": 4.7154, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.48148148148148145, | |
| "grad_norm": 2.6419830322265625, | |
| "learning_rate": 0.00010392156862745099, | |
| "loss": 4.5512, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.48257080610021785, | |
| "grad_norm": 2.7745046615600586, | |
| "learning_rate": 0.0001037037037037037, | |
| "loss": 4.4104, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.48366013071895425, | |
| "grad_norm": 3.4725728034973145, | |
| "learning_rate": 0.00010348583877995644, | |
| "loss": 4.068, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.48474945533769065, | |
| "grad_norm": 3.839707851409912, | |
| "learning_rate": 0.00010326797385620916, | |
| "loss": 4.6368, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.485838779956427, | |
| "grad_norm": 3.2895514965057373, | |
| "learning_rate": 0.00010305010893246187, | |
| "loss": 4.7474, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.4869281045751634, | |
| "grad_norm": 5.714195728302002, | |
| "learning_rate": 0.00010283224400871461, | |
| "loss": 4.229, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.4880174291938998, | |
| "grad_norm": 4.010753154754639, | |
| "learning_rate": 0.00010261437908496732, | |
| "loss": 5.1063, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.4891067538126362, | |
| "grad_norm": 4.544241905212402, | |
| "learning_rate": 0.00010239651416122004, | |
| "loss": 5.0706, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.49019607843137253, | |
| "grad_norm": 3.7243716716766357, | |
| "learning_rate": 0.00010217864923747278, | |
| "loss": 4.8247, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.4912854030501089, | |
| "grad_norm": 3.6378531455993652, | |
| "learning_rate": 0.00010196078431372549, | |
| "loss": 4.9122, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.4923747276688453, | |
| "grad_norm": 2.5549800395965576, | |
| "learning_rate": 0.00010174291938997821, | |
| "loss": 4.4748, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.4934640522875817, | |
| "grad_norm": 4.132662296295166, | |
| "learning_rate": 0.00010152505446623095, | |
| "loss": 4.5698, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.49455337690631807, | |
| "grad_norm": 3.922738790512085, | |
| "learning_rate": 0.00010130718954248366, | |
| "loss": 5.0917, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.49564270152505446, | |
| "grad_norm": 3.2766265869140625, | |
| "learning_rate": 0.00010108932461873639, | |
| "loss": 4.4083, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.49673202614379086, | |
| "grad_norm": 3.297292709350586, | |
| "learning_rate": 0.0001008714596949891, | |
| "loss": 4.7993, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.49782135076252726, | |
| "grad_norm": 3.353689193725586, | |
| "learning_rate": 0.00010065359477124183, | |
| "loss": 4.6677, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.4989106753812636, | |
| "grad_norm": 2.691397190093994, | |
| "learning_rate": 0.00010043572984749457, | |
| "loss": 4.5091, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 2.4701926708221436, | |
| "learning_rate": 0.00010021786492374727, | |
| "loss": 4.674, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.5010893246187363, | |
| "grad_norm": 2.686195135116577, | |
| "learning_rate": 0.0001, | |
| "loss": 4.5644, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5021786492374728, | |
| "grad_norm": 4.287258625030518, | |
| "learning_rate": 9.978213507625273e-05, | |
| "loss": 4.4426, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.5032679738562091, | |
| "grad_norm": 2.4832026958465576, | |
| "learning_rate": 9.956427015250545e-05, | |
| "loss": 4.664, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.5043572984749455, | |
| "grad_norm": 2.9472968578338623, | |
| "learning_rate": 9.934640522875818e-05, | |
| "loss": 4.534, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.5054466230936819, | |
| "grad_norm": 2.76766300201416, | |
| "learning_rate": 9.91285403050109e-05, | |
| "loss": 4.5646, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.5065359477124183, | |
| "grad_norm": 3.476855516433716, | |
| "learning_rate": 9.891067538126362e-05, | |
| "loss": 4.2473, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.5076252723311547, | |
| "grad_norm": 2.654572010040283, | |
| "learning_rate": 9.869281045751635e-05, | |
| "loss": 4.7984, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.5087145969498911, | |
| "grad_norm": 2.742908239364624, | |
| "learning_rate": 9.847494553376907e-05, | |
| "loss": 4.7206, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.5098039215686274, | |
| "grad_norm": 4.9197611808776855, | |
| "learning_rate": 9.82570806100218e-05, | |
| "loss": 4.147, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.5108932461873639, | |
| "grad_norm": 2.5709521770477295, | |
| "learning_rate": 9.80392156862745e-05, | |
| "loss": 4.9892, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.5119825708061002, | |
| "grad_norm": 2.8320975303649902, | |
| "learning_rate": 9.782135076252724e-05, | |
| "loss": 5.088, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5130718954248366, | |
| "grad_norm": 2.810145378112793, | |
| "learning_rate": 9.760348583877997e-05, | |
| "loss": 4.6947, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.514161220043573, | |
| "grad_norm": 3.477754592895508, | |
| "learning_rate": 9.738562091503268e-05, | |
| "loss": 4.6429, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.5152505446623094, | |
| "grad_norm": 2.9826571941375732, | |
| "learning_rate": 9.71677559912854e-05, | |
| "loss": 4.4291, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.5163398692810458, | |
| "grad_norm": 4.387795925140381, | |
| "learning_rate": 9.694989106753814e-05, | |
| "loss": 5.3528, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.5174291938997821, | |
| "grad_norm": 4.61776876449585, | |
| "learning_rate": 9.673202614379085e-05, | |
| "loss": 4.3188, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.5185185185185185, | |
| "grad_norm": 2.3165104389190674, | |
| "learning_rate": 9.651416122004357e-05, | |
| "loss": 4.7733, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.5196078431372549, | |
| "grad_norm": 2.644317150115967, | |
| "learning_rate": 9.62962962962963e-05, | |
| "loss": 4.5561, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.5206971677559913, | |
| "grad_norm": 3.5700933933258057, | |
| "learning_rate": 9.607843137254903e-05, | |
| "loss": 4.6602, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.5217864923747276, | |
| "grad_norm": 2.1070199012756348, | |
| "learning_rate": 9.586056644880174e-05, | |
| "loss": 4.8822, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.5228758169934641, | |
| "grad_norm": 3.0725560188293457, | |
| "learning_rate": 9.564270152505447e-05, | |
| "loss": 4.7155, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5239651416122004, | |
| "grad_norm": 3.228832960128784, | |
| "learning_rate": 9.54248366013072e-05, | |
| "loss": 4.4635, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.5250544662309368, | |
| "grad_norm": 3.046309232711792, | |
| "learning_rate": 9.520697167755992e-05, | |
| "loss": 4.5285, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.5261437908496732, | |
| "grad_norm": 4.328576564788818, | |
| "learning_rate": 9.498910675381264e-05, | |
| "loss": 4.8138, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.5272331154684096, | |
| "grad_norm": 2.9554691314697266, | |
| "learning_rate": 9.477124183006536e-05, | |
| "loss": 4.5337, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.528322440087146, | |
| "grad_norm": 2.700284242630005, | |
| "learning_rate": 9.455337690631809e-05, | |
| "loss": 4.3948, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.5294117647058824, | |
| "grad_norm": 2.9909026622772217, | |
| "learning_rate": 9.433551198257081e-05, | |
| "loss": 4.4274, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.5305010893246187, | |
| "grad_norm": 3.623758316040039, | |
| "learning_rate": 9.411764705882353e-05, | |
| "loss": 4.6817, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.5315904139433552, | |
| "grad_norm": 4.6507415771484375, | |
| "learning_rate": 9.389978213507626e-05, | |
| "loss": 4.4477, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.5326797385620915, | |
| "grad_norm": 2.5566675662994385, | |
| "learning_rate": 9.368191721132898e-05, | |
| "loss": 4.8413, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.5337690631808278, | |
| "grad_norm": 3.6067495346069336, | |
| "learning_rate": 9.34640522875817e-05, | |
| "loss": 4.5247, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.5348583877995643, | |
| "grad_norm": 2.9623374938964844, | |
| "learning_rate": 9.324618736383443e-05, | |
| "loss": 4.7552, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.5359477124183006, | |
| "grad_norm": 3.302311420440674, | |
| "learning_rate": 9.302832244008714e-05, | |
| "loss": 4.4709, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.5370370370370371, | |
| "grad_norm": 3.787867546081543, | |
| "learning_rate": 9.281045751633988e-05, | |
| "loss": 4.5812, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.5381263616557734, | |
| "grad_norm": 2.7494025230407715, | |
| "learning_rate": 9.25925925925926e-05, | |
| "loss": 4.6292, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.5392156862745098, | |
| "grad_norm": 2.9627022743225098, | |
| "learning_rate": 9.237472766884531e-05, | |
| "loss": 4.859, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.5403050108932462, | |
| "grad_norm": 2.661006212234497, | |
| "learning_rate": 9.215686274509804e-05, | |
| "loss": 4.5307, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.5413943355119826, | |
| "grad_norm": 2.73852276802063, | |
| "learning_rate": 9.193899782135077e-05, | |
| "loss": 4.7416, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.5424836601307189, | |
| "grad_norm": 3.0369839668273926, | |
| "learning_rate": 9.172113289760348e-05, | |
| "loss": 4.4017, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.5435729847494554, | |
| "grad_norm": 3.2155401706695557, | |
| "learning_rate": 9.150326797385621e-05, | |
| "loss": 4.6235, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.5446623093681917, | |
| "grad_norm": 4.628570556640625, | |
| "learning_rate": 9.128540305010894e-05, | |
| "loss": 4.3991, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.545751633986928, | |
| "grad_norm": 3.397460460662842, | |
| "learning_rate": 9.106753812636167e-05, | |
| "loss": 4.4024, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.5468409586056645, | |
| "grad_norm": 3.134575128555298, | |
| "learning_rate": 9.084967320261438e-05, | |
| "loss": 4.4325, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.5479302832244008, | |
| "grad_norm": 4.651434421539307, | |
| "learning_rate": 9.06318082788671e-05, | |
| "loss": 4.5204, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.5490196078431373, | |
| "grad_norm": 4.166567325592041, | |
| "learning_rate": 9.041394335511984e-05, | |
| "loss": 4.679, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.5501089324618736, | |
| "grad_norm": 2.6662089824676514, | |
| "learning_rate": 9.019607843137255e-05, | |
| "loss": 4.2818, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.55119825708061, | |
| "grad_norm": 2.519526958465576, | |
| "learning_rate": 8.997821350762527e-05, | |
| "loss": 4.5338, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.5522875816993464, | |
| "grad_norm": 2.561030149459839, | |
| "learning_rate": 8.9760348583878e-05, | |
| "loss": 4.5726, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.5533769063180828, | |
| "grad_norm": 3.1477291584014893, | |
| "learning_rate": 8.954248366013072e-05, | |
| "loss": 4.4923, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.5544662309368191, | |
| "grad_norm": 3.5873420238494873, | |
| "learning_rate": 8.932461873638345e-05, | |
| "loss": 4.3401, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 3.926547050476074, | |
| "learning_rate": 8.910675381263617e-05, | |
| "loss": 5.1414, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.5566448801742919, | |
| "grad_norm": 3.5044329166412354, | |
| "learning_rate": 8.888888888888889e-05, | |
| "loss": 4.3844, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.5577342047930284, | |
| "grad_norm": 3.4362680912017822, | |
| "learning_rate": 8.867102396514162e-05, | |
| "loss": 4.4701, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.5588235294117647, | |
| "grad_norm": 3.804070472717285, | |
| "learning_rate": 8.845315904139434e-05, | |
| "loss": 4.5234, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.5599128540305011, | |
| "grad_norm": 3.2715303897857666, | |
| "learning_rate": 8.823529411764706e-05, | |
| "loss": 5.3599, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.5610021786492375, | |
| "grad_norm": 4.5119123458862305, | |
| "learning_rate": 8.801742919389977e-05, | |
| "loss": 4.3403, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.5620915032679739, | |
| "grad_norm": 2.9930667877197266, | |
| "learning_rate": 8.779956427015251e-05, | |
| "loss": 4.6728, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.5631808278867102, | |
| "grad_norm": 3.3414700031280518, | |
| "learning_rate": 8.758169934640524e-05, | |
| "loss": 4.2796, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.5642701525054467, | |
| "grad_norm": 3.454094409942627, | |
| "learning_rate": 8.736383442265795e-05, | |
| "loss": 4.6207, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.565359477124183, | |
| "grad_norm": 2.343726634979248, | |
| "learning_rate": 8.714596949891068e-05, | |
| "loss": 4.6071, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.5664488017429193, | |
| "grad_norm": 4.373025894165039, | |
| "learning_rate": 8.692810457516341e-05, | |
| "loss": 4.8911, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5675381263616558, | |
| "grad_norm": 3.511699676513672, | |
| "learning_rate": 8.671023965141613e-05, | |
| "loss": 4.6699, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.5686274509803921, | |
| "grad_norm": 3.171043872833252, | |
| "learning_rate": 8.649237472766884e-05, | |
| "loss": 4.9787, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.5697167755991286, | |
| "grad_norm": 4.076540470123291, | |
| "learning_rate": 8.627450980392158e-05, | |
| "loss": 4.9232, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.5708061002178649, | |
| "grad_norm": 3.0852832794189453, | |
| "learning_rate": 8.60566448801743e-05, | |
| "loss": 4.7163, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.5718954248366013, | |
| "grad_norm": 2.6939470767974854, | |
| "learning_rate": 8.583877995642701e-05, | |
| "loss": 4.4997, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.5729847494553377, | |
| "grad_norm": 3.620074510574341, | |
| "learning_rate": 8.562091503267974e-05, | |
| "loss": 4.8771, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.5740740740740741, | |
| "grad_norm": 2.4933581352233887, | |
| "learning_rate": 8.540305010893247e-05, | |
| "loss": 4.5819, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.5751633986928104, | |
| "grad_norm": 3.1998159885406494, | |
| "learning_rate": 8.518518518518518e-05, | |
| "loss": 4.4039, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.5762527233115469, | |
| "grad_norm": 2.7315926551818848, | |
| "learning_rate": 8.496732026143791e-05, | |
| "loss": 4.9051, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.5773420479302832, | |
| "grad_norm": 3.002873420715332, | |
| "learning_rate": 8.474945533769063e-05, | |
| "loss": 4.4435, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5784313725490197, | |
| "grad_norm": 3.9929301738739014, | |
| "learning_rate": 8.453159041394336e-05, | |
| "loss": 4.6196, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.579520697167756, | |
| "grad_norm": 4.095264434814453, | |
| "learning_rate": 8.431372549019608e-05, | |
| "loss": 4.6282, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.5806100217864923, | |
| "grad_norm": 2.285243272781372, | |
| "learning_rate": 8.40958605664488e-05, | |
| "loss": 4.9182, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.5816993464052288, | |
| "grad_norm": 2.6674435138702393, | |
| "learning_rate": 8.387799564270154e-05, | |
| "loss": 4.5317, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.5827886710239651, | |
| "grad_norm": 2.477482557296753, | |
| "learning_rate": 8.366013071895425e-05, | |
| "loss": 4.7032, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.5838779956427015, | |
| "grad_norm": 4.01880407333374, | |
| "learning_rate": 8.344226579520698e-05, | |
| "loss": 4.4193, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.5849673202614379, | |
| "grad_norm": 2.3387510776519775, | |
| "learning_rate": 8.32244008714597e-05, | |
| "loss": 4.7034, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.5860566448801743, | |
| "grad_norm": 2.9223763942718506, | |
| "learning_rate": 8.300653594771242e-05, | |
| "loss": 4.7173, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.5871459694989106, | |
| "grad_norm": 2.537396192550659, | |
| "learning_rate": 8.278867102396515e-05, | |
| "loss": 4.4837, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 2.9067773818969727, | |
| "learning_rate": 8.257080610021787e-05, | |
| "loss": 4.7819, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5893246187363834, | |
| "grad_norm": 3.539597988128662, | |
| "learning_rate": 8.23529411764706e-05, | |
| "loss": 4.2396, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.5904139433551199, | |
| "grad_norm": 3.219714641571045, | |
| "learning_rate": 8.213507625272332e-05, | |
| "loss": 4.8601, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.5915032679738562, | |
| "grad_norm": 4.1988325119018555, | |
| "learning_rate": 8.191721132897604e-05, | |
| "loss": 4.4636, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.5925925925925926, | |
| "grad_norm": 3.404759407043457, | |
| "learning_rate": 8.169934640522877e-05, | |
| "loss": 4.6973, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.593681917211329, | |
| "grad_norm": 3.848076820373535, | |
| "learning_rate": 8.148148148148148e-05, | |
| "loss": 4.5368, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.5947712418300654, | |
| "grad_norm": 3.773057460784912, | |
| "learning_rate": 8.126361655773421e-05, | |
| "loss": 4.5394, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.5958605664488017, | |
| "grad_norm": 3.8611278533935547, | |
| "learning_rate": 8.104575163398694e-05, | |
| "loss": 4.8637, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.5969498910675382, | |
| "grad_norm": 2.950388193130493, | |
| "learning_rate": 8.082788671023965e-05, | |
| "loss": 4.3669, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.5980392156862745, | |
| "grad_norm": 2.911773443222046, | |
| "learning_rate": 8.061002178649237e-05, | |
| "loss": 4.9239, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.599128540305011, | |
| "grad_norm": 2.4563143253326416, | |
| "learning_rate": 8.039215686274511e-05, | |
| "loss": 4.9816, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.6002178649237473, | |
| "grad_norm": 3.053799867630005, | |
| "learning_rate": 8.017429193899782e-05, | |
| "loss": 4.4451, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.6013071895424836, | |
| "grad_norm": 4.562179088592529, | |
| "learning_rate": 7.995642701525054e-05, | |
| "loss": 4.3851, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.6023965141612201, | |
| "grad_norm": 3.585679054260254, | |
| "learning_rate": 7.973856209150328e-05, | |
| "loss": 4.4014, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.6034858387799564, | |
| "grad_norm": 3.0385072231292725, | |
| "learning_rate": 7.952069716775599e-05, | |
| "loss": 4.753, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.6045751633986928, | |
| "grad_norm": 1.95540189743042, | |
| "learning_rate": 7.930283224400871e-05, | |
| "loss": 4.6713, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.6056644880174292, | |
| "grad_norm": 2.5057144165039062, | |
| "learning_rate": 7.908496732026144e-05, | |
| "loss": 4.5111, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.6067538126361656, | |
| "grad_norm": 2.4981019496917725, | |
| "learning_rate": 7.886710239651418e-05, | |
| "loss": 4.5696, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.6078431372549019, | |
| "grad_norm": 3.182044506072998, | |
| "learning_rate": 7.864923747276689e-05, | |
| "loss": 4.6675, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.6089324618736384, | |
| "grad_norm": 3.999967575073242, | |
| "learning_rate": 7.843137254901961e-05, | |
| "loss": 4.557, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.6100217864923747, | |
| "grad_norm": 2.430976152420044, | |
| "learning_rate": 7.821350762527233e-05, | |
| "loss": 4.6301, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.6111111111111112, | |
| "grad_norm": 4.943197250366211, | |
| "learning_rate": 7.799564270152506e-05, | |
| "loss": 5.0166, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.6122004357298475, | |
| "grad_norm": 2.988447427749634, | |
| "learning_rate": 7.777777777777778e-05, | |
| "loss": 4.5796, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.6132897603485838, | |
| "grad_norm": 2.7854208946228027, | |
| "learning_rate": 7.75599128540305e-05, | |
| "loss": 4.814, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.6143790849673203, | |
| "grad_norm": 3.3578221797943115, | |
| "learning_rate": 7.734204793028323e-05, | |
| "loss": 4.5014, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.6154684095860566, | |
| "grad_norm": 2.4927005767822266, | |
| "learning_rate": 7.712418300653595e-05, | |
| "loss": 4.3526, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.616557734204793, | |
| "grad_norm": 3.176056385040283, | |
| "learning_rate": 7.690631808278868e-05, | |
| "loss": 4.5428, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.6176470588235294, | |
| "grad_norm": 2.8541767597198486, | |
| "learning_rate": 7.66884531590414e-05, | |
| "loss": 4.6105, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.6187363834422658, | |
| "grad_norm": 4.8937153816223145, | |
| "learning_rate": 7.647058823529411e-05, | |
| "loss": 3.912, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.6198257080610022, | |
| "grad_norm": 4.21392297744751, | |
| "learning_rate": 7.625272331154685e-05, | |
| "loss": 4.1306, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.6209150326797386, | |
| "grad_norm": 2.5804617404937744, | |
| "learning_rate": 7.603485838779957e-05, | |
| "loss": 4.7057, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6220043572984749, | |
| "grad_norm": 2.3549726009368896, | |
| "learning_rate": 7.581699346405228e-05, | |
| "loss": 4.6464, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.6230936819172114, | |
| "grad_norm": 3.9271023273468018, | |
| "learning_rate": 7.559912854030502e-05, | |
| "loss": 4.1135, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.6241830065359477, | |
| "grad_norm": 3.2713518142700195, | |
| "learning_rate": 7.538126361655774e-05, | |
| "loss": 4.7463, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.6252723311546841, | |
| "grad_norm": 2.842008113861084, | |
| "learning_rate": 7.516339869281045e-05, | |
| "loss": 4.6107, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.6263616557734205, | |
| "grad_norm": 2.7107882499694824, | |
| "learning_rate": 7.494553376906318e-05, | |
| "loss": 4.7955, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.6274509803921569, | |
| "grad_norm": 3.076648235321045, | |
| "learning_rate": 7.472766884531592e-05, | |
| "loss": 4.8382, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.6285403050108932, | |
| "grad_norm": 2.7458837032318115, | |
| "learning_rate": 7.450980392156864e-05, | |
| "loss": 4.4344, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.6296296296296297, | |
| "grad_norm": 2.543877124786377, | |
| "learning_rate": 7.429193899782135e-05, | |
| "loss": 4.3536, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.630718954248366, | |
| "grad_norm": 3.883885145187378, | |
| "learning_rate": 7.407407407407407e-05, | |
| "loss": 4.8026, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.6318082788671024, | |
| "grad_norm": 2.9665493965148926, | |
| "learning_rate": 7.385620915032681e-05, | |
| "loss": 4.779, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.6328976034858388, | |
| "grad_norm": 3.1739346981048584, | |
| "learning_rate": 7.363834422657952e-05, | |
| "loss": 4.7859, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.6339869281045751, | |
| "grad_norm": 3.4699697494506836, | |
| "learning_rate": 7.342047930283224e-05, | |
| "loss": 4.2899, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.6350762527233116, | |
| "grad_norm": 4.120363235473633, | |
| "learning_rate": 7.320261437908497e-05, | |
| "loss": 4.3786, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.6361655773420479, | |
| "grad_norm": 2.930485248565674, | |
| "learning_rate": 7.298474945533769e-05, | |
| "loss": 4.4614, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.6372549019607843, | |
| "grad_norm": 2.5551388263702393, | |
| "learning_rate": 7.276688453159042e-05, | |
| "loss": 4.5241, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.6383442265795207, | |
| "grad_norm": 2.2362239360809326, | |
| "learning_rate": 7.254901960784314e-05, | |
| "loss": 4.763, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.6394335511982571, | |
| "grad_norm": 2.7569496631622314, | |
| "learning_rate": 7.233115468409586e-05, | |
| "loss": 4.3938, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.6405228758169934, | |
| "grad_norm": 2.8717620372772217, | |
| "learning_rate": 7.211328976034859e-05, | |
| "loss": 4.4601, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.6416122004357299, | |
| "grad_norm": 4.488802433013916, | |
| "learning_rate": 7.189542483660131e-05, | |
| "loss": 4.3694, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.6427015250544662, | |
| "grad_norm": 3.735525131225586, | |
| "learning_rate": 7.167755991285404e-05, | |
| "loss": 5.0611, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.6437908496732027, | |
| "grad_norm": 3.061038017272949, | |
| "learning_rate": 7.145969498910676e-05, | |
| "loss": 4.5695, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.644880174291939, | |
| "grad_norm": 4.214951992034912, | |
| "learning_rate": 7.124183006535948e-05, | |
| "loss": 4.5174, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.6459694989106753, | |
| "grad_norm": 3.226206064224243, | |
| "learning_rate": 7.10239651416122e-05, | |
| "loss": 4.9365, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.6470588235294118, | |
| "grad_norm": 2.9899861812591553, | |
| "learning_rate": 7.080610021786492e-05, | |
| "loss": 4.2227, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.6481481481481481, | |
| "grad_norm": 2.587507963180542, | |
| "learning_rate": 7.058823529411765e-05, | |
| "loss": 4.8616, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.6492374727668845, | |
| "grad_norm": 3.3234870433807373, | |
| "learning_rate": 7.037037037037038e-05, | |
| "loss": 5.0433, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.6503267973856209, | |
| "grad_norm": 2.378889560699463, | |
| "learning_rate": 7.01525054466231e-05, | |
| "loss": 4.658, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.6514161220043573, | |
| "grad_norm": 3.2097010612487793, | |
| "learning_rate": 6.993464052287581e-05, | |
| "loss": 4.1424, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.6525054466230937, | |
| "grad_norm": 2.9745841026306152, | |
| "learning_rate": 6.971677559912855e-05, | |
| "loss": 4.3658, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.6535947712418301, | |
| "grad_norm": 3.336416482925415, | |
| "learning_rate": 6.949891067538127e-05, | |
| "loss": 4.5247, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6546840958605664, | |
| "grad_norm": 3.381309986114502, | |
| "learning_rate": 6.928104575163398e-05, | |
| "loss": 4.9413, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.6557734204793029, | |
| "grad_norm": 2.6095030307769775, | |
| "learning_rate": 6.906318082788671e-05, | |
| "loss": 4.5653, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.6568627450980392, | |
| "grad_norm": 2.8048579692840576, | |
| "learning_rate": 6.884531590413945e-05, | |
| "loss": 4.5627, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.6579520697167756, | |
| "grad_norm": 3.750295400619507, | |
| "learning_rate": 6.862745098039216e-05, | |
| "loss": 4.6294, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.659041394335512, | |
| "grad_norm": 2.7482926845550537, | |
| "learning_rate": 6.840958605664488e-05, | |
| "loss": 4.821, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.6601307189542484, | |
| "grad_norm": 2.6860432624816895, | |
| "learning_rate": 6.81917211328976e-05, | |
| "loss": 4.5562, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.6612200435729847, | |
| "grad_norm": 3.9789106845855713, | |
| "learning_rate": 6.797385620915033e-05, | |
| "loss": 4.7817, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.6623093681917211, | |
| "grad_norm": 2.963273048400879, | |
| "learning_rate": 6.775599128540305e-05, | |
| "loss": 4.8488, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.6633986928104575, | |
| "grad_norm": 3.09425687789917, | |
| "learning_rate": 6.753812636165577e-05, | |
| "loss": 4.5239, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.664488017429194, | |
| "grad_norm": 2.897761106491089, | |
| "learning_rate": 6.73202614379085e-05, | |
| "loss": 4.6252, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6655773420479303, | |
| "grad_norm": 2.7389283180236816, | |
| "learning_rate": 6.710239651416122e-05, | |
| "loss": 4.4114, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 3.7740859985351562, | |
| "learning_rate": 6.688453159041395e-05, | |
| "loss": 5.1252, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.6677559912854031, | |
| "grad_norm": 2.69930362701416, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 4.8494, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.6688453159041394, | |
| "grad_norm": 3.3595452308654785, | |
| "learning_rate": 6.64488017429194e-05, | |
| "loss": 4.5464, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.6699346405228758, | |
| "grad_norm": 3.3550498485565186, | |
| "learning_rate": 6.623093681917212e-05, | |
| "loss": 4.0987, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.6710239651416122, | |
| "grad_norm": 2.9367904663085938, | |
| "learning_rate": 6.601307189542484e-05, | |
| "loss": 4.5163, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.6721132897603486, | |
| "grad_norm": 3.16866135597229, | |
| "learning_rate": 6.579520697167757e-05, | |
| "loss": 4.7276, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.673202614379085, | |
| "grad_norm": 2.5311694145202637, | |
| "learning_rate": 6.557734204793029e-05, | |
| "loss": 4.5561, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.6742919389978214, | |
| "grad_norm": 3.9656856060028076, | |
| "learning_rate": 6.535947712418301e-05, | |
| "loss": 4.7085, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.6753812636165577, | |
| "grad_norm": 3.392267942428589, | |
| "learning_rate": 6.514161220043574e-05, | |
| "loss": 4.4498, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.6764705882352942, | |
| "grad_norm": 3.2795276641845703, | |
| "learning_rate": 6.492374727668845e-05, | |
| "loss": 5.0139, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.6775599128540305, | |
| "grad_norm": 3.897383451461792, | |
| "learning_rate": 6.470588235294118e-05, | |
| "loss": 4.4428, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.6786492374727668, | |
| "grad_norm": 3.054150104522705, | |
| "learning_rate": 6.448801742919391e-05, | |
| "loss": 4.574, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.6797385620915033, | |
| "grad_norm": 3.345979690551758, | |
| "learning_rate": 6.427015250544662e-05, | |
| "loss": 4.7765, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.6808278867102396, | |
| "grad_norm": 2.7274982929229736, | |
| "learning_rate": 6.405228758169934e-05, | |
| "loss": 4.435, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.681917211328976, | |
| "grad_norm": 4.5814032554626465, | |
| "learning_rate": 6.383442265795208e-05, | |
| "loss": 4.1487, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.6830065359477124, | |
| "grad_norm": 4.058572292327881, | |
| "learning_rate": 6.361655773420479e-05, | |
| "loss": 4.386, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.6840958605664488, | |
| "grad_norm": 3.2232651710510254, | |
| "learning_rate": 6.339869281045751e-05, | |
| "loss": 4.8899, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.6851851851851852, | |
| "grad_norm": 3.23541259765625, | |
| "learning_rate": 6.318082788671025e-05, | |
| "loss": 4.6912, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.6862745098039216, | |
| "grad_norm": 4.35893440246582, | |
| "learning_rate": 6.296296296296296e-05, | |
| "loss": 4.6744, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6873638344226579, | |
| "grad_norm": 2.809992790222168, | |
| "learning_rate": 6.274509803921569e-05, | |
| "loss": 4.4697, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.6884531590413944, | |
| "grad_norm": 3.3621973991394043, | |
| "learning_rate": 6.252723311546841e-05, | |
| "loss": 4.5625, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.6895424836601307, | |
| "grad_norm": 3.5378973484039307, | |
| "learning_rate": 6.230936819172115e-05, | |
| "loss": 4.4222, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.690631808278867, | |
| "grad_norm": 4.445308208465576, | |
| "learning_rate": 6.209150326797386e-05, | |
| "loss": 4.4816, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.6917211328976035, | |
| "grad_norm": 4.621172904968262, | |
| "learning_rate": 6.187363834422658e-05, | |
| "loss": 4.8582, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.6928104575163399, | |
| "grad_norm": 2.6876468658447266, | |
| "learning_rate": 6.16557734204793e-05, | |
| "loss": 5.1406, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.6938997821350763, | |
| "grad_norm": 2.912463665008545, | |
| "learning_rate": 6.143790849673203e-05, | |
| "loss": 4.3075, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.6949891067538126, | |
| "grad_norm": 3.2914223670959473, | |
| "learning_rate": 6.122004357298475e-05, | |
| "loss": 4.5181, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.696078431372549, | |
| "grad_norm": 3.1938445568084717, | |
| "learning_rate": 6.1002178649237476e-05, | |
| "loss": 4.2892, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.6971677559912854, | |
| "grad_norm": 3.2888052463531494, | |
| "learning_rate": 6.078431372549019e-05, | |
| "loss": 4.3035, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6982570806100218, | |
| "grad_norm": 3.7282330989837646, | |
| "learning_rate": 6.0566448801742924e-05, | |
| "loss": 4.5092, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.6993464052287581, | |
| "grad_norm": 3.121951103210449, | |
| "learning_rate": 6.034858387799565e-05, | |
| "loss": 4.41, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.7004357298474946, | |
| "grad_norm": 3.6562163829803467, | |
| "learning_rate": 6.0130718954248365e-05, | |
| "loss": 4.5502, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.7015250544662309, | |
| "grad_norm": 4.061103343963623, | |
| "learning_rate": 5.9912854030501095e-05, | |
| "loss": 4.4169, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.7026143790849673, | |
| "grad_norm": 2.988943338394165, | |
| "learning_rate": 5.969498910675382e-05, | |
| "loss": 4.4949, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.7037037037037037, | |
| "grad_norm": 3.66359281539917, | |
| "learning_rate": 5.9477124183006536e-05, | |
| "loss": 4.3983, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.7047930283224401, | |
| "grad_norm": 2.7517478466033936, | |
| "learning_rate": 5.925925925925926e-05, | |
| "loss": 4.5619, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": 3.325645923614502, | |
| "learning_rate": 5.904139433551199e-05, | |
| "loss": 4.3944, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.7069716775599129, | |
| "grad_norm": 2.393507242202759, | |
| "learning_rate": 5.882352941176471e-05, | |
| "loss": 4.3706, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.7080610021786492, | |
| "grad_norm": 2.5588436126708984, | |
| "learning_rate": 5.860566448801743e-05, | |
| "loss": 4.5016, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.7091503267973857, | |
| "grad_norm": 2.5094637870788574, | |
| "learning_rate": 5.838779956427015e-05, | |
| "loss": 4.3259, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.710239651416122, | |
| "grad_norm": 2.8639650344848633, | |
| "learning_rate": 5.8169934640522886e-05, | |
| "loss": 4.3741, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.7113289760348583, | |
| "grad_norm": 2.851107358932495, | |
| "learning_rate": 5.79520697167756e-05, | |
| "loss": 4.5406, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.7124183006535948, | |
| "grad_norm": 2.6943624019622803, | |
| "learning_rate": 5.773420479302833e-05, | |
| "loss": 4.7667, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.7135076252723311, | |
| "grad_norm": 3.8529598712921143, | |
| "learning_rate": 5.7516339869281044e-05, | |
| "loss": 4.1106, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.7145969498910676, | |
| "grad_norm": 3.688046932220459, | |
| "learning_rate": 5.7298474945533774e-05, | |
| "loss": 4.872, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.7156862745098039, | |
| "grad_norm": 6.292950630187988, | |
| "learning_rate": 5.70806100217865e-05, | |
| "loss": 4.9386, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.7167755991285403, | |
| "grad_norm": 2.9463226795196533, | |
| "learning_rate": 5.6862745098039215e-05, | |
| "loss": 4.3529, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.7178649237472767, | |
| "grad_norm": 3.75313138961792, | |
| "learning_rate": 5.664488017429194e-05, | |
| "loss": 4.3738, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.7189542483660131, | |
| "grad_norm": 3.0259487628936768, | |
| "learning_rate": 5.642701525054467e-05, | |
| "loss": 4.517, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.7200435729847494, | |
| "grad_norm": 3.362027168273926, | |
| "learning_rate": 5.620915032679739e-05, | |
| "loss": 4.5903, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.7211328976034859, | |
| "grad_norm": 3.0693953037261963, | |
| "learning_rate": 5.599128540305011e-05, | |
| "loss": 4.7722, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.7222222222222222, | |
| "grad_norm": 4.313173770904541, | |
| "learning_rate": 5.577342047930284e-05, | |
| "loss": 4.705, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.7233115468409586, | |
| "grad_norm": 3.1795902252197266, | |
| "learning_rate": 5.555555555555556e-05, | |
| "loss": 4.6037, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.724400871459695, | |
| "grad_norm": 2.650792360305786, | |
| "learning_rate": 5.533769063180828e-05, | |
| "loss": 4.5835, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.7254901960784313, | |
| "grad_norm": 3.9786386489868164, | |
| "learning_rate": 5.5119825708061e-05, | |
| "loss": 5.0628, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.7265795206971678, | |
| "grad_norm": 2.3720548152923584, | |
| "learning_rate": 5.490196078431373e-05, | |
| "loss": 4.4845, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.7276688453159041, | |
| "grad_norm": 4.014529705047607, | |
| "learning_rate": 5.4684095860566454e-05, | |
| "loss": 4.8008, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.7287581699346405, | |
| "grad_norm": 3.989478826522827, | |
| "learning_rate": 5.446623093681917e-05, | |
| "loss": 4.353, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.7298474945533769, | |
| "grad_norm": 2.8843913078308105, | |
| "learning_rate": 5.4248366013071894e-05, | |
| "loss": 4.5807, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.7309368191721133, | |
| "grad_norm": 4.555778980255127, | |
| "learning_rate": 5.4030501089324625e-05, | |
| "loss": 4.3632, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.7320261437908496, | |
| "grad_norm": 2.7697739601135254, | |
| "learning_rate": 5.381263616557735e-05, | |
| "loss": 4.7757, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.7331154684095861, | |
| "grad_norm": 6.817968845367432, | |
| "learning_rate": 5.3594771241830066e-05, | |
| "loss": 4.3715, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.7342047930283224, | |
| "grad_norm": 3.6644465923309326, | |
| "learning_rate": 5.337690631808279e-05, | |
| "loss": 4.3353, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.7352941176470589, | |
| "grad_norm": 2.378873586654663, | |
| "learning_rate": 5.315904139433552e-05, | |
| "loss": 4.3944, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.7363834422657952, | |
| "grad_norm": 1.8505460023880005, | |
| "learning_rate": 5.294117647058824e-05, | |
| "loss": 4.8842, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.7374727668845316, | |
| "grad_norm": 2.434199333190918, | |
| "learning_rate": 5.272331154684096e-05, | |
| "loss": 4.6757, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.738562091503268, | |
| "grad_norm": 3.9741463661193848, | |
| "learning_rate": 5.250544662309368e-05, | |
| "loss": 4.5491, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.7396514161220044, | |
| "grad_norm": 2.569467306137085, | |
| "learning_rate": 5.228758169934641e-05, | |
| "loss": 4.7716, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.7407407407407407, | |
| "grad_norm": 3.0343027114868164, | |
| "learning_rate": 5.206971677559913e-05, | |
| "loss": 4.8048, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.7418300653594772, | |
| "grad_norm": 1.8422495126724243, | |
| "learning_rate": 5.185185185185185e-05, | |
| "loss": 4.5785, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.7429193899782135, | |
| "grad_norm": 3.4610087871551514, | |
| "learning_rate": 5.163398692810458e-05, | |
| "loss": 4.5243, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.7440087145969498, | |
| "grad_norm": 3.4240050315856934, | |
| "learning_rate": 5.1416122004357304e-05, | |
| "loss": 4.4241, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.7450980392156863, | |
| "grad_norm": 3.1401493549346924, | |
| "learning_rate": 5.119825708061002e-05, | |
| "loss": 4.5411, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.7461873638344226, | |
| "grad_norm": 2.829155921936035, | |
| "learning_rate": 5.0980392156862745e-05, | |
| "loss": 4.9659, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.7472766884531591, | |
| "grad_norm": 4.25803279876709, | |
| "learning_rate": 5.0762527233115476e-05, | |
| "loss": 4.3894, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.7483660130718954, | |
| "grad_norm": 3.9126133918762207, | |
| "learning_rate": 5.054466230936819e-05, | |
| "loss": 3.9024, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.7494553376906318, | |
| "grad_norm": 3.5430266857147217, | |
| "learning_rate": 5.032679738562092e-05, | |
| "loss": 4.282, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.7505446623093682, | |
| "grad_norm": 3.804924249649048, | |
| "learning_rate": 5.0108932461873634e-05, | |
| "loss": 4.1057, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.7516339869281046, | |
| "grad_norm": 3.732877492904663, | |
| "learning_rate": 4.9891067538126364e-05, | |
| "loss": 4.4652, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.7527233115468409, | |
| "grad_norm": 2.9085025787353516, | |
| "learning_rate": 4.967320261437909e-05, | |
| "loss": 4.5023, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.7538126361655774, | |
| "grad_norm": 3.6516687870025635, | |
| "learning_rate": 4.945533769063181e-05, | |
| "loss": 4.8358, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.7549019607843137, | |
| "grad_norm": 4.306848526000977, | |
| "learning_rate": 4.9237472766884536e-05, | |
| "loss": 4.2137, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.7559912854030502, | |
| "grad_norm": 3.881239652633667, | |
| "learning_rate": 4.901960784313725e-05, | |
| "loss": 4.2181, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.7570806100217865, | |
| "grad_norm": 2.368997097015381, | |
| "learning_rate": 4.8801742919389983e-05, | |
| "loss": 4.6, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.7581699346405228, | |
| "grad_norm": 3.4072535037994385, | |
| "learning_rate": 4.85838779956427e-05, | |
| "loss": 4.4402, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.7592592592592593, | |
| "grad_norm": 2.909001588821411, | |
| "learning_rate": 4.8366013071895424e-05, | |
| "loss": 4.2182, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.7603485838779956, | |
| "grad_norm": 3.1234962940216064, | |
| "learning_rate": 4.814814814814815e-05, | |
| "loss": 4.2472, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.761437908496732, | |
| "grad_norm": 2.94049334526062, | |
| "learning_rate": 4.793028322440087e-05, | |
| "loss": 4.6319, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.7625272331154684, | |
| "grad_norm": 2.517805576324463, | |
| "learning_rate": 4.77124183006536e-05, | |
| "loss": 4.5978, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7636165577342048, | |
| "grad_norm": 3.0102155208587646, | |
| "learning_rate": 4.749455337690632e-05, | |
| "loss": 4.4812, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.7647058823529411, | |
| "grad_norm": 3.5447139739990234, | |
| "learning_rate": 4.7276688453159044e-05, | |
| "loss": 4.8397, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.7657952069716776, | |
| "grad_norm": 2.5676867961883545, | |
| "learning_rate": 4.705882352941177e-05, | |
| "loss": 4.3917, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.7668845315904139, | |
| "grad_norm": 5.074711322784424, | |
| "learning_rate": 4.684095860566449e-05, | |
| "loss": 4.8466, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.7679738562091504, | |
| "grad_norm": 3.212968587875366, | |
| "learning_rate": 4.6623093681917215e-05, | |
| "loss": 4.6356, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.7690631808278867, | |
| "grad_norm": 2.5076026916503906, | |
| "learning_rate": 4.640522875816994e-05, | |
| "loss": 4.3863, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.7701525054466231, | |
| "grad_norm": 3.6537904739379883, | |
| "learning_rate": 4.6187363834422656e-05, | |
| "loss": 4.1982, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.7712418300653595, | |
| "grad_norm": 3.170794725418091, | |
| "learning_rate": 4.5969498910675387e-05, | |
| "loss": 4.4426, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.7723311546840959, | |
| "grad_norm": 3.2717745304107666, | |
| "learning_rate": 4.5751633986928104e-05, | |
| "loss": 4.923, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.7734204793028322, | |
| "grad_norm": 3.9620261192321777, | |
| "learning_rate": 4.5533769063180834e-05, | |
| "loss": 4.2521, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.7745098039215687, | |
| "grad_norm": 4.35859489440918, | |
| "learning_rate": 4.531590413943355e-05, | |
| "loss": 3.9256, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.775599128540305, | |
| "grad_norm": 3.554739236831665, | |
| "learning_rate": 4.5098039215686275e-05, | |
| "loss": 4.2547, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.7766884531590414, | |
| "grad_norm": 2.5886950492858887, | |
| "learning_rate": 4.4880174291939e-05, | |
| "loss": 4.3773, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.7777777777777778, | |
| "grad_norm": 2.9419705867767334, | |
| "learning_rate": 4.466230936819172e-05, | |
| "loss": 4.4662, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.7788671023965141, | |
| "grad_norm": 3.4473655223846436, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 3.8291, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.7799564270152506, | |
| "grad_norm": 2.6257734298706055, | |
| "learning_rate": 4.422657952069717e-05, | |
| "loss": 4.5269, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.7810457516339869, | |
| "grad_norm": 4.320521831512451, | |
| "learning_rate": 4.400871459694989e-05, | |
| "loss": 4.5898, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.7821350762527233, | |
| "grad_norm": 3.2959165573120117, | |
| "learning_rate": 4.379084967320262e-05, | |
| "loss": 4.4443, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.7832244008714597, | |
| "grad_norm": 3.6437246799468994, | |
| "learning_rate": 4.357298474945534e-05, | |
| "loss": 4.5023, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.7843137254901961, | |
| "grad_norm": 2.935994863510132, | |
| "learning_rate": 4.3355119825708066e-05, | |
| "loss": 4.4349, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.7854030501089324, | |
| "grad_norm": 3.684237241744995, | |
| "learning_rate": 4.313725490196079e-05, | |
| "loss": 4.0764, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.7864923747276689, | |
| "grad_norm": 2.6446266174316406, | |
| "learning_rate": 4.291938997821351e-05, | |
| "loss": 4.483, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.7875816993464052, | |
| "grad_norm": 3.3432328701019287, | |
| "learning_rate": 4.270152505446624e-05, | |
| "loss": 4.1032, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.7886710239651417, | |
| "grad_norm": 2.900771379470825, | |
| "learning_rate": 4.2483660130718954e-05, | |
| "loss": 4.128, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.789760348583878, | |
| "grad_norm": 3.470869302749634, | |
| "learning_rate": 4.226579520697168e-05, | |
| "loss": 4.1844, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.7908496732026143, | |
| "grad_norm": 3.150548219680786, | |
| "learning_rate": 4.20479302832244e-05, | |
| "loss": 4.4777, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.7919389978213508, | |
| "grad_norm": 3.979973077774048, | |
| "learning_rate": 4.1830065359477126e-05, | |
| "loss": 4.5166, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.7930283224400871, | |
| "grad_norm": 3.882768392562866, | |
| "learning_rate": 4.161220043572985e-05, | |
| "loss": 4.3409, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.7941176470588235, | |
| "grad_norm": 3.7101409435272217, | |
| "learning_rate": 4.1394335511982573e-05, | |
| "loss": 4.6513, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.7952069716775599, | |
| "grad_norm": 3.294309616088867, | |
| "learning_rate": 4.11764705882353e-05, | |
| "loss": 4.6719, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.7962962962962963, | |
| "grad_norm": 3.4256606101989746, | |
| "learning_rate": 4.095860566448802e-05, | |
| "loss": 4.277, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.7973856209150327, | |
| "grad_norm": 3.583495616912842, | |
| "learning_rate": 4.074074074074074e-05, | |
| "loss": 4.2184, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.7984749455337691, | |
| "grad_norm": 2.8277370929718018, | |
| "learning_rate": 4.052287581699347e-05, | |
| "loss": 4.4167, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.7995642701525054, | |
| "grad_norm": 4.443241596221924, | |
| "learning_rate": 4.0305010893246186e-05, | |
| "loss": 4.1597, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.8006535947712419, | |
| "grad_norm": 3.9684319496154785, | |
| "learning_rate": 4.008714596949891e-05, | |
| "loss": 4.5253, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.8017429193899782, | |
| "grad_norm": 3.9552969932556152, | |
| "learning_rate": 3.986928104575164e-05, | |
| "loss": 4.4641, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.8028322440087146, | |
| "grad_norm": 2.4811275005340576, | |
| "learning_rate": 3.965141612200436e-05, | |
| "loss": 4.8492, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.803921568627451, | |
| "grad_norm": 2.727994918823242, | |
| "learning_rate": 3.943355119825709e-05, | |
| "loss": 4.7617, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.8050108932461874, | |
| "grad_norm": 3.1672701835632324, | |
| "learning_rate": 3.9215686274509805e-05, | |
| "loss": 4.341, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.8061002178649237, | |
| "grad_norm": 5.139643669128418, | |
| "learning_rate": 3.899782135076253e-05, | |
| "loss": 4.3594, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.8071895424836601, | |
| "grad_norm": 2.6922364234924316, | |
| "learning_rate": 3.877995642701525e-05, | |
| "loss": 4.786, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.8082788671023965, | |
| "grad_norm": 4.037826061248779, | |
| "learning_rate": 3.8562091503267977e-05, | |
| "loss": 4.7951, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.809368191721133, | |
| "grad_norm": 4.6529927253723145, | |
| "learning_rate": 3.83442265795207e-05, | |
| "loss": 4.7118, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.8104575163398693, | |
| "grad_norm": 4.143136978149414, | |
| "learning_rate": 3.8126361655773424e-05, | |
| "loss": 4.0313, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.8115468409586056, | |
| "grad_norm": 3.8727409839630127, | |
| "learning_rate": 3.790849673202614e-05, | |
| "loss": 4.9551, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.8126361655773421, | |
| "grad_norm": 3.256863594055176, | |
| "learning_rate": 3.769063180827887e-05, | |
| "loss": 4.4688, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.8137254901960784, | |
| "grad_norm": 3.701784610748291, | |
| "learning_rate": 3.747276688453159e-05, | |
| "loss": 4.3752, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.8148148148148148, | |
| "grad_norm": 3.1670970916748047, | |
| "learning_rate": 3.725490196078432e-05, | |
| "loss": 4.5925, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.8159041394335512, | |
| "grad_norm": 2.430621385574341, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 4.497, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.8169934640522876, | |
| "grad_norm": 5.46519660949707, | |
| "learning_rate": 3.681917211328976e-05, | |
| "loss": 4.4218, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.818082788671024, | |
| "grad_norm": 2.524573564529419, | |
| "learning_rate": 3.6601307189542484e-05, | |
| "loss": 4.5478, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.8191721132897604, | |
| "grad_norm": 3.3287127017974854, | |
| "learning_rate": 3.638344226579521e-05, | |
| "loss": 4.3598, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.8202614379084967, | |
| "grad_norm": 3.003361940383911, | |
| "learning_rate": 3.616557734204793e-05, | |
| "loss": 4.2639, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.8213507625272332, | |
| "grad_norm": 3.4090774059295654, | |
| "learning_rate": 3.5947712418300656e-05, | |
| "loss": 4.3133, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.8224400871459695, | |
| "grad_norm": 2.570692300796509, | |
| "learning_rate": 3.572984749455338e-05, | |
| "loss": 4.4859, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.8235294117647058, | |
| "grad_norm": 3.2437076568603516, | |
| "learning_rate": 3.55119825708061e-05, | |
| "loss": 4.9471, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.8246187363834423, | |
| "grad_norm": 2.33784556388855, | |
| "learning_rate": 3.529411764705883e-05, | |
| "loss": 4.3232, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.8257080610021786, | |
| "grad_norm": 3.095179319381714, | |
| "learning_rate": 3.507625272331155e-05, | |
| "loss": 4.2713, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.826797385620915, | |
| "grad_norm": 2.924217462539673, | |
| "learning_rate": 3.4858387799564275e-05, | |
| "loss": 4.3772, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.8278867102396514, | |
| "grad_norm": 4.275852203369141, | |
| "learning_rate": 3.464052287581699e-05, | |
| "loss": 4.6822, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.8289760348583878, | |
| "grad_norm": 3.064005136489868, | |
| "learning_rate": 3.442265795206972e-05, | |
| "loss": 4.8159, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.8300653594771242, | |
| "grad_norm": 2.3928287029266357, | |
| "learning_rate": 3.420479302832244e-05, | |
| "loss": 4.4421, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.8311546840958606, | |
| "grad_norm": 3.7825465202331543, | |
| "learning_rate": 3.3986928104575163e-05, | |
| "loss": 4.3476, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.8322440087145969, | |
| "grad_norm": 3.0275168418884277, | |
| "learning_rate": 3.376906318082789e-05, | |
| "loss": 4.6799, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 2.415347099304199, | |
| "learning_rate": 3.355119825708061e-05, | |
| "loss": 4.5567, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.8344226579520697, | |
| "grad_norm": 4.181748390197754, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 4.8625, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.835511982570806, | |
| "grad_norm": 2.8122055530548096, | |
| "learning_rate": 3.311546840958606e-05, | |
| "loss": 4.4339, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.8366013071895425, | |
| "grad_norm": 5.145498752593994, | |
| "learning_rate": 3.289760348583878e-05, | |
| "loss": 4.4479, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.8376906318082789, | |
| "grad_norm": 2.8015735149383545, | |
| "learning_rate": 3.2679738562091506e-05, | |
| "loss": 4.4831, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.8387799564270153, | |
| "grad_norm": 2.6297879219055176, | |
| "learning_rate": 3.2461873638344223e-05, | |
| "loss": 4.3064, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.8398692810457516, | |
| "grad_norm": 3.4533751010894775, | |
| "learning_rate": 3.2244008714596954e-05, | |
| "loss": 4.3691, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.840958605664488, | |
| "grad_norm": 3.411375045776367, | |
| "learning_rate": 3.202614379084967e-05, | |
| "loss": 4.8371, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.8420479302832244, | |
| "grad_norm": 3.217210531234741, | |
| "learning_rate": 3.1808278867102395e-05, | |
| "loss": 4.521, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.8431372549019608, | |
| "grad_norm": 3.2574751377105713, | |
| "learning_rate": 3.1590413943355126e-05, | |
| "loss": 4.3908, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.8442265795206971, | |
| "grad_norm": 3.3532497882843018, | |
| "learning_rate": 3.137254901960784e-05, | |
| "loss": 4.4482, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.8453159041394336, | |
| "grad_norm": 3.3096394538879395, | |
| "learning_rate": 3.115468409586057e-05, | |
| "loss": 4.6897, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.8464052287581699, | |
| "grad_norm": 2.804602861404419, | |
| "learning_rate": 3.093681917211329e-05, | |
| "loss": 4.4341, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.8474945533769063, | |
| "grad_norm": 2.5430572032928467, | |
| "learning_rate": 3.0718954248366014e-05, | |
| "loss": 4.4146, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.8485838779956427, | |
| "grad_norm": 3.859699249267578, | |
| "learning_rate": 3.0501089324618738e-05, | |
| "loss": 4.3238, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.8496732026143791, | |
| "grad_norm": 3.4429585933685303, | |
| "learning_rate": 3.0283224400871462e-05, | |
| "loss": 4.1805, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.8507625272331155, | |
| "grad_norm": 3.5958237648010254, | |
| "learning_rate": 3.0065359477124182e-05, | |
| "loss": 4.6116, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.8518518518518519, | |
| "grad_norm": 4.12628173828125, | |
| "learning_rate": 2.984749455337691e-05, | |
| "loss": 4.0494, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.8529411764705882, | |
| "grad_norm": 3.068495988845825, | |
| "learning_rate": 2.962962962962963e-05, | |
| "loss": 4.236, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.8540305010893247, | |
| "grad_norm": 4.414676189422607, | |
| "learning_rate": 2.9411764705882354e-05, | |
| "loss": 4.2029, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.855119825708061, | |
| "grad_norm": 2.343167543411255, | |
| "learning_rate": 2.9193899782135074e-05, | |
| "loss": 4.6678, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.8562091503267973, | |
| "grad_norm": 2.415961980819702, | |
| "learning_rate": 2.89760348583878e-05, | |
| "loss": 4.4856, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.8572984749455338, | |
| "grad_norm": 2.7041404247283936, | |
| "learning_rate": 2.8758169934640522e-05, | |
| "loss": 4.2077, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.8583877995642701, | |
| "grad_norm": 3.7065887451171875, | |
| "learning_rate": 2.854030501089325e-05, | |
| "loss": 4.24, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.8594771241830066, | |
| "grad_norm": 2.7964565753936768, | |
| "learning_rate": 2.832244008714597e-05, | |
| "loss": 4.4061, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.8605664488017429, | |
| "grad_norm": 2.7362513542175293, | |
| "learning_rate": 2.8104575163398693e-05, | |
| "loss": 4.6079, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.8616557734204793, | |
| "grad_norm": 3.7887816429138184, | |
| "learning_rate": 2.788671023965142e-05, | |
| "loss": 4.8322, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.8627450980392157, | |
| "grad_norm": 2.9420385360717773, | |
| "learning_rate": 2.766884531590414e-05, | |
| "loss": 4.4393, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.8638344226579521, | |
| "grad_norm": 2.545464515686035, | |
| "learning_rate": 2.7450980392156865e-05, | |
| "loss": 4.1903, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.8649237472766884, | |
| "grad_norm": 2.869817018508911, | |
| "learning_rate": 2.7233115468409585e-05, | |
| "loss": 4.5073, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.8660130718954249, | |
| "grad_norm": 4.065328121185303, | |
| "learning_rate": 2.7015250544662313e-05, | |
| "loss": 4.131, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.8671023965141612, | |
| "grad_norm": 4.19705867767334, | |
| "learning_rate": 2.6797385620915033e-05, | |
| "loss": 3.9346, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.8681917211328976, | |
| "grad_norm": 2.0968496799468994, | |
| "learning_rate": 2.657952069716776e-05, | |
| "loss": 4.6387, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.869281045751634, | |
| "grad_norm": 3.3844027519226074, | |
| "learning_rate": 2.636165577342048e-05, | |
| "loss": 4.1972, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.8703703703703703, | |
| "grad_norm": 4.317149639129639, | |
| "learning_rate": 2.6143790849673204e-05, | |
| "loss": 3.8641, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.8714596949891068, | |
| "grad_norm": 2.7906928062438965, | |
| "learning_rate": 2.5925925925925925e-05, | |
| "loss": 4.895, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8725490196078431, | |
| "grad_norm": 5.625737190246582, | |
| "learning_rate": 2.5708061002178652e-05, | |
| "loss": 4.1917, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.8736383442265795, | |
| "grad_norm": 3.180407762527466, | |
| "learning_rate": 2.5490196078431373e-05, | |
| "loss": 4.4799, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.8747276688453159, | |
| "grad_norm": 2.444420576095581, | |
| "learning_rate": 2.5272331154684096e-05, | |
| "loss": 4.4138, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.8758169934640523, | |
| "grad_norm": 3.7271342277526855, | |
| "learning_rate": 2.5054466230936817e-05, | |
| "loss": 4.5968, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.8769063180827886, | |
| "grad_norm": 2.372236967086792, | |
| "learning_rate": 2.4836601307189544e-05, | |
| "loss": 4.6474, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.8779956427015251, | |
| "grad_norm": 3.3021676540374756, | |
| "learning_rate": 2.4618736383442268e-05, | |
| "loss": 4.3627, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.8790849673202614, | |
| "grad_norm": 5.0191779136657715, | |
| "learning_rate": 2.4400871459694992e-05, | |
| "loss": 4.4277, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.8801742919389978, | |
| "grad_norm": 2.840336322784424, | |
| "learning_rate": 2.4183006535947712e-05, | |
| "loss": 4.821, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.8812636165577342, | |
| "grad_norm": 2.553668260574341, | |
| "learning_rate": 2.3965141612200436e-05, | |
| "loss": 4.2192, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.8823529411764706, | |
| "grad_norm": 3.1289873123168945, | |
| "learning_rate": 2.374727668845316e-05, | |
| "loss": 4.6784, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.883442265795207, | |
| "grad_norm": 2.4871625900268555, | |
| "learning_rate": 2.3529411764705884e-05, | |
| "loss": 4.2971, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.8845315904139434, | |
| "grad_norm": 3.015580654144287, | |
| "learning_rate": 2.3311546840958608e-05, | |
| "loss": 4.1916, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.8856209150326797, | |
| "grad_norm": 3.12770676612854, | |
| "learning_rate": 2.3093681917211328e-05, | |
| "loss": 4.8488, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.8867102396514162, | |
| "grad_norm": 4.185814380645752, | |
| "learning_rate": 2.2875816993464052e-05, | |
| "loss": 4.1931, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.8877995642701525, | |
| "grad_norm": 3.2363178730010986, | |
| "learning_rate": 2.2657952069716776e-05, | |
| "loss": 3.9761, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 3.264817953109741, | |
| "learning_rate": 2.24400871459695e-05, | |
| "loss": 3.9483, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.8899782135076253, | |
| "grad_norm": 2.8902573585510254, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 4.3474, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.8910675381263616, | |
| "grad_norm": 4.015153408050537, | |
| "learning_rate": 2.2004357298474944e-05, | |
| "loss": 5.0101, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.8921568627450981, | |
| "grad_norm": 2.7316837310791016, | |
| "learning_rate": 2.178649237472767e-05, | |
| "loss": 4.6326, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.8932461873638344, | |
| "grad_norm": 3.2079977989196777, | |
| "learning_rate": 2.1568627450980395e-05, | |
| "loss": 4.6429, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.8943355119825708, | |
| "grad_norm": 3.246037006378174, | |
| "learning_rate": 2.135076252723312e-05, | |
| "loss": 4.6208, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.8954248366013072, | |
| "grad_norm": 3.2515430450439453, | |
| "learning_rate": 2.113289760348584e-05, | |
| "loss": 4.5181, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.8965141612200436, | |
| "grad_norm": 3.3833346366882324, | |
| "learning_rate": 2.0915032679738563e-05, | |
| "loss": 4.4344, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.8976034858387799, | |
| "grad_norm": 3.3735289573669434, | |
| "learning_rate": 2.0697167755991287e-05, | |
| "loss": 4.184, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.8986928104575164, | |
| "grad_norm": 3.0778541564941406, | |
| "learning_rate": 2.047930283224401e-05, | |
| "loss": 4.3511, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.8997821350762527, | |
| "grad_norm": 3.2135345935821533, | |
| "learning_rate": 2.0261437908496734e-05, | |
| "loss": 4.2923, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.900871459694989, | |
| "grad_norm": 3.828021287918091, | |
| "learning_rate": 2.0043572984749455e-05, | |
| "loss": 4.0934, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.9019607843137255, | |
| "grad_norm": 2.8927841186523438, | |
| "learning_rate": 1.982570806100218e-05, | |
| "loss": 4.4145, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.9030501089324618, | |
| "grad_norm": 2.8813278675079346, | |
| "learning_rate": 1.9607843137254903e-05, | |
| "loss": 4.2101, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.9041394335511983, | |
| "grad_norm": 3.247687816619873, | |
| "learning_rate": 1.9389978213507626e-05, | |
| "loss": 4.449, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.9052287581699346, | |
| "grad_norm": 2.423374652862549, | |
| "learning_rate": 1.917211328976035e-05, | |
| "loss": 4.2909, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.906318082788671, | |
| "grad_norm": 2.915576934814453, | |
| "learning_rate": 1.895424836601307e-05, | |
| "loss": 4.2515, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.9074074074074074, | |
| "grad_norm": 3.332075595855713, | |
| "learning_rate": 1.8736383442265794e-05, | |
| "loss": 4.1769, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.9084967320261438, | |
| "grad_norm": 3.268340587615967, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 4.0638, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.9095860566448801, | |
| "grad_norm": 4.253492832183838, | |
| "learning_rate": 1.8300653594771242e-05, | |
| "loss": 4.6748, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.9106753812636166, | |
| "grad_norm": 3.4260220527648926, | |
| "learning_rate": 1.8082788671023966e-05, | |
| "loss": 4.1754, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.9117647058823529, | |
| "grad_norm": 4.5677266120910645, | |
| "learning_rate": 1.786492374727669e-05, | |
| "loss": 4.4002, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.9128540305010894, | |
| "grad_norm": 2.8352746963500977, | |
| "learning_rate": 1.7647058823529414e-05, | |
| "loss": 4.1695, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.9139433551198257, | |
| "grad_norm": 3.011715888977051, | |
| "learning_rate": 1.7429193899782137e-05, | |
| "loss": 4.2904, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.9150326797385621, | |
| "grad_norm": 3.083839178085327, | |
| "learning_rate": 1.721132897603486e-05, | |
| "loss": 4.4036, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.9161220043572985, | |
| "grad_norm": 4.099680423736572, | |
| "learning_rate": 1.6993464052287582e-05, | |
| "loss": 5.0838, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.9172113289760349, | |
| "grad_norm": 2.89890193939209, | |
| "learning_rate": 1.6775599128540306e-05, | |
| "loss": 4.235, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.9183006535947712, | |
| "grad_norm": 3.054748296737671, | |
| "learning_rate": 1.655773420479303e-05, | |
| "loss": 4.7427, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.9193899782135077, | |
| "grad_norm": 2.4290452003479004, | |
| "learning_rate": 1.6339869281045753e-05, | |
| "loss": 4.5623, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.920479302832244, | |
| "grad_norm": 3.0590555667877197, | |
| "learning_rate": 1.6122004357298477e-05, | |
| "loss": 4.617, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.9215686274509803, | |
| "grad_norm": 3.4882876873016357, | |
| "learning_rate": 1.5904139433551197e-05, | |
| "loss": 4.4919, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.9226579520697168, | |
| "grad_norm": 2.5416266918182373, | |
| "learning_rate": 1.568627450980392e-05, | |
| "loss": 4.2478, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.9237472766884531, | |
| "grad_norm": 3.034259796142578, | |
| "learning_rate": 1.5468409586056645e-05, | |
| "loss": 4.334, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.9248366013071896, | |
| "grad_norm": 3.5231168270111084, | |
| "learning_rate": 1.5250544662309369e-05, | |
| "loss": 4.4518, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.9259259259259259, | |
| "grad_norm": 3.0768980979919434, | |
| "learning_rate": 1.5032679738562091e-05, | |
| "loss": 4.2201, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.9270152505446623, | |
| "grad_norm": 2.9910175800323486, | |
| "learning_rate": 1.4814814814814815e-05, | |
| "loss": 4.5128, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.9281045751633987, | |
| "grad_norm": 4.108496189117432, | |
| "learning_rate": 1.4596949891067537e-05, | |
| "loss": 4.472, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.9291938997821351, | |
| "grad_norm": 2.6037635803222656, | |
| "learning_rate": 1.4379084967320261e-05, | |
| "loss": 4.4553, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.9302832244008714, | |
| "grad_norm": 3.0736937522888184, | |
| "learning_rate": 1.4161220043572985e-05, | |
| "loss": 4.226, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.9313725490196079, | |
| "grad_norm": 2.5338780879974365, | |
| "learning_rate": 1.394335511982571e-05, | |
| "loss": 4.4091, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.9324618736383442, | |
| "grad_norm": 3.7258126735687256, | |
| "learning_rate": 1.3725490196078432e-05, | |
| "loss": 4.6917, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.9335511982570807, | |
| "grad_norm": 4.166601181030273, | |
| "learning_rate": 1.3507625272331156e-05, | |
| "loss": 4.143, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.934640522875817, | |
| "grad_norm": 2.4743645191192627, | |
| "learning_rate": 1.328976034858388e-05, | |
| "loss": 4.1835, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.9357298474945533, | |
| "grad_norm": 2.661558151245117, | |
| "learning_rate": 1.3071895424836602e-05, | |
| "loss": 4.5051, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.9368191721132898, | |
| "grad_norm": 2.705817699432373, | |
| "learning_rate": 1.2854030501089326e-05, | |
| "loss": 4.4325, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.9379084967320261, | |
| "grad_norm": 2.885896921157837, | |
| "learning_rate": 1.2636165577342048e-05, | |
| "loss": 4.5767, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.9389978213507625, | |
| "grad_norm": 3.1418848037719727, | |
| "learning_rate": 1.2418300653594772e-05, | |
| "loss": 4.2073, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.9400871459694989, | |
| "grad_norm": 2.982358694076538, | |
| "learning_rate": 1.2200435729847496e-05, | |
| "loss": 4.4629, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 2.5018253326416016, | |
| "learning_rate": 1.1982570806100218e-05, | |
| "loss": 4.3306, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.9422657952069716, | |
| "grad_norm": 2.8917834758758545, | |
| "learning_rate": 1.1764705882352942e-05, | |
| "loss": 4.096, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.9433551198257081, | |
| "grad_norm": 4.460819721221924, | |
| "learning_rate": 1.1546840958605664e-05, | |
| "loss": 4.4547, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.9444444444444444, | |
| "grad_norm": 3.612231492996216, | |
| "learning_rate": 1.1328976034858388e-05, | |
| "loss": 4.4797, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.9455337690631809, | |
| "grad_norm": 3.5711090564727783, | |
| "learning_rate": 1.1111111111111112e-05, | |
| "loss": 4.5295, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.9466230936819172, | |
| "grad_norm": 3.783479928970337, | |
| "learning_rate": 1.0893246187363835e-05, | |
| "loss": 3.9625, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.9477124183006536, | |
| "grad_norm": 2.980947256088257, | |
| "learning_rate": 1.067538126361656e-05, | |
| "loss": 4.487, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.94880174291939, | |
| "grad_norm": 3.743603229522705, | |
| "learning_rate": 1.0457516339869281e-05, | |
| "loss": 4.4444, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.9498910675381264, | |
| "grad_norm": 2.9100987911224365, | |
| "learning_rate": 1.0239651416122005e-05, | |
| "loss": 4.4357, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.9509803921568627, | |
| "grad_norm": 3.2935433387756348, | |
| "learning_rate": 1.0021786492374727e-05, | |
| "loss": 4.6886, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.9520697167755992, | |
| "grad_norm": 3.8862123489379883, | |
| "learning_rate": 9.803921568627451e-06, | |
| "loss": 4.1871, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.9531590413943355, | |
| "grad_norm": 3.0392003059387207, | |
| "learning_rate": 9.586056644880175e-06, | |
| "loss": 4.7113, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.954248366013072, | |
| "grad_norm": 2.377678871154785, | |
| "learning_rate": 9.368191721132897e-06, | |
| "loss": 4.7133, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.9553376906318083, | |
| "grad_norm": 4.4908223152160645, | |
| "learning_rate": 9.150326797385621e-06, | |
| "loss": 4.5903, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.9564270152505446, | |
| "grad_norm": 4.116386413574219, | |
| "learning_rate": 8.932461873638345e-06, | |
| "loss": 4.3146, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.9575163398692811, | |
| "grad_norm": 3.787520408630371, | |
| "learning_rate": 8.714596949891069e-06, | |
| "loss": 4.7772, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.9586056644880174, | |
| "grad_norm": 4.713115692138672, | |
| "learning_rate": 8.496732026143791e-06, | |
| "loss": 4.077, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.9596949891067538, | |
| "grad_norm": 3.4946200847625732, | |
| "learning_rate": 8.278867102396515e-06, | |
| "loss": 4.4494, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.9607843137254902, | |
| "grad_norm": 7.297133922576904, | |
| "learning_rate": 8.061002178649239e-06, | |
| "loss": 3.8255, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.9618736383442266, | |
| "grad_norm": 3.2210330963134766, | |
| "learning_rate": 7.84313725490196e-06, | |
| "loss": 4.3539, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.9629629629629629, | |
| "grad_norm": 3.9630517959594727, | |
| "learning_rate": 7.6252723311546845e-06, | |
| "loss": 4.9081, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.9640522875816994, | |
| "grad_norm": 3.165914297103882, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 4.2638, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.9651416122004357, | |
| "grad_norm": 2.8624045848846436, | |
| "learning_rate": 7.1895424836601305e-06, | |
| "loss": 4.3948, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.9662309368191722, | |
| "grad_norm": 3.314714193344116, | |
| "learning_rate": 6.971677559912855e-06, | |
| "loss": 4.7292, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.9673202614379085, | |
| "grad_norm": 2.2992618083953857, | |
| "learning_rate": 6.753812636165578e-06, | |
| "loss": 4.4612, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.9684095860566448, | |
| "grad_norm": 4.193181991577148, | |
| "learning_rate": 6.535947712418301e-06, | |
| "loss": 4.3218, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.9694989106753813, | |
| "grad_norm": 2.4635162353515625, | |
| "learning_rate": 6.318082788671024e-06, | |
| "loss": 4.5963, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.9705882352941176, | |
| "grad_norm": 2.4746878147125244, | |
| "learning_rate": 6.100217864923748e-06, | |
| "loss": 4.3439, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.971677559912854, | |
| "grad_norm": 2.770826578140259, | |
| "learning_rate": 5.882352941176471e-06, | |
| "loss": 4.1497, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.9727668845315904, | |
| "grad_norm": 2.483689546585083, | |
| "learning_rate": 5.664488017429194e-06, | |
| "loss": 4.3472, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.9738562091503268, | |
| "grad_norm": 3.953700542449951, | |
| "learning_rate": 5.446623093681918e-06, | |
| "loss": 4.1945, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.9749455337690632, | |
| "grad_norm": 3.3949203491210938, | |
| "learning_rate": 5.228758169934641e-06, | |
| "loss": 4.2244, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.9760348583877996, | |
| "grad_norm": 3.2721428871154785, | |
| "learning_rate": 5.010893246187364e-06, | |
| "loss": 5.1877, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.9771241830065359, | |
| "grad_norm": 3.0004539489746094, | |
| "learning_rate": 4.7930283224400875e-06, | |
| "loss": 4.4266, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.9782135076252724, | |
| "grad_norm": 2.427356243133545, | |
| "learning_rate": 4.5751633986928105e-06, | |
| "loss": 4.3152, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.9793028322440087, | |
| "grad_norm": 2.8808891773223877, | |
| "learning_rate": 4.357298474945534e-06, | |
| "loss": 4.5186, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.9803921568627451, | |
| "grad_norm": 2.924720287322998, | |
| "learning_rate": 4.139433551198257e-06, | |
| "loss": 4.4141, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.9814814814814815, | |
| "grad_norm": 2.356206178665161, | |
| "learning_rate": 3.92156862745098e-06, | |
| "loss": 4.6122, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.9825708061002179, | |
| "grad_norm": 2.7468602657318115, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 4.3972, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.9836601307189542, | |
| "grad_norm": 3.2754995822906494, | |
| "learning_rate": 3.4858387799564276e-06, | |
| "loss": 4.2037, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.9847494553376906, | |
| "grad_norm": 3.5675883293151855, | |
| "learning_rate": 3.2679738562091506e-06, | |
| "loss": 4.1336, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.985838779956427, | |
| "grad_norm": 4.010306358337402, | |
| "learning_rate": 3.050108932461874e-06, | |
| "loss": 4.7152, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.9869281045751634, | |
| "grad_norm": 3.2009243965148926, | |
| "learning_rate": 2.832244008714597e-06, | |
| "loss": 4.3139, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.9880174291938998, | |
| "grad_norm": 3.407482624053955, | |
| "learning_rate": 2.6143790849673204e-06, | |
| "loss": 4.4083, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.9891067538126361, | |
| "grad_norm": 2.407134532928467, | |
| "learning_rate": 2.3965141612200438e-06, | |
| "loss": 4.3022, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.9901960784313726, | |
| "grad_norm": 2.962719202041626, | |
| "learning_rate": 2.178649237472767e-06, | |
| "loss": 4.9264, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.9912854030501089, | |
| "grad_norm": 2.8014609813690186, | |
| "learning_rate": 1.96078431372549e-06, | |
| "loss": 4.5361, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.9923747276688453, | |
| "grad_norm": 3.681654453277588, | |
| "learning_rate": 1.7429193899782138e-06, | |
| "loss": 4.358, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.9934640522875817, | |
| "grad_norm": 2.6658272743225098, | |
| "learning_rate": 1.525054466230937e-06, | |
| "loss": 4.4824, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.9945533769063181, | |
| "grad_norm": 3.949403762817383, | |
| "learning_rate": 1.3071895424836602e-06, | |
| "loss": 4.1765, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.9956427015250545, | |
| "grad_norm": 2.6979477405548096, | |
| "learning_rate": 1.0893246187363836e-06, | |
| "loss": 4.34, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.9967320261437909, | |
| "grad_norm": 3.2136738300323486, | |
| "learning_rate": 8.714596949891069e-07, | |
| "loss": 4.4514, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.9978213507625272, | |
| "grad_norm": 3.675879955291748, | |
| "learning_rate": 6.535947712418301e-07, | |
| "loss": 4.4091, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.9989106753812637, | |
| "grad_norm": 4.200655460357666, | |
| "learning_rate": 4.3572984749455345e-07, | |
| "loss": 4.7659, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 5.2691826820373535, | |
| "learning_rate": 2.1786492374727672e-07, | |
| "loss": 4.521, | |
| "step": 918 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 918, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 5000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 921405048453120.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |