{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9997280391623606, "eval_steps": 500, "global_step": 7352, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00013598041881968996, "grad_norm": 1.6484375, "learning_rate": 0.0, "loss": 0.973, "memory/device_mem_reserved(gib)": 47.64, "memory/max_mem_active(gib)": 37.56, "memory/max_mem_allocated(gib)": 37.56, "step": 1 }, { "epoch": 0.0002719608376393799, "grad_norm": 1.9296875, "learning_rate": 2.7210884353741497e-08, "loss": 1.109, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2 }, { "epoch": 0.0004079412564590699, "grad_norm": 0.9765625, "learning_rate": 5.4421768707482993e-08, "loss": 0.582, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3 }, { "epoch": 0.0005439216752787598, "grad_norm": 1.625, "learning_rate": 8.163265306122451e-08, "loss": 1.0596, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4 }, { "epoch": 0.0006799020940984499, "grad_norm": 0.93359375, "learning_rate": 1.0884353741496599e-07, "loss": 0.5951, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5 }, { "epoch": 0.0008158825129181398, "grad_norm": 1.4453125, "learning_rate": 1.360544217687075e-07, "loss": 0.9098, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6 }, { "epoch": 0.0009518629317378297, "grad_norm": 1.6796875, "learning_rate": 1.6326530612244901e-07, "loss": 1.0146, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7 }, { "epoch": 0.0010878433505575197, "grad_norm": 1.1875, "learning_rate": 1.904761904761905e-07, "loss": 0.6969, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 8 }, { "epoch": 0.0012238237693772097, "grad_norm": 1.1953125, "learning_rate": 2.1768707482993197e-07, "loss": 0.8058, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 9 }, { "epoch": 0.0013598041881968997, "grad_norm": 1.2734375, "learning_rate": 2.4489795918367347e-07, "loss": 0.8242, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 10 }, { "epoch": 0.0014957846070165896, "grad_norm": 0.84375, "learning_rate": 2.72108843537415e-07, "loss": 0.499, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 11 }, { "epoch": 0.0016317650258362796, "grad_norm": 1.578125, "learning_rate": 2.9931972789115645e-07, "loss": 0.9258, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 12 }, { "epoch": 0.0017677454446559696, "grad_norm": 1.1796875, "learning_rate": 3.2653061224489803e-07, "loss": 0.8031, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 13 }, { "epoch": 0.0019037258634756595, "grad_norm": 2.265625, "learning_rate": 3.537414965986395e-07, "loss": 1.1639, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 14 }, { "epoch": 0.0020397062822953495, "grad_norm": 1.5546875, "learning_rate": 3.80952380952381e-07, "loss": 0.959, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 15 }, { "epoch": 0.0021756867011150393, "grad_norm": 1.078125, "learning_rate": 4.0816326530612243e-07, "loss": 0.7271, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 16 }, { "epoch": 0.0023116671199347296, "grad_norm": 1.2265625, "learning_rate": 4.3537414965986395e-07, "loss": 0.8768, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 17 }, { "epoch": 0.0024476475387544194, "grad_norm": 1.5390625, "learning_rate": 4.6258503401360547e-07, "loss": 1.0246, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 18 }, { "epoch": 0.002583627957574109, "grad_norm": 1.21875, "learning_rate": 4.897959183673469e-07, "loss": 0.7928, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 19 }, { "epoch": 0.0027196083763937995, "grad_norm": 8.4375, "learning_rate": 5.170068027210885e-07, "loss": 1.9658, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 20 }, { "epoch": 0.0028555887952134893, "grad_norm": 1.203125, "learning_rate": 5.4421768707483e-07, "loss": 0.7484, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 21 }, { "epoch": 0.002991569214033179, "grad_norm": 1.078125, "learning_rate": 5.714285714285715e-07, "loss": 0.6932, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 22 }, { "epoch": 0.0031275496328528694, "grad_norm": 0.875, "learning_rate": 5.986394557823129e-07, "loss": 0.5314, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 23 }, { "epoch": 0.003263530051672559, "grad_norm": 1.2890625, "learning_rate": 6.258503401360545e-07, "loss": 0.8307, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 24 }, { "epoch": 0.003399510470492249, "grad_norm": 0.8203125, "learning_rate": 6.530612244897961e-07, "loss": 0.5645, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 25 }, { "epoch": 0.0035354908893119393, "grad_norm": 1.75, "learning_rate": 6.802721088435376e-07, "loss": 1.1427, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 26 }, { "epoch": 0.003671471308131629, "grad_norm": 1.546875, "learning_rate": 7.07482993197279e-07, "loss": 0.9694, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 27 }, { "epoch": 0.003807451726951319, "grad_norm": 1.328125, "learning_rate": 7.346938775510205e-07, "loss": 0.8584, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 28 }, { "epoch": 0.003943432145771009, "grad_norm": 1.5, "learning_rate": 7.61904761904762e-07, "loss": 0.9222, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 29 }, { "epoch": 0.004079412564590699, "grad_norm": 1.4296875, "learning_rate": 7.891156462585034e-07, "loss": 0.8164, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 30 }, { "epoch": 0.004215392983410389, "grad_norm": 1.7265625, "learning_rate": 8.163265306122449e-07, "loss": 0.9699, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 31 }, { "epoch": 0.004351373402230079, "grad_norm": 1.21875, "learning_rate": 8.435374149659864e-07, "loss": 0.7445, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 32 }, { "epoch": 0.0044873538210497685, "grad_norm": 1.390625, "learning_rate": 8.707482993197279e-07, "loss": 0.925, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 33 }, { "epoch": 0.004623334239869459, "grad_norm": 1.5390625, "learning_rate": 8.979591836734694e-07, "loss": 0.9312, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 34 }, { "epoch": 0.004759314658689149, "grad_norm": 1.953125, "learning_rate": 9.251700680272109e-07, "loss": 1.0957, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 35 }, { "epoch": 0.004895295077508839, "grad_norm": 1.3359375, "learning_rate": 9.523809523809525e-07, "loss": 0.9211, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 36 }, { "epoch": 0.005031275496328529, "grad_norm": 1.515625, "learning_rate": 9.795918367346939e-07, "loss": 0.9463, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 37 }, { "epoch": 0.005167255915148218, "grad_norm": 1.3203125, "learning_rate": 1.0068027210884354e-06, "loss": 0.8509, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 38 }, { "epoch": 0.005303236333967908, "grad_norm": 1.09375, "learning_rate": 1.034013605442177e-06, "loss": 0.7588, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 39 }, { "epoch": 0.005439216752787599, "grad_norm": 1.5390625, "learning_rate": 1.0612244897959184e-06, "loss": 0.9951, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 40 }, { "epoch": 0.005575197171607289, "grad_norm": 1.625, "learning_rate": 1.08843537414966e-06, "loss": 1.0251, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 41 }, { "epoch": 0.005711177590426979, "grad_norm": 1.15625, "learning_rate": 1.1156462585034015e-06, "loss": 0.7757, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 42 }, { "epoch": 0.005847158009246668, "grad_norm": 1.265625, "learning_rate": 1.142857142857143e-06, "loss": 0.7581, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 43 }, { "epoch": 0.005983138428066358, "grad_norm": 1.3046875, "learning_rate": 1.1700680272108845e-06, "loss": 0.8813, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 44 }, { "epoch": 0.006119118846886048, "grad_norm": 1.484375, "learning_rate": 1.1972789115646258e-06, "loss": 0.9515, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 45 }, { "epoch": 0.006255099265705739, "grad_norm": 1.1796875, "learning_rate": 1.2244897959183673e-06, "loss": 0.8459, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 46 }, { "epoch": 0.0063910796845254286, "grad_norm": 0.890625, "learning_rate": 1.251700680272109e-06, "loss": 0.6001, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 47 }, { "epoch": 0.006527060103345118, "grad_norm": 1.09375, "learning_rate": 1.2789115646258504e-06, "loss": 0.7272, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 48 }, { "epoch": 0.006663040522164808, "grad_norm": 1.578125, "learning_rate": 1.3061224489795921e-06, "loss": 1.0622, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 49 }, { "epoch": 0.006799020940984498, "grad_norm": 1.0703125, "learning_rate": 1.3333333333333334e-06, "loss": 0.6992, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 50 }, { "epoch": 0.006935001359804188, "grad_norm": 1.265625, "learning_rate": 1.3605442176870751e-06, "loss": 0.7513, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 51 }, { "epoch": 0.0070709817786238785, "grad_norm": 1.1953125, "learning_rate": 1.3877551020408165e-06, "loss": 0.8815, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 52 }, { "epoch": 0.007206962197443568, "grad_norm": 1.15625, "learning_rate": 1.414965986394558e-06, "loss": 0.7988, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 53 }, { "epoch": 0.007342942616263258, "grad_norm": 0.7890625, "learning_rate": 1.4421768707482995e-06, "loss": 0.5962, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 54 }, { "epoch": 0.007478923035082948, "grad_norm": 1.46875, "learning_rate": 1.469387755102041e-06, "loss": 1.0454, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 55 }, { "epoch": 0.007614903453902638, "grad_norm": 1.5390625, "learning_rate": 1.4965986394557825e-06, "loss": 1.0671, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 56 }, { "epoch": 0.007750883872722328, "grad_norm": 0.71484375, "learning_rate": 1.523809523809524e-06, "loss": 0.5267, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 57 }, { "epoch": 0.007886864291542018, "grad_norm": 1.0546875, "learning_rate": 1.5510204081632654e-06, "loss": 0.8149, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 58 }, { "epoch": 0.008022844710361708, "grad_norm": 1.515625, "learning_rate": 1.5782312925170069e-06, "loss": 1.0713, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 59 }, { "epoch": 0.008158825129181398, "grad_norm": 1.2734375, "learning_rate": 1.6054421768707484e-06, "loss": 0.9425, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 60 }, { "epoch": 0.008294805548001088, "grad_norm": 1.40625, "learning_rate": 1.6326530612244897e-06, "loss": 1.0483, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 61 }, { "epoch": 0.008430785966820778, "grad_norm": 1.078125, "learning_rate": 1.6598639455782314e-06, "loss": 0.8156, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 62 }, { "epoch": 0.008566766385640467, "grad_norm": 1.328125, "learning_rate": 1.6870748299319727e-06, "loss": 0.8403, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 63 }, { "epoch": 0.008702746804460157, "grad_norm": 1.3984375, "learning_rate": 1.7142857142857145e-06, "loss": 1.0612, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 64 }, { "epoch": 0.008838727223279847, "grad_norm": 1.2109375, "learning_rate": 1.7414965986394558e-06, "loss": 0.8657, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 65 }, { "epoch": 0.008974707642099537, "grad_norm": 1.078125, "learning_rate": 1.7687074829931975e-06, "loss": 0.7617, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 66 }, { "epoch": 0.009110688060919228, "grad_norm": 1.4765625, "learning_rate": 1.7959183673469388e-06, "loss": 1.0361, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 67 }, { "epoch": 0.009246668479738918, "grad_norm": 0.98046875, "learning_rate": 1.8231292517006803e-06, "loss": 0.7256, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 68 }, { "epoch": 0.009382648898558608, "grad_norm": 0.86328125, "learning_rate": 1.8503401360544219e-06, "loss": 0.62, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 69 }, { "epoch": 0.009518629317378298, "grad_norm": 0.96484375, "learning_rate": 1.8775510204081634e-06, "loss": 0.6696, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 70 }, { "epoch": 0.009654609736197988, "grad_norm": 1.25, "learning_rate": 1.904761904761905e-06, "loss": 0.8675, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 71 }, { "epoch": 0.009790590155017678, "grad_norm": 4.0, "learning_rate": 1.9319727891156464e-06, "loss": 1.1577, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 72 }, { "epoch": 0.009926570573837367, "grad_norm": 1.0625, "learning_rate": 1.9591836734693877e-06, "loss": 0.7135, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 73 }, { "epoch": 0.010062550992657057, "grad_norm": 1.0078125, "learning_rate": 1.9863945578231295e-06, "loss": 0.7959, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 74 }, { "epoch": 0.010198531411476747, "grad_norm": 0.88671875, "learning_rate": 2.0136054421768708e-06, "loss": 0.6812, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 75 }, { "epoch": 0.010334511830296437, "grad_norm": 1.1953125, "learning_rate": 2.0408163265306125e-06, "loss": 0.9491, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 76 }, { "epoch": 0.010470492249116127, "grad_norm": 1.0859375, "learning_rate": 2.068027210884354e-06, "loss": 0.8219, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 77 }, { "epoch": 0.010606472667935817, "grad_norm": 1.0859375, "learning_rate": 2.0952380952380955e-06, "loss": 0.887, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 78 }, { "epoch": 0.010742453086755508, "grad_norm": 1.1640625, "learning_rate": 2.122448979591837e-06, "loss": 0.9248, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 79 }, { "epoch": 0.010878433505575198, "grad_norm": 1.4296875, "learning_rate": 2.1496598639455786e-06, "loss": 1.009, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 80 }, { "epoch": 0.011014413924394888, "grad_norm": 0.94140625, "learning_rate": 2.17687074829932e-06, "loss": 0.7104, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 81 }, { "epoch": 0.011150394343214578, "grad_norm": 1.15625, "learning_rate": 2.2040816326530616e-06, "loss": 0.9108, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 82 }, { "epoch": 0.011286374762034267, "grad_norm": 0.79296875, "learning_rate": 2.231292517006803e-06, "loss": 0.6615, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 83 }, { "epoch": 0.011422355180853957, "grad_norm": 0.96875, "learning_rate": 2.2585034013605447e-06, "loss": 0.7593, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 84 }, { "epoch": 0.011558335599673647, "grad_norm": 1.15625, "learning_rate": 2.285714285714286e-06, "loss": 1.0078, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 85 }, { "epoch": 0.011694316018493337, "grad_norm": 0.94921875, "learning_rate": 2.3129251700680273e-06, "loss": 0.7004, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 86 }, { "epoch": 0.011830296437313027, "grad_norm": 0.5078125, "learning_rate": 2.340136054421769e-06, "loss": 0.4699, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 87 }, { "epoch": 0.011966276856132716, "grad_norm": 0.828125, "learning_rate": 2.3673469387755103e-06, "loss": 0.8008, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 88 }, { "epoch": 0.012102257274952406, "grad_norm": 1.0703125, "learning_rate": 2.3945578231292516e-06, "loss": 0.8563, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 89 }, { "epoch": 0.012238237693772096, "grad_norm": 0.72265625, "learning_rate": 2.4217687074829934e-06, "loss": 0.6979, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 90 }, { "epoch": 0.012374218112591786, "grad_norm": 2.09375, "learning_rate": 2.4489795918367347e-06, "loss": 0.7786, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 91 }, { "epoch": 0.012510198531411477, "grad_norm": 0.6328125, "learning_rate": 2.4761904761904764e-06, "loss": 0.6012, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 92 }, { "epoch": 0.012646178950231167, "grad_norm": 0.72265625, "learning_rate": 2.503401360544218e-06, "loss": 0.6615, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 93 }, { "epoch": 0.012782159369050857, "grad_norm": 0.96484375, "learning_rate": 2.530612244897959e-06, "loss": 0.9347, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 94 }, { "epoch": 0.012918139787870547, "grad_norm": 0.95703125, "learning_rate": 2.5578231292517007e-06, "loss": 0.8934, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 95 }, { "epoch": 0.013054120206690237, "grad_norm": 1.0625, "learning_rate": 2.5850340136054425e-06, "loss": 0.7243, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 96 }, { "epoch": 0.013190100625509927, "grad_norm": 0.9375, "learning_rate": 2.6122448979591842e-06, "loss": 0.9403, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 97 }, { "epoch": 0.013326081044329616, "grad_norm": 0.6953125, "learning_rate": 2.639455782312925e-06, "loss": 0.6966, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 98 }, { "epoch": 0.013462061463149306, "grad_norm": 0.640625, "learning_rate": 2.666666666666667e-06, "loss": 0.7039, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 99 }, { "epoch": 0.013598041881968996, "grad_norm": 0.69140625, "learning_rate": 2.6938775510204086e-06, "loss": 0.6383, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 100 }, { "epoch": 0.013734022300788686, "grad_norm": 0.94140625, "learning_rate": 2.7210884353741503e-06, "loss": 0.8105, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 101 }, { "epoch": 0.013870002719608376, "grad_norm": 0.78125, "learning_rate": 2.748299319727891e-06, "loss": 0.6768, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 102 }, { "epoch": 0.014005983138428066, "grad_norm": 0.75, "learning_rate": 2.775510204081633e-06, "loss": 0.7967, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 103 }, { "epoch": 0.014141963557247757, "grad_norm": 0.57421875, "learning_rate": 2.8027210884353746e-06, "loss": 0.6797, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 104 }, { "epoch": 0.014277943976067447, "grad_norm": 0.94140625, "learning_rate": 2.829931972789116e-06, "loss": 1.04, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 105 }, { "epoch": 0.014413924394887137, "grad_norm": 0.80078125, "learning_rate": 2.8571428571428573e-06, "loss": 0.8249, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 106 }, { "epoch": 0.014549904813706827, "grad_norm": 0.7421875, "learning_rate": 2.884353741496599e-06, "loss": 0.9142, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 107 }, { "epoch": 0.014685885232526516, "grad_norm": 0.7421875, "learning_rate": 2.9115646258503403e-06, "loss": 0.9105, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 108 }, { "epoch": 0.014821865651346206, "grad_norm": 2.375, "learning_rate": 2.938775510204082e-06, "loss": 0.8752, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 109 }, { "epoch": 0.014957846070165896, "grad_norm": 0.734375, "learning_rate": 2.9659863945578233e-06, "loss": 0.7622, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 110 }, { "epoch": 0.015093826488985586, "grad_norm": 0.66015625, "learning_rate": 2.993197278911565e-06, "loss": 0.6536, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 111 }, { "epoch": 0.015229806907805276, "grad_norm": 0.66015625, "learning_rate": 3.0204081632653064e-06, "loss": 0.7599, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 112 }, { "epoch": 0.015365787326624965, "grad_norm": 0.81640625, "learning_rate": 3.047619047619048e-06, "loss": 0.7565, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 113 }, { "epoch": 0.015501767745444655, "grad_norm": 0.56640625, "learning_rate": 3.0748299319727894e-06, "loss": 0.7336, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 114 }, { "epoch": 0.015637748164264345, "grad_norm": 0.86328125, "learning_rate": 3.1020408163265307e-06, "loss": 0.9966, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 115 }, { "epoch": 0.015773728583084037, "grad_norm": 0.60546875, "learning_rate": 3.1292517006802725e-06, "loss": 0.9222, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 116 }, { "epoch": 0.015909709001903725, "grad_norm": 0.53515625, "learning_rate": 3.1564625850340138e-06, "loss": 0.7316, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 117 }, { "epoch": 0.016045689420723416, "grad_norm": 1.78125, "learning_rate": 3.183673469387755e-06, "loss": 0.7899, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 118 }, { "epoch": 0.016181669839543104, "grad_norm": 0.453125, "learning_rate": 3.210884353741497e-06, "loss": 0.5801, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 119 }, { "epoch": 0.016317650258362796, "grad_norm": 0.71875, "learning_rate": 3.2380952380952385e-06, "loss": 0.9632, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 120 }, { "epoch": 0.016453630677182484, "grad_norm": 0.73046875, "learning_rate": 3.2653061224489794e-06, "loss": 0.5618, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 121 }, { "epoch": 0.016589611096002176, "grad_norm": 0.89453125, "learning_rate": 3.292517006802721e-06, "loss": 1.0436, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 122 }, { "epoch": 0.016725591514821867, "grad_norm": 0.7734375, "learning_rate": 3.319727891156463e-06, "loss": 0.9748, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 123 }, { "epoch": 0.016861571933641555, "grad_norm": 0.2431640625, "learning_rate": 3.3469387755102046e-06, "loss": 0.3447, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 124 }, { "epoch": 0.016997552352461247, "grad_norm": 0.62109375, "learning_rate": 3.3741496598639455e-06, "loss": 0.8794, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 125 }, { "epoch": 0.017133532771280935, "grad_norm": 0.5390625, "learning_rate": 3.4013605442176872e-06, "loss": 0.7708, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 126 }, { "epoch": 0.017269513190100626, "grad_norm": 0.6875, "learning_rate": 3.428571428571429e-06, "loss": 0.9043, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 127 }, { "epoch": 0.017405493608920315, "grad_norm": 0.3671875, "learning_rate": 3.4557823129251707e-06, "loss": 0.5068, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 128 }, { "epoch": 0.017541474027740006, "grad_norm": 0.59765625, "learning_rate": 3.4829931972789116e-06, "loss": 0.7539, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 129 }, { "epoch": 0.017677454446559694, "grad_norm": 0.546875, "learning_rate": 3.5102040816326533e-06, "loss": 0.923, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 130 }, { "epoch": 0.017813434865379386, "grad_norm": 0.46484375, "learning_rate": 3.537414965986395e-06, "loss": 0.6766, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 131 }, { "epoch": 0.017949415284199074, "grad_norm": 0.7265625, "learning_rate": 3.5646258503401363e-06, "loss": 0.8231, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 132 }, { "epoch": 0.018085395703018765, "grad_norm": 0.49609375, "learning_rate": 3.5918367346938777e-06, "loss": 0.7767, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 133 }, { "epoch": 0.018221376121838457, "grad_norm": 0.9296875, "learning_rate": 3.6190476190476194e-06, "loss": 0.7992, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 134 }, { "epoch": 0.018357356540658145, "grad_norm": 0.419921875, "learning_rate": 3.6462585034013607e-06, "loss": 0.6965, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 135 }, { "epoch": 0.018493336959477837, "grad_norm": 0.5703125, "learning_rate": 3.6734693877551024e-06, "loss": 0.6017, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 136 }, { "epoch": 0.018629317378297525, "grad_norm": 0.70703125, "learning_rate": 3.7006802721088437e-06, "loss": 0.6126, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 137 }, { "epoch": 0.018765297797117216, "grad_norm": 0.4765625, "learning_rate": 3.7278911564625855e-06, "loss": 0.6598, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 138 }, { "epoch": 0.018901278215936904, "grad_norm": 0.75390625, "learning_rate": 3.7551020408163268e-06, "loss": 0.8512, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 139 }, { "epoch": 0.019037258634756596, "grad_norm": 0.7109375, "learning_rate": 3.7823129251700685e-06, "loss": 0.7428, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 140 }, { "epoch": 0.019173239053576284, "grad_norm": 0.419921875, "learning_rate": 3.80952380952381e-06, "loss": 0.7266, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 141 }, { "epoch": 0.019309219472395975, "grad_norm": 0.51953125, "learning_rate": 3.836734693877551e-06, "loss": 0.6245, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 142 }, { "epoch": 0.019445199891215664, "grad_norm": 0.63671875, "learning_rate": 3.863945578231293e-06, "loss": 0.8674, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 143 }, { "epoch": 0.019581180310035355, "grad_norm": 0.390625, "learning_rate": 3.891156462585035e-06, "loss": 0.6572, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 144 }, { "epoch": 0.019717160728855043, "grad_norm": 0.439453125, "learning_rate": 3.9183673469387755e-06, "loss": 0.8815, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 145 }, { "epoch": 0.019853141147674735, "grad_norm": 0.546875, "learning_rate": 3.945578231292517e-06, "loss": 0.8299, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 146 }, { "epoch": 0.019989121566494426, "grad_norm": 0.5703125, "learning_rate": 3.972789115646259e-06, "loss": 0.9198, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 147 }, { "epoch": 0.020125101985314114, "grad_norm": 0.53125, "learning_rate": 4.000000000000001e-06, "loss": 0.8075, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 148 }, { "epoch": 0.020261082404133806, "grad_norm": 1.0234375, "learning_rate": 4.0272108843537416e-06, "loss": 0.7716, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 149 }, { "epoch": 0.020397062822953494, "grad_norm": 0.52734375, "learning_rate": 4.054421768707483e-06, "loss": 0.862, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 150 }, { "epoch": 0.020533043241773186, "grad_norm": 0.78515625, "learning_rate": 4.081632653061225e-06, "loss": 0.7142, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 151 }, { "epoch": 0.020669023660592874, "grad_norm": 0.412109375, "learning_rate": 4.108843537414967e-06, "loss": 0.6968, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 152 }, { "epoch": 0.020805004079412565, "grad_norm": 0.52734375, "learning_rate": 4.136054421768708e-06, "loss": 0.8211, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 153 }, { "epoch": 0.020940984498232253, "grad_norm": 0.380859375, "learning_rate": 4.163265306122449e-06, "loss": 0.7043, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 154 }, { "epoch": 0.021076964917051945, "grad_norm": 0.4375, "learning_rate": 4.190476190476191e-06, "loss": 0.6123, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 155 }, { "epoch": 0.021212945335871633, "grad_norm": 0.427734375, "learning_rate": 4.217687074829933e-06, "loss": 0.757, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 156 }, { "epoch": 0.021348925754691325, "grad_norm": 0.42578125, "learning_rate": 4.244897959183674e-06, "loss": 0.7404, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 157 }, { "epoch": 0.021484906173511016, "grad_norm": 0.3203125, "learning_rate": 4.2721088435374154e-06, "loss": 0.6502, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 158 }, { "epoch": 0.021620886592330704, "grad_norm": 0.39453125, "learning_rate": 4.299319727891157e-06, "loss": 0.7308, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 159 }, { "epoch": 0.021756867011150396, "grad_norm": 0.39453125, "learning_rate": 4.326530612244899e-06, "loss": 0.7619, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 160 }, { "epoch": 0.021892847429970084, "grad_norm": 0.48046875, "learning_rate": 4.35374149659864e-06, "loss": 0.9688, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 161 }, { "epoch": 0.022028827848789775, "grad_norm": 0.404296875, "learning_rate": 4.3809523809523815e-06, "loss": 0.6945, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 162 }, { "epoch": 0.022164808267609463, "grad_norm": 0.3359375, "learning_rate": 4.408163265306123e-06, "loss": 0.6489, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 163 }, { "epoch": 0.022300788686429155, "grad_norm": 0.7890625, "learning_rate": 4.435374149659865e-06, "loss": 0.8934, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 164 }, { "epoch": 0.022436769105248843, "grad_norm": 1.25, "learning_rate": 4.462585034013606e-06, "loss": 0.7466, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 165 }, { "epoch": 0.022572749524068535, "grad_norm": 0.373046875, "learning_rate": 4.489795918367348e-06, "loss": 0.8389, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 166 }, { "epoch": 0.022708729942888223, "grad_norm": 0.47265625, "learning_rate": 4.517006802721089e-06, "loss": 0.6725, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 167 }, { "epoch": 0.022844710361707914, "grad_norm": 0.43359375, "learning_rate": 4.54421768707483e-06, "loss": 0.7062, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 168 }, { "epoch": 0.022980690780527602, "grad_norm": 0.51953125, "learning_rate": 4.571428571428572e-06, "loss": 0.5658, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 169 }, { "epoch": 0.023116671199347294, "grad_norm": 0.447265625, "learning_rate": 4.598639455782314e-06, "loss": 0.8704, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 170 }, { "epoch": 0.023252651618166986, "grad_norm": 0.396484375, "learning_rate": 4.6258503401360546e-06, "loss": 0.6842, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 171 }, { "epoch": 0.023388632036986674, "grad_norm": 0.478515625, "learning_rate": 4.653061224489796e-06, "loss": 0.7699, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 172 }, { "epoch": 0.023524612455806365, "grad_norm": 0.4375, "learning_rate": 4.680272108843538e-06, "loss": 0.6392, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 173 }, { "epoch": 0.023660592874626053, "grad_norm": 0.53515625, "learning_rate": 4.707482993197279e-06, "loss": 0.7949, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 174 }, { "epoch": 0.023796573293445745, "grad_norm": 0.28125, "learning_rate": 4.734693877551021e-06, "loss": 0.5171, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 175 }, { "epoch": 0.023932553712265433, "grad_norm": 0.6328125, "learning_rate": 4.761904761904762e-06, "loss": 0.9404, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 176 }, { "epoch": 0.024068534131085124, "grad_norm": 0.3203125, "learning_rate": 4.789115646258503e-06, "loss": 0.6393, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 177 }, { "epoch": 0.024204514549904813, "grad_norm": 0.40234375, "learning_rate": 4.816326530612245e-06, "loss": 0.8898, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 178 }, { "epoch": 0.024340494968724504, "grad_norm": 0.30859375, "learning_rate": 4.843537414965987e-06, "loss": 0.4834, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 179 }, { "epoch": 0.024476475387544192, "grad_norm": 0.82421875, "learning_rate": 4.8707482993197285e-06, "loss": 0.7681, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 180 }, { "epoch": 0.024612455806363884, "grad_norm": 0.55078125, "learning_rate": 4.897959183673469e-06, "loss": 0.7754, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 181 }, { "epoch": 0.024748436225183572, "grad_norm": 0.341796875, "learning_rate": 4.925170068027211e-06, "loss": 0.6714, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 182 }, { "epoch": 0.024884416644003263, "grad_norm": 0.369140625, "learning_rate": 4.952380952380953e-06, "loss": 0.7147, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 183 }, { "epoch": 0.025020397062822955, "grad_norm": 0.43359375, "learning_rate": 4.979591836734694e-06, "loss": 0.6795, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 184 }, { "epoch": 0.025156377481642643, "grad_norm": 0.42578125, "learning_rate": 5.006802721088436e-06, "loss": 0.8494, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 185 }, { "epoch": 0.025292357900462335, "grad_norm": 0.408203125, "learning_rate": 5.034013605442177e-06, "loss": 0.5566, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 186 }, { "epoch": 0.025428338319282023, "grad_norm": 0.65234375, "learning_rate": 5.061224489795918e-06, "loss": 0.6413, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 187 }, { "epoch": 0.025564318738101714, "grad_norm": 0.439453125, "learning_rate": 5.088435374149661e-06, "loss": 0.8281, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 188 }, { "epoch": 0.025700299156921402, "grad_norm": 0.400390625, "learning_rate": 5.1156462585034015e-06, "loss": 0.8175, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 189 }, { "epoch": 0.025836279575741094, "grad_norm": 0.51953125, "learning_rate": 5.142857142857142e-06, "loss": 0.8748, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 190 }, { "epoch": 0.025972259994560782, "grad_norm": 0.703125, "learning_rate": 5.170068027210885e-06, "loss": 0.6808, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 191 }, { "epoch": 0.026108240413380474, "grad_norm": 0.21484375, "learning_rate": 5.197278911564626e-06, "loss": 0.3787, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 192 }, { "epoch": 0.02624422083220016, "grad_norm": 0.53125, "learning_rate": 5.2244897959183684e-06, "loss": 0.637, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 193 }, { "epoch": 0.026380201251019853, "grad_norm": 0.369140625, "learning_rate": 5.251700680272109e-06, "loss": 0.7282, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 194 }, { "epoch": 0.026516181669839545, "grad_norm": 0.484375, "learning_rate": 5.27891156462585e-06, "loss": 0.9263, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 195 }, { "epoch": 0.026652162088659233, "grad_norm": 0.455078125, "learning_rate": 5.306122448979593e-06, "loss": 0.8765, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 196 }, { "epoch": 0.026788142507478924, "grad_norm": 0.341796875, "learning_rate": 5.333333333333334e-06, "loss": 0.6606, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 197 }, { "epoch": 0.026924122926298612, "grad_norm": 0.390625, "learning_rate": 5.3605442176870745e-06, "loss": 0.6154, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 198 }, { "epoch": 0.027060103345118304, "grad_norm": 0.66015625, "learning_rate": 5.387755102040817e-06, "loss": 0.8929, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 199 }, { "epoch": 0.027196083763937992, "grad_norm": 0.458984375, "learning_rate": 5.414965986394558e-06, "loss": 0.693, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 200 }, { "epoch": 0.027332064182757684, "grad_norm": 0.56640625, "learning_rate": 5.442176870748301e-06, "loss": 0.9438, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 201 }, { "epoch": 0.027468044601577372, "grad_norm": 0.57421875, "learning_rate": 5.4693877551020415e-06, "loss": 0.8498, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 202 }, { "epoch": 0.027604025020397063, "grad_norm": 0.408203125, "learning_rate": 5.496598639455782e-06, "loss": 0.6166, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 203 }, { "epoch": 0.02774000543921675, "grad_norm": 0.37890625, "learning_rate": 5.523809523809525e-06, "loss": 0.6255, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 204 }, { "epoch": 0.027875985858036443, "grad_norm": 0.59765625, "learning_rate": 5.551020408163266e-06, "loss": 0.9277, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 205 }, { "epoch": 0.02801196627685613, "grad_norm": 0.42578125, "learning_rate": 5.578231292517007e-06, "loss": 0.7293, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 206 }, { "epoch": 0.028147946695675823, "grad_norm": 0.3671875, "learning_rate": 5.605442176870749e-06, "loss": 0.6662, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 207 }, { "epoch": 0.028283927114495514, "grad_norm": 0.796875, "learning_rate": 5.63265306122449e-06, "loss": 0.8867, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 208 }, { "epoch": 0.028419907533315202, "grad_norm": 0.65234375, "learning_rate": 5.659863945578232e-06, "loss": 0.7655, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 209 }, { "epoch": 0.028555887952134894, "grad_norm": 0.52734375, "learning_rate": 5.687074829931974e-06, "loss": 0.8082, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 210 }, { "epoch": 0.028691868370954582, "grad_norm": 0.341796875, "learning_rate": 5.7142857142857145e-06, "loss": 0.5872, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 211 }, { "epoch": 0.028827848789774273, "grad_norm": 0.376953125, "learning_rate": 5.741496598639456e-06, "loss": 0.7412, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 212 }, { "epoch": 0.02896382920859396, "grad_norm": 0.396484375, "learning_rate": 5.768707482993198e-06, "loss": 0.6835, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 213 }, { "epoch": 0.029099809627413653, "grad_norm": 0.47265625, "learning_rate": 5.795918367346939e-06, "loss": 0.5636, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 214 }, { "epoch": 0.02923579004623334, "grad_norm": 0.60546875, "learning_rate": 5.823129251700681e-06, "loss": 0.741, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 215 }, { "epoch": 0.029371770465053033, "grad_norm": 0.57421875, "learning_rate": 5.850340136054422e-06, "loss": 0.6878, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 216 }, { "epoch": 0.02950775088387272, "grad_norm": 0.56640625, "learning_rate": 5.877551020408164e-06, "loss": 0.7103, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 217 }, { "epoch": 0.029643731302692412, "grad_norm": 0.298828125, "learning_rate": 5.904761904761905e-06, "loss": 0.4969, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 218 }, { "epoch": 0.029779711721512104, "grad_norm": 0.67578125, "learning_rate": 5.931972789115647e-06, "loss": 0.6834, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 219 }, { "epoch": 0.029915692140331792, "grad_norm": 0.357421875, "learning_rate": 5.959183673469388e-06, "loss": 0.639, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 220 }, { "epoch": 0.030051672559151484, "grad_norm": 0.462890625, "learning_rate": 5.98639455782313e-06, "loss": 0.7853, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 221 }, { "epoch": 0.03018765297797117, "grad_norm": 0.453125, "learning_rate": 6.013605442176871e-06, "loss": 0.8154, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 222 }, { "epoch": 0.030323633396790863, "grad_norm": 0.443359375, "learning_rate": 6.040816326530613e-06, "loss": 0.734, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 223 }, { "epoch": 0.03045961381561055, "grad_norm": 0.515625, "learning_rate": 6.0680272108843545e-06, "loss": 0.5636, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 224 }, { "epoch": 0.030595594234430243, "grad_norm": 0.326171875, "learning_rate": 6.095238095238096e-06, "loss": 0.5664, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 225 }, { "epoch": 0.03073157465324993, "grad_norm": 0.44921875, "learning_rate": 6.122448979591837e-06, "loss": 0.8218, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 226 }, { "epoch": 0.030867555072069622, "grad_norm": 0.337890625, "learning_rate": 6.149659863945579e-06, "loss": 0.7049, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 227 }, { "epoch": 0.03100353549088931, "grad_norm": 0.33203125, "learning_rate": 6.1768707482993206e-06, "loss": 0.6589, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 228 }, { "epoch": 0.031139515909709002, "grad_norm": 0.435546875, "learning_rate": 6.2040816326530614e-06, "loss": 0.694, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 229 }, { "epoch": 0.03127549632852869, "grad_norm": 0.51953125, "learning_rate": 6.231292517006803e-06, "loss": 0.5724, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 230 }, { "epoch": 0.031411476747348385, "grad_norm": 0.45703125, "learning_rate": 6.258503401360545e-06, "loss": 0.5073, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 231 }, { "epoch": 0.03154745716616807, "grad_norm": 0.341796875, "learning_rate": 6.285714285714286e-06, "loss": 0.6092, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 232 }, { "epoch": 0.03168343758498776, "grad_norm": 0.52734375, "learning_rate": 6.3129251700680275e-06, "loss": 0.7922, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 233 }, { "epoch": 0.03181941800380745, "grad_norm": 0.40625, "learning_rate": 6.340136054421769e-06, "loss": 0.889, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 234 }, { "epoch": 0.031955398422627145, "grad_norm": 0.45703125, "learning_rate": 6.36734693877551e-06, "loss": 0.7852, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 235 }, { "epoch": 0.03209137884144683, "grad_norm": 0.26171875, "learning_rate": 6.394557823129253e-06, "loss": 0.4699, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 236 }, { "epoch": 0.03222735926026652, "grad_norm": 0.7578125, "learning_rate": 6.421768707482994e-06, "loss": 0.61, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 237 }, { "epoch": 0.03236333967908621, "grad_norm": 0.478515625, "learning_rate": 6.4489795918367345e-06, "loss": 0.7663, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 238 }, { "epoch": 0.032499320097905904, "grad_norm": 0.341796875, "learning_rate": 6.476190476190477e-06, "loss": 0.508, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 239 }, { "epoch": 0.03263530051672559, "grad_norm": 0.59765625, "learning_rate": 6.503401360544218e-06, "loss": 0.6351, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 240 }, { "epoch": 0.03277128093554528, "grad_norm": 0.357421875, "learning_rate": 6.530612244897959e-06, "loss": 0.6646, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 241 }, { "epoch": 0.03290726135436497, "grad_norm": 0.5234375, "learning_rate": 6.557823129251701e-06, "loss": 0.9951, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 242 }, { "epoch": 0.03304324177318466, "grad_norm": 0.4140625, "learning_rate": 6.585034013605442e-06, "loss": 0.7405, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 243 }, { "epoch": 0.03317922219200435, "grad_norm": 0.439453125, "learning_rate": 6.612244897959185e-06, "loss": 0.883, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 244 }, { "epoch": 0.03331520261082404, "grad_norm": 0.40625, "learning_rate": 6.639455782312926e-06, "loss": 0.8263, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 245 }, { "epoch": 0.033451183029643734, "grad_norm": 0.56640625, "learning_rate": 6.666666666666667e-06, "loss": 0.6847, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 246 }, { "epoch": 0.03358716344846342, "grad_norm": 0.494140625, "learning_rate": 6.693877551020409e-06, "loss": 0.7989, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 247 }, { "epoch": 0.03372314386728311, "grad_norm": 0.3203125, "learning_rate": 6.72108843537415e-06, "loss": 0.4845, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 248 }, { "epoch": 0.0338591242861028, "grad_norm": 0.337890625, "learning_rate": 6.748299319727891e-06, "loss": 0.6258, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 249 }, { "epoch": 0.033995104704922494, "grad_norm": 0.330078125, "learning_rate": 6.7755102040816336e-06, "loss": 0.6061, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 250 }, { "epoch": 0.03413108512374218, "grad_norm": 0.4921875, "learning_rate": 6.8027210884353745e-06, "loss": 0.5195, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 251 }, { "epoch": 0.03426706554256187, "grad_norm": 0.32421875, "learning_rate": 6.829931972789117e-06, "loss": 0.6489, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 252 }, { "epoch": 0.03440304596138156, "grad_norm": 0.68359375, "learning_rate": 6.857142857142858e-06, "loss": 0.84, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 253 }, { "epoch": 0.03453902638020125, "grad_norm": 0.34375, "learning_rate": 6.884353741496599e-06, "loss": 0.6265, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 254 }, { "epoch": 0.03467500679902094, "grad_norm": 0.703125, "learning_rate": 6.911564625850341e-06, "loss": 0.9307, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 255 }, { "epoch": 0.03481098721784063, "grad_norm": 0.34765625, "learning_rate": 6.938775510204082e-06, "loss": 0.5264, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 256 }, { "epoch": 0.034946967636660324, "grad_norm": 0.4296875, "learning_rate": 6.965986394557823e-06, "loss": 0.7669, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 257 }, { "epoch": 0.03508294805548001, "grad_norm": 0.318359375, "learning_rate": 6.993197278911566e-06, "loss": 0.6453, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 258 }, { "epoch": 0.0352189284742997, "grad_norm": 0.279296875, "learning_rate": 7.020408163265307e-06, "loss": 0.4497, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 259 }, { "epoch": 0.03535490889311939, "grad_norm": 0.408203125, "learning_rate": 7.047619047619048e-06, "loss": 0.6089, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 260 }, { "epoch": 0.03549088931193908, "grad_norm": 0.478515625, "learning_rate": 7.07482993197279e-06, "loss": 0.7101, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 261 }, { "epoch": 0.03562686973075877, "grad_norm": 0.361328125, "learning_rate": 7.102040816326531e-06, "loss": 0.6751, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 262 }, { "epoch": 0.03576285014957846, "grad_norm": 0.359375, "learning_rate": 7.129251700680273e-06, "loss": 0.68, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 263 }, { "epoch": 0.03589883056839815, "grad_norm": 0.34765625, "learning_rate": 7.1564625850340144e-06, "loss": 0.7606, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 264 }, { "epoch": 0.03603481098721784, "grad_norm": 0.5625, "learning_rate": 7.183673469387755e-06, "loss": 0.7028, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 265 }, { "epoch": 0.03617079140603753, "grad_norm": 0.486328125, "learning_rate": 7.210884353741497e-06, "loss": 0.9393, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 266 }, { "epoch": 0.03630677182485722, "grad_norm": 0.3203125, "learning_rate": 7.238095238095239e-06, "loss": 0.6763, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 267 }, { "epoch": 0.036442752243676914, "grad_norm": 0.4609375, "learning_rate": 7.2653061224489805e-06, "loss": 0.7218, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 268 }, { "epoch": 0.0365787326624966, "grad_norm": 0.494140625, "learning_rate": 7.292517006802721e-06, "loss": 0.7088, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 269 }, { "epoch": 0.03671471308131629, "grad_norm": 0.72265625, "learning_rate": 7.319727891156463e-06, "loss": 0.8929, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 270 }, { "epoch": 0.03685069350013598, "grad_norm": 0.3515625, "learning_rate": 7.346938775510205e-06, "loss": 0.6882, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 271 }, { "epoch": 0.03698667391895567, "grad_norm": 0.353515625, "learning_rate": 7.374149659863946e-06, "loss": 0.5739, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 272 }, { "epoch": 0.03712265433777536, "grad_norm": 0.31640625, "learning_rate": 7.4013605442176875e-06, "loss": 0.6309, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 273 }, { "epoch": 0.03725863475659505, "grad_norm": 0.43359375, "learning_rate": 7.428571428571429e-06, "loss": 0.7231, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 274 }, { "epoch": 0.03739461517541474, "grad_norm": 0.333984375, "learning_rate": 7.455782312925171e-06, "loss": 0.6932, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 275 }, { "epoch": 0.03753059559423443, "grad_norm": 0.4375, "learning_rate": 7.482993197278913e-06, "loss": 0.7718, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 276 }, { "epoch": 0.03766657601305412, "grad_norm": 0.58984375, "learning_rate": 7.5102040816326536e-06, "loss": 0.874, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 277 }, { "epoch": 0.03780255643187381, "grad_norm": 0.470703125, "learning_rate": 7.537414965986395e-06, "loss": 0.6112, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 278 }, { "epoch": 0.0379385368506935, "grad_norm": 0.32421875, "learning_rate": 7.564625850340137e-06, "loss": 0.7163, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 279 }, { "epoch": 0.03807451726951319, "grad_norm": 0.470703125, "learning_rate": 7.591836734693878e-06, "loss": 0.7707, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 280 }, { "epoch": 0.03821049768833288, "grad_norm": 0.455078125, "learning_rate": 7.61904761904762e-06, "loss": 0.6704, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 281 }, { "epoch": 0.03834647810715257, "grad_norm": 0.345703125, "learning_rate": 7.646258503401362e-06, "loss": 0.5549, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 282 }, { "epoch": 0.03848245852597226, "grad_norm": 0.3203125, "learning_rate": 7.673469387755102e-06, "loss": 0.6187, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 283 }, { "epoch": 0.03861843894479195, "grad_norm": 0.8125, "learning_rate": 7.700680272108844e-06, "loss": 0.8107, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 284 }, { "epoch": 0.03875441936361164, "grad_norm": 0.408203125, "learning_rate": 7.727891156462586e-06, "loss": 0.8127, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 285 }, { "epoch": 0.03889039978243133, "grad_norm": 0.427734375, "learning_rate": 7.755102040816327e-06, "loss": 0.6056, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 286 }, { "epoch": 0.03902638020125102, "grad_norm": 0.369140625, "learning_rate": 7.78231292517007e-06, "loss": 0.7949, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 287 }, { "epoch": 0.03916236062007071, "grad_norm": 0.392578125, "learning_rate": 7.809523809523811e-06, "loss": 0.8029, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 288 }, { "epoch": 0.0392983410388904, "grad_norm": 0.546875, "learning_rate": 7.836734693877551e-06, "loss": 0.6792, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 289 }, { "epoch": 0.039434321457710086, "grad_norm": 0.5703125, "learning_rate": 7.863945578231293e-06, "loss": 0.6167, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 290 }, { "epoch": 0.03957030187652978, "grad_norm": 0.55859375, "learning_rate": 7.891156462585034e-06, "loss": 0.6478, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 291 }, { "epoch": 0.03970628229534947, "grad_norm": 0.40234375, "learning_rate": 7.918367346938776e-06, "loss": 0.6825, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 292 }, { "epoch": 0.03984226271416916, "grad_norm": 0.427734375, "learning_rate": 7.945578231292518e-06, "loss": 0.6628, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 293 }, { "epoch": 0.03997824313298885, "grad_norm": 2.09375, "learning_rate": 7.97278911564626e-06, "loss": 0.9548, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 294 }, { "epoch": 0.04011422355180854, "grad_norm": 0.474609375, "learning_rate": 8.000000000000001e-06, "loss": 0.6634, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 295 }, { "epoch": 0.04025020397062823, "grad_norm": 0.357421875, "learning_rate": 8.027210884353741e-06, "loss": 0.7087, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 296 }, { "epoch": 0.04038618438944792, "grad_norm": 0.478515625, "learning_rate": 8.054421768707483e-06, "loss": 0.7059, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 297 }, { "epoch": 0.04052216480826761, "grad_norm": 0.40234375, "learning_rate": 8.081632653061225e-06, "loss": 0.5822, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 298 }, { "epoch": 0.0406581452270873, "grad_norm": 0.2734375, "learning_rate": 8.108843537414967e-06, "loss": 0.5086, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 299 }, { "epoch": 0.04079412564590699, "grad_norm": 0.294921875, "learning_rate": 8.136054421768708e-06, "loss": 0.5651, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 300 }, { "epoch": 0.040930106064726676, "grad_norm": 0.40234375, "learning_rate": 8.16326530612245e-06, "loss": 0.7184, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 301 }, { "epoch": 0.04106608648354637, "grad_norm": 0.39453125, "learning_rate": 8.190476190476192e-06, "loss": 0.6715, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 302 }, { "epoch": 0.04120206690236606, "grad_norm": 0.4609375, "learning_rate": 8.217687074829933e-06, "loss": 0.7961, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 303 }, { "epoch": 0.04133804732118575, "grad_norm": 0.5, "learning_rate": 8.244897959183674e-06, "loss": 0.7453, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 304 }, { "epoch": 0.04147402774000544, "grad_norm": 0.78125, "learning_rate": 8.272108843537415e-06, "loss": 0.9214, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 305 }, { "epoch": 0.04161000815882513, "grad_norm": 0.474609375, "learning_rate": 8.299319727891157e-06, "loss": 0.7075, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 306 }, { "epoch": 0.04174598857764482, "grad_norm": 0.498046875, "learning_rate": 8.326530612244899e-06, "loss": 0.7676, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 307 }, { "epoch": 0.04188196899646451, "grad_norm": 0.314453125, "learning_rate": 8.35374149659864e-06, "loss": 0.4648, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 308 }, { "epoch": 0.0420179494152842, "grad_norm": 0.447265625, "learning_rate": 8.380952380952382e-06, "loss": 0.6919, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 309 }, { "epoch": 0.04215392983410389, "grad_norm": 0.349609375, "learning_rate": 8.408163265306122e-06, "loss": 0.5938, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 310 }, { "epoch": 0.04228991025292358, "grad_norm": 0.34765625, "learning_rate": 8.435374149659866e-06, "loss": 0.7843, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 311 }, { "epoch": 0.042425890671743266, "grad_norm": 0.40234375, "learning_rate": 8.462585034013606e-06, "loss": 0.672, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 312 }, { "epoch": 0.04256187109056296, "grad_norm": 0.275390625, "learning_rate": 8.489795918367347e-06, "loss": 0.487, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 313 }, { "epoch": 0.04269785150938265, "grad_norm": 0.380859375, "learning_rate": 8.517006802721089e-06, "loss": 0.8018, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 314 }, { "epoch": 0.04283383192820234, "grad_norm": 0.431640625, "learning_rate": 8.544217687074831e-06, "loss": 0.6273, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 315 }, { "epoch": 0.04296981234702203, "grad_norm": 0.345703125, "learning_rate": 8.571428571428571e-06, "loss": 0.6175, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 316 }, { "epoch": 0.04310579276584172, "grad_norm": 1.453125, "learning_rate": 8.598639455782314e-06, "loss": 0.8319, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 317 }, { "epoch": 0.04324177318466141, "grad_norm": 0.322265625, "learning_rate": 8.625850340136054e-06, "loss": 0.52, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 318 }, { "epoch": 0.043377753603481096, "grad_norm": 1.125, "learning_rate": 8.653061224489798e-06, "loss": 0.5791, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 319 }, { "epoch": 0.04351373402230079, "grad_norm": 0.470703125, "learning_rate": 8.680272108843538e-06, "loss": 0.7443, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 320 }, { "epoch": 0.04364971444112048, "grad_norm": 0.400390625, "learning_rate": 8.70748299319728e-06, "loss": 0.8138, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 321 }, { "epoch": 0.04378569485994017, "grad_norm": 0.404296875, "learning_rate": 8.734693877551021e-06, "loss": 0.7811, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 322 }, { "epoch": 0.043921675278759856, "grad_norm": 0.3828125, "learning_rate": 8.761904761904763e-06, "loss": 0.7497, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 323 }, { "epoch": 0.04405765569757955, "grad_norm": 0.314453125, "learning_rate": 8.789115646258503e-06, "loss": 0.5591, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 324 }, { "epoch": 0.04419363611639924, "grad_norm": 0.75390625, "learning_rate": 8.816326530612247e-06, "loss": 0.8439, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 325 }, { "epoch": 0.04432961653521893, "grad_norm": 0.3515625, "learning_rate": 8.843537414965987e-06, "loss": 0.6481, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 326 }, { "epoch": 0.044465596954038615, "grad_norm": 0.5625, "learning_rate": 8.87074829931973e-06, "loss": 0.745, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 327 }, { "epoch": 0.04460157737285831, "grad_norm": 0.40625, "learning_rate": 8.89795918367347e-06, "loss": 0.4888, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 328 }, { "epoch": 0.044737557791678, "grad_norm": 0.55078125, "learning_rate": 8.925170068027212e-06, "loss": 0.971, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 329 }, { "epoch": 0.044873538210497686, "grad_norm": 0.408203125, "learning_rate": 8.952380952380953e-06, "loss": 0.8376, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 330 }, { "epoch": 0.04500951862931738, "grad_norm": 0.326171875, "learning_rate": 8.979591836734695e-06, "loss": 0.5605, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 331 }, { "epoch": 0.04514549904813707, "grad_norm": 0.341796875, "learning_rate": 9.006802721088435e-06, "loss": 0.5184, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 332 }, { "epoch": 0.04528147946695676, "grad_norm": 0.3515625, "learning_rate": 9.034013605442179e-06, "loss": 0.6974, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 333 }, { "epoch": 0.045417459885776446, "grad_norm": 0.333984375, "learning_rate": 9.061224489795919e-06, "loss": 0.6138, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 334 }, { "epoch": 0.04555344030459614, "grad_norm": 0.365234375, "learning_rate": 9.08843537414966e-06, "loss": 0.6425, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 335 }, { "epoch": 0.04568942072341583, "grad_norm": 0.306640625, "learning_rate": 9.115646258503402e-06, "loss": 0.5353, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 336 }, { "epoch": 0.04582540114223552, "grad_norm": 0.49609375, "learning_rate": 9.142857142857144e-06, "loss": 0.7184, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 337 }, { "epoch": 0.045961381561055205, "grad_norm": 0.62890625, "learning_rate": 9.170068027210886e-06, "loss": 0.8455, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 338 }, { "epoch": 0.0460973619798749, "grad_norm": 0.380859375, "learning_rate": 9.197278911564627e-06, "loss": 0.7378, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 339 }, { "epoch": 0.04623334239869459, "grad_norm": 0.671875, "learning_rate": 9.224489795918367e-06, "loss": 0.7215, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 340 }, { "epoch": 0.046369322817514276, "grad_norm": 0.369140625, "learning_rate": 9.251700680272109e-06, "loss": 0.7313, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 341 }, { "epoch": 0.04650530323633397, "grad_norm": 0.34765625, "learning_rate": 9.278911564625851e-06, "loss": 0.6239, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 342 }, { "epoch": 0.04664128365515366, "grad_norm": 0.34375, "learning_rate": 9.306122448979593e-06, "loss": 0.5005, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 343 }, { "epoch": 0.04677726407397335, "grad_norm": 0.427734375, "learning_rate": 9.333333333333334e-06, "loss": 0.8174, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 344 }, { "epoch": 0.046913244492793035, "grad_norm": 0.4453125, "learning_rate": 9.360544217687076e-06, "loss": 0.7788, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 345 }, { "epoch": 0.04704922491161273, "grad_norm": 0.310546875, "learning_rate": 9.387755102040818e-06, "loss": 0.6099, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 346 }, { "epoch": 0.04718520533043242, "grad_norm": 0.482421875, "learning_rate": 9.414965986394558e-06, "loss": 0.6987, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 347 }, { "epoch": 0.047321185749252107, "grad_norm": 0.4296875, "learning_rate": 9.4421768707483e-06, "loss": 0.7588, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 348 }, { "epoch": 0.047457166168071795, "grad_norm": 0.6953125, "learning_rate": 9.469387755102041e-06, "loss": 0.856, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 349 }, { "epoch": 0.04759314658689149, "grad_norm": 0.67578125, "learning_rate": 9.496598639455783e-06, "loss": 0.9992, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 350 }, { "epoch": 0.04772912700571118, "grad_norm": 0.43359375, "learning_rate": 9.523809523809525e-06, "loss": 0.7578, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 351 }, { "epoch": 0.047865107424530866, "grad_norm": 0.2119140625, "learning_rate": 9.551020408163266e-06, "loss": 0.3485, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 352 }, { "epoch": 0.04800108784335056, "grad_norm": 0.443359375, "learning_rate": 9.578231292517007e-06, "loss": 0.658, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 353 }, { "epoch": 0.04813706826217025, "grad_norm": 0.416015625, "learning_rate": 9.60544217687075e-06, "loss": 0.8817, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 354 }, { "epoch": 0.04827304868098994, "grad_norm": 0.423828125, "learning_rate": 9.63265306122449e-06, "loss": 0.6756, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 355 }, { "epoch": 0.048409029099809625, "grad_norm": 0.3515625, "learning_rate": 9.659863945578232e-06, "loss": 0.7754, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 356 }, { "epoch": 0.04854500951862932, "grad_norm": 0.4921875, "learning_rate": 9.687074829931973e-06, "loss": 0.8369, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 357 }, { "epoch": 0.04868098993744901, "grad_norm": 0.353515625, "learning_rate": 9.714285714285715e-06, "loss": 0.5322, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 358 }, { "epoch": 0.048816970356268696, "grad_norm": 0.49609375, "learning_rate": 9.741496598639457e-06, "loss": 0.9388, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 359 }, { "epoch": 0.048952950775088384, "grad_norm": 0.3515625, "learning_rate": 9.768707482993199e-06, "loss": 0.6512, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 360 }, { "epoch": 0.04908893119390808, "grad_norm": 0.25390625, "learning_rate": 9.795918367346939e-06, "loss": 0.4749, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 361 }, { "epoch": 0.04922491161272777, "grad_norm": 0.578125, "learning_rate": 9.823129251700682e-06, "loss": 0.7264, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 362 }, { "epoch": 0.049360892031547456, "grad_norm": 0.5234375, "learning_rate": 9.850340136054422e-06, "loss": 0.5392, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 363 }, { "epoch": 0.049496872450367144, "grad_norm": 0.44921875, "learning_rate": 9.877551020408164e-06, "loss": 0.6844, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 364 }, { "epoch": 0.04963285286918684, "grad_norm": 0.2412109375, "learning_rate": 9.904761904761906e-06, "loss": 0.4134, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 365 }, { "epoch": 0.04976883328800653, "grad_norm": 0.345703125, "learning_rate": 9.931972789115647e-06, "loss": 0.6761, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 366 }, { "epoch": 0.049904813706826215, "grad_norm": 0.474609375, "learning_rate": 9.959183673469387e-06, "loss": 0.6971, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 367 }, { "epoch": 0.05004079412564591, "grad_norm": 0.275390625, "learning_rate": 9.98639455782313e-06, "loss": 0.4684, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 368 }, { "epoch": 0.0501767745444656, "grad_norm": 0.37109375, "learning_rate": 1.0013605442176873e-05, "loss": 0.6971, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 369 }, { "epoch": 0.050312754963285286, "grad_norm": 0.375, "learning_rate": 1.0040816326530614e-05, "loss": 0.5433, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 370 }, { "epoch": 0.050448735382104974, "grad_norm": 0.5390625, "learning_rate": 1.0068027210884354e-05, "loss": 0.8608, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 371 }, { "epoch": 0.05058471580092467, "grad_norm": 0.3125, "learning_rate": 1.0095238095238096e-05, "loss": 0.6313, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 372 }, { "epoch": 0.05072069621974436, "grad_norm": 0.35546875, "learning_rate": 1.0122448979591836e-05, "loss": 0.5334, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 373 }, { "epoch": 0.050856676638564045, "grad_norm": 0.474609375, "learning_rate": 1.014965986394558e-05, "loss": 0.9106, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 374 }, { "epoch": 0.05099265705738373, "grad_norm": 0.50390625, "learning_rate": 1.0176870748299321e-05, "loss": 0.802, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 375 }, { "epoch": 0.05112863747620343, "grad_norm": 0.400390625, "learning_rate": 1.0204081632653063e-05, "loss": 0.5729, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 376 }, { "epoch": 0.05126461789502312, "grad_norm": 0.328125, "learning_rate": 1.0231292517006803e-05, "loss": 0.5594, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 377 }, { "epoch": 0.051400598313842805, "grad_norm": 0.40234375, "learning_rate": 1.0258503401360545e-05, "loss": 0.7295, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 378 }, { "epoch": 0.0515365787326625, "grad_norm": 0.498046875, "learning_rate": 1.0285714285714285e-05, "loss": 0.8379, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 379 }, { "epoch": 0.05167255915148219, "grad_norm": 0.60546875, "learning_rate": 1.0312925170068028e-05, "loss": 0.7363, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 380 }, { "epoch": 0.051808539570301876, "grad_norm": 0.84765625, "learning_rate": 1.034013605442177e-05, "loss": 0.7749, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 381 }, { "epoch": 0.051944519989121564, "grad_norm": 0.283203125, "learning_rate": 1.0367346938775512e-05, "loss": 0.4989, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 382 }, { "epoch": 0.05208050040794126, "grad_norm": 0.330078125, "learning_rate": 1.0394557823129252e-05, "loss": 0.5752, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 383 }, { "epoch": 0.05221648082676095, "grad_norm": 0.263671875, "learning_rate": 1.0421768707482993e-05, "loss": 0.4671, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 384 }, { "epoch": 0.052352461245580635, "grad_norm": 0.466796875, "learning_rate": 1.0448979591836737e-05, "loss": 0.7692, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 385 }, { "epoch": 0.05248844166440032, "grad_norm": 0.390625, "learning_rate": 1.0476190476190477e-05, "loss": 0.495, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 386 }, { "epoch": 0.05262442208322002, "grad_norm": 0.4609375, "learning_rate": 1.0503401360544219e-05, "loss": 0.6807, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 387 }, { "epoch": 0.052760402502039706, "grad_norm": 0.412109375, "learning_rate": 1.053061224489796e-05, "loss": 0.6576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 388 }, { "epoch": 0.052896382920859394, "grad_norm": 0.333984375, "learning_rate": 1.05578231292517e-05, "loss": 0.5825, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 389 }, { "epoch": 0.05303236333967909, "grad_norm": 0.478515625, "learning_rate": 1.0585034013605444e-05, "loss": 0.8063, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 390 }, { "epoch": 0.05316834375849878, "grad_norm": 0.263671875, "learning_rate": 1.0612244897959186e-05, "loss": 0.5661, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 391 }, { "epoch": 0.053304324177318466, "grad_norm": 0.5234375, "learning_rate": 1.0639455782312926e-05, "loss": 0.9085, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 392 }, { "epoch": 0.053440304596138154, "grad_norm": 0.65625, "learning_rate": 1.0666666666666667e-05, "loss": 0.8589, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 393 }, { "epoch": 0.05357628501495785, "grad_norm": 0.4453125, "learning_rate": 1.0693877551020409e-05, "loss": 0.7983, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 394 }, { "epoch": 0.05371226543377754, "grad_norm": 0.5859375, "learning_rate": 1.0721088435374149e-05, "loss": 0.9154, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 395 }, { "epoch": 0.053848245852597225, "grad_norm": 0.54296875, "learning_rate": 1.0748299319727893e-05, "loss": 0.7284, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 396 }, { "epoch": 0.05398422627141691, "grad_norm": 0.287109375, "learning_rate": 1.0775510204081634e-05, "loss": 0.5695, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 397 }, { "epoch": 0.05412020669023661, "grad_norm": 0.53125, "learning_rate": 1.0802721088435374e-05, "loss": 0.6257, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 398 }, { "epoch": 0.054256187109056296, "grad_norm": 0.5390625, "learning_rate": 1.0829931972789116e-05, "loss": 0.8249, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 399 }, { "epoch": 0.054392167527875984, "grad_norm": 0.380859375, "learning_rate": 1.0857142857142858e-05, "loss": 0.6406, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 400 }, { "epoch": 0.05452814794669568, "grad_norm": 0.353515625, "learning_rate": 1.0884353741496601e-05, "loss": 0.6986, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 401 }, { "epoch": 0.05466412836551537, "grad_norm": 0.451171875, "learning_rate": 1.0911564625850341e-05, "loss": 0.7218, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 402 }, { "epoch": 0.054800108784335055, "grad_norm": 0.25390625, "learning_rate": 1.0938775510204083e-05, "loss": 0.4718, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 403 }, { "epoch": 0.054936089203154743, "grad_norm": 1.015625, "learning_rate": 1.0965986394557823e-05, "loss": 0.9548, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 404 }, { "epoch": 0.05507206962197444, "grad_norm": 0.453125, "learning_rate": 1.0993197278911565e-05, "loss": 0.5225, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 405 }, { "epoch": 0.05520805004079413, "grad_norm": 0.8125, "learning_rate": 1.1020408163265306e-05, "loss": 0.6984, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 406 }, { "epoch": 0.055344030459613815, "grad_norm": 0.337890625, "learning_rate": 1.104761904761905e-05, "loss": 0.6056, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 407 }, { "epoch": 0.0554800108784335, "grad_norm": 0.3203125, "learning_rate": 1.107482993197279e-05, "loss": 0.5592, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 408 }, { "epoch": 0.0556159912972532, "grad_norm": 0.50390625, "learning_rate": 1.1102040816326532e-05, "loss": 0.7472, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 409 }, { "epoch": 0.055751971716072886, "grad_norm": 0.51953125, "learning_rate": 1.1129251700680272e-05, "loss": 0.8509, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 410 }, { "epoch": 0.055887952134892574, "grad_norm": 0.416015625, "learning_rate": 1.1156462585034013e-05, "loss": 0.6698, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 411 }, { "epoch": 0.05602393255371226, "grad_norm": 0.453125, "learning_rate": 1.1183673469387757e-05, "loss": 0.5566, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 412 }, { "epoch": 0.05615991297253196, "grad_norm": 0.37890625, "learning_rate": 1.1210884353741499e-05, "loss": 0.7792, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 413 }, { "epoch": 0.056295893391351645, "grad_norm": 0.609375, "learning_rate": 1.1238095238095239e-05, "loss": 0.5747, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 414 }, { "epoch": 0.05643187381017133, "grad_norm": 0.43359375, "learning_rate": 1.126530612244898e-05, "loss": 0.8993, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 415 }, { "epoch": 0.05656785422899103, "grad_norm": 0.5703125, "learning_rate": 1.1292517006802722e-05, "loss": 0.7666, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 416 }, { "epoch": 0.056703834647810716, "grad_norm": 0.72265625, "learning_rate": 1.1319727891156464e-05, "loss": 0.5433, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 417 }, { "epoch": 0.056839815066630404, "grad_norm": 0.388671875, "learning_rate": 1.1346938775510206e-05, "loss": 0.7432, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 418 }, { "epoch": 0.05697579548545009, "grad_norm": 0.34765625, "learning_rate": 1.1374149659863947e-05, "loss": 0.7215, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 419 }, { "epoch": 0.05711177590426979, "grad_norm": 0.484375, "learning_rate": 1.1401360544217687e-05, "loss": 0.9536, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 420 }, { "epoch": 0.057247756323089476, "grad_norm": 0.62890625, "learning_rate": 1.1428571428571429e-05, "loss": 0.9642, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 421 }, { "epoch": 0.057383736741909164, "grad_norm": 0.546875, "learning_rate": 1.145578231292517e-05, "loss": 0.7106, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 422 }, { "epoch": 0.05751971716072885, "grad_norm": 0.953125, "learning_rate": 1.1482993197278912e-05, "loss": 1.1024, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 423 }, { "epoch": 0.05765569757954855, "grad_norm": 0.40625, "learning_rate": 1.1510204081632654e-05, "loss": 0.5381, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 424 }, { "epoch": 0.057791677998368235, "grad_norm": 0.361328125, "learning_rate": 1.1537414965986396e-05, "loss": 0.6531, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 425 }, { "epoch": 0.05792765841718792, "grad_norm": 0.435546875, "learning_rate": 1.1564625850340136e-05, "loss": 0.5202, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 426 }, { "epoch": 0.05806363883600762, "grad_norm": 0.82421875, "learning_rate": 1.1591836734693878e-05, "loss": 0.6372, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 427 }, { "epoch": 0.058199619254827306, "grad_norm": 0.404296875, "learning_rate": 1.1619047619047621e-05, "loss": 0.5677, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 428 }, { "epoch": 0.058335599673646994, "grad_norm": 0.419921875, "learning_rate": 1.1646258503401361e-05, "loss": 0.7122, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 429 }, { "epoch": 0.05847158009246668, "grad_norm": 0.408203125, "learning_rate": 1.1673469387755103e-05, "loss": 0.7005, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 430 }, { "epoch": 0.05860756051128638, "grad_norm": 0.435546875, "learning_rate": 1.1700680272108845e-05, "loss": 0.665, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 431 }, { "epoch": 0.058743540930106065, "grad_norm": 0.345703125, "learning_rate": 1.1727891156462585e-05, "loss": 0.5583, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 432 }, { "epoch": 0.058879521348925754, "grad_norm": 0.69140625, "learning_rate": 1.1755102040816328e-05, "loss": 0.8818, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 433 }, { "epoch": 0.05901550176774544, "grad_norm": 0.67578125, "learning_rate": 1.178231292517007e-05, "loss": 0.8172, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 434 }, { "epoch": 0.05915148218656514, "grad_norm": 0.26171875, "learning_rate": 1.180952380952381e-05, "loss": 0.5228, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 435 }, { "epoch": 0.059287462605384825, "grad_norm": 0.34765625, "learning_rate": 1.1836734693877552e-05, "loss": 0.6009, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 436 }, { "epoch": 0.05942344302420451, "grad_norm": 0.58984375, "learning_rate": 1.1863945578231293e-05, "loss": 0.6424, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 437 }, { "epoch": 0.05955942344302421, "grad_norm": 0.51171875, "learning_rate": 1.1891156462585033e-05, "loss": 0.9295, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 438 }, { "epoch": 0.059695403861843896, "grad_norm": 0.37890625, "learning_rate": 1.1918367346938777e-05, "loss": 0.7046, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 439 }, { "epoch": 0.059831384280663584, "grad_norm": 0.400390625, "learning_rate": 1.1945578231292519e-05, "loss": 0.6343, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 440 }, { "epoch": 0.05996736469948327, "grad_norm": 0.53125, "learning_rate": 1.197278911564626e-05, "loss": 0.8281, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 441 }, { "epoch": 0.06010334511830297, "grad_norm": 0.462890625, "learning_rate": 1.2e-05, "loss": 0.7962, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 442 }, { "epoch": 0.060239325537122655, "grad_norm": 0.34375, "learning_rate": 1.2027210884353742e-05, "loss": 0.5674, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 443 }, { "epoch": 0.06037530595594234, "grad_norm": 0.32421875, "learning_rate": 1.2054421768707485e-05, "loss": 0.5415, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 444 }, { "epoch": 0.06051128637476203, "grad_norm": 0.306640625, "learning_rate": 1.2081632653061225e-05, "loss": 0.505, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 445 }, { "epoch": 0.060647266793581726, "grad_norm": 0.55078125, "learning_rate": 1.2108843537414967e-05, "loss": 0.783, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 446 }, { "epoch": 0.060783247212401414, "grad_norm": 0.44140625, "learning_rate": 1.2136054421768709e-05, "loss": 0.613, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 447 }, { "epoch": 0.0609192276312211, "grad_norm": 3.375, "learning_rate": 1.2163265306122449e-05, "loss": 0.8394, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 448 }, { "epoch": 0.06105520805004079, "grad_norm": 0.6796875, "learning_rate": 1.2190476190476192e-05, "loss": 0.7384, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 449 }, { "epoch": 0.061191188468860486, "grad_norm": 0.291015625, "learning_rate": 1.2217687074829934e-05, "loss": 0.5768, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 450 }, { "epoch": 0.061327168887680174, "grad_norm": 0.4296875, "learning_rate": 1.2244897959183674e-05, "loss": 0.7186, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 451 }, { "epoch": 0.06146314930649986, "grad_norm": 0.5703125, "learning_rate": 1.2272108843537416e-05, "loss": 0.9295, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 452 }, { "epoch": 0.06159912972531956, "grad_norm": 0.443359375, "learning_rate": 1.2299319727891158e-05, "loss": 0.6816, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 453 }, { "epoch": 0.061735110144139245, "grad_norm": 0.59765625, "learning_rate": 1.2326530612244898e-05, "loss": 0.826, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 454 }, { "epoch": 0.06187109056295893, "grad_norm": 0.6328125, "learning_rate": 1.2353741496598641e-05, "loss": 0.6152, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 455 }, { "epoch": 0.06200707098177862, "grad_norm": 0.60546875, "learning_rate": 1.2380952380952383e-05, "loss": 0.5968, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 456 }, { "epoch": 0.062143051400598316, "grad_norm": 0.4609375, "learning_rate": 1.2408163265306123e-05, "loss": 0.6663, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 457 }, { "epoch": 0.062279031819418004, "grad_norm": 0.3359375, "learning_rate": 1.2435374149659865e-05, "loss": 0.5983, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 458 }, { "epoch": 0.06241501223823769, "grad_norm": 0.515625, "learning_rate": 1.2462585034013606e-05, "loss": 0.8436, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 459 }, { "epoch": 0.06255099265705738, "grad_norm": 0.6640625, "learning_rate": 1.248979591836735e-05, "loss": 0.6678, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 460 }, { "epoch": 0.06268697307587708, "grad_norm": 1.2109375, "learning_rate": 1.251700680272109e-05, "loss": 1.0929, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 461 }, { "epoch": 0.06282295349469677, "grad_norm": 0.482421875, "learning_rate": 1.2544217687074832e-05, "loss": 0.7142, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 462 }, { "epoch": 0.06295893391351645, "grad_norm": 0.466796875, "learning_rate": 1.2571428571428572e-05, "loss": 0.6984, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 463 }, { "epoch": 0.06309491433233615, "grad_norm": 0.2353515625, "learning_rate": 1.2598639455782313e-05, "loss": 0.4098, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 464 }, { "epoch": 0.06323089475115583, "grad_norm": 0.365234375, "learning_rate": 1.2625850340136055e-05, "loss": 0.5765, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 465 }, { "epoch": 0.06336687516997552, "grad_norm": 0.63671875, "learning_rate": 1.2653061224489798e-05, "loss": 0.7435, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 466 }, { "epoch": 0.06350285558879522, "grad_norm": 0.376953125, "learning_rate": 1.2680272108843539e-05, "loss": 0.6115, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 467 }, { "epoch": 0.0636388360076149, "grad_norm": 0.58984375, "learning_rate": 1.270748299319728e-05, "loss": 0.8625, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 468 }, { "epoch": 0.0637748164264346, "grad_norm": 0.443359375, "learning_rate": 1.273469387755102e-05, "loss": 0.8615, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 469 }, { "epoch": 0.06391079684525429, "grad_norm": 0.345703125, "learning_rate": 1.2761904761904762e-05, "loss": 0.6159, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 470 }, { "epoch": 0.06404677726407397, "grad_norm": 0.4375, "learning_rate": 1.2789115646258505e-05, "loss": 0.7295, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 471 }, { "epoch": 0.06418275768289367, "grad_norm": 0.984375, "learning_rate": 1.2816326530612247e-05, "loss": 0.9731, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 472 }, { "epoch": 0.06431873810171335, "grad_norm": 0.384765625, "learning_rate": 1.2843537414965987e-05, "loss": 0.6507, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 473 }, { "epoch": 0.06445471852053304, "grad_norm": 0.279296875, "learning_rate": 1.2870748299319729e-05, "loss": 0.4395, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 474 }, { "epoch": 0.06459069893935274, "grad_norm": 0.42578125, "learning_rate": 1.2897959183673469e-05, "loss": 0.7064, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 475 }, { "epoch": 0.06472667935817242, "grad_norm": 0.6796875, "learning_rate": 1.2925170068027212e-05, "loss": 0.7272, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 476 }, { "epoch": 0.06486265977699211, "grad_norm": 0.5390625, "learning_rate": 1.2952380952380954e-05, "loss": 0.4587, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 477 }, { "epoch": 0.06499864019581181, "grad_norm": 0.2734375, "learning_rate": 1.2979591836734696e-05, "loss": 0.564, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 478 }, { "epoch": 0.06513462061463149, "grad_norm": 0.361328125, "learning_rate": 1.3006802721088436e-05, "loss": 0.5409, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 479 }, { "epoch": 0.06527060103345118, "grad_norm": 0.58984375, "learning_rate": 1.3034013605442178e-05, "loss": 0.6755, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 480 }, { "epoch": 0.06540658145227088, "grad_norm": 0.462890625, "learning_rate": 1.3061224489795918e-05, "loss": 0.7492, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 481 }, { "epoch": 0.06554256187109056, "grad_norm": 0.486328125, "learning_rate": 1.3088435374149661e-05, "loss": 0.8991, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 482 }, { "epoch": 0.06567854228991026, "grad_norm": 0.310546875, "learning_rate": 1.3115646258503403e-05, "loss": 0.5011, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 483 }, { "epoch": 0.06581452270872994, "grad_norm": 0.232421875, "learning_rate": 1.3142857142857145e-05, "loss": 0.3862, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 484 }, { "epoch": 0.06595050312754963, "grad_norm": 0.34375, "learning_rate": 1.3170068027210885e-05, "loss": 0.6554, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 485 }, { "epoch": 0.06608648354636933, "grad_norm": 0.46875, "learning_rate": 1.3197278911564626e-05, "loss": 0.6403, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 486 }, { "epoch": 0.06622246396518901, "grad_norm": 0.375, "learning_rate": 1.322448979591837e-05, "loss": 0.7493, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 487 }, { "epoch": 0.0663584443840087, "grad_norm": 0.54296875, "learning_rate": 1.325170068027211e-05, "loss": 0.6587, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 488 }, { "epoch": 0.0664944248028284, "grad_norm": 0.66796875, "learning_rate": 1.3278911564625852e-05, "loss": 0.3784, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 489 }, { "epoch": 0.06663040522164808, "grad_norm": 0.6796875, "learning_rate": 1.3306122448979593e-05, "loss": 0.8104, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 490 }, { "epoch": 0.06676638564046777, "grad_norm": 0.373046875, "learning_rate": 1.3333333333333333e-05, "loss": 0.6315, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 491 }, { "epoch": 0.06690236605928747, "grad_norm": 0.421875, "learning_rate": 1.3360544217687077e-05, "loss": 0.5985, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 492 }, { "epoch": 0.06703834647810715, "grad_norm": 0.431640625, "learning_rate": 1.3387755102040818e-05, "loss": 0.5402, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 493 }, { "epoch": 0.06717432689692684, "grad_norm": 0.361328125, "learning_rate": 1.3414965986394558e-05, "loss": 0.6382, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 494 }, { "epoch": 0.06731030731574653, "grad_norm": 0.474609375, "learning_rate": 1.34421768707483e-05, "loss": 0.8337, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 495 }, { "epoch": 0.06744628773456622, "grad_norm": 0.41796875, "learning_rate": 1.3469387755102042e-05, "loss": 0.7397, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 496 }, { "epoch": 0.06758226815338592, "grad_norm": 0.34765625, "learning_rate": 1.3496598639455782e-05, "loss": 0.5174, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 497 }, { "epoch": 0.0677182485722056, "grad_norm": 0.5234375, "learning_rate": 1.3523809523809525e-05, "loss": 1.0026, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 498 }, { "epoch": 0.06785422899102529, "grad_norm": 0.59375, "learning_rate": 1.3551020408163267e-05, "loss": 0.7067, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 499 }, { "epoch": 0.06799020940984499, "grad_norm": 0.314453125, "learning_rate": 1.3578231292517007e-05, "loss": 0.5436, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 500 }, { "epoch": 0.06812618982866467, "grad_norm": 0.31640625, "learning_rate": 1.3605442176870749e-05, "loss": 0.5347, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 501 }, { "epoch": 0.06826217024748436, "grad_norm": 0.50390625, "learning_rate": 1.363265306122449e-05, "loss": 0.7526, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 502 }, { "epoch": 0.06839815066630406, "grad_norm": 0.341796875, "learning_rate": 1.3659863945578234e-05, "loss": 0.6105, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 503 }, { "epoch": 0.06853413108512374, "grad_norm": 0.3515625, "learning_rate": 1.3687074829931974e-05, "loss": 0.6859, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 504 }, { "epoch": 0.06867011150394343, "grad_norm": 0.33203125, "learning_rate": 1.3714285714285716e-05, "loss": 0.7122, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 505 }, { "epoch": 0.06880609192276312, "grad_norm": 0.470703125, "learning_rate": 1.3741496598639456e-05, "loss": 0.8667, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 506 }, { "epoch": 0.06894207234158281, "grad_norm": 0.73046875, "learning_rate": 1.3768707482993198e-05, "loss": 0.7036, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 507 }, { "epoch": 0.0690780527604025, "grad_norm": 0.455078125, "learning_rate": 1.3795918367346941e-05, "loss": 0.7152, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 508 }, { "epoch": 0.06921403317922219, "grad_norm": 0.734375, "learning_rate": 1.3823129251700683e-05, "loss": 0.8081, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 509 }, { "epoch": 0.06935001359804188, "grad_norm": 0.330078125, "learning_rate": 1.3850340136054423e-05, "loss": 0.5981, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 510 }, { "epoch": 0.06948599401686158, "grad_norm": 3.0625, "learning_rate": 1.3877551020408165e-05, "loss": 0.7912, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 511 }, { "epoch": 0.06962197443568126, "grad_norm": 0.498046875, "learning_rate": 1.3904761904761905e-05, "loss": 0.8778, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 512 }, { "epoch": 0.06975795485450095, "grad_norm": 0.38671875, "learning_rate": 1.3931972789115646e-05, "loss": 0.5428, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 513 }, { "epoch": 0.06989393527332065, "grad_norm": 0.357421875, "learning_rate": 1.395918367346939e-05, "loss": 0.7606, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 514 }, { "epoch": 0.07002991569214033, "grad_norm": 0.443359375, "learning_rate": 1.3986394557823131e-05, "loss": 0.5864, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 515 }, { "epoch": 0.07016589611096002, "grad_norm": 0.369140625, "learning_rate": 1.4013605442176872e-05, "loss": 0.6125, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 516 }, { "epoch": 0.0703018765297797, "grad_norm": 0.416015625, "learning_rate": 1.4040816326530613e-05, "loss": 0.5894, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 517 }, { "epoch": 0.0704378569485994, "grad_norm": 0.78125, "learning_rate": 1.4068027210884353e-05, "loss": 0.8887, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 518 }, { "epoch": 0.0705738373674191, "grad_norm": 0.423828125, "learning_rate": 1.4095238095238097e-05, "loss": 0.6815, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 519 }, { "epoch": 0.07070981778623878, "grad_norm": 0.41796875, "learning_rate": 1.4122448979591838e-05, "loss": 0.8229, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 520 }, { "epoch": 0.07084579820505847, "grad_norm": 0.68359375, "learning_rate": 1.414965986394558e-05, "loss": 0.7354, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 521 }, { "epoch": 0.07098177862387817, "grad_norm": 0.515625, "learning_rate": 1.417687074829932e-05, "loss": 0.5526, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 522 }, { "epoch": 0.07111775904269785, "grad_norm": 0.423828125, "learning_rate": 1.4204081632653062e-05, "loss": 0.6772, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 523 }, { "epoch": 0.07125373946151754, "grad_norm": 0.5234375, "learning_rate": 1.4231292517006802e-05, "loss": 0.617, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 524 }, { "epoch": 0.07138971988033724, "grad_norm": 0.48046875, "learning_rate": 1.4258503401360545e-05, "loss": 0.5855, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 525 }, { "epoch": 0.07152570029915692, "grad_norm": 0.40625, "learning_rate": 1.4285714285714287e-05, "loss": 0.7098, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 526 }, { "epoch": 0.07166168071797661, "grad_norm": 0.484375, "learning_rate": 1.4312925170068029e-05, "loss": 0.7419, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 527 }, { "epoch": 0.0717976611367963, "grad_norm": 0.7890625, "learning_rate": 1.4340136054421769e-05, "loss": 0.7279, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 528 }, { "epoch": 0.07193364155561599, "grad_norm": 0.6953125, "learning_rate": 1.436734693877551e-05, "loss": 0.5246, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 529 }, { "epoch": 0.07206962197443569, "grad_norm": 0.32421875, "learning_rate": 1.4394557823129254e-05, "loss": 0.4725, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 530 }, { "epoch": 0.07220560239325537, "grad_norm": 0.5078125, "learning_rate": 1.4421768707482994e-05, "loss": 0.7705, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 531 }, { "epoch": 0.07234158281207506, "grad_norm": 0.609375, "learning_rate": 1.4448979591836736e-05, "loss": 0.6609, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 532 }, { "epoch": 0.07247756323089476, "grad_norm": 0.490234375, "learning_rate": 1.4476190476190478e-05, "loss": 0.7612, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 533 }, { "epoch": 0.07261354364971444, "grad_norm": 0.84765625, "learning_rate": 1.4503401360544218e-05, "loss": 0.472, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 534 }, { "epoch": 0.07274952406853413, "grad_norm": 0.52734375, "learning_rate": 1.4530612244897961e-05, "loss": 0.9489, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 535 }, { "epoch": 0.07288550448735383, "grad_norm": 0.388671875, "learning_rate": 1.4557823129251703e-05, "loss": 0.7168, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 536 }, { "epoch": 0.07302148490617351, "grad_norm": 0.2578125, "learning_rate": 1.4585034013605443e-05, "loss": 0.3506, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 537 }, { "epoch": 0.0731574653249932, "grad_norm": 0.435546875, "learning_rate": 1.4612244897959185e-05, "loss": 0.6997, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 538 }, { "epoch": 0.07329344574381289, "grad_norm": 0.45703125, "learning_rate": 1.4639455782312926e-05, "loss": 0.7103, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 539 }, { "epoch": 0.07342942616263258, "grad_norm": 0.498046875, "learning_rate": 1.4666666666666666e-05, "loss": 0.6815, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 540 }, { "epoch": 0.07356540658145228, "grad_norm": 0.4140625, "learning_rate": 1.469387755102041e-05, "loss": 0.4292, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 541 }, { "epoch": 0.07370138700027196, "grad_norm": 0.51171875, "learning_rate": 1.4721088435374151e-05, "loss": 0.8656, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 542 }, { "epoch": 0.07383736741909165, "grad_norm": 0.3828125, "learning_rate": 1.4748299319727891e-05, "loss": 0.6108, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 543 }, { "epoch": 0.07397334783791135, "grad_norm": 0.349609375, "learning_rate": 1.4775510204081633e-05, "loss": 0.5869, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 544 }, { "epoch": 0.07410932825673103, "grad_norm": 0.57421875, "learning_rate": 1.4802721088435375e-05, "loss": 0.7852, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 545 }, { "epoch": 0.07424530867555072, "grad_norm": 0.1982421875, "learning_rate": 1.4829931972789118e-05, "loss": 0.3281, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 546 }, { "epoch": 0.07438128909437042, "grad_norm": 0.44140625, "learning_rate": 1.4857142857142858e-05, "loss": 0.7008, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 547 }, { "epoch": 0.0745172695131901, "grad_norm": 0.52734375, "learning_rate": 1.48843537414966e-05, "loss": 0.7467, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 548 }, { "epoch": 0.0746532499320098, "grad_norm": 1.34375, "learning_rate": 1.4911564625850342e-05, "loss": 0.9422, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 549 }, { "epoch": 0.07478923035082947, "grad_norm": 0.416015625, "learning_rate": 1.4938775510204082e-05, "loss": 0.6206, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 550 }, { "epoch": 0.07492521076964917, "grad_norm": 0.6015625, "learning_rate": 1.4965986394557825e-05, "loss": 0.3831, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 551 }, { "epoch": 0.07506119118846886, "grad_norm": 0.61328125, "learning_rate": 1.4993197278911567e-05, "loss": 0.5942, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 552 }, { "epoch": 0.07519717160728855, "grad_norm": 0.6953125, "learning_rate": 1.5020408163265307e-05, "loss": 0.8011, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 553 }, { "epoch": 0.07533315202610824, "grad_norm": 0.5703125, "learning_rate": 1.5047619047619049e-05, "loss": 0.5545, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 554 }, { "epoch": 0.07546913244492794, "grad_norm": 0.388671875, "learning_rate": 1.507482993197279e-05, "loss": 0.4624, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 555 }, { "epoch": 0.07560511286374762, "grad_norm": 0.48828125, "learning_rate": 1.510204081632653e-05, "loss": 0.7686, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 556 }, { "epoch": 0.07574109328256731, "grad_norm": 0.58984375, "learning_rate": 1.5129251700680274e-05, "loss": 0.7417, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 557 }, { "epoch": 0.075877073701387, "grad_norm": 0.455078125, "learning_rate": 1.5156462585034016e-05, "loss": 0.9266, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 558 }, { "epoch": 0.07601305412020669, "grad_norm": 0.484375, "learning_rate": 1.5183673469387756e-05, "loss": 0.7153, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 559 }, { "epoch": 0.07614903453902638, "grad_norm": 0.5234375, "learning_rate": 1.5210884353741498e-05, "loss": 0.8066, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 560 }, { "epoch": 0.07628501495784606, "grad_norm": 0.60546875, "learning_rate": 1.523809523809524e-05, "loss": 0.7179, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 561 }, { "epoch": 0.07642099537666576, "grad_norm": 1.5546875, "learning_rate": 1.526530612244898e-05, "loss": 0.7503, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 562 }, { "epoch": 0.07655697579548545, "grad_norm": 0.498046875, "learning_rate": 1.5292517006802724e-05, "loss": 0.799, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 563 }, { "epoch": 0.07669295621430514, "grad_norm": 0.451171875, "learning_rate": 1.5319727891156464e-05, "loss": 0.7425, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 564 }, { "epoch": 0.07682893663312483, "grad_norm": 0.462890625, "learning_rate": 1.5346938775510204e-05, "loss": 0.4948, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 565 }, { "epoch": 0.07696491705194453, "grad_norm": 0.326171875, "learning_rate": 1.5374149659863945e-05, "loss": 0.4196, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 566 }, { "epoch": 0.07710089747076421, "grad_norm": 0.36328125, "learning_rate": 1.5401360544217688e-05, "loss": 0.6999, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 567 }, { "epoch": 0.0772368778895839, "grad_norm": 0.53125, "learning_rate": 1.542857142857143e-05, "loss": 0.6751, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 568 }, { "epoch": 0.07737285830840358, "grad_norm": 0.404296875, "learning_rate": 1.545578231292517e-05, "loss": 0.617, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 569 }, { "epoch": 0.07750883872722328, "grad_norm": 0.359375, "learning_rate": 1.548299319727891e-05, "loss": 0.7288, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 570 }, { "epoch": 0.07764481914604297, "grad_norm": 0.5859375, "learning_rate": 1.5510204081632655e-05, "loss": 0.9116, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 571 }, { "epoch": 0.07778079956486265, "grad_norm": 0.67578125, "learning_rate": 1.5537414965986395e-05, "loss": 0.5809, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 572 }, { "epoch": 0.07791677998368235, "grad_norm": 0.38671875, "learning_rate": 1.556462585034014e-05, "loss": 0.7199, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 573 }, { "epoch": 0.07805276040250204, "grad_norm": 0.490234375, "learning_rate": 1.559183673469388e-05, "loss": 0.8745, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 574 }, { "epoch": 0.07818874082132173, "grad_norm": 0.4921875, "learning_rate": 1.5619047619047622e-05, "loss": 0.7891, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 575 }, { "epoch": 0.07832472124014142, "grad_norm": 0.4609375, "learning_rate": 1.5646258503401362e-05, "loss": 0.7611, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 576 }, { "epoch": 0.07846070165896112, "grad_norm": 0.412109375, "learning_rate": 1.5673469387755102e-05, "loss": 0.6354, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 577 }, { "epoch": 0.0785966820777808, "grad_norm": 0.98046875, "learning_rate": 1.5700680272108845e-05, "loss": 0.7998, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 578 }, { "epoch": 0.07873266249660049, "grad_norm": 0.33984375, "learning_rate": 1.5727891156462585e-05, "loss": 0.5814, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 579 }, { "epoch": 0.07886864291542017, "grad_norm": 0.40625, "learning_rate": 1.575510204081633e-05, "loss": 0.8164, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 580 }, { "epoch": 0.07900462333423987, "grad_norm": 0.369140625, "learning_rate": 1.578231292517007e-05, "loss": 0.7254, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 581 }, { "epoch": 0.07914060375305956, "grad_norm": 0.314453125, "learning_rate": 1.580952380952381e-05, "loss": 0.5124, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 582 }, { "epoch": 0.07927658417187924, "grad_norm": 0.333984375, "learning_rate": 1.5836734693877552e-05, "loss": 0.7453, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 583 }, { "epoch": 0.07941256459069894, "grad_norm": 0.45703125, "learning_rate": 1.5863945578231296e-05, "loss": 0.8633, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 584 }, { "epoch": 0.07954854500951863, "grad_norm": 0.52734375, "learning_rate": 1.5891156462585036e-05, "loss": 0.5838, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 585 }, { "epoch": 0.07968452542833832, "grad_norm": 0.408203125, "learning_rate": 1.5918367346938776e-05, "loss": 0.7746, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 586 }, { "epoch": 0.07982050584715801, "grad_norm": 0.625, "learning_rate": 1.594557823129252e-05, "loss": 0.7969, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 587 }, { "epoch": 0.0799564862659777, "grad_norm": 0.41796875, "learning_rate": 1.597278911564626e-05, "loss": 0.5858, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 588 }, { "epoch": 0.08009246668479739, "grad_norm": 0.416015625, "learning_rate": 1.6000000000000003e-05, "loss": 0.7933, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 589 }, { "epoch": 0.08022844710361708, "grad_norm": 0.50390625, "learning_rate": 1.6027210884353743e-05, "loss": 0.9272, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 590 }, { "epoch": 0.08036442752243676, "grad_norm": 0.5703125, "learning_rate": 1.6054421768707483e-05, "loss": 0.8588, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 591 }, { "epoch": 0.08050040794125646, "grad_norm": 0.5234375, "learning_rate": 1.6081632653061226e-05, "loss": 0.5306, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 592 }, { "epoch": 0.08063638836007615, "grad_norm": 0.37109375, "learning_rate": 1.6108843537414966e-05, "loss": 0.6553, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 593 }, { "epoch": 0.08077236877889583, "grad_norm": 0.43359375, "learning_rate": 1.613605442176871e-05, "loss": 0.5894, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 594 }, { "epoch": 0.08090834919771553, "grad_norm": 0.283203125, "learning_rate": 1.616326530612245e-05, "loss": 0.4754, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 595 }, { "epoch": 0.08104432961653522, "grad_norm": 0.33984375, "learning_rate": 1.6190476190476193e-05, "loss": 0.7834, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 596 }, { "epoch": 0.0811803100353549, "grad_norm": 0.5625, "learning_rate": 1.6217687074829933e-05, "loss": 0.5929, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 597 }, { "epoch": 0.0813162904541746, "grad_norm": 0.375, "learning_rate": 1.6244897959183673e-05, "loss": 0.6294, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 598 }, { "epoch": 0.0814522708729943, "grad_norm": 0.419921875, "learning_rate": 1.6272108843537417e-05, "loss": 0.6735, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 599 }, { "epoch": 0.08158825129181398, "grad_norm": 0.36328125, "learning_rate": 1.629931972789116e-05, "loss": 0.6891, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 600 }, { "epoch": 0.08172423171063367, "grad_norm": 0.39453125, "learning_rate": 1.63265306122449e-05, "loss": 0.721, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 601 }, { "epoch": 0.08186021212945335, "grad_norm": 0.283203125, "learning_rate": 1.635374149659864e-05, "loss": 0.6235, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 602 }, { "epoch": 0.08199619254827305, "grad_norm": 0.388671875, "learning_rate": 1.6380952380952384e-05, "loss": 0.7349, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 603 }, { "epoch": 0.08213217296709274, "grad_norm": 0.34375, "learning_rate": 1.6408163265306124e-05, "loss": 0.5184, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 604 }, { "epoch": 0.08226815338591242, "grad_norm": 0.5546875, "learning_rate": 1.6435374149659867e-05, "loss": 0.6493, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 605 }, { "epoch": 0.08240413380473212, "grad_norm": 0.4140625, "learning_rate": 1.6462585034013607e-05, "loss": 0.5878, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 606 }, { "epoch": 0.08254011422355181, "grad_norm": 0.515625, "learning_rate": 1.6489795918367347e-05, "loss": 0.7202, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 607 }, { "epoch": 0.0826760946423715, "grad_norm": 0.36328125, "learning_rate": 1.651700680272109e-05, "loss": 0.4784, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 608 }, { "epoch": 0.08281207506119119, "grad_norm": 0.37109375, "learning_rate": 1.654421768707483e-05, "loss": 0.6064, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 609 }, { "epoch": 0.08294805548001088, "grad_norm": 0.462890625, "learning_rate": 1.6571428571428574e-05, "loss": 0.7153, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 610 }, { "epoch": 0.08308403589883057, "grad_norm": 0.35546875, "learning_rate": 1.6598639455782314e-05, "loss": 0.6732, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 611 }, { "epoch": 0.08322001631765026, "grad_norm": 0.462890625, "learning_rate": 1.6625850340136057e-05, "loss": 0.6934, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 612 }, { "epoch": 0.08335599673646994, "grad_norm": 0.6328125, "learning_rate": 1.6653061224489797e-05, "loss": 0.8293, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 613 }, { "epoch": 0.08349197715528964, "grad_norm": 0.4375, "learning_rate": 1.6680272108843537e-05, "loss": 0.6772, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 614 }, { "epoch": 0.08362795757410933, "grad_norm": 0.447265625, "learning_rate": 1.670748299319728e-05, "loss": 0.6994, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 615 }, { "epoch": 0.08376393799292901, "grad_norm": 0.5, "learning_rate": 1.673469387755102e-05, "loss": 0.6186, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 616 }, { "epoch": 0.08389991841174871, "grad_norm": 0.6875, "learning_rate": 1.6761904761904764e-05, "loss": 0.5752, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 617 }, { "epoch": 0.0840358988305684, "grad_norm": 0.451171875, "learning_rate": 1.6789115646258504e-05, "loss": 0.6973, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 618 }, { "epoch": 0.08417187924938808, "grad_norm": 0.271484375, "learning_rate": 1.6816326530612244e-05, "loss": 0.5794, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 619 }, { "epoch": 0.08430785966820778, "grad_norm": 0.349609375, "learning_rate": 1.6843537414965988e-05, "loss": 0.5775, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 620 }, { "epoch": 0.08444384008702747, "grad_norm": 0.431640625, "learning_rate": 1.687074829931973e-05, "loss": 0.6559, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 621 }, { "epoch": 0.08457982050584716, "grad_norm": 1.046875, "learning_rate": 1.689795918367347e-05, "loss": 0.4826, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 622 }, { "epoch": 0.08471580092466685, "grad_norm": 0.4140625, "learning_rate": 1.692517006802721e-05, "loss": 0.6742, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 623 }, { "epoch": 0.08485178134348653, "grad_norm": 0.21484375, "learning_rate": 1.6952380952380955e-05, "loss": 0.3547, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 624 }, { "epoch": 0.08498776176230623, "grad_norm": 0.73828125, "learning_rate": 1.6979591836734695e-05, "loss": 0.4362, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 625 }, { "epoch": 0.08512374218112592, "grad_norm": 0.70703125, "learning_rate": 1.7006802721088435e-05, "loss": 0.8407, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 626 }, { "epoch": 0.0852597225999456, "grad_norm": 0.408203125, "learning_rate": 1.7034013605442178e-05, "loss": 0.6693, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 627 }, { "epoch": 0.0853957030187653, "grad_norm": 0.396484375, "learning_rate": 1.7061224489795922e-05, "loss": 0.6473, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 628 }, { "epoch": 0.085531683437585, "grad_norm": 0.4921875, "learning_rate": 1.7088435374149662e-05, "loss": 0.7824, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 629 }, { "epoch": 0.08566766385640467, "grad_norm": 0.23828125, "learning_rate": 1.7115646258503402e-05, "loss": 0.5239, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 630 }, { "epoch": 0.08580364427522437, "grad_norm": 0.365234375, "learning_rate": 1.7142857142857142e-05, "loss": 0.5028, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 631 }, { "epoch": 0.08593962469404406, "grad_norm": 0.5234375, "learning_rate": 1.7170068027210885e-05, "loss": 0.8488, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 632 }, { "epoch": 0.08607560511286375, "grad_norm": 0.48046875, "learning_rate": 1.719727891156463e-05, "loss": 0.5894, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 633 }, { "epoch": 0.08621158553168344, "grad_norm": 0.55859375, "learning_rate": 1.722448979591837e-05, "loss": 0.7474, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 634 }, { "epoch": 0.08634756595050312, "grad_norm": 0.49609375, "learning_rate": 1.725170068027211e-05, "loss": 0.6912, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 635 }, { "epoch": 0.08648354636932282, "grad_norm": 0.466796875, "learning_rate": 1.7278911564625852e-05, "loss": 0.6086, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 636 }, { "epoch": 0.08661952678814251, "grad_norm": 0.400390625, "learning_rate": 1.7306122448979596e-05, "loss": 0.6445, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 637 }, { "epoch": 0.08675550720696219, "grad_norm": 0.71875, "learning_rate": 1.7333333333333336e-05, "loss": 0.7062, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 638 }, { "epoch": 0.08689148762578189, "grad_norm": 0.453125, "learning_rate": 1.7360544217687076e-05, "loss": 0.7661, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 639 }, { "epoch": 0.08702746804460158, "grad_norm": 0.3984375, "learning_rate": 1.738775510204082e-05, "loss": 0.5462, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 640 }, { "epoch": 0.08716344846342126, "grad_norm": 0.625, "learning_rate": 1.741496598639456e-05, "loss": 0.6162, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 641 }, { "epoch": 0.08729942888224096, "grad_norm": 0.53125, "learning_rate": 1.74421768707483e-05, "loss": 0.5688, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 642 }, { "epoch": 0.08743540930106064, "grad_norm": 0.306640625, "learning_rate": 1.7469387755102043e-05, "loss": 0.4521, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 643 }, { "epoch": 0.08757138971988034, "grad_norm": 0.96875, "learning_rate": 1.7496598639455783e-05, "loss": 0.8927, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 644 }, { "epoch": 0.08770737013870003, "grad_norm": 0.3828125, "learning_rate": 1.7523809523809526e-05, "loss": 0.6654, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 645 }, { "epoch": 0.08784335055751971, "grad_norm": 0.458984375, "learning_rate": 1.7551020408163266e-05, "loss": 0.6686, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 646 }, { "epoch": 0.0879793309763394, "grad_norm": 0.51953125, "learning_rate": 1.7578231292517006e-05, "loss": 0.6924, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 647 }, { "epoch": 0.0881153113951591, "grad_norm": 0.515625, "learning_rate": 1.760544217687075e-05, "loss": 0.8356, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 648 }, { "epoch": 0.08825129181397878, "grad_norm": 0.875, "learning_rate": 1.7632653061224493e-05, "loss": 0.6258, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 649 }, { "epoch": 0.08838727223279848, "grad_norm": 0.484375, "learning_rate": 1.7659863945578233e-05, "loss": 0.9227, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 650 }, { "epoch": 0.08852325265161817, "grad_norm": 0.390625, "learning_rate": 1.7687074829931973e-05, "loss": 0.6056, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 651 }, { "epoch": 0.08865923307043785, "grad_norm": 0.5546875, "learning_rate": 1.7714285714285717e-05, "loss": 0.9338, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 652 }, { "epoch": 0.08879521348925755, "grad_norm": 0.4296875, "learning_rate": 1.774149659863946e-05, "loss": 0.6589, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 653 }, { "epoch": 0.08893119390807723, "grad_norm": 0.609375, "learning_rate": 1.77687074829932e-05, "loss": 0.8381, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 654 }, { "epoch": 0.08906717432689693, "grad_norm": 0.30859375, "learning_rate": 1.779591836734694e-05, "loss": 0.6152, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 655 }, { "epoch": 0.08920315474571662, "grad_norm": 0.51171875, "learning_rate": 1.782312925170068e-05, "loss": 0.8177, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 656 }, { "epoch": 0.0893391351645363, "grad_norm": 0.953125, "learning_rate": 1.7850340136054423e-05, "loss": 0.7708, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 657 }, { "epoch": 0.089475115583356, "grad_norm": 0.73046875, "learning_rate": 1.7877551020408164e-05, "loss": 0.7433, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 658 }, { "epoch": 0.08961109600217569, "grad_norm": 1.1640625, "learning_rate": 1.7904761904761907e-05, "loss": 0.7938, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 659 }, { "epoch": 0.08974707642099537, "grad_norm": 0.416015625, "learning_rate": 1.7931972789115647e-05, "loss": 0.6092, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 660 }, { "epoch": 0.08988305683981507, "grad_norm": 0.390625, "learning_rate": 1.795918367346939e-05, "loss": 0.5627, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 661 }, { "epoch": 0.09001903725863476, "grad_norm": 0.38671875, "learning_rate": 1.798639455782313e-05, "loss": 0.6328, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 662 }, { "epoch": 0.09015501767745444, "grad_norm": 0.400390625, "learning_rate": 1.801360544217687e-05, "loss": 0.5605, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 663 }, { "epoch": 0.09029099809627414, "grad_norm": 0.302734375, "learning_rate": 1.8040816326530614e-05, "loss": 0.5127, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 664 }, { "epoch": 0.09042697851509382, "grad_norm": 0.43359375, "learning_rate": 1.8068027210884357e-05, "loss": 0.6668, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 665 }, { "epoch": 0.09056295893391351, "grad_norm": 0.671875, "learning_rate": 1.8095238095238097e-05, "loss": 0.8872, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 666 }, { "epoch": 0.09069893935273321, "grad_norm": 0.48828125, "learning_rate": 1.8122448979591837e-05, "loss": 0.6076, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 667 }, { "epoch": 0.09083491977155289, "grad_norm": 0.515625, "learning_rate": 1.8149659863945577e-05, "loss": 0.6606, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 668 }, { "epoch": 0.09097090019037259, "grad_norm": 0.462890625, "learning_rate": 1.817687074829932e-05, "loss": 0.7957, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 669 }, { "epoch": 0.09110688060919228, "grad_norm": 0.4296875, "learning_rate": 1.8204081632653064e-05, "loss": 0.6429, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 670 }, { "epoch": 0.09124286102801196, "grad_norm": 0.435546875, "learning_rate": 1.8231292517006804e-05, "loss": 0.512, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 671 }, { "epoch": 0.09137884144683166, "grad_norm": 0.400390625, "learning_rate": 1.8258503401360544e-05, "loss": 0.8014, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 672 }, { "epoch": 0.09151482186565135, "grad_norm": 0.5078125, "learning_rate": 1.8285714285714288e-05, "loss": 0.4766, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 673 }, { "epoch": 0.09165080228447103, "grad_norm": 0.396484375, "learning_rate": 1.8312925170068028e-05, "loss": 0.6714, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 674 }, { "epoch": 0.09178678270329073, "grad_norm": 0.52734375, "learning_rate": 1.834013605442177e-05, "loss": 0.5931, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 675 }, { "epoch": 0.09192276312211041, "grad_norm": 0.5625, "learning_rate": 1.836734693877551e-05, "loss": 0.6564, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 676 }, { "epoch": 0.0920587435409301, "grad_norm": 0.4921875, "learning_rate": 1.8394557823129255e-05, "loss": 0.7808, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 677 }, { "epoch": 0.0921947239597498, "grad_norm": 0.361328125, "learning_rate": 1.8421768707482995e-05, "loss": 0.6341, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 678 }, { "epoch": 0.09233070437856948, "grad_norm": 0.58203125, "learning_rate": 1.8448979591836735e-05, "loss": 0.998, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 679 }, { "epoch": 0.09246668479738918, "grad_norm": 0.3515625, "learning_rate": 1.8476190476190478e-05, "loss": 0.6343, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 680 }, { "epoch": 0.09260266521620887, "grad_norm": 0.470703125, "learning_rate": 1.8503401360544218e-05, "loss": 0.8618, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 681 }, { "epoch": 0.09273864563502855, "grad_norm": 0.28125, "learning_rate": 1.853061224489796e-05, "loss": 0.5597, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 682 }, { "epoch": 0.09287462605384825, "grad_norm": 0.435546875, "learning_rate": 1.8557823129251702e-05, "loss": 0.7568, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 683 }, { "epoch": 0.09301060647266794, "grad_norm": 1.4296875, "learning_rate": 1.8585034013605442e-05, "loss": 0.7145, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 684 }, { "epoch": 0.09314658689148762, "grad_norm": 0.7421875, "learning_rate": 1.8612244897959185e-05, "loss": 0.6692, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 685 }, { "epoch": 0.09328256731030732, "grad_norm": 0.53125, "learning_rate": 1.863945578231293e-05, "loss": 0.6429, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 686 }, { "epoch": 0.093418547729127, "grad_norm": 0.53125, "learning_rate": 1.866666666666667e-05, "loss": 0.8006, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 687 }, { "epoch": 0.0935545281479467, "grad_norm": 0.396484375, "learning_rate": 1.869387755102041e-05, "loss": 0.7323, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 688 }, { "epoch": 0.09369050856676639, "grad_norm": 0.37890625, "learning_rate": 1.8721088435374152e-05, "loss": 0.6424, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 689 }, { "epoch": 0.09382648898558607, "grad_norm": 0.318359375, "learning_rate": 1.8748299319727892e-05, "loss": 0.5952, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 690 }, { "epoch": 0.09396246940440577, "grad_norm": 0.515625, "learning_rate": 1.8775510204081636e-05, "loss": 0.7869, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 691 }, { "epoch": 0.09409844982322546, "grad_norm": 0.81640625, "learning_rate": 1.8802721088435376e-05, "loss": 0.5594, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 692 }, { "epoch": 0.09423443024204514, "grad_norm": 0.40625, "learning_rate": 1.8829931972789116e-05, "loss": 0.6222, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 693 }, { "epoch": 0.09437041066086484, "grad_norm": 0.283203125, "learning_rate": 1.885714285714286e-05, "loss": 0.5317, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 694 }, { "epoch": 0.09450639107968453, "grad_norm": 0.28515625, "learning_rate": 1.88843537414966e-05, "loss": 0.4914, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 695 }, { "epoch": 0.09464237149850421, "grad_norm": 0.353515625, "learning_rate": 1.8911564625850343e-05, "loss": 0.5547, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 696 }, { "epoch": 0.09477835191732391, "grad_norm": 0.4765625, "learning_rate": 1.8938775510204083e-05, "loss": 0.7684, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 697 }, { "epoch": 0.09491433233614359, "grad_norm": 0.5234375, "learning_rate": 1.8965986394557826e-05, "loss": 0.6665, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 698 }, { "epoch": 0.09505031275496328, "grad_norm": 0.5078125, "learning_rate": 1.8993197278911566e-05, "loss": 0.9193, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 699 }, { "epoch": 0.09518629317378298, "grad_norm": 0.4140625, "learning_rate": 1.9020408163265306e-05, "loss": 0.5843, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 700 }, { "epoch": 0.09532227359260266, "grad_norm": 0.390625, "learning_rate": 1.904761904761905e-05, "loss": 0.5347, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 701 }, { "epoch": 0.09545825401142236, "grad_norm": 0.4609375, "learning_rate": 1.9074829931972793e-05, "loss": 0.7246, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 702 }, { "epoch": 0.09559423443024205, "grad_norm": 0.63671875, "learning_rate": 1.9102040816326533e-05, "loss": 0.6709, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 703 }, { "epoch": 0.09573021484906173, "grad_norm": 0.2734375, "learning_rate": 1.9129251700680273e-05, "loss": 0.5265, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 704 }, { "epoch": 0.09586619526788143, "grad_norm": 0.494140625, "learning_rate": 1.9156462585034013e-05, "loss": 0.7196, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 705 }, { "epoch": 0.09600217568670112, "grad_norm": 1.40625, "learning_rate": 1.9183673469387756e-05, "loss": 0.734, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 706 }, { "epoch": 0.0961381561055208, "grad_norm": 0.6640625, "learning_rate": 1.92108843537415e-05, "loss": 0.7389, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 707 }, { "epoch": 0.0962741365243405, "grad_norm": 0.3125, "learning_rate": 1.923809523809524e-05, "loss": 0.5981, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 708 }, { "epoch": 0.09641011694316018, "grad_norm": 0.5234375, "learning_rate": 1.926530612244898e-05, "loss": 0.6906, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 709 }, { "epoch": 0.09654609736197987, "grad_norm": 0.341796875, "learning_rate": 1.9292517006802723e-05, "loss": 0.6302, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 710 }, { "epoch": 0.09668207778079957, "grad_norm": 0.33203125, "learning_rate": 1.9319727891156463e-05, "loss": 0.6865, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 711 }, { "epoch": 0.09681805819961925, "grad_norm": 0.330078125, "learning_rate": 1.9346938775510207e-05, "loss": 0.5322, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 712 }, { "epoch": 0.09695403861843895, "grad_norm": 0.39453125, "learning_rate": 1.9374149659863947e-05, "loss": 0.7124, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 713 }, { "epoch": 0.09709001903725864, "grad_norm": 0.921875, "learning_rate": 1.940136054421769e-05, "loss": 0.8219, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 714 }, { "epoch": 0.09722599945607832, "grad_norm": 0.27734375, "learning_rate": 1.942857142857143e-05, "loss": 0.45, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 715 }, { "epoch": 0.09736197987489802, "grad_norm": 0.4375, "learning_rate": 1.945578231292517e-05, "loss": 0.6948, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 716 }, { "epoch": 0.09749796029371771, "grad_norm": 0.3671875, "learning_rate": 1.9482993197278914e-05, "loss": 0.6908, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 717 }, { "epoch": 0.09763394071253739, "grad_norm": 0.8359375, "learning_rate": 1.9510204081632654e-05, "loss": 0.638, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 718 }, { "epoch": 0.09776992113135709, "grad_norm": 0.33203125, "learning_rate": 1.9537414965986397e-05, "loss": 0.5465, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 719 }, { "epoch": 0.09790590155017677, "grad_norm": 0.3125, "learning_rate": 1.9564625850340137e-05, "loss": 0.5534, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 720 }, { "epoch": 0.09804188196899646, "grad_norm": 0.421875, "learning_rate": 1.9591836734693877e-05, "loss": 0.7804, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 721 }, { "epoch": 0.09817786238781616, "grad_norm": 0.6953125, "learning_rate": 1.961904761904762e-05, "loss": 0.6732, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 722 }, { "epoch": 0.09831384280663584, "grad_norm": 0.427734375, "learning_rate": 1.9646258503401364e-05, "loss": 0.7542, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 723 }, { "epoch": 0.09844982322545553, "grad_norm": 0.306640625, "learning_rate": 1.9673469387755104e-05, "loss": 0.4567, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 724 }, { "epoch": 0.09858580364427523, "grad_norm": 0.392578125, "learning_rate": 1.9700680272108844e-05, "loss": 0.5392, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 725 }, { "epoch": 0.09872178406309491, "grad_norm": 0.359375, "learning_rate": 1.9727891156462588e-05, "loss": 0.597, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 726 }, { "epoch": 0.0988577644819146, "grad_norm": 0.40234375, "learning_rate": 1.9755102040816328e-05, "loss": 0.5068, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 727 }, { "epoch": 0.09899374490073429, "grad_norm": 0.376953125, "learning_rate": 1.978231292517007e-05, "loss": 0.4753, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 728 }, { "epoch": 0.09912972531955398, "grad_norm": 0.333984375, "learning_rate": 1.980952380952381e-05, "loss": 0.6844, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 729 }, { "epoch": 0.09926570573837368, "grad_norm": 0.4296875, "learning_rate": 1.983673469387755e-05, "loss": 0.7507, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 730 }, { "epoch": 0.09940168615719336, "grad_norm": 0.44921875, "learning_rate": 1.9863945578231295e-05, "loss": 0.813, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 731 }, { "epoch": 0.09953766657601305, "grad_norm": 0.265625, "learning_rate": 1.9891156462585035e-05, "loss": 0.4027, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 732 }, { "epoch": 0.09967364699483275, "grad_norm": 0.275390625, "learning_rate": 1.9918367346938775e-05, "loss": 0.5864, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 733 }, { "epoch": 0.09980962741365243, "grad_norm": 0.5078125, "learning_rate": 1.9945578231292518e-05, "loss": 0.9105, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 734 }, { "epoch": 0.09994560783247212, "grad_norm": 0.498046875, "learning_rate": 1.997278911564626e-05, "loss": 0.7962, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 735 }, { "epoch": 0.10008158825129182, "grad_norm": 0.427734375, "learning_rate": 2e-05, "loss": 0.6499, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 736 }, { "epoch": 0.1002175686701115, "grad_norm": 0.62109375, "learning_rate": 1.9999998872938873e-05, "loss": 0.6597, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 737 }, { "epoch": 0.1003535490889312, "grad_norm": 0.34765625, "learning_rate": 1.9999995491755736e-05, "loss": 0.5028, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 738 }, { "epoch": 0.10048952950775088, "grad_norm": 0.396484375, "learning_rate": 1.9999989856451352e-05, "loss": 0.8711, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 739 }, { "epoch": 0.10062550992657057, "grad_norm": 0.33984375, "learning_rate": 1.9999981967027e-05, "loss": 0.592, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 740 }, { "epoch": 0.10076149034539027, "grad_norm": 0.341796875, "learning_rate": 1.9999971823484444e-05, "loss": 0.5791, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 741 }, { "epoch": 0.10089747076420995, "grad_norm": 0.3203125, "learning_rate": 1.9999959425825983e-05, "loss": 0.6021, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 742 }, { "epoch": 0.10103345118302964, "grad_norm": 0.5859375, "learning_rate": 1.9999944774054406e-05, "loss": 0.9653, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 743 }, { "epoch": 0.10116943160184934, "grad_norm": 1.53125, "learning_rate": 1.9999927868173018e-05, "loss": 0.8663, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 744 }, { "epoch": 0.10130541202066902, "grad_norm": 0.267578125, "learning_rate": 1.9999908708185626e-05, "loss": 0.4526, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 745 }, { "epoch": 0.10144139243948871, "grad_norm": 0.6171875, "learning_rate": 1.9999887294096557e-05, "loss": 0.569, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 746 }, { "epoch": 0.10157737285830841, "grad_norm": 0.283203125, "learning_rate": 1.999986362591063e-05, "loss": 0.4289, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 747 }, { "epoch": 0.10171335327712809, "grad_norm": 0.412109375, "learning_rate": 1.999983770363318e-05, "loss": 0.6408, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 748 }, { "epoch": 0.10184933369594779, "grad_norm": 0.3203125, "learning_rate": 1.9999809527270053e-05, "loss": 0.5498, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 749 }, { "epoch": 0.10198531411476747, "grad_norm": 0.458984375, "learning_rate": 1.9999779096827603e-05, "loss": 0.7254, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 750 }, { "epoch": 0.10212129453358716, "grad_norm": 0.515625, "learning_rate": 1.9999746412312684e-05, "loss": 0.8105, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 751 }, { "epoch": 0.10225727495240686, "grad_norm": 0.578125, "learning_rate": 1.9999711473732667e-05, "loss": 0.7323, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 752 }, { "epoch": 0.10239325537122654, "grad_norm": 0.6640625, "learning_rate": 1.9999674281095425e-05, "loss": 0.8099, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 753 }, { "epoch": 0.10252923579004623, "grad_norm": 0.318359375, "learning_rate": 1.9999634834409348e-05, "loss": 0.5698, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 754 }, { "epoch": 0.10266521620886593, "grad_norm": 0.515625, "learning_rate": 1.999959313368332e-05, "loss": 0.5747, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 755 }, { "epoch": 0.10280119662768561, "grad_norm": 0.59375, "learning_rate": 1.9999549178926743e-05, "loss": 0.7821, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 756 }, { "epoch": 0.1029371770465053, "grad_norm": 0.44140625, "learning_rate": 1.9999502970149524e-05, "loss": 0.5861, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 757 }, { "epoch": 0.103073157465325, "grad_norm": 0.408203125, "learning_rate": 1.9999454507362085e-05, "loss": 0.7007, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 758 }, { "epoch": 0.10320913788414468, "grad_norm": 0.34765625, "learning_rate": 1.9999403790575344e-05, "loss": 0.5633, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 759 }, { "epoch": 0.10334511830296438, "grad_norm": 0.45703125, "learning_rate": 1.9999350819800733e-05, "loss": 0.7147, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 760 }, { "epoch": 0.10348109872178406, "grad_norm": 0.34765625, "learning_rate": 1.99992955950502e-05, "loss": 0.4688, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 761 }, { "epoch": 0.10361707914060375, "grad_norm": 0.96484375, "learning_rate": 1.999923811633618e-05, "loss": 0.8335, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 762 }, { "epoch": 0.10375305955942345, "grad_norm": 0.515625, "learning_rate": 1.9999178383671644e-05, "loss": 0.582, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 763 }, { "epoch": 0.10388903997824313, "grad_norm": 0.41015625, "learning_rate": 1.9999116397070045e-05, "loss": 0.5915, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 764 }, { "epoch": 0.10402502039706282, "grad_norm": 0.76953125, "learning_rate": 1.9999052156545365e-05, "loss": 0.8001, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 765 }, { "epoch": 0.10416100081588252, "grad_norm": 0.39453125, "learning_rate": 1.9998985662112077e-05, "loss": 0.6354, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 766 }, { "epoch": 0.1042969812347022, "grad_norm": 0.392578125, "learning_rate": 1.999891691378517e-05, "loss": 0.6974, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 767 }, { "epoch": 0.1044329616535219, "grad_norm": 0.328125, "learning_rate": 1.9998845911580143e-05, "loss": 0.4718, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 768 }, { "epoch": 0.10456894207234159, "grad_norm": 0.55078125, "learning_rate": 1.9998772655513007e-05, "loss": 0.8677, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 769 }, { "epoch": 0.10470492249116127, "grad_norm": 0.65625, "learning_rate": 1.999869714560026e-05, "loss": 0.8179, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 770 }, { "epoch": 0.10484090290998097, "grad_norm": 0.77734375, "learning_rate": 1.999861938185894e-05, "loss": 0.5334, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 771 }, { "epoch": 0.10497688332880065, "grad_norm": 0.3359375, "learning_rate": 1.999853936430656e-05, "loss": 0.6322, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 772 }, { "epoch": 0.10511286374762034, "grad_norm": 0.56640625, "learning_rate": 1.9998457092961172e-05, "loss": 0.7773, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 773 }, { "epoch": 0.10524884416644004, "grad_norm": 0.484375, "learning_rate": 1.9998372567841308e-05, "loss": 0.772, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 774 }, { "epoch": 0.10538482458525972, "grad_norm": 0.400390625, "learning_rate": 1.9998285788966027e-05, "loss": 0.6842, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 775 }, { "epoch": 0.10552080500407941, "grad_norm": 0.326171875, "learning_rate": 1.9998196756354892e-05, "loss": 0.6032, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 776 }, { "epoch": 0.10565678542289911, "grad_norm": 0.359375, "learning_rate": 1.9998105470027965e-05, "loss": 0.6649, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 777 }, { "epoch": 0.10579276584171879, "grad_norm": 0.2255859375, "learning_rate": 1.999801193000583e-05, "loss": 0.3735, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 778 }, { "epoch": 0.10592874626053848, "grad_norm": 0.51953125, "learning_rate": 1.999791613630957e-05, "loss": 0.6261, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 779 }, { "epoch": 0.10606472667935818, "grad_norm": 0.53515625, "learning_rate": 1.9997818088960777e-05, "loss": 0.7533, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 780 }, { "epoch": 0.10620070709817786, "grad_norm": 0.609375, "learning_rate": 1.999771778798155e-05, "loss": 0.6027, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 781 }, { "epoch": 0.10633668751699756, "grad_norm": 0.3046875, "learning_rate": 1.9997615233394505e-05, "loss": 0.4487, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 782 }, { "epoch": 0.10647266793581724, "grad_norm": 0.35546875, "learning_rate": 1.9997510425222754e-05, "loss": 0.5986, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 783 }, { "epoch": 0.10660864835463693, "grad_norm": 0.78515625, "learning_rate": 1.999740336348992e-05, "loss": 0.6808, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 784 }, { "epoch": 0.10674462877345663, "grad_norm": 2.78125, "learning_rate": 1.9997294048220142e-05, "loss": 0.8021, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 785 }, { "epoch": 0.10688060919227631, "grad_norm": 0.466796875, "learning_rate": 1.9997182479438056e-05, "loss": 0.7575, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 786 }, { "epoch": 0.107016589611096, "grad_norm": 0.5, "learning_rate": 1.9997068657168812e-05, "loss": 0.6589, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 787 }, { "epoch": 0.1071525700299157, "grad_norm": 0.474609375, "learning_rate": 1.999695258143807e-05, "loss": 0.7007, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 788 }, { "epoch": 0.10728855044873538, "grad_norm": 0.46484375, "learning_rate": 1.999683425227199e-05, "loss": 0.8709, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 789 }, { "epoch": 0.10742453086755507, "grad_norm": 0.369140625, "learning_rate": 1.999671366969725e-05, "loss": 0.8291, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 790 }, { "epoch": 0.10756051128637477, "grad_norm": 0.302734375, "learning_rate": 1.999659083374103e-05, "loss": 0.6457, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 791 }, { "epoch": 0.10769649170519445, "grad_norm": 0.361328125, "learning_rate": 1.999646574443101e-05, "loss": 0.6382, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 792 }, { "epoch": 0.10783247212401414, "grad_norm": 0.66796875, "learning_rate": 1.9996338401795395e-05, "loss": 0.8293, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 793 }, { "epoch": 0.10796845254283383, "grad_norm": 0.447265625, "learning_rate": 1.9996208805862893e-05, "loss": 0.7599, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 794 }, { "epoch": 0.10810443296165352, "grad_norm": 0.44140625, "learning_rate": 1.999607695666271e-05, "loss": 0.8091, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 795 }, { "epoch": 0.10824041338047322, "grad_norm": 0.39453125, "learning_rate": 1.9995942854224566e-05, "loss": 0.4757, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 796 }, { "epoch": 0.1083763937992929, "grad_norm": 0.365234375, "learning_rate": 1.9995806498578693e-05, "loss": 0.5868, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 797 }, { "epoch": 0.10851237421811259, "grad_norm": 0.279296875, "learning_rate": 1.9995667889755828e-05, "loss": 0.5632, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 798 }, { "epoch": 0.10864835463693229, "grad_norm": 0.49609375, "learning_rate": 1.9995527027787213e-05, "loss": 0.7951, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 799 }, { "epoch": 0.10878433505575197, "grad_norm": 0.337890625, "learning_rate": 1.9995383912704596e-05, "loss": 0.6177, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 800 }, { "epoch": 0.10892031547457166, "grad_norm": 0.5390625, "learning_rate": 1.999523854454024e-05, "loss": 0.8128, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 801 }, { "epoch": 0.10905629589339136, "grad_norm": 0.41015625, "learning_rate": 1.9995090923326916e-05, "loss": 0.6994, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 802 }, { "epoch": 0.10919227631221104, "grad_norm": 0.3984375, "learning_rate": 1.9994941049097898e-05, "loss": 0.6922, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 803 }, { "epoch": 0.10932825673103073, "grad_norm": 0.57421875, "learning_rate": 1.9994788921886968e-05, "loss": 0.5726, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 804 }, { "epoch": 0.10946423714985042, "grad_norm": 0.53515625, "learning_rate": 1.999463454172842e-05, "loss": 0.6491, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 805 }, { "epoch": 0.10960021756867011, "grad_norm": 0.3125, "learning_rate": 1.9994477908657044e-05, "loss": 0.6577, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 806 }, { "epoch": 0.1097361979874898, "grad_norm": 0.419921875, "learning_rate": 1.999431902270816e-05, "loss": 0.7557, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 807 }, { "epoch": 0.10987217840630949, "grad_norm": 0.4765625, "learning_rate": 1.9994157883917574e-05, "loss": 0.5723, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 808 }, { "epoch": 0.11000815882512918, "grad_norm": 0.451171875, "learning_rate": 1.9993994492321613e-05, "loss": 0.6719, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 809 }, { "epoch": 0.11014413924394888, "grad_norm": 0.54296875, "learning_rate": 1.9993828847957106e-05, "loss": 0.7189, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 810 }, { "epoch": 0.11028011966276856, "grad_norm": 0.451171875, "learning_rate": 1.999366095086139e-05, "loss": 0.5809, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 811 }, { "epoch": 0.11041610008158825, "grad_norm": 0.65234375, "learning_rate": 1.9993490801072316e-05, "loss": 0.6644, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 812 }, { "epoch": 0.11055208050040793, "grad_norm": 0.53515625, "learning_rate": 1.9993318398628233e-05, "loss": 0.673, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 813 }, { "epoch": 0.11068806091922763, "grad_norm": 0.44921875, "learning_rate": 1.9993143743568e-05, "loss": 0.7445, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 814 }, { "epoch": 0.11082404133804732, "grad_norm": 0.494140625, "learning_rate": 1.9992966835930995e-05, "loss": 0.8888, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 815 }, { "epoch": 0.110960021756867, "grad_norm": 0.349609375, "learning_rate": 1.999278767575709e-05, "loss": 0.5911, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 816 }, { "epoch": 0.1110960021756867, "grad_norm": 0.52734375, "learning_rate": 1.9992606263086664e-05, "loss": 0.5924, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 817 }, { "epoch": 0.1112319825945064, "grad_norm": 0.66796875, "learning_rate": 1.9992422597960624e-05, "loss": 0.6971, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 818 }, { "epoch": 0.11136796301332608, "grad_norm": 0.66796875, "learning_rate": 1.9992236680420357e-05, "loss": 0.7976, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 819 }, { "epoch": 0.11150394343214577, "grad_norm": 0.7109375, "learning_rate": 1.9992048510507776e-05, "loss": 0.8423, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 820 }, { "epoch": 0.11163992385096547, "grad_norm": 0.58984375, "learning_rate": 1.99918580882653e-05, "loss": 0.9198, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 821 }, { "epoch": 0.11177590426978515, "grad_norm": 0.47265625, "learning_rate": 1.999166541373585e-05, "loss": 0.7736, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 822 }, { "epoch": 0.11191188468860484, "grad_norm": 0.65625, "learning_rate": 1.9991470486962858e-05, "loss": 0.8368, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 823 }, { "epoch": 0.11204786510742452, "grad_norm": 0.28515625, "learning_rate": 1.999127330799026e-05, "loss": 0.4556, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 824 }, { "epoch": 0.11218384552624422, "grad_norm": 0.46484375, "learning_rate": 1.9991073876862502e-05, "loss": 0.6619, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 825 }, { "epoch": 0.11231982594506391, "grad_norm": 0.42578125, "learning_rate": 1.9990872193624544e-05, "loss": 0.5531, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 826 }, { "epoch": 0.1124558063638836, "grad_norm": 0.39453125, "learning_rate": 1.999066825832184e-05, "loss": 0.5579, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 827 }, { "epoch": 0.11259178678270329, "grad_norm": 0.6953125, "learning_rate": 1.9990462071000366e-05, "loss": 0.5505, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 828 }, { "epoch": 0.11272776720152299, "grad_norm": 0.59375, "learning_rate": 1.99902536317066e-05, "loss": 0.884, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 829 }, { "epoch": 0.11286374762034267, "grad_norm": 0.365234375, "learning_rate": 1.999004294048752e-05, "loss": 0.679, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 830 }, { "epoch": 0.11299972803916236, "grad_norm": 0.384765625, "learning_rate": 1.9989829997390625e-05, "loss": 0.7008, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 831 }, { "epoch": 0.11313570845798206, "grad_norm": 0.431640625, "learning_rate": 1.998961480246391e-05, "loss": 0.5817, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 832 }, { "epoch": 0.11327168887680174, "grad_norm": 0.54296875, "learning_rate": 1.9989397355755885e-05, "loss": 0.8073, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 833 }, { "epoch": 0.11340766929562143, "grad_norm": 0.578125, "learning_rate": 1.9989177657315567e-05, "loss": 0.6942, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 834 }, { "epoch": 0.11354364971444111, "grad_norm": 0.435546875, "learning_rate": 1.998895570719247e-05, "loss": 0.8542, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 835 }, { "epoch": 0.11367963013326081, "grad_norm": 0.578125, "learning_rate": 1.998873150543664e-05, "loss": 0.461, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 836 }, { "epoch": 0.1138156105520805, "grad_norm": 0.275390625, "learning_rate": 1.9988505052098598e-05, "loss": 0.4388, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 837 }, { "epoch": 0.11395159097090019, "grad_norm": 0.404296875, "learning_rate": 1.9988276347229404e-05, "loss": 0.6938, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 838 }, { "epoch": 0.11408757138971988, "grad_norm": 0.50390625, "learning_rate": 1.9988045390880598e-05, "loss": 0.63, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 839 }, { "epoch": 0.11422355180853958, "grad_norm": 0.3125, "learning_rate": 1.998781218310425e-05, "loss": 0.5101, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 840 }, { "epoch": 0.11435953222735926, "grad_norm": 0.5859375, "learning_rate": 1.9987576723952923e-05, "loss": 0.6717, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 841 }, { "epoch": 0.11449551264617895, "grad_norm": 0.65234375, "learning_rate": 1.9987339013479693e-05, "loss": 0.5623, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 842 }, { "epoch": 0.11463149306499865, "grad_norm": 0.427734375, "learning_rate": 1.9987099051738142e-05, "loss": 0.7295, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 843 }, { "epoch": 0.11476747348381833, "grad_norm": 0.65234375, "learning_rate": 1.998685683878236e-05, "loss": 0.5285, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 844 }, { "epoch": 0.11490345390263802, "grad_norm": 0.3984375, "learning_rate": 1.998661237466695e-05, "loss": 0.6227, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 845 }, { "epoch": 0.1150394343214577, "grad_norm": 0.439453125, "learning_rate": 1.9986365659447015e-05, "loss": 0.714, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 846 }, { "epoch": 0.1151754147402774, "grad_norm": 0.5390625, "learning_rate": 1.9986116693178163e-05, "loss": 0.6069, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 847 }, { "epoch": 0.1153113951590971, "grad_norm": 0.3671875, "learning_rate": 1.9985865475916522e-05, "loss": 0.7363, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 848 }, { "epoch": 0.11544737557791677, "grad_norm": 2.609375, "learning_rate": 1.998561200771871e-05, "loss": 0.9087, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 849 }, { "epoch": 0.11558335599673647, "grad_norm": 0.4765625, "learning_rate": 1.9985356288641865e-05, "loss": 0.7489, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 850 }, { "epoch": 0.11571933641555616, "grad_norm": 0.62109375, "learning_rate": 1.9985098318743635e-05, "loss": 0.9269, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 851 }, { "epoch": 0.11585531683437585, "grad_norm": 0.318359375, "learning_rate": 1.9984838098082166e-05, "loss": 0.491, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 852 }, { "epoch": 0.11599129725319554, "grad_norm": 0.431640625, "learning_rate": 1.9984575626716113e-05, "loss": 0.6169, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 853 }, { "epoch": 0.11612727767201524, "grad_norm": 0.36328125, "learning_rate": 1.998431090470464e-05, "loss": 0.6247, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 854 }, { "epoch": 0.11626325809083492, "grad_norm": 0.384765625, "learning_rate": 1.9984043932107423e-05, "loss": 0.6287, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 855 }, { "epoch": 0.11639923850965461, "grad_norm": 0.515625, "learning_rate": 1.9983774708984634e-05, "loss": 0.7202, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 856 }, { "epoch": 0.1165352189284743, "grad_norm": 0.37890625, "learning_rate": 1.9983503235396962e-05, "loss": 0.5926, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 857 }, { "epoch": 0.11667119934729399, "grad_norm": 0.44921875, "learning_rate": 1.9983229511405606e-05, "loss": 0.6304, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 858 }, { "epoch": 0.11680717976611368, "grad_norm": 0.35546875, "learning_rate": 1.998295353707226e-05, "loss": 0.647, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 859 }, { "epoch": 0.11694316018493336, "grad_norm": 0.8828125, "learning_rate": 1.998267531245914e-05, "loss": 0.7101, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 860 }, { "epoch": 0.11707914060375306, "grad_norm": 0.60546875, "learning_rate": 1.998239483762895e-05, "loss": 0.4635, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 861 }, { "epoch": 0.11721512102257275, "grad_norm": 0.41015625, "learning_rate": 1.998211211264492e-05, "loss": 0.6068, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 862 }, { "epoch": 0.11735110144139244, "grad_norm": 0.80859375, "learning_rate": 1.9981827137570774e-05, "loss": 0.6992, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 863 }, { "epoch": 0.11748708186021213, "grad_norm": 0.30859375, "learning_rate": 1.9981539912470754e-05, "loss": 0.5244, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 864 }, { "epoch": 0.11762306227903183, "grad_norm": 0.357421875, "learning_rate": 1.9981250437409607e-05, "loss": 0.526, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 865 }, { "epoch": 0.11775904269785151, "grad_norm": 0.328125, "learning_rate": 1.9980958712452577e-05, "loss": 0.584, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 866 }, { "epoch": 0.1178950231166712, "grad_norm": 0.337890625, "learning_rate": 1.9980664737665423e-05, "loss": 0.6514, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 867 }, { "epoch": 0.11803100353549088, "grad_norm": 0.291015625, "learning_rate": 1.9980368513114418e-05, "loss": 0.5661, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 868 }, { "epoch": 0.11816698395431058, "grad_norm": 0.6015625, "learning_rate": 1.9980070038866324e-05, "loss": 0.8156, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 869 }, { "epoch": 0.11830296437313027, "grad_norm": 0.54296875, "learning_rate": 1.9979769314988432e-05, "loss": 0.8319, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 870 }, { "epoch": 0.11843894479194995, "grad_norm": 0.412109375, "learning_rate": 1.9979466341548517e-05, "loss": 0.528, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 871 }, { "epoch": 0.11857492521076965, "grad_norm": 0.400390625, "learning_rate": 1.997916111861488e-05, "loss": 0.8024, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 872 }, { "epoch": 0.11871090562958934, "grad_norm": 0.427734375, "learning_rate": 1.997885364625632e-05, "loss": 0.7461, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 873 }, { "epoch": 0.11884688604840903, "grad_norm": 0.404296875, "learning_rate": 1.997854392454215e-05, "loss": 0.6426, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 874 }, { "epoch": 0.11898286646722872, "grad_norm": 0.390625, "learning_rate": 1.997823195354218e-05, "loss": 0.7282, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 875 }, { "epoch": 0.11911884688604842, "grad_norm": 1.4921875, "learning_rate": 1.997791773332673e-05, "loss": 0.8179, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 876 }, { "epoch": 0.1192548273048681, "grad_norm": 0.35546875, "learning_rate": 1.9977601263966637e-05, "loss": 0.7039, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 877 }, { "epoch": 0.11939080772368779, "grad_norm": 0.6171875, "learning_rate": 1.9977282545533227e-05, "loss": 0.8731, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 878 }, { "epoch": 0.11952678814250747, "grad_norm": 0.3125, "learning_rate": 1.9976961578098352e-05, "loss": 0.5633, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 879 }, { "epoch": 0.11966276856132717, "grad_norm": 0.6328125, "learning_rate": 1.9976638361734356e-05, "loss": 0.7718, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 880 }, { "epoch": 0.11979874898014686, "grad_norm": 0.7578125, "learning_rate": 1.9976312896514098e-05, "loss": 0.8467, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 881 }, { "epoch": 0.11993472939896654, "grad_norm": 0.6640625, "learning_rate": 1.9975985182510943e-05, "loss": 0.7064, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 882 }, { "epoch": 0.12007070981778624, "grad_norm": 0.375, "learning_rate": 1.9975655219798757e-05, "loss": 0.5734, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 883 }, { "epoch": 0.12020669023660593, "grad_norm": 0.275390625, "learning_rate": 1.9975323008451926e-05, "loss": 0.4199, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 884 }, { "epoch": 0.12034267065542562, "grad_norm": 0.69921875, "learning_rate": 1.9974988548545328e-05, "loss": 0.6841, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 885 }, { "epoch": 0.12047865107424531, "grad_norm": 0.4609375, "learning_rate": 1.9974651840154355e-05, "loss": 0.7264, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 886 }, { "epoch": 0.120614631493065, "grad_norm": 0.546875, "learning_rate": 1.9974312883354908e-05, "loss": 0.8101, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 887 }, { "epoch": 0.12075061191188469, "grad_norm": 0.337890625, "learning_rate": 1.997397167822339e-05, "loss": 0.5518, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 888 }, { "epoch": 0.12088659233070438, "grad_norm": 0.443359375, "learning_rate": 1.9973628224836715e-05, "loss": 0.6507, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 889 }, { "epoch": 0.12102257274952406, "grad_norm": 0.474609375, "learning_rate": 1.9973282523272297e-05, "loss": 0.6854, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 890 }, { "epoch": 0.12115855316834376, "grad_norm": 0.48828125, "learning_rate": 1.997293457360807e-05, "loss": 0.8841, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 891 }, { "epoch": 0.12129453358716345, "grad_norm": 0.5078125, "learning_rate": 1.9972584375922453e-05, "loss": 0.8254, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 892 }, { "epoch": 0.12143051400598313, "grad_norm": 0.447265625, "learning_rate": 1.9972231930294392e-05, "loss": 0.7121, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 893 }, { "epoch": 0.12156649442480283, "grad_norm": 0.50390625, "learning_rate": 1.9971877236803336e-05, "loss": 0.8402, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 894 }, { "epoch": 0.12170247484362252, "grad_norm": 0.421875, "learning_rate": 1.997152029552923e-05, "loss": 0.6509, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 895 }, { "epoch": 0.1218384552624422, "grad_norm": 0.5625, "learning_rate": 1.9971161106552543e-05, "loss": 0.8314, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 896 }, { "epoch": 0.1219744356812619, "grad_norm": 0.416015625, "learning_rate": 1.9970799669954227e-05, "loss": 0.7559, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 897 }, { "epoch": 0.12211041610008158, "grad_norm": 0.45703125, "learning_rate": 1.997043598581577e-05, "loss": 0.6233, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 898 }, { "epoch": 0.12224639651890128, "grad_norm": 0.400390625, "learning_rate": 1.9970070054219136e-05, "loss": 0.7186, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 899 }, { "epoch": 0.12238237693772097, "grad_norm": 0.46484375, "learning_rate": 1.9969701875246817e-05, "loss": 0.5802, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 900 }, { "epoch": 0.12251835735654065, "grad_norm": 0.36328125, "learning_rate": 1.996933144898181e-05, "loss": 0.6935, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 901 }, { "epoch": 0.12265433777536035, "grad_norm": 0.828125, "learning_rate": 1.9968958775507605e-05, "loss": 0.862, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 902 }, { "epoch": 0.12279031819418004, "grad_norm": 0.3671875, "learning_rate": 1.996858385490821e-05, "loss": 0.5695, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 903 }, { "epoch": 0.12292629861299972, "grad_norm": 0.388671875, "learning_rate": 1.996820668726814e-05, "loss": 0.6877, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 904 }, { "epoch": 0.12306227903181942, "grad_norm": 0.318359375, "learning_rate": 1.9967827272672407e-05, "loss": 0.6325, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 905 }, { "epoch": 0.12319825945063911, "grad_norm": 0.61328125, "learning_rate": 1.996744561120654e-05, "loss": 0.6074, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 906 }, { "epoch": 0.1233342398694588, "grad_norm": 0.38671875, "learning_rate": 1.9967061702956574e-05, "loss": 0.5391, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 907 }, { "epoch": 0.12347022028827849, "grad_norm": 0.458984375, "learning_rate": 1.9966675548009037e-05, "loss": 0.5195, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 908 }, { "epoch": 0.12360620070709817, "grad_norm": 0.400390625, "learning_rate": 1.9966287146450984e-05, "loss": 0.6043, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 909 }, { "epoch": 0.12374218112591787, "grad_norm": 0.65625, "learning_rate": 1.996589649836995e-05, "loss": 0.9186, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 910 }, { "epoch": 0.12387816154473756, "grad_norm": 0.3125, "learning_rate": 1.9965503603854012e-05, "loss": 0.5485, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 911 }, { "epoch": 0.12401414196355724, "grad_norm": 0.6328125, "learning_rate": 1.9965108462991717e-05, "loss": 0.576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 912 }, { "epoch": 0.12415012238237694, "grad_norm": 0.51171875, "learning_rate": 1.996471107587214e-05, "loss": 0.7178, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 913 }, { "epoch": 0.12428610280119663, "grad_norm": 0.62890625, "learning_rate": 1.9964311442584857e-05, "loss": 0.853, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 914 }, { "epoch": 0.12442208322001631, "grad_norm": 0.296875, "learning_rate": 1.996390956321995e-05, "loss": 0.533, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 915 }, { "epoch": 0.12455806363883601, "grad_norm": 0.61328125, "learning_rate": 1.996350543786801e-05, "loss": 0.9316, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 916 }, { "epoch": 0.1246940440576557, "grad_norm": 0.46875, "learning_rate": 1.996309906662013e-05, "loss": 0.8706, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 917 }, { "epoch": 0.12483002447647538, "grad_norm": 1.0390625, "learning_rate": 1.996269044956791e-05, "loss": 0.6429, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 918 }, { "epoch": 0.12496600489529508, "grad_norm": 1.375, "learning_rate": 1.9962279586803456e-05, "loss": 0.5833, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 919 }, { "epoch": 0.12510198531411476, "grad_norm": 0.7578125, "learning_rate": 1.9961866478419387e-05, "loss": 0.4321, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 920 }, { "epoch": 0.12523796573293447, "grad_norm": 0.439453125, "learning_rate": 1.996145112450882e-05, "loss": 0.5547, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 921 }, { "epoch": 0.12537394615175415, "grad_norm": 0.33984375, "learning_rate": 1.9961033525165375e-05, "loss": 0.5241, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 922 }, { "epoch": 0.12550992657057383, "grad_norm": 0.49609375, "learning_rate": 1.9960613680483192e-05, "loss": 0.7559, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 923 }, { "epoch": 0.12564590698939354, "grad_norm": 0.41015625, "learning_rate": 1.9960191590556905e-05, "loss": 0.6034, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 924 }, { "epoch": 0.12578188740821322, "grad_norm": 1.421875, "learning_rate": 1.9959767255481662e-05, "loss": 0.7069, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 925 }, { "epoch": 0.1259178678270329, "grad_norm": 0.8125, "learning_rate": 1.995934067535311e-05, "loss": 0.6419, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 926 }, { "epoch": 0.12605384824585258, "grad_norm": 0.365234375, "learning_rate": 1.9958911850267403e-05, "loss": 0.5957, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 927 }, { "epoch": 0.1261898286646723, "grad_norm": 0.296875, "learning_rate": 1.995848078032121e-05, "loss": 0.4338, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 928 }, { "epoch": 0.12632580908349197, "grad_norm": 0.4296875, "learning_rate": 1.9958047465611693e-05, "loss": 0.6878, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 929 }, { "epoch": 0.12646178950231166, "grad_norm": 0.53125, "learning_rate": 1.995761190623653e-05, "loss": 0.6214, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 930 }, { "epoch": 0.12659776992113136, "grad_norm": 0.51953125, "learning_rate": 1.99571741022939e-05, "loss": 0.7498, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 931 }, { "epoch": 0.12673375033995105, "grad_norm": 0.384765625, "learning_rate": 1.9956734053882494e-05, "loss": 0.6637, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 932 }, { "epoch": 0.12686973075877073, "grad_norm": 0.60546875, "learning_rate": 1.9956291761101498e-05, "loss": 0.7935, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 933 }, { "epoch": 0.12700571117759044, "grad_norm": 0.3125, "learning_rate": 1.9955847224050614e-05, "loss": 0.5706, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 934 }, { "epoch": 0.12714169159641012, "grad_norm": 0.416015625, "learning_rate": 1.9955400442830043e-05, "loss": 0.5934, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 935 }, { "epoch": 0.1272776720152298, "grad_norm": 0.63671875, "learning_rate": 1.9954951417540494e-05, "loss": 0.5667, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 936 }, { "epoch": 0.1274136524340495, "grad_norm": 0.703125, "learning_rate": 1.995450014828319e-05, "loss": 0.6117, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 937 }, { "epoch": 0.1275496328528692, "grad_norm": 0.45703125, "learning_rate": 1.9954046635159848e-05, "loss": 0.6524, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 938 }, { "epoch": 0.12768561327168887, "grad_norm": 0.48046875, "learning_rate": 1.9953590878272692e-05, "loss": 0.755, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 939 }, { "epoch": 0.12782159369050858, "grad_norm": 0.38671875, "learning_rate": 1.9953132877724462e-05, "loss": 0.6589, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 940 }, { "epoch": 0.12795757410932826, "grad_norm": 0.90234375, "learning_rate": 1.995267263361839e-05, "loss": 0.8628, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 941 }, { "epoch": 0.12809355452814794, "grad_norm": 0.40625, "learning_rate": 1.9952210146058227e-05, "loss": 0.721, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 942 }, { "epoch": 0.12822953494696765, "grad_norm": 0.46875, "learning_rate": 1.995174541514822e-05, "loss": 0.7957, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 943 }, { "epoch": 0.12836551536578733, "grad_norm": 0.37890625, "learning_rate": 1.9951278440993128e-05, "loss": 0.6125, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 944 }, { "epoch": 0.128501495784607, "grad_norm": 0.369140625, "learning_rate": 1.995080922369821e-05, "loss": 0.5758, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 945 }, { "epoch": 0.1286374762034267, "grad_norm": 0.326171875, "learning_rate": 1.995033776336923e-05, "loss": 0.5921, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 946 }, { "epoch": 0.1287734566222464, "grad_norm": 0.40234375, "learning_rate": 1.9949864060112466e-05, "loss": 0.6081, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 947 }, { "epoch": 0.12890943704106608, "grad_norm": 0.361328125, "learning_rate": 1.9949388114034693e-05, "loss": 0.4583, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 948 }, { "epoch": 0.12904541745988576, "grad_norm": 0.33203125, "learning_rate": 1.9948909925243202e-05, "loss": 0.4806, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 949 }, { "epoch": 0.12918139787870547, "grad_norm": 1.2890625, "learning_rate": 1.9948429493845775e-05, "loss": 0.6196, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 950 }, { "epoch": 0.12931737829752515, "grad_norm": 0.4375, "learning_rate": 1.9947946819950707e-05, "loss": 0.7553, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 951 }, { "epoch": 0.12945335871634484, "grad_norm": 0.51953125, "learning_rate": 1.9947461903666805e-05, "loss": 0.818, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 952 }, { "epoch": 0.12958933913516454, "grad_norm": 0.431640625, "learning_rate": 1.994697474510337e-05, "loss": 0.918, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 953 }, { "epoch": 0.12972531955398423, "grad_norm": 0.3828125, "learning_rate": 1.994648534437022e-05, "loss": 0.6901, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 954 }, { "epoch": 0.1298612999728039, "grad_norm": 0.48828125, "learning_rate": 1.994599370157766e-05, "loss": 0.5968, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 955 }, { "epoch": 0.12999728039162362, "grad_norm": 0.50390625, "learning_rate": 1.994549981683652e-05, "loss": 0.7632, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 956 }, { "epoch": 0.1301332608104433, "grad_norm": 0.72265625, "learning_rate": 1.9945003690258127e-05, "loss": 0.6034, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 957 }, { "epoch": 0.13026924122926298, "grad_norm": 0.4296875, "learning_rate": 1.9944505321954315e-05, "loss": 0.743, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 958 }, { "epoch": 0.1304052216480827, "grad_norm": 0.515625, "learning_rate": 1.9944004712037422e-05, "loss": 0.6491, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 959 }, { "epoch": 0.13054120206690237, "grad_norm": 0.388671875, "learning_rate": 1.994350186062029e-05, "loss": 0.6207, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 960 }, { "epoch": 0.13067718248572205, "grad_norm": 0.408203125, "learning_rate": 1.994299676781627e-05, "loss": 0.6641, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 961 }, { "epoch": 0.13081316290454176, "grad_norm": 0.65234375, "learning_rate": 1.994248943373921e-05, "loss": 0.8486, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 962 }, { "epoch": 0.13094914332336144, "grad_norm": 0.55859375, "learning_rate": 1.994197985850348e-05, "loss": 0.8011, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 963 }, { "epoch": 0.13108512374218112, "grad_norm": 0.65625, "learning_rate": 1.9941468042223936e-05, "loss": 0.763, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 964 }, { "epoch": 0.13122110416100083, "grad_norm": 0.451171875, "learning_rate": 1.994095398501595e-05, "loss": 0.6084, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 965 }, { "epoch": 0.1313570845798205, "grad_norm": 0.734375, "learning_rate": 1.9940437686995394e-05, "loss": 0.8003, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 966 }, { "epoch": 0.1314930649986402, "grad_norm": 0.345703125, "learning_rate": 1.9939919148278654e-05, "loss": 0.4635, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 967 }, { "epoch": 0.13162904541745987, "grad_norm": 0.4453125, "learning_rate": 1.993939836898261e-05, "loss": 0.6427, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 968 }, { "epoch": 0.13176502583627958, "grad_norm": 0.392578125, "learning_rate": 1.9938875349224655e-05, "loss": 0.6338, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 969 }, { "epoch": 0.13190100625509926, "grad_norm": 0.4609375, "learning_rate": 1.993835008912268e-05, "loss": 0.6769, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 970 }, { "epoch": 0.13203698667391894, "grad_norm": 0.42578125, "learning_rate": 1.9937822588795096e-05, "loss": 0.8145, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 971 }, { "epoch": 0.13217296709273865, "grad_norm": 1.109375, "learning_rate": 1.993729284836079e-05, "loss": 0.8276, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 972 }, { "epoch": 0.13230894751155833, "grad_norm": 0.439453125, "learning_rate": 1.993676086793919e-05, "loss": 0.7707, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 973 }, { "epoch": 0.13244492793037801, "grad_norm": 0.4453125, "learning_rate": 1.99362266476502e-05, "loss": 0.601, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 974 }, { "epoch": 0.13258090834919772, "grad_norm": 0.322265625, "learning_rate": 1.993569018761424e-05, "loss": 0.5931, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 975 }, { "epoch": 0.1327168887680174, "grad_norm": 0.6015625, "learning_rate": 1.993515148795224e-05, "loss": 0.513, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 976 }, { "epoch": 0.13285286918683709, "grad_norm": 0.43359375, "learning_rate": 1.993461054878562e-05, "loss": 0.5488, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 977 }, { "epoch": 0.1329888496056568, "grad_norm": 0.5234375, "learning_rate": 1.993406737023633e-05, "loss": 0.3774, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 978 }, { "epoch": 0.13312483002447648, "grad_norm": 0.33203125, "learning_rate": 1.9933521952426796e-05, "loss": 0.6198, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 979 }, { "epoch": 0.13326081044329616, "grad_norm": 0.44140625, "learning_rate": 1.9932974295479965e-05, "loss": 0.8618, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 980 }, { "epoch": 0.13339679086211587, "grad_norm": 0.54296875, "learning_rate": 1.9932424399519286e-05, "loss": 0.9338, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 981 }, { "epoch": 0.13353277128093555, "grad_norm": 0.31640625, "learning_rate": 1.9931872264668717e-05, "loss": 0.3941, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 982 }, { "epoch": 0.13366875169975523, "grad_norm": 0.38671875, "learning_rate": 1.9931317891052707e-05, "loss": 0.639, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 983 }, { "epoch": 0.13380473211857494, "grad_norm": 0.43359375, "learning_rate": 1.9930761278796228e-05, "loss": 0.6043, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 984 }, { "epoch": 0.13394071253739462, "grad_norm": 0.4921875, "learning_rate": 1.993020242802474e-05, "loss": 0.508, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 985 }, { "epoch": 0.1340766929562143, "grad_norm": 0.396484375, "learning_rate": 1.9929641338864213e-05, "loss": 0.7293, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 986 }, { "epoch": 0.134212673375034, "grad_norm": 0.3984375, "learning_rate": 1.9929078011441133e-05, "loss": 0.6965, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 987 }, { "epoch": 0.1343486537938537, "grad_norm": 0.427734375, "learning_rate": 1.9928512445882474e-05, "loss": 0.6818, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 988 }, { "epoch": 0.13448463421267337, "grad_norm": 0.52734375, "learning_rate": 1.9927944642315726e-05, "loss": 0.6385, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 989 }, { "epoch": 0.13462061463149305, "grad_norm": 0.6328125, "learning_rate": 1.9927374600868873e-05, "loss": 0.7975, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 990 }, { "epoch": 0.13475659505031276, "grad_norm": 0.5, "learning_rate": 1.992680232167041e-05, "loss": 0.6315, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 991 }, { "epoch": 0.13489257546913244, "grad_norm": 0.380859375, "learning_rate": 1.992622780484934e-05, "loss": 0.6209, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 992 }, { "epoch": 0.13502855588795212, "grad_norm": 0.52734375, "learning_rate": 1.9925651050535165e-05, "loss": 0.6364, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 993 }, { "epoch": 0.13516453630677183, "grad_norm": 0.384765625, "learning_rate": 1.9925072058857887e-05, "loss": 0.8385, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 994 }, { "epoch": 0.1353005167255915, "grad_norm": 0.97265625, "learning_rate": 1.992449082994803e-05, "loss": 0.688, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 995 }, { "epoch": 0.1354364971444112, "grad_norm": 0.271484375, "learning_rate": 1.9923907363936593e-05, "loss": 0.4967, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 996 }, { "epoch": 0.1355724775632309, "grad_norm": 1.046875, "learning_rate": 1.9923321660955113e-05, "loss": 0.9671, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 997 }, { "epoch": 0.13570845798205058, "grad_norm": 0.330078125, "learning_rate": 1.9922733721135606e-05, "loss": 0.5848, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 998 }, { "epoch": 0.13584443840087027, "grad_norm": 0.328125, "learning_rate": 1.99221435446106e-05, "loss": 0.5133, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 999 }, { "epoch": 0.13598041881968997, "grad_norm": 0.451171875, "learning_rate": 1.992155113151313e-05, "loss": 0.7087, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1000 }, { "epoch": 0.13611639923850966, "grad_norm": 4.15625, "learning_rate": 1.9920956481976734e-05, "loss": 0.6852, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1001 }, { "epoch": 0.13625237965732934, "grad_norm": 0.65234375, "learning_rate": 1.9920359596135454e-05, "loss": 0.5228, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1002 }, { "epoch": 0.13638836007614905, "grad_norm": 0.41015625, "learning_rate": 1.9919760474123832e-05, "loss": 0.8451, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1003 }, { "epoch": 0.13652434049496873, "grad_norm": 0.41796875, "learning_rate": 1.991915911607692e-05, "loss": 0.5782, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1004 }, { "epoch": 0.1366603209137884, "grad_norm": 0.53515625, "learning_rate": 1.991855552213027e-05, "loss": 0.7952, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1005 }, { "epoch": 0.13679630133260812, "grad_norm": 0.44921875, "learning_rate": 1.9917949692419943e-05, "loss": 0.5933, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1006 }, { "epoch": 0.1369322817514278, "grad_norm": 0.384765625, "learning_rate": 1.9917341627082494e-05, "loss": 0.4313, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1007 }, { "epoch": 0.13706826217024748, "grad_norm": 0.546875, "learning_rate": 1.9916731326254997e-05, "loss": 0.7194, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1008 }, { "epoch": 0.1372042425890672, "grad_norm": 0.4296875, "learning_rate": 1.991611879007501e-05, "loss": 0.7493, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1009 }, { "epoch": 0.13734022300788687, "grad_norm": 0.322265625, "learning_rate": 1.9915504018680617e-05, "loss": 0.554, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1010 }, { "epoch": 0.13747620342670655, "grad_norm": 0.58203125, "learning_rate": 1.991488701221039e-05, "loss": 0.6828, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1011 }, { "epoch": 0.13761218384552623, "grad_norm": 0.35546875, "learning_rate": 1.9914267770803407e-05, "loss": 0.5615, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1012 }, { "epoch": 0.13774816426434594, "grad_norm": 0.55859375, "learning_rate": 1.991364629459926e-05, "loss": 0.6545, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1013 }, { "epoch": 0.13788414468316562, "grad_norm": 0.279296875, "learning_rate": 1.9913022583738032e-05, "loss": 0.3615, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1014 }, { "epoch": 0.1380201251019853, "grad_norm": 0.70703125, "learning_rate": 1.9912396638360314e-05, "loss": 0.6746, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1015 }, { "epoch": 0.138156105520805, "grad_norm": 0.51171875, "learning_rate": 1.9911768458607206e-05, "loss": 0.5874, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1016 }, { "epoch": 0.1382920859396247, "grad_norm": 0.337890625, "learning_rate": 1.9911138044620304e-05, "loss": 0.541, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1017 }, { "epoch": 0.13842806635844437, "grad_norm": 0.298828125, "learning_rate": 1.9910505396541712e-05, "loss": 0.5576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1018 }, { "epoch": 0.13856404677726408, "grad_norm": 0.2890625, "learning_rate": 1.990987051451404e-05, "loss": 0.4857, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1019 }, { "epoch": 0.13870002719608376, "grad_norm": 0.302734375, "learning_rate": 1.9909233398680393e-05, "loss": 0.6004, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1020 }, { "epoch": 0.13883600761490344, "grad_norm": 0.375, "learning_rate": 1.990859404918439e-05, "loss": 0.6766, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1021 }, { "epoch": 0.13897198803372315, "grad_norm": 0.3125, "learning_rate": 1.990795246617014e-05, "loss": 0.5635, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1022 }, { "epoch": 0.13910796845254284, "grad_norm": 0.296875, "learning_rate": 1.990730864978227e-05, "loss": 0.5182, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1023 }, { "epoch": 0.13924394887136252, "grad_norm": 0.38671875, "learning_rate": 1.9906662600165906e-05, "loss": 0.7386, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1024 }, { "epoch": 0.13937992929018223, "grad_norm": 0.59765625, "learning_rate": 1.990601431746667e-05, "loss": 0.6359, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1025 }, { "epoch": 0.1395159097090019, "grad_norm": 0.462890625, "learning_rate": 1.9905363801830695e-05, "loss": 0.8268, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1026 }, { "epoch": 0.1396518901278216, "grad_norm": 0.3984375, "learning_rate": 1.9904711053404614e-05, "loss": 0.6481, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1027 }, { "epoch": 0.1397878705466413, "grad_norm": 0.3359375, "learning_rate": 1.9904056072335568e-05, "loss": 0.6668, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1028 }, { "epoch": 0.13992385096546098, "grad_norm": 0.51171875, "learning_rate": 1.9903398858771195e-05, "loss": 0.7178, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1029 }, { "epoch": 0.14005983138428066, "grad_norm": 0.5546875, "learning_rate": 1.990273941285964e-05, "loss": 0.7139, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1030 }, { "epoch": 0.14019581180310034, "grad_norm": 0.6484375, "learning_rate": 1.990207773474955e-05, "loss": 0.6335, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1031 }, { "epoch": 0.14033179222192005, "grad_norm": 0.44140625, "learning_rate": 1.9901413824590072e-05, "loss": 0.6893, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1032 }, { "epoch": 0.14046777264073973, "grad_norm": 0.5625, "learning_rate": 1.9900747682530864e-05, "loss": 0.731, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1033 }, { "epoch": 0.1406037530595594, "grad_norm": 0.478515625, "learning_rate": 1.990007930872208e-05, "loss": 0.7915, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1034 }, { "epoch": 0.14073973347837912, "grad_norm": 0.54296875, "learning_rate": 1.9899408703314383e-05, "loss": 0.7218, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1035 }, { "epoch": 0.1408757138971988, "grad_norm": 0.4375, "learning_rate": 1.9898735866458935e-05, "loss": 0.5785, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1036 }, { "epoch": 0.14101169431601848, "grad_norm": 0.3359375, "learning_rate": 1.9898060798307396e-05, "loss": 0.6484, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1037 }, { "epoch": 0.1411476747348382, "grad_norm": 0.34765625, "learning_rate": 1.989738349901194e-05, "loss": 0.6992, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1038 }, { "epoch": 0.14128365515365787, "grad_norm": 0.423828125, "learning_rate": 1.9896703968725236e-05, "loss": 0.6302, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1039 }, { "epoch": 0.14141963557247755, "grad_norm": 0.515625, "learning_rate": 1.989602220760046e-05, "loss": 0.5436, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1040 }, { "epoch": 0.14155561599129726, "grad_norm": 0.328125, "learning_rate": 1.9895338215791292e-05, "loss": 0.5828, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1041 }, { "epoch": 0.14169159641011694, "grad_norm": 0.67578125, "learning_rate": 1.9894651993451907e-05, "loss": 0.6473, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1042 }, { "epoch": 0.14182757682893662, "grad_norm": 0.42578125, "learning_rate": 1.989396354073699e-05, "loss": 0.7012, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1043 }, { "epoch": 0.14196355724775633, "grad_norm": 0.2734375, "learning_rate": 1.9893272857801726e-05, "loss": 0.4551, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1044 }, { "epoch": 0.14209953766657601, "grad_norm": 0.416015625, "learning_rate": 1.9892579944801806e-05, "loss": 0.668, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1045 }, { "epoch": 0.1422355180853957, "grad_norm": 0.3125, "learning_rate": 1.9891884801893414e-05, "loss": 0.6064, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1046 }, { "epoch": 0.1423714985042154, "grad_norm": 0.267578125, "learning_rate": 1.9891187429233253e-05, "loss": 0.5319, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1047 }, { "epoch": 0.14250747892303509, "grad_norm": 0.2890625, "learning_rate": 1.989048782697851e-05, "loss": 0.3743, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1048 }, { "epoch": 0.14264345934185477, "grad_norm": 0.6796875, "learning_rate": 1.9889785995286894e-05, "loss": 0.6494, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1049 }, { "epoch": 0.14277943976067448, "grad_norm": 1.03125, "learning_rate": 1.98890819343166e-05, "loss": 0.8675, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1050 }, { "epoch": 0.14291542017949416, "grad_norm": 0.400390625, "learning_rate": 1.9888375644226333e-05, "loss": 0.5254, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1051 }, { "epoch": 0.14305140059831384, "grad_norm": 0.345703125, "learning_rate": 1.98876671251753e-05, "loss": 0.541, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1052 }, { "epoch": 0.14318738101713352, "grad_norm": 0.7734375, "learning_rate": 1.9886956377323207e-05, "loss": 0.7489, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1053 }, { "epoch": 0.14332336143595323, "grad_norm": 0.388671875, "learning_rate": 1.9886243400830272e-05, "loss": 0.4702, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1054 }, { "epoch": 0.1434593418547729, "grad_norm": 0.55078125, "learning_rate": 1.9885528195857204e-05, "loss": 0.7684, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1055 }, { "epoch": 0.1435953222735926, "grad_norm": 0.50390625, "learning_rate": 1.9884810762565216e-05, "loss": 0.6664, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1056 }, { "epoch": 0.1437313026924123, "grad_norm": 1.1015625, "learning_rate": 1.9884091101116032e-05, "loss": 0.9056, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1057 }, { "epoch": 0.14386728311123198, "grad_norm": 0.58984375, "learning_rate": 1.988336921167187e-05, "loss": 0.6343, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1058 }, { "epoch": 0.14400326353005166, "grad_norm": 0.51953125, "learning_rate": 1.988264509439545e-05, "loss": 0.6582, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1059 }, { "epoch": 0.14413924394887137, "grad_norm": 0.375, "learning_rate": 1.9881918749450004e-05, "loss": 0.5641, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1060 }, { "epoch": 0.14427522436769105, "grad_norm": 0.51171875, "learning_rate": 1.9881190176999255e-05, "loss": 0.7757, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1061 }, { "epoch": 0.14441120478651073, "grad_norm": 0.34765625, "learning_rate": 1.988045937720743e-05, "loss": 0.5729, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1062 }, { "epoch": 0.14454718520533044, "grad_norm": 0.400390625, "learning_rate": 1.987972635023926e-05, "loss": 0.7183, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1063 }, { "epoch": 0.14468316562415012, "grad_norm": 0.33203125, "learning_rate": 1.9878991096259985e-05, "loss": 0.466, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1064 }, { "epoch": 0.1448191460429698, "grad_norm": 0.65234375, "learning_rate": 1.987825361543533e-05, "loss": 0.6888, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1065 }, { "epoch": 0.1449551264617895, "grad_norm": 0.40625, "learning_rate": 1.9877513907931543e-05, "loss": 0.7483, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1066 }, { "epoch": 0.1450911068806092, "grad_norm": 0.50390625, "learning_rate": 1.9876771973915355e-05, "loss": 0.8032, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1067 }, { "epoch": 0.14522708729942888, "grad_norm": 0.3203125, "learning_rate": 1.987602781355401e-05, "loss": 0.5272, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1068 }, { "epoch": 0.14536306771824858, "grad_norm": 0.263671875, "learning_rate": 1.9875281427015252e-05, "loss": 0.4329, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1069 }, { "epoch": 0.14549904813706827, "grad_norm": 0.5546875, "learning_rate": 1.987453281446732e-05, "loss": 0.5986, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1070 }, { "epoch": 0.14563502855588795, "grad_norm": 0.380859375, "learning_rate": 1.987378197607897e-05, "loss": 0.6388, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1071 }, { "epoch": 0.14577100897470766, "grad_norm": 0.32421875, "learning_rate": 1.9873028912019444e-05, "loss": 0.6108, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1072 }, { "epoch": 0.14590698939352734, "grad_norm": 0.251953125, "learning_rate": 1.9872273622458494e-05, "loss": 0.4535, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1073 }, { "epoch": 0.14604296981234702, "grad_norm": 0.3828125, "learning_rate": 1.9871516107566366e-05, "loss": 0.5742, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1074 }, { "epoch": 0.1461789502311667, "grad_norm": 0.32421875, "learning_rate": 1.987075636751382e-05, "loss": 0.5293, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1075 }, { "epoch": 0.1463149306499864, "grad_norm": 0.27734375, "learning_rate": 1.9869994402472108e-05, "loss": 0.5568, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1076 }, { "epoch": 0.1464509110688061, "grad_norm": 0.408203125, "learning_rate": 1.9869230212612986e-05, "loss": 0.5788, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1077 }, { "epoch": 0.14658689148762577, "grad_norm": 0.44140625, "learning_rate": 1.9868463798108714e-05, "loss": 0.54, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1078 }, { "epoch": 0.14672287190644548, "grad_norm": 0.7890625, "learning_rate": 1.9867695159132045e-05, "loss": 0.6463, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1079 }, { "epoch": 0.14685885232526516, "grad_norm": 0.41015625, "learning_rate": 1.9866924295856245e-05, "loss": 0.6922, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1080 }, { "epoch": 0.14699483274408484, "grad_norm": 0.296875, "learning_rate": 1.986615120845508e-05, "loss": 0.5762, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1081 }, { "epoch": 0.14713081316290455, "grad_norm": 0.361328125, "learning_rate": 1.9865375897102806e-05, "loss": 0.6546, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1082 }, { "epoch": 0.14726679358172423, "grad_norm": 0.380859375, "learning_rate": 1.9864598361974184e-05, "loss": 0.6694, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1083 }, { "epoch": 0.1474027740005439, "grad_norm": 0.64453125, "learning_rate": 1.9863818603244492e-05, "loss": 0.6701, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1084 }, { "epoch": 0.14753875441936362, "grad_norm": 0.6640625, "learning_rate": 1.986303662108949e-05, "loss": 0.6712, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1085 }, { "epoch": 0.1476747348381833, "grad_norm": 0.6171875, "learning_rate": 1.9862252415685447e-05, "loss": 0.8314, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1086 }, { "epoch": 0.14781071525700298, "grad_norm": 0.32421875, "learning_rate": 1.986146598720913e-05, "loss": 0.507, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1087 }, { "epoch": 0.1479466956758227, "grad_norm": 0.392578125, "learning_rate": 1.9860677335837818e-05, "loss": 0.5885, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1088 }, { "epoch": 0.14808267609464237, "grad_norm": 0.44140625, "learning_rate": 1.9859886461749275e-05, "loss": 0.7633, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1089 }, { "epoch": 0.14821865651346205, "grad_norm": 0.41015625, "learning_rate": 1.9859093365121773e-05, "loss": 0.6082, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1090 }, { "epoch": 0.14835463693228176, "grad_norm": 0.625, "learning_rate": 1.9858298046134094e-05, "loss": 0.631, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1091 }, { "epoch": 0.14849061735110144, "grad_norm": 0.515625, "learning_rate": 1.9857500504965503e-05, "loss": 0.8205, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1092 }, { "epoch": 0.14862659776992113, "grad_norm": 0.37890625, "learning_rate": 1.9856700741795777e-05, "loss": 0.6898, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1093 }, { "epoch": 0.14876257818874083, "grad_norm": 0.4453125, "learning_rate": 1.9855898756805197e-05, "loss": 0.5993, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1094 }, { "epoch": 0.14889855860756052, "grad_norm": 0.734375, "learning_rate": 1.985509455017454e-05, "loss": 0.8652, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1095 }, { "epoch": 0.1490345390263802, "grad_norm": 0.27734375, "learning_rate": 1.9854288122085082e-05, "loss": 0.4761, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1096 }, { "epoch": 0.14917051944519988, "grad_norm": 0.462890625, "learning_rate": 1.9853479472718603e-05, "loss": 0.8153, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1097 }, { "epoch": 0.1493064998640196, "grad_norm": 0.400390625, "learning_rate": 1.985266860225738e-05, "loss": 0.59, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1098 }, { "epoch": 0.14944248028283927, "grad_norm": 0.59375, "learning_rate": 1.9851855510884194e-05, "loss": 0.4793, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1099 }, { "epoch": 0.14957846070165895, "grad_norm": 0.375, "learning_rate": 1.985104019878233e-05, "loss": 0.5822, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1100 }, { "epoch": 0.14971444112047866, "grad_norm": 0.6875, "learning_rate": 1.9850222666135558e-05, "loss": 0.8162, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1101 }, { "epoch": 0.14985042153929834, "grad_norm": 0.33203125, "learning_rate": 1.984940291312817e-05, "loss": 0.5589, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1102 }, { "epoch": 0.14998640195811802, "grad_norm": 0.81640625, "learning_rate": 1.984858093994495e-05, "loss": 0.7357, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1103 }, { "epoch": 0.15012238237693773, "grad_norm": 0.6796875, "learning_rate": 1.9847756746771175e-05, "loss": 0.8205, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1104 }, { "epoch": 0.1502583627957574, "grad_norm": 0.431640625, "learning_rate": 1.984693033379263e-05, "loss": 0.709, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1105 }, { "epoch": 0.1503943432145771, "grad_norm": 0.328125, "learning_rate": 1.9846101701195594e-05, "loss": 0.6652, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1106 }, { "epoch": 0.1505303236333968, "grad_norm": 0.3671875, "learning_rate": 1.9845270849166856e-05, "loss": 0.5422, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1107 }, { "epoch": 0.15066630405221648, "grad_norm": 0.408203125, "learning_rate": 1.9844437777893707e-05, "loss": 0.6642, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1108 }, { "epoch": 0.15080228447103616, "grad_norm": 0.6640625, "learning_rate": 1.9843602487563918e-05, "loss": 0.5658, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1109 }, { "epoch": 0.15093826488985587, "grad_norm": 0.3828125, "learning_rate": 1.984276497836578e-05, "loss": 0.6483, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1110 }, { "epoch": 0.15107424530867555, "grad_norm": 0.353515625, "learning_rate": 1.984192525048808e-05, "loss": 0.6112, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1111 }, { "epoch": 0.15121022572749523, "grad_norm": 0.265625, "learning_rate": 1.9841083304120097e-05, "loss": 0.4655, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1112 }, { "epoch": 0.15134620614631494, "grad_norm": 0.361328125, "learning_rate": 1.9840239139451622e-05, "loss": 0.6156, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1113 }, { "epoch": 0.15148218656513462, "grad_norm": 1.0078125, "learning_rate": 1.9839392756672936e-05, "loss": 0.8356, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1114 }, { "epoch": 0.1516181669839543, "grad_norm": 0.31640625, "learning_rate": 1.9838544155974822e-05, "loss": 0.4772, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1115 }, { "epoch": 0.151754147402774, "grad_norm": 0.423828125, "learning_rate": 1.9837693337548575e-05, "loss": 0.6364, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1116 }, { "epoch": 0.1518901278215937, "grad_norm": 0.82421875, "learning_rate": 1.983684030158597e-05, "loss": 0.7549, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1117 }, { "epoch": 0.15202610824041338, "grad_norm": 0.60546875, "learning_rate": 1.9835985048279296e-05, "loss": 0.6429, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1118 }, { "epoch": 0.15216208865923306, "grad_norm": 0.392578125, "learning_rate": 1.983512757782134e-05, "loss": 0.6309, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1119 }, { "epoch": 0.15229806907805277, "grad_norm": 0.431640625, "learning_rate": 1.9834267890405375e-05, "loss": 0.7669, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1120 }, { "epoch": 0.15243404949687245, "grad_norm": 0.3515625, "learning_rate": 1.98334059862252e-05, "loss": 0.5456, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1121 }, { "epoch": 0.15257002991569213, "grad_norm": 0.609375, "learning_rate": 1.9832541865475086e-05, "loss": 0.8426, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1122 }, { "epoch": 0.15270601033451184, "grad_norm": 0.318359375, "learning_rate": 1.9831675528349825e-05, "loss": 0.5942, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1123 }, { "epoch": 0.15284199075333152, "grad_norm": 0.71875, "learning_rate": 1.9830806975044697e-05, "loss": 0.8075, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1124 }, { "epoch": 0.1529779711721512, "grad_norm": 0.36328125, "learning_rate": 1.9829936205755484e-05, "loss": 0.548, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1125 }, { "epoch": 0.1531139515909709, "grad_norm": 0.30859375, "learning_rate": 1.982906322067847e-05, "loss": 0.542, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1126 }, { "epoch": 0.1532499320097906, "grad_norm": 0.416015625, "learning_rate": 1.9828188020010433e-05, "loss": 0.7406, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1127 }, { "epoch": 0.15338591242861027, "grad_norm": 0.69140625, "learning_rate": 1.982731060394866e-05, "loss": 0.661, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1128 }, { "epoch": 0.15352189284742998, "grad_norm": 0.359375, "learning_rate": 1.9826430972690924e-05, "loss": 0.5698, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1129 }, { "epoch": 0.15365787326624966, "grad_norm": 0.7109375, "learning_rate": 1.982554912643551e-05, "loss": 0.767, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1130 }, { "epoch": 0.15379385368506934, "grad_norm": 0.396484375, "learning_rate": 1.9824665065381194e-05, "loss": 0.6672, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1131 }, { "epoch": 0.15392983410388905, "grad_norm": 0.38671875, "learning_rate": 1.9823778789727253e-05, "loss": 0.5986, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1132 }, { "epoch": 0.15406581452270873, "grad_norm": 0.609375, "learning_rate": 1.9822890299673466e-05, "loss": 0.6289, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1133 }, { "epoch": 0.15420179494152841, "grad_norm": 0.400390625, "learning_rate": 1.9821999595420114e-05, "loss": 0.7399, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1134 }, { "epoch": 0.15433777536034812, "grad_norm": 0.44921875, "learning_rate": 1.9821106677167966e-05, "loss": 0.668, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1135 }, { "epoch": 0.1544737557791678, "grad_norm": 0.466796875, "learning_rate": 1.9820211545118303e-05, "loss": 0.8298, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1136 }, { "epoch": 0.15460973619798749, "grad_norm": 0.76171875, "learning_rate": 1.981931419947289e-05, "loss": 0.9217, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1137 }, { "epoch": 0.15474571661680717, "grad_norm": 0.408203125, "learning_rate": 1.981841464043401e-05, "loss": 0.5116, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1138 }, { "epoch": 0.15488169703562688, "grad_norm": 0.3984375, "learning_rate": 1.9817512868204425e-05, "loss": 0.7804, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1139 }, { "epoch": 0.15501767745444656, "grad_norm": 0.48046875, "learning_rate": 1.981660888298741e-05, "loss": 0.6471, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1140 }, { "epoch": 0.15515365787326624, "grad_norm": 0.486328125, "learning_rate": 1.9815702684986738e-05, "loss": 0.5968, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1141 }, { "epoch": 0.15528963829208595, "grad_norm": 0.48046875, "learning_rate": 1.981479427440667e-05, "loss": 0.5347, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1142 }, { "epoch": 0.15542561871090563, "grad_norm": 0.263671875, "learning_rate": 1.9813883651451976e-05, "loss": 0.4196, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1143 }, { "epoch": 0.1555615991297253, "grad_norm": 0.396484375, "learning_rate": 1.981297081632792e-05, "loss": 0.7403, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1144 }, { "epoch": 0.15569757954854502, "grad_norm": 0.48046875, "learning_rate": 1.981205576924027e-05, "loss": 0.7716, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1145 }, { "epoch": 0.1558335599673647, "grad_norm": 0.388671875, "learning_rate": 1.981113851039529e-05, "loss": 0.7126, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1146 }, { "epoch": 0.15596954038618438, "grad_norm": 0.3828125, "learning_rate": 1.9810219039999732e-05, "loss": 0.6157, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1147 }, { "epoch": 0.1561055208050041, "grad_norm": 0.337890625, "learning_rate": 1.9809297358260868e-05, "loss": 0.5856, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1148 }, { "epoch": 0.15624150122382377, "grad_norm": 0.58984375, "learning_rate": 1.9808373465386445e-05, "loss": 0.7186, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1149 }, { "epoch": 0.15637748164264345, "grad_norm": 0.6953125, "learning_rate": 1.9807447361584726e-05, "loss": 0.7599, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1150 }, { "epoch": 0.15651346206146316, "grad_norm": 0.359375, "learning_rate": 1.9806519047064467e-05, "loss": 0.6315, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1151 }, { "epoch": 0.15664944248028284, "grad_norm": 0.9296875, "learning_rate": 1.980558852203492e-05, "loss": 0.8918, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1152 }, { "epoch": 0.15678542289910252, "grad_norm": 0.81640625, "learning_rate": 1.980465578670583e-05, "loss": 0.8765, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1153 }, { "epoch": 0.15692140331792223, "grad_norm": 0.287109375, "learning_rate": 1.9803720841287458e-05, "loss": 0.4538, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1154 }, { "epoch": 0.1570573837367419, "grad_norm": 0.462890625, "learning_rate": 1.980278368599055e-05, "loss": 0.737, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1155 }, { "epoch": 0.1571933641555616, "grad_norm": 0.314453125, "learning_rate": 1.9801844321026345e-05, "loss": 0.5021, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1156 }, { "epoch": 0.1573293445743813, "grad_norm": 0.421875, "learning_rate": 1.9800902746606594e-05, "loss": 0.6737, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1157 }, { "epoch": 0.15746532499320098, "grad_norm": 0.49609375, "learning_rate": 1.9799958962943536e-05, "loss": 0.751, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1158 }, { "epoch": 0.15760130541202066, "grad_norm": 0.703125, "learning_rate": 1.9799012970249913e-05, "loss": 0.8314, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1159 }, { "epoch": 0.15773728583084035, "grad_norm": 0.474609375, "learning_rate": 1.9798064768738965e-05, "loss": 0.7041, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1160 }, { "epoch": 0.15787326624966005, "grad_norm": 0.578125, "learning_rate": 1.9797114358624425e-05, "loss": 0.5095, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1161 }, { "epoch": 0.15800924666847974, "grad_norm": 0.58984375, "learning_rate": 1.979616174012053e-05, "loss": 0.6177, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1162 }, { "epoch": 0.15814522708729942, "grad_norm": 0.41015625, "learning_rate": 1.9795206913442007e-05, "loss": 0.6412, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1163 }, { "epoch": 0.15828120750611913, "grad_norm": 0.48828125, "learning_rate": 1.979424987880409e-05, "loss": 0.653, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1164 }, { "epoch": 0.1584171879249388, "grad_norm": 0.4375, "learning_rate": 1.9793290636422503e-05, "loss": 0.7507, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1165 }, { "epoch": 0.1585531683437585, "grad_norm": 0.6015625, "learning_rate": 1.9792329186513476e-05, "loss": 0.8773, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1166 }, { "epoch": 0.1586891487625782, "grad_norm": 0.8046875, "learning_rate": 1.9791365529293727e-05, "loss": 0.6901, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1167 }, { "epoch": 0.15882512918139788, "grad_norm": 0.423828125, "learning_rate": 1.9790399664980478e-05, "loss": 0.6663, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1168 }, { "epoch": 0.15896110960021756, "grad_norm": 0.458984375, "learning_rate": 1.9789431593791447e-05, "loss": 0.57, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1169 }, { "epoch": 0.15909709001903727, "grad_norm": 0.84375, "learning_rate": 1.9788461315944847e-05, "loss": 0.7396, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1170 }, { "epoch": 0.15923307043785695, "grad_norm": 0.38671875, "learning_rate": 1.9787488831659395e-05, "loss": 0.6574, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1171 }, { "epoch": 0.15936905085667663, "grad_norm": 0.2353515625, "learning_rate": 1.9786514141154293e-05, "loss": 0.4658, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1172 }, { "epoch": 0.15950503127549634, "grad_norm": 0.396484375, "learning_rate": 1.9785537244649258e-05, "loss": 0.5817, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1173 }, { "epoch": 0.15964101169431602, "grad_norm": 0.4453125, "learning_rate": 1.9784558142364485e-05, "loss": 0.8245, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1174 }, { "epoch": 0.1597769921131357, "grad_norm": 0.314453125, "learning_rate": 1.978357683452068e-05, "loss": 0.625, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1175 }, { "epoch": 0.1599129725319554, "grad_norm": 0.62890625, "learning_rate": 1.9782593321339045e-05, "loss": 0.6803, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1176 }, { "epoch": 0.1600489529507751, "grad_norm": 0.3046875, "learning_rate": 1.978160760304127e-05, "loss": 0.5366, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1177 }, { "epoch": 0.16018493336959477, "grad_norm": 0.31640625, "learning_rate": 1.9780619679849552e-05, "loss": 0.619, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1178 }, { "epoch": 0.16032091378841448, "grad_norm": 0.6015625, "learning_rate": 1.977962955198658e-05, "loss": 0.7303, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1179 }, { "epoch": 0.16045689420723416, "grad_norm": 0.322265625, "learning_rate": 1.9778637219675537e-05, "loss": 0.5314, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1180 }, { "epoch": 0.16059287462605384, "grad_norm": 0.396484375, "learning_rate": 1.9777642683140112e-05, "loss": 0.5911, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1181 }, { "epoch": 0.16072885504487353, "grad_norm": 0.349609375, "learning_rate": 1.9776645942604487e-05, "loss": 0.5889, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1182 }, { "epoch": 0.16086483546369323, "grad_norm": 0.4453125, "learning_rate": 1.9775646998293333e-05, "loss": 0.8309, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1183 }, { "epoch": 0.16100081588251292, "grad_norm": 0.75390625, "learning_rate": 1.9774645850431828e-05, "loss": 0.7244, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1184 }, { "epoch": 0.1611367963013326, "grad_norm": 0.78515625, "learning_rate": 1.9773642499245645e-05, "loss": 0.6966, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1185 }, { "epoch": 0.1612727767201523, "grad_norm": 0.50390625, "learning_rate": 1.9772636944960947e-05, "loss": 0.8962, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1186 }, { "epoch": 0.161408757138972, "grad_norm": 0.40625, "learning_rate": 1.97716291878044e-05, "loss": 0.6491, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1187 }, { "epoch": 0.16154473755779167, "grad_norm": 0.44140625, "learning_rate": 1.977061922800317e-05, "loss": 0.6693, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1188 }, { "epoch": 0.16168071797661138, "grad_norm": 0.314453125, "learning_rate": 1.9769607065784906e-05, "loss": 0.4582, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1189 }, { "epoch": 0.16181669839543106, "grad_norm": 0.5390625, "learning_rate": 1.9768592701377765e-05, "loss": 0.416, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1190 }, { "epoch": 0.16195267881425074, "grad_norm": 0.4921875, "learning_rate": 1.97675761350104e-05, "loss": 0.6838, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1191 }, { "epoch": 0.16208865923307045, "grad_norm": 0.45703125, "learning_rate": 1.9766557366911952e-05, "loss": 0.6548, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1192 }, { "epoch": 0.16222463965189013, "grad_norm": 0.357421875, "learning_rate": 1.9765536397312067e-05, "loss": 0.6471, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1193 }, { "epoch": 0.1623606200707098, "grad_norm": 0.55859375, "learning_rate": 1.9764513226440888e-05, "loss": 0.5936, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1194 }, { "epoch": 0.16249660048952952, "grad_norm": 0.41015625, "learning_rate": 1.9763487854529042e-05, "loss": 0.7578, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1195 }, { "epoch": 0.1626325809083492, "grad_norm": 0.80078125, "learning_rate": 1.9762460281807665e-05, "loss": 0.759, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1196 }, { "epoch": 0.16276856132716888, "grad_norm": 0.3671875, "learning_rate": 1.9761430508508382e-05, "loss": 0.631, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1197 }, { "epoch": 0.1629045417459886, "grad_norm": 0.49609375, "learning_rate": 1.9760398534863324e-05, "loss": 0.7474, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1198 }, { "epoch": 0.16304052216480827, "grad_norm": 0.4609375, "learning_rate": 1.97593643611051e-05, "loss": 0.8389, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1199 }, { "epoch": 0.16317650258362795, "grad_norm": 0.400390625, "learning_rate": 1.9758327987466833e-05, "loss": 0.589, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1200 }, { "epoch": 0.16331248300244763, "grad_norm": 0.44140625, "learning_rate": 1.975728941418213e-05, "loss": 0.7363, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1201 }, { "epoch": 0.16344846342126734, "grad_norm": 0.361328125, "learning_rate": 1.9756248641485102e-05, "loss": 0.5498, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1202 }, { "epoch": 0.16358444384008702, "grad_norm": 0.6953125, "learning_rate": 1.9755205669610347e-05, "loss": 0.8901, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1203 }, { "epoch": 0.1637204242589067, "grad_norm": 0.55859375, "learning_rate": 1.9754160498792964e-05, "loss": 0.9139, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1204 }, { "epoch": 0.1638564046777264, "grad_norm": 0.609375, "learning_rate": 1.9753113129268552e-05, "loss": 0.8415, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1205 }, { "epoch": 0.1639923850965461, "grad_norm": 0.5625, "learning_rate": 1.9752063561273198e-05, "loss": 0.7881, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1206 }, { "epoch": 0.16412836551536578, "grad_norm": 0.7421875, "learning_rate": 1.975101179504349e-05, "loss": 0.8063, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1207 }, { "epoch": 0.16426434593418549, "grad_norm": 0.4140625, "learning_rate": 1.9749957830816503e-05, "loss": 0.7756, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1208 }, { "epoch": 0.16440032635300517, "grad_norm": 0.3046875, "learning_rate": 1.9748901668829822e-05, "loss": 0.5106, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1209 }, { "epoch": 0.16453630677182485, "grad_norm": 0.275390625, "learning_rate": 1.974784330932151e-05, "loss": 0.5335, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1210 }, { "epoch": 0.16467228719064456, "grad_norm": 0.490234375, "learning_rate": 1.9746782752530138e-05, "loss": 0.8044, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1211 }, { "epoch": 0.16480826760946424, "grad_norm": 0.96484375, "learning_rate": 1.974571999869477e-05, "loss": 0.75, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1212 }, { "epoch": 0.16494424802828392, "grad_norm": 0.443359375, "learning_rate": 1.9744655048054962e-05, "loss": 0.6067, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1213 }, { "epoch": 0.16508022844710363, "grad_norm": 0.65234375, "learning_rate": 1.9743587900850767e-05, "loss": 0.7181, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1214 }, { "epoch": 0.1652162088659233, "grad_norm": 0.392578125, "learning_rate": 1.9742518557322735e-05, "loss": 0.744, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1215 }, { "epoch": 0.165352189284743, "grad_norm": 0.5078125, "learning_rate": 1.9741447017711905e-05, "loss": 0.7939, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1216 }, { "epoch": 0.1654881697035627, "grad_norm": 0.5390625, "learning_rate": 1.974037328225982e-05, "loss": 0.8475, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1217 }, { "epoch": 0.16562415012238238, "grad_norm": 1.0546875, "learning_rate": 1.9739297351208508e-05, "loss": 0.645, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1218 }, { "epoch": 0.16576013054120206, "grad_norm": 0.40234375, "learning_rate": 1.9738219224800503e-05, "loss": 0.7446, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1219 }, { "epoch": 0.16589611096002177, "grad_norm": 0.67578125, "learning_rate": 1.973713890327882e-05, "loss": 0.7642, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1220 }, { "epoch": 0.16603209137884145, "grad_norm": 0.65234375, "learning_rate": 1.9736056386886986e-05, "loss": 0.8324, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1221 }, { "epoch": 0.16616807179766113, "grad_norm": 0.392578125, "learning_rate": 1.9734971675869006e-05, "loss": 0.5917, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1222 }, { "epoch": 0.1663040522164808, "grad_norm": 0.515625, "learning_rate": 1.973388477046939e-05, "loss": 0.6859, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1223 }, { "epoch": 0.16644003263530052, "grad_norm": 0.345703125, "learning_rate": 1.973279567093314e-05, "loss": 0.5308, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1224 }, { "epoch": 0.1665760130541202, "grad_norm": 0.6171875, "learning_rate": 1.9731704377505746e-05, "loss": 0.8151, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1225 }, { "epoch": 0.16671199347293988, "grad_norm": 0.3671875, "learning_rate": 1.9730610890433212e-05, "loss": 0.757, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1226 }, { "epoch": 0.1668479738917596, "grad_norm": 0.38671875, "learning_rate": 1.9729515209962014e-05, "loss": 0.5674, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1227 }, { "epoch": 0.16698395431057927, "grad_norm": 0.54296875, "learning_rate": 1.9728417336339133e-05, "loss": 0.7894, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1228 }, { "epoch": 0.16711993472939896, "grad_norm": 0.361328125, "learning_rate": 1.972731726981205e-05, "loss": 0.6911, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1229 }, { "epoch": 0.16725591514821866, "grad_norm": 0.40234375, "learning_rate": 1.9726215010628717e-05, "loss": 0.7285, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1230 }, { "epoch": 0.16739189556703835, "grad_norm": 0.34765625, "learning_rate": 1.972511055903761e-05, "loss": 0.5993, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1231 }, { "epoch": 0.16752787598585803, "grad_norm": 0.5703125, "learning_rate": 1.972400391528769e-05, "loss": 0.9159, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1232 }, { "epoch": 0.16766385640467774, "grad_norm": 0.5703125, "learning_rate": 1.9722895079628395e-05, "loss": 0.4661, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1233 }, { "epoch": 0.16779983682349742, "grad_norm": 0.41015625, "learning_rate": 1.9721784052309676e-05, "loss": 0.6217, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1234 }, { "epoch": 0.1679358172423171, "grad_norm": 0.388671875, "learning_rate": 1.9720670833581973e-05, "loss": 0.5798, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1235 }, { "epoch": 0.1680717976611368, "grad_norm": 0.310546875, "learning_rate": 1.971955542369622e-05, "loss": 0.5438, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1236 }, { "epoch": 0.1682077780799565, "grad_norm": 0.40234375, "learning_rate": 1.9718437822903844e-05, "loss": 0.6379, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1237 }, { "epoch": 0.16834375849877617, "grad_norm": 0.39453125, "learning_rate": 1.971731803145676e-05, "loss": 0.8447, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1238 }, { "epoch": 0.16847973891759588, "grad_norm": 0.5625, "learning_rate": 1.971619604960739e-05, "loss": 0.625, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1239 }, { "epoch": 0.16861571933641556, "grad_norm": 0.33984375, "learning_rate": 1.971507187760864e-05, "loss": 0.5498, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1240 }, { "epoch": 0.16875169975523524, "grad_norm": 0.359375, "learning_rate": 1.9713945515713907e-05, "loss": 0.6216, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1241 }, { "epoch": 0.16888768017405495, "grad_norm": 0.294921875, "learning_rate": 1.9712816964177098e-05, "loss": 0.4492, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1242 }, { "epoch": 0.16902366059287463, "grad_norm": 0.3515625, "learning_rate": 1.971168622325259e-05, "loss": 0.6309, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1243 }, { "epoch": 0.1691596410116943, "grad_norm": 0.349609375, "learning_rate": 1.9710553293195273e-05, "loss": 0.6245, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1244 }, { "epoch": 0.169295621430514, "grad_norm": 0.349609375, "learning_rate": 1.9709418174260523e-05, "loss": 0.61, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1245 }, { "epoch": 0.1694316018493337, "grad_norm": 0.361328125, "learning_rate": 1.9708280866704204e-05, "loss": 0.7505, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1246 }, { "epoch": 0.16956758226815338, "grad_norm": 0.427734375, "learning_rate": 1.970714137078269e-05, "loss": 0.6997, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1247 }, { "epoch": 0.16970356268697306, "grad_norm": 0.306640625, "learning_rate": 1.9705999686752827e-05, "loss": 0.5137, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1248 }, { "epoch": 0.16983954310579277, "grad_norm": 0.357421875, "learning_rate": 1.970485581487197e-05, "loss": 0.6011, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1249 }, { "epoch": 0.16997552352461245, "grad_norm": 0.453125, "learning_rate": 1.970370975539796e-05, "loss": 0.5189, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1250 }, { "epoch": 0.17011150394343214, "grad_norm": 0.361328125, "learning_rate": 1.970256150858913e-05, "loss": 0.5584, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1251 }, { "epoch": 0.17024748436225184, "grad_norm": 0.279296875, "learning_rate": 1.9701411074704312e-05, "loss": 0.5467, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1252 }, { "epoch": 0.17038346478107153, "grad_norm": 0.36328125, "learning_rate": 1.970025845400283e-05, "loss": 0.5731, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1253 }, { "epoch": 0.1705194451998912, "grad_norm": 0.31640625, "learning_rate": 1.9699103646744496e-05, "loss": 0.5856, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1254 }, { "epoch": 0.17065542561871092, "grad_norm": 0.400390625, "learning_rate": 1.9697946653189616e-05, "loss": 0.6045, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1255 }, { "epoch": 0.1707914060375306, "grad_norm": 0.333984375, "learning_rate": 1.9696787473598993e-05, "loss": 0.5635, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1256 }, { "epoch": 0.17092738645635028, "grad_norm": 0.36328125, "learning_rate": 1.969562610823392e-05, "loss": 0.6538, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1257 }, { "epoch": 0.17106336687517, "grad_norm": 0.40625, "learning_rate": 1.9694462557356185e-05, "loss": 0.5311, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1258 }, { "epoch": 0.17119934729398967, "grad_norm": 0.390625, "learning_rate": 1.969329682122806e-05, "loss": 0.6997, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1259 }, { "epoch": 0.17133532771280935, "grad_norm": 0.40234375, "learning_rate": 1.9692128900112325e-05, "loss": 0.7201, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1260 }, { "epoch": 0.17147130813162906, "grad_norm": 0.6171875, "learning_rate": 1.9690958794272237e-05, "loss": 0.7266, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1261 }, { "epoch": 0.17160728855044874, "grad_norm": 0.2265625, "learning_rate": 1.9689786503971553e-05, "loss": 0.4657, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1262 }, { "epoch": 0.17174326896926842, "grad_norm": 0.318359375, "learning_rate": 1.9688612029474527e-05, "loss": 0.6545, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1263 }, { "epoch": 0.17187924938808813, "grad_norm": 0.462890625, "learning_rate": 1.9687435371045893e-05, "loss": 0.6333, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1264 }, { "epoch": 0.1720152298069078, "grad_norm": 0.443359375, "learning_rate": 1.9686256528950887e-05, "loss": 0.6966, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1265 }, { "epoch": 0.1721512102257275, "grad_norm": 0.69140625, "learning_rate": 1.9685075503455232e-05, "loss": 0.6921, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1266 }, { "epoch": 0.17228719064454717, "grad_norm": 0.5625, "learning_rate": 1.968389229482515e-05, "loss": 0.5309, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1267 }, { "epoch": 0.17242317106336688, "grad_norm": 0.68359375, "learning_rate": 1.968270690332735e-05, "loss": 0.6935, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1268 }, { "epoch": 0.17255915148218656, "grad_norm": 0.328125, "learning_rate": 1.968151932922903e-05, "loss": 0.5221, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1269 }, { "epoch": 0.17269513190100624, "grad_norm": 0.91015625, "learning_rate": 1.968032957279789e-05, "loss": 0.6785, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1270 }, { "epoch": 0.17283111231982595, "grad_norm": 0.330078125, "learning_rate": 1.967913763430211e-05, "loss": 0.5513, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1271 }, { "epoch": 0.17296709273864563, "grad_norm": 0.388671875, "learning_rate": 1.9677943514010368e-05, "loss": 0.6324, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1272 }, { "epoch": 0.17310307315746531, "grad_norm": 0.33203125, "learning_rate": 1.967674721219184e-05, "loss": 0.4842, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1273 }, { "epoch": 0.17323905357628502, "grad_norm": 0.375, "learning_rate": 1.9675548729116176e-05, "loss": 0.7072, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1274 }, { "epoch": 0.1733750339951047, "grad_norm": 0.53515625, "learning_rate": 1.9674348065053533e-05, "loss": 0.755, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1275 }, { "epoch": 0.17351101441392439, "grad_norm": 0.369140625, "learning_rate": 1.967314522027456e-05, "loss": 0.5949, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1276 }, { "epoch": 0.1736469948327441, "grad_norm": 0.3359375, "learning_rate": 1.9671940195050393e-05, "loss": 0.6201, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1277 }, { "epoch": 0.17378297525156378, "grad_norm": 0.5078125, "learning_rate": 1.967073298965265e-05, "loss": 0.827, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1278 }, { "epoch": 0.17391895567038346, "grad_norm": 0.40234375, "learning_rate": 1.966952360435346e-05, "loss": 0.631, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1279 }, { "epoch": 0.17405493608920317, "grad_norm": 0.419921875, "learning_rate": 1.9668312039425425e-05, "loss": 0.7119, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1280 }, { "epoch": 0.17419091650802285, "grad_norm": 0.484375, "learning_rate": 1.9667098295141652e-05, "loss": 0.6484, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1281 }, { "epoch": 0.17432689692684253, "grad_norm": 0.50390625, "learning_rate": 1.9665882371775735e-05, "loss": 0.5741, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1282 }, { "epoch": 0.17446287734566224, "grad_norm": 0.50390625, "learning_rate": 1.9664664269601757e-05, "loss": 0.4888, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1283 }, { "epoch": 0.17459885776448192, "grad_norm": 0.625, "learning_rate": 1.966344398889429e-05, "loss": 0.5646, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1284 }, { "epoch": 0.1747348381833016, "grad_norm": 0.298828125, "learning_rate": 1.96622215299284e-05, "loss": 0.6134, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1285 }, { "epoch": 0.17487081860212128, "grad_norm": 0.69921875, "learning_rate": 1.9660996892979646e-05, "loss": 0.4948, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1286 }, { "epoch": 0.175006799020941, "grad_norm": 0.375, "learning_rate": 1.965977007832408e-05, "loss": 0.6069, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1287 }, { "epoch": 0.17514277943976067, "grad_norm": 0.51171875, "learning_rate": 1.965854108623823e-05, "loss": 0.6733, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1288 }, { "epoch": 0.17527875985858035, "grad_norm": 0.306640625, "learning_rate": 1.965730991699914e-05, "loss": 0.4141, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1289 }, { "epoch": 0.17541474027740006, "grad_norm": 0.625, "learning_rate": 1.965607657088432e-05, "loss": 0.6229, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1290 }, { "epoch": 0.17555072069621974, "grad_norm": 0.52734375, "learning_rate": 1.9654841048171786e-05, "loss": 0.7907, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1291 }, { "epoch": 0.17568670111503942, "grad_norm": 0.294921875, "learning_rate": 1.965360334914004e-05, "loss": 0.5654, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1292 }, { "epoch": 0.17582268153385913, "grad_norm": 0.29296875, "learning_rate": 1.9652363474068075e-05, "loss": 0.618, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1293 }, { "epoch": 0.1759586619526788, "grad_norm": 0.48828125, "learning_rate": 1.9651121423235367e-05, "loss": 0.9053, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1294 }, { "epoch": 0.1760946423714985, "grad_norm": 0.431640625, "learning_rate": 1.96498771969219e-05, "loss": 0.8005, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1295 }, { "epoch": 0.1762306227903182, "grad_norm": 0.6484375, "learning_rate": 1.964863079540813e-05, "loss": 0.6169, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1296 }, { "epoch": 0.17636660320913788, "grad_norm": 0.328125, "learning_rate": 1.9647382218975013e-05, "loss": 0.604, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1297 }, { "epoch": 0.17650258362795757, "grad_norm": 0.322265625, "learning_rate": 1.9646131467903995e-05, "loss": 0.5832, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1298 }, { "epoch": 0.17663856404677727, "grad_norm": 0.353515625, "learning_rate": 1.9644878542477012e-05, "loss": 0.743, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1299 }, { "epoch": 0.17677454446559696, "grad_norm": 0.6875, "learning_rate": 1.9643623442976482e-05, "loss": 0.7922, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1300 }, { "epoch": 0.17691052488441664, "grad_norm": 0.60546875, "learning_rate": 1.9642366169685328e-05, "loss": 0.7749, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1301 }, { "epoch": 0.17704650530323635, "grad_norm": 0.34765625, "learning_rate": 1.964110672288695e-05, "loss": 0.5609, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1302 }, { "epoch": 0.17718248572205603, "grad_norm": 0.72265625, "learning_rate": 1.9639845102865244e-05, "loss": 0.5804, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1303 }, { "epoch": 0.1773184661408757, "grad_norm": 0.314453125, "learning_rate": 1.963858130990459e-05, "loss": 0.5789, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1304 }, { "epoch": 0.17745444655969542, "grad_norm": 0.28515625, "learning_rate": 1.9637315344289874e-05, "loss": 0.4751, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1305 }, { "epoch": 0.1775904269785151, "grad_norm": 0.53125, "learning_rate": 1.9636047206306446e-05, "loss": 0.6872, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1306 }, { "epoch": 0.17772640739733478, "grad_norm": 0.490234375, "learning_rate": 1.9634776896240172e-05, "loss": 0.5967, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1307 }, { "epoch": 0.17786238781615446, "grad_norm": 0.412109375, "learning_rate": 1.963350441437739e-05, "loss": 0.5086, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1308 }, { "epoch": 0.17799836823497417, "grad_norm": 0.578125, "learning_rate": 1.963222976100493e-05, "loss": 0.8151, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1309 }, { "epoch": 0.17813434865379385, "grad_norm": 0.365234375, "learning_rate": 1.963095293641012e-05, "loss": 0.6279, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1310 }, { "epoch": 0.17827032907261353, "grad_norm": 0.259765625, "learning_rate": 1.9629673940880768e-05, "loss": 0.4538, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1311 }, { "epoch": 0.17840630949143324, "grad_norm": 0.33203125, "learning_rate": 1.9628392774705175e-05, "loss": 0.4754, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1312 }, { "epoch": 0.17854228991025292, "grad_norm": 0.48828125, "learning_rate": 1.9627109438172134e-05, "loss": 0.7018, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1313 }, { "epoch": 0.1786782703290726, "grad_norm": 0.38671875, "learning_rate": 1.9625823931570925e-05, "loss": 0.5936, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1314 }, { "epoch": 0.1788142507478923, "grad_norm": 0.357421875, "learning_rate": 1.9624536255191315e-05, "loss": 0.5033, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1315 }, { "epoch": 0.178950231166712, "grad_norm": 0.9375, "learning_rate": 1.962324640932356e-05, "loss": 0.9113, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1316 }, { "epoch": 0.17908621158553167, "grad_norm": 0.419921875, "learning_rate": 1.9621954394258414e-05, "loss": 0.562, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1317 }, { "epoch": 0.17922219200435138, "grad_norm": 0.458984375, "learning_rate": 1.9620660210287107e-05, "loss": 0.7033, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1318 }, { "epoch": 0.17935817242317106, "grad_norm": 0.396484375, "learning_rate": 1.9619363857701367e-05, "loss": 0.5534, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1319 }, { "epoch": 0.17949415284199075, "grad_norm": 0.2255859375, "learning_rate": 1.9618065336793407e-05, "loss": 0.3763, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1320 }, { "epoch": 0.17963013326081045, "grad_norm": 0.39453125, "learning_rate": 1.9616764647855924e-05, "loss": 0.5763, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1321 }, { "epoch": 0.17976611367963014, "grad_norm": 3.125, "learning_rate": 1.9615461791182118e-05, "loss": 0.9072, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1322 }, { "epoch": 0.17990209409844982, "grad_norm": 0.322265625, "learning_rate": 1.9614156767065667e-05, "loss": 0.6006, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1323 }, { "epoch": 0.18003807451726953, "grad_norm": 0.53515625, "learning_rate": 1.9612849575800733e-05, "loss": 0.918, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1324 }, { "epoch": 0.1801740549360892, "grad_norm": 0.34765625, "learning_rate": 1.9611540217681978e-05, "loss": 0.57, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1325 }, { "epoch": 0.1803100353549089, "grad_norm": 0.65625, "learning_rate": 1.9610228693004547e-05, "loss": 0.599, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1326 }, { "epoch": 0.1804460157737286, "grad_norm": 0.46484375, "learning_rate": 1.9608915002064073e-05, "loss": 0.721, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1327 }, { "epoch": 0.18058199619254828, "grad_norm": 0.201171875, "learning_rate": 1.9607599145156676e-05, "loss": 0.3973, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1328 }, { "epoch": 0.18071797661136796, "grad_norm": 0.412109375, "learning_rate": 1.9606281122578973e-05, "loss": 0.7218, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1329 }, { "epoch": 0.18085395703018764, "grad_norm": 0.4296875, "learning_rate": 1.9604960934628053e-05, "loss": 0.5866, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1330 }, { "epoch": 0.18098993744900735, "grad_norm": 0.29296875, "learning_rate": 1.9603638581601513e-05, "loss": 0.4051, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1331 }, { "epoch": 0.18112591786782703, "grad_norm": 0.330078125, "learning_rate": 1.960231406379742e-05, "loss": 0.5675, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1332 }, { "epoch": 0.1812618982866467, "grad_norm": 0.8203125, "learning_rate": 1.9600987381514334e-05, "loss": 0.7231, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1333 }, { "epoch": 0.18139787870546642, "grad_norm": 0.314453125, "learning_rate": 1.9599658535051312e-05, "loss": 0.4318, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1334 }, { "epoch": 0.1815338591242861, "grad_norm": 0.40625, "learning_rate": 1.959832752470789e-05, "loss": 0.77, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1335 }, { "epoch": 0.18166983954310578, "grad_norm": 0.388671875, "learning_rate": 1.9596994350784095e-05, "loss": 0.5801, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1336 }, { "epoch": 0.1818058199619255, "grad_norm": 0.55078125, "learning_rate": 1.9595659013580443e-05, "loss": 0.7863, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1337 }, { "epoch": 0.18194180038074517, "grad_norm": 0.349609375, "learning_rate": 1.959432151339793e-05, "loss": 0.5495, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1338 }, { "epoch": 0.18207778079956485, "grad_norm": 0.314453125, "learning_rate": 1.9592981850538046e-05, "loss": 0.6079, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1339 }, { "epoch": 0.18221376121838456, "grad_norm": 0.51171875, "learning_rate": 1.9591640025302765e-05, "loss": 0.5511, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1340 }, { "epoch": 0.18234974163720424, "grad_norm": 0.67578125, "learning_rate": 1.9590296037994557e-05, "loss": 0.6848, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1341 }, { "epoch": 0.18248572205602392, "grad_norm": 0.423828125, "learning_rate": 1.958894988891637e-05, "loss": 0.7144, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1342 }, { "epoch": 0.18262170247484363, "grad_norm": 0.345703125, "learning_rate": 1.958760157837164e-05, "loss": 0.5721, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1343 }, { "epoch": 0.18275768289366331, "grad_norm": 0.6953125, "learning_rate": 1.95862511066643e-05, "loss": 0.703, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1344 }, { "epoch": 0.182893663312483, "grad_norm": 0.27734375, "learning_rate": 1.9584898474098758e-05, "loss": 0.5343, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1345 }, { "epoch": 0.1830296437313027, "grad_norm": 0.80078125, "learning_rate": 1.9583543680979914e-05, "loss": 0.7145, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1346 }, { "epoch": 0.18316562415012239, "grad_norm": 0.291015625, "learning_rate": 1.9582186727613152e-05, "loss": 0.457, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1347 }, { "epoch": 0.18330160456894207, "grad_norm": 0.765625, "learning_rate": 1.9580827614304354e-05, "loss": 0.8818, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1348 }, { "epoch": 0.18343758498776178, "grad_norm": 0.296875, "learning_rate": 1.9579466341359873e-05, "loss": 0.5413, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1349 }, { "epoch": 0.18357356540658146, "grad_norm": 0.51171875, "learning_rate": 1.9578102909086558e-05, "loss": 0.813, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1350 }, { "epoch": 0.18370954582540114, "grad_norm": 0.796875, "learning_rate": 1.9576737317791745e-05, "loss": 0.7801, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1351 }, { "epoch": 0.18384552624422082, "grad_norm": 0.474609375, "learning_rate": 1.9575369567783258e-05, "loss": 0.7036, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1352 }, { "epoch": 0.18398150666304053, "grad_norm": 0.42578125, "learning_rate": 1.9573999659369396e-05, "loss": 0.611, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1353 }, { "epoch": 0.1841174870818602, "grad_norm": 0.357421875, "learning_rate": 1.9572627592858963e-05, "loss": 0.59, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1354 }, { "epoch": 0.1842534675006799, "grad_norm": 0.357421875, "learning_rate": 1.9571253368561232e-05, "loss": 0.6388, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1355 }, { "epoch": 0.1843894479194996, "grad_norm": 0.357421875, "learning_rate": 1.9569876986785975e-05, "loss": 0.6089, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1356 }, { "epoch": 0.18452542833831928, "grad_norm": 0.37109375, "learning_rate": 1.956849844784344e-05, "loss": 0.5965, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1357 }, { "epoch": 0.18466140875713896, "grad_norm": 0.34375, "learning_rate": 1.956711775204437e-05, "loss": 0.5506, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1358 }, { "epoch": 0.18479738917595867, "grad_norm": 0.333984375, "learning_rate": 1.9565734899699997e-05, "loss": 0.5034, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1359 }, { "epoch": 0.18493336959477835, "grad_norm": 0.251953125, "learning_rate": 1.9564349891122017e-05, "loss": 0.4676, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1360 }, { "epoch": 0.18506935001359803, "grad_norm": 0.5390625, "learning_rate": 1.956296272662264e-05, "loss": 0.549, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1361 }, { "epoch": 0.18520533043241774, "grad_norm": 0.39453125, "learning_rate": 1.956157340651455e-05, "loss": 0.7209, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1362 }, { "epoch": 0.18534131085123742, "grad_norm": 0.421875, "learning_rate": 1.9560181931110912e-05, "loss": 0.7223, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1363 }, { "epoch": 0.1854772912700571, "grad_norm": 0.8046875, "learning_rate": 1.955878830072538e-05, "loss": 0.7591, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1364 }, { "epoch": 0.1856132716888768, "grad_norm": 0.55078125, "learning_rate": 1.95573925156721e-05, "loss": 0.5838, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1365 }, { "epoch": 0.1857492521076965, "grad_norm": 0.322265625, "learning_rate": 1.9555994576265703e-05, "loss": 0.625, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1366 }, { "epoch": 0.18588523252651618, "grad_norm": 0.7265625, "learning_rate": 1.9554594482821293e-05, "loss": 1.0197, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1367 }, { "epoch": 0.18602121294533588, "grad_norm": 0.298828125, "learning_rate": 1.955319223565447e-05, "loss": 0.6595, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1368 }, { "epoch": 0.18615719336415557, "grad_norm": 0.48828125, "learning_rate": 1.9551787835081318e-05, "loss": 0.8224, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1369 }, { "epoch": 0.18629317378297525, "grad_norm": 0.26171875, "learning_rate": 1.955038128141841e-05, "loss": 0.488, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1370 }, { "epoch": 0.18642915420179493, "grad_norm": 0.408203125, "learning_rate": 1.9548972574982796e-05, "loss": 0.6195, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1371 }, { "epoch": 0.18656513462061464, "grad_norm": 0.578125, "learning_rate": 1.9547561716092013e-05, "loss": 0.9359, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1372 }, { "epoch": 0.18670111503943432, "grad_norm": 0.25, "learning_rate": 1.9546148705064097e-05, "loss": 0.4818, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1373 }, { "epoch": 0.186837095458254, "grad_norm": 0.40234375, "learning_rate": 1.9544733542217543e-05, "loss": 0.6382, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1374 }, { "epoch": 0.1869730758770737, "grad_norm": 0.421875, "learning_rate": 1.954331622787136e-05, "loss": 0.4618, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1375 }, { "epoch": 0.1871090562958934, "grad_norm": 0.400390625, "learning_rate": 1.9541896762345024e-05, "loss": 0.6795, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1376 }, { "epoch": 0.18724503671471307, "grad_norm": 0.33203125, "learning_rate": 1.9540475145958493e-05, "loss": 0.5553, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1377 }, { "epoch": 0.18738101713353278, "grad_norm": 0.6015625, "learning_rate": 1.953905137903222e-05, "loss": 0.6387, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1378 }, { "epoch": 0.18751699755235246, "grad_norm": 0.318359375, "learning_rate": 1.9537625461887147e-05, "loss": 0.4517, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1379 }, { "epoch": 0.18765297797117214, "grad_norm": 0.42578125, "learning_rate": 1.9536197394844682e-05, "loss": 0.7124, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1380 }, { "epoch": 0.18778895838999185, "grad_norm": 0.64453125, "learning_rate": 1.9534767178226738e-05, "loss": 0.9798, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1381 }, { "epoch": 0.18792493880881153, "grad_norm": 0.66796875, "learning_rate": 1.9533334812355694e-05, "loss": 0.5246, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1382 }, { "epoch": 0.1880609192276312, "grad_norm": 0.515625, "learning_rate": 1.9531900297554433e-05, "loss": 0.6313, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1383 }, { "epoch": 0.18819689964645092, "grad_norm": 0.5546875, "learning_rate": 1.9530463634146304e-05, "loss": 0.7884, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1384 }, { "epoch": 0.1883328800652706, "grad_norm": 0.58203125, "learning_rate": 1.9529024822455152e-05, "loss": 0.5721, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1385 }, { "epoch": 0.18846886048409028, "grad_norm": 0.373046875, "learning_rate": 1.9527583862805303e-05, "loss": 0.6118, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1386 }, { "epoch": 0.18860484090291, "grad_norm": 0.49609375, "learning_rate": 1.9526140755521566e-05, "loss": 0.9036, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1387 }, { "epoch": 0.18874082132172967, "grad_norm": 0.45703125, "learning_rate": 1.9524695500929236e-05, "loss": 0.6159, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1388 }, { "epoch": 0.18887680174054935, "grad_norm": 0.37890625, "learning_rate": 1.952324809935409e-05, "loss": 0.7119, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1389 }, { "epoch": 0.18901278215936906, "grad_norm": 0.408203125, "learning_rate": 1.952179855112239e-05, "loss": 0.7601, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1390 }, { "epoch": 0.18914876257818874, "grad_norm": 0.4921875, "learning_rate": 1.9520346856560885e-05, "loss": 0.7358, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1391 }, { "epoch": 0.18928474299700843, "grad_norm": 0.259765625, "learning_rate": 1.95188930159968e-05, "loss": 0.5049, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1392 }, { "epoch": 0.1894207234158281, "grad_norm": 0.6328125, "learning_rate": 1.9517437029757848e-05, "loss": 0.6234, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1393 }, { "epoch": 0.18955670383464782, "grad_norm": 0.390625, "learning_rate": 1.9515978898172233e-05, "loss": 0.596, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1394 }, { "epoch": 0.1896926842534675, "grad_norm": 0.5234375, "learning_rate": 1.951451862156863e-05, "loss": 0.6359, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1395 }, { "epoch": 0.18982866467228718, "grad_norm": 0.453125, "learning_rate": 1.95130562002762e-05, "loss": 0.7907, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1396 }, { "epoch": 0.1899646450911069, "grad_norm": 0.373046875, "learning_rate": 1.9511591634624603e-05, "loss": 0.6935, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1397 }, { "epoch": 0.19010062550992657, "grad_norm": 0.373046875, "learning_rate": 1.9510124924943956e-05, "loss": 0.5871, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1398 }, { "epoch": 0.19023660592874625, "grad_norm": 0.6484375, "learning_rate": 1.9508656071564883e-05, "loss": 0.7743, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1399 }, { "epoch": 0.19037258634756596, "grad_norm": 0.3828125, "learning_rate": 1.9507185074818475e-05, "loss": 0.7474, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1400 }, { "epoch": 0.19050856676638564, "grad_norm": 0.28515625, "learning_rate": 1.9505711935036315e-05, "loss": 0.4795, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1401 }, { "epoch": 0.19064454718520532, "grad_norm": 0.5234375, "learning_rate": 1.950423665255047e-05, "loss": 0.6595, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1402 }, { "epoch": 0.19078052760402503, "grad_norm": 0.470703125, "learning_rate": 1.950275922769348e-05, "loss": 0.6276, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1403 }, { "epoch": 0.1909165080228447, "grad_norm": 0.482421875, "learning_rate": 1.9501279660798383e-05, "loss": 0.7576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1404 }, { "epoch": 0.1910524884416644, "grad_norm": 0.61328125, "learning_rate": 1.9499797952198685e-05, "loss": 0.6211, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1405 }, { "epoch": 0.1911884688604841, "grad_norm": 0.435546875, "learning_rate": 1.9498314102228387e-05, "loss": 0.6748, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1406 }, { "epoch": 0.19132444927930378, "grad_norm": 0.40234375, "learning_rate": 1.9496828111221958e-05, "loss": 0.6948, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1407 }, { "epoch": 0.19146042969812346, "grad_norm": 0.34765625, "learning_rate": 1.9495339979514365e-05, "loss": 0.5109, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1408 }, { "epoch": 0.19159641011694317, "grad_norm": 0.515625, "learning_rate": 1.949384970744105e-05, "loss": 0.5425, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1409 }, { "epoch": 0.19173239053576285, "grad_norm": 0.330078125, "learning_rate": 1.949235729533794e-05, "loss": 0.4445, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1410 }, { "epoch": 0.19186837095458253, "grad_norm": 0.67578125, "learning_rate": 1.9490862743541438e-05, "loss": 1.0132, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1411 }, { "epoch": 0.19200435137340224, "grad_norm": 0.40625, "learning_rate": 1.9489366052388443e-05, "loss": 0.6735, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1412 }, { "epoch": 0.19214033179222192, "grad_norm": 0.322265625, "learning_rate": 1.9487867222216315e-05, "loss": 0.5882, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1413 }, { "epoch": 0.1922763122110416, "grad_norm": 0.8359375, "learning_rate": 1.9486366253362924e-05, "loss": 0.7396, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1414 }, { "epoch": 0.1924122926298613, "grad_norm": 0.33203125, "learning_rate": 1.9484863146166593e-05, "loss": 0.6698, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1415 }, { "epoch": 0.192548273048681, "grad_norm": 0.482421875, "learning_rate": 1.9483357900966147e-05, "loss": 0.7293, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1416 }, { "epoch": 0.19268425346750068, "grad_norm": 0.6796875, "learning_rate": 1.9481850518100886e-05, "loss": 0.6774, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1417 }, { "epoch": 0.19282023388632036, "grad_norm": 0.421875, "learning_rate": 1.9480340997910593e-05, "loss": 0.6748, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1418 }, { "epoch": 0.19295621430514007, "grad_norm": 0.470703125, "learning_rate": 1.9478829340735527e-05, "loss": 0.916, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1419 }, { "epoch": 0.19309219472395975, "grad_norm": 0.46484375, "learning_rate": 1.9477315546916443e-05, "loss": 0.745, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1420 }, { "epoch": 0.19322817514277943, "grad_norm": 0.46484375, "learning_rate": 1.9475799616794567e-05, "loss": 0.8737, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1421 }, { "epoch": 0.19336415556159914, "grad_norm": 0.4375, "learning_rate": 1.94742815507116e-05, "loss": 0.7606, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1422 }, { "epoch": 0.19350013598041882, "grad_norm": 0.373046875, "learning_rate": 1.9472761349009742e-05, "loss": 0.7422, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1423 }, { "epoch": 0.1936361163992385, "grad_norm": 0.380859375, "learning_rate": 1.947123901203166e-05, "loss": 0.5425, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1424 }, { "epoch": 0.1937720968180582, "grad_norm": 0.578125, "learning_rate": 1.946971454012051e-05, "loss": 0.8501, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1425 }, { "epoch": 0.1939080772368779, "grad_norm": 0.267578125, "learning_rate": 1.9468187933619923e-05, "loss": 0.4622, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1426 }, { "epoch": 0.19404405765569757, "grad_norm": 0.439453125, "learning_rate": 1.9466659192874016e-05, "loss": 0.6708, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1427 }, { "epoch": 0.19418003807451728, "grad_norm": 0.431640625, "learning_rate": 1.946512831822739e-05, "loss": 0.6343, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1428 }, { "epoch": 0.19431601849333696, "grad_norm": 0.423828125, "learning_rate": 1.9463595310025115e-05, "loss": 0.6794, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1429 }, { "epoch": 0.19445199891215664, "grad_norm": 0.462890625, "learning_rate": 1.946206016861276e-05, "loss": 0.8018, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1430 }, { "epoch": 0.19458797933097635, "grad_norm": 0.6796875, "learning_rate": 1.9460522894336354e-05, "loss": 0.7505, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1431 }, { "epoch": 0.19472395974979603, "grad_norm": 0.6171875, "learning_rate": 1.9458983487542425e-05, "loss": 0.7798, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1432 }, { "epoch": 0.19485994016861571, "grad_norm": 0.36328125, "learning_rate": 1.945744194857797e-05, "loss": 0.5347, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1433 }, { "epoch": 0.19499592058743542, "grad_norm": 0.640625, "learning_rate": 1.9455898277790478e-05, "loss": 0.7404, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1434 }, { "epoch": 0.1951319010062551, "grad_norm": 0.70703125, "learning_rate": 1.94543524755279e-05, "loss": 0.7957, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1435 }, { "epoch": 0.19526788142507479, "grad_norm": 0.5, "learning_rate": 1.9452804542138686e-05, "loss": 0.7381, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1436 }, { "epoch": 0.19540386184389447, "grad_norm": 0.455078125, "learning_rate": 1.9451254477971756e-05, "loss": 0.7557, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1437 }, { "epoch": 0.19553984226271418, "grad_norm": 0.4609375, "learning_rate": 1.9449702283376516e-05, "loss": 0.7677, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1438 }, { "epoch": 0.19567582268153386, "grad_norm": 0.6796875, "learning_rate": 1.944814795870285e-05, "loss": 0.6676, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1439 }, { "epoch": 0.19581180310035354, "grad_norm": 0.349609375, "learning_rate": 1.944659150430112e-05, "loss": 0.4697, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1440 }, { "epoch": 0.19594778351917325, "grad_norm": 0.6875, "learning_rate": 1.9445032920522168e-05, "loss": 0.8953, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1441 }, { "epoch": 0.19608376393799293, "grad_norm": 0.34765625, "learning_rate": 1.944347220771732e-05, "loss": 0.6493, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1442 }, { "epoch": 0.1962197443568126, "grad_norm": 0.365234375, "learning_rate": 1.9441909366238383e-05, "loss": 0.582, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1443 }, { "epoch": 0.19635572477563232, "grad_norm": 0.375, "learning_rate": 1.9440344396437635e-05, "loss": 0.6213, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1444 }, { "epoch": 0.196491705194452, "grad_norm": 0.376953125, "learning_rate": 1.943877729866784e-05, "loss": 0.6252, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1445 }, { "epoch": 0.19662768561327168, "grad_norm": 0.373046875, "learning_rate": 1.9437208073282247e-05, "loss": 0.6672, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1446 }, { "epoch": 0.1967636660320914, "grad_norm": 0.341796875, "learning_rate": 1.9435636720634573e-05, "loss": 0.6309, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1447 }, { "epoch": 0.19689964645091107, "grad_norm": 0.41015625, "learning_rate": 1.9434063241079017e-05, "loss": 0.6397, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1448 }, { "epoch": 0.19703562686973075, "grad_norm": 0.3671875, "learning_rate": 1.943248763497027e-05, "loss": 0.6021, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1449 }, { "epoch": 0.19717160728855046, "grad_norm": 0.48828125, "learning_rate": 1.9430909902663485e-05, "loss": 0.7225, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1450 }, { "epoch": 0.19730758770737014, "grad_norm": 0.58203125, "learning_rate": 1.9429330044514305e-05, "loss": 0.6445, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1451 }, { "epoch": 0.19744356812618982, "grad_norm": 0.447265625, "learning_rate": 1.942774806087885e-05, "loss": 0.749, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1452 }, { "epoch": 0.19757954854500953, "grad_norm": 0.431640625, "learning_rate": 1.942616395211372e-05, "loss": 0.807, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1453 }, { "epoch": 0.1977155289638292, "grad_norm": 0.431640625, "learning_rate": 1.9424577718575987e-05, "loss": 0.6315, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1454 }, { "epoch": 0.1978515093826489, "grad_norm": 0.345703125, "learning_rate": 1.9422989360623207e-05, "loss": 0.5591, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1455 }, { "epoch": 0.19798748980146857, "grad_norm": 0.49609375, "learning_rate": 1.9421398878613423e-05, "loss": 0.7869, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1456 }, { "epoch": 0.19812347022028828, "grad_norm": 0.37890625, "learning_rate": 1.941980627290515e-05, "loss": 0.6336, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1457 }, { "epoch": 0.19825945063910796, "grad_norm": 0.369140625, "learning_rate": 1.941821154385737e-05, "loss": 0.6613, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1458 }, { "epoch": 0.19839543105792765, "grad_norm": 0.458984375, "learning_rate": 1.941661469182956e-05, "loss": 0.7394, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1459 }, { "epoch": 0.19853141147674735, "grad_norm": 0.294921875, "learning_rate": 1.9415015717181672e-05, "loss": 0.5921, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1460 }, { "epoch": 0.19866739189556704, "grad_norm": 0.400390625, "learning_rate": 1.941341462027413e-05, "loss": 0.6252, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1461 }, { "epoch": 0.19880337231438672, "grad_norm": 0.43359375, "learning_rate": 1.9411811401467846e-05, "loss": 0.6199, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1462 }, { "epoch": 0.19893935273320643, "grad_norm": 0.53515625, "learning_rate": 1.94102060611242e-05, "loss": 0.9323, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1463 }, { "epoch": 0.1990753331520261, "grad_norm": 0.375, "learning_rate": 1.9408598599605062e-05, "loss": 0.6133, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1464 }, { "epoch": 0.1992113135708458, "grad_norm": 0.60546875, "learning_rate": 1.940698901727277e-05, "loss": 0.728, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1465 }, { "epoch": 0.1993472939896655, "grad_norm": 0.384765625, "learning_rate": 1.9405377314490136e-05, "loss": 0.6769, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1466 }, { "epoch": 0.19948327440848518, "grad_norm": 0.7109375, "learning_rate": 1.9403763491620468e-05, "loss": 0.5602, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1467 }, { "epoch": 0.19961925482730486, "grad_norm": 0.423828125, "learning_rate": 1.940214754902754e-05, "loss": 0.7778, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1468 }, { "epoch": 0.19975523524612457, "grad_norm": 0.45703125, "learning_rate": 1.94005294870756e-05, "loss": 0.7255, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1469 }, { "epoch": 0.19989121566494425, "grad_norm": 0.84375, "learning_rate": 1.9398909306129386e-05, "loss": 0.7798, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1470 }, { "epoch": 0.20002719608376393, "grad_norm": 0.62109375, "learning_rate": 1.9397287006554096e-05, "loss": 0.8716, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1471 }, { "epoch": 0.20016317650258364, "grad_norm": 0.8515625, "learning_rate": 1.9395662588715428e-05, "loss": 0.793, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1472 }, { "epoch": 0.20029915692140332, "grad_norm": 0.5234375, "learning_rate": 1.939403605297954e-05, "loss": 0.7598, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1473 }, { "epoch": 0.200435137340223, "grad_norm": 0.53515625, "learning_rate": 1.9392407399713072e-05, "loss": 0.6774, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1474 }, { "epoch": 0.2005711177590427, "grad_norm": 0.61328125, "learning_rate": 1.9390776629283143e-05, "loss": 0.5526, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1475 }, { "epoch": 0.2007070981778624, "grad_norm": 0.2392578125, "learning_rate": 1.938914374205735e-05, "loss": 0.3314, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1476 }, { "epoch": 0.20084307859668207, "grad_norm": 0.416015625, "learning_rate": 1.938750873840377e-05, "loss": 0.7155, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1477 }, { "epoch": 0.20097905901550175, "grad_norm": 0.59375, "learning_rate": 1.938587161869094e-05, "loss": 0.5868, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1478 }, { "epoch": 0.20111503943432146, "grad_norm": 0.279296875, "learning_rate": 1.9384232383287898e-05, "loss": 0.5439, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1479 }, { "epoch": 0.20125101985314114, "grad_norm": 0.34375, "learning_rate": 1.9382591032564146e-05, "loss": 0.5, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1480 }, { "epoch": 0.20138700027196083, "grad_norm": 0.6328125, "learning_rate": 1.9380947566889663e-05, "loss": 0.6787, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1481 }, { "epoch": 0.20152298069078053, "grad_norm": 0.4921875, "learning_rate": 1.9379301986634906e-05, "loss": 0.7417, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1482 }, { "epoch": 0.20165896110960022, "grad_norm": 0.50390625, "learning_rate": 1.9377654292170808e-05, "loss": 0.791, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1483 }, { "epoch": 0.2017949415284199, "grad_norm": 0.462890625, "learning_rate": 1.9376004483868777e-05, "loss": 0.6834, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1484 }, { "epoch": 0.2019309219472396, "grad_norm": 0.318359375, "learning_rate": 1.937435256210071e-05, "loss": 0.5902, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1485 }, { "epoch": 0.2020669023660593, "grad_norm": 0.6484375, "learning_rate": 1.937269852723896e-05, "loss": 0.5557, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1486 }, { "epoch": 0.20220288278487897, "grad_norm": 0.2578125, "learning_rate": 1.9371042379656372e-05, "loss": 0.443, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1487 }, { "epoch": 0.20233886320369868, "grad_norm": 0.31640625, "learning_rate": 1.936938411972626e-05, "loss": 0.547, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1488 }, { "epoch": 0.20247484362251836, "grad_norm": 0.486328125, "learning_rate": 1.9367723747822418e-05, "loss": 0.7542, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1489 }, { "epoch": 0.20261082404133804, "grad_norm": 0.310546875, "learning_rate": 1.9366061264319112e-05, "loss": 0.5721, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1490 }, { "epoch": 0.20274680446015775, "grad_norm": 0.384765625, "learning_rate": 1.9364396669591084e-05, "loss": 0.7017, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1491 }, { "epoch": 0.20288278487897743, "grad_norm": 0.32421875, "learning_rate": 1.936272996401356e-05, "loss": 0.6418, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1492 }, { "epoch": 0.2030187652977971, "grad_norm": 0.57421875, "learning_rate": 1.9361061147962233e-05, "loss": 0.6605, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1493 }, { "epoch": 0.20315474571661682, "grad_norm": 0.34375, "learning_rate": 1.9359390221813272e-05, "loss": 0.6934, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1494 }, { "epoch": 0.2032907261354365, "grad_norm": 0.78125, "learning_rate": 1.9357717185943328e-05, "loss": 0.7106, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1495 }, { "epoch": 0.20342670655425618, "grad_norm": 0.49609375, "learning_rate": 1.9356042040729522e-05, "loss": 0.779, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1496 }, { "epoch": 0.2035626869730759, "grad_norm": 0.76953125, "learning_rate": 1.9354364786549452e-05, "loss": 0.8325, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1497 }, { "epoch": 0.20369866739189557, "grad_norm": 0.369140625, "learning_rate": 1.9352685423781193e-05, "loss": 0.6572, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1498 }, { "epoch": 0.20383464781071525, "grad_norm": 0.353515625, "learning_rate": 1.935100395280329e-05, "loss": 0.5475, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1499 }, { "epoch": 0.20397062822953493, "grad_norm": 0.435546875, "learning_rate": 1.934932037399477e-05, "loss": 0.641, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1500 }, { "epoch": 0.20410660864835464, "grad_norm": 0.66796875, "learning_rate": 1.9347634687735134e-05, "loss": 0.8372, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1501 }, { "epoch": 0.20424258906717432, "grad_norm": 0.4609375, "learning_rate": 1.9345946894404356e-05, "loss": 0.4676, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1502 }, { "epoch": 0.204378569485994, "grad_norm": 0.328125, "learning_rate": 1.934425699438288e-05, "loss": 0.6279, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1503 }, { "epoch": 0.2045145499048137, "grad_norm": 0.490234375, "learning_rate": 1.9342564988051636e-05, "loss": 0.624, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1504 }, { "epoch": 0.2046505303236334, "grad_norm": 0.6171875, "learning_rate": 1.934087087579202e-05, "loss": 0.7944, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1505 }, { "epoch": 0.20478651074245308, "grad_norm": 0.388671875, "learning_rate": 1.9339174657985905e-05, "loss": 0.6396, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1506 }, { "epoch": 0.20492249116127279, "grad_norm": 0.61328125, "learning_rate": 1.9337476335015646e-05, "loss": 0.8415, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1507 }, { "epoch": 0.20505847158009247, "grad_norm": 0.44921875, "learning_rate": 1.9335775907264054e-05, "loss": 0.6351, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1508 }, { "epoch": 0.20519445199891215, "grad_norm": 0.443359375, "learning_rate": 1.9334073375114437e-05, "loss": 0.4246, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1509 }, { "epoch": 0.20533043241773186, "grad_norm": 0.546875, "learning_rate": 1.933236873895056e-05, "loss": 0.6182, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1510 }, { "epoch": 0.20546641283655154, "grad_norm": 0.328125, "learning_rate": 1.933066199915667e-05, "loss": 0.4964, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1511 }, { "epoch": 0.20560239325537122, "grad_norm": 0.3671875, "learning_rate": 1.9328953156117492e-05, "loss": 0.5822, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1512 }, { "epoch": 0.20573837367419093, "grad_norm": 0.81640625, "learning_rate": 1.932724221021821e-05, "loss": 0.8841, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1513 }, { "epoch": 0.2058743540930106, "grad_norm": 0.341796875, "learning_rate": 1.9325529161844502e-05, "loss": 0.5324, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1514 }, { "epoch": 0.2060103345118303, "grad_norm": 0.50390625, "learning_rate": 1.9323814011382508e-05, "loss": 0.7077, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1515 }, { "epoch": 0.20614631493065, "grad_norm": 0.31640625, "learning_rate": 1.9322096759218838e-05, "loss": 0.5338, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1516 }, { "epoch": 0.20628229534946968, "grad_norm": 0.34765625, "learning_rate": 1.932037740574059e-05, "loss": 0.4665, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1517 }, { "epoch": 0.20641827576828936, "grad_norm": 0.357421875, "learning_rate": 1.931865595133532e-05, "loss": 0.6693, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1518 }, { "epoch": 0.20655425618710907, "grad_norm": 0.51953125, "learning_rate": 1.931693239639107e-05, "loss": 0.6102, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1519 }, { "epoch": 0.20669023660592875, "grad_norm": 0.498046875, "learning_rate": 1.9315206741296345e-05, "loss": 0.6219, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1520 }, { "epoch": 0.20682621702474843, "grad_norm": 0.361328125, "learning_rate": 1.9313478986440134e-05, "loss": 0.6854, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1521 }, { "epoch": 0.2069621974435681, "grad_norm": 0.32421875, "learning_rate": 1.9311749132211893e-05, "loss": 0.4556, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1522 }, { "epoch": 0.20709817786238782, "grad_norm": 0.59765625, "learning_rate": 1.931001717900155e-05, "loss": 1.0825, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1523 }, { "epoch": 0.2072341582812075, "grad_norm": 0.41796875, "learning_rate": 1.930828312719951e-05, "loss": 0.7287, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1524 }, { "epoch": 0.20737013870002718, "grad_norm": 0.298828125, "learning_rate": 1.930654697719665e-05, "loss": 0.4769, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1525 }, { "epoch": 0.2075061191188469, "grad_norm": 0.98828125, "learning_rate": 1.930480872938432e-05, "loss": 0.7689, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1526 }, { "epoch": 0.20764209953766657, "grad_norm": 0.3046875, "learning_rate": 1.9303068384154338e-05, "loss": 0.5952, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1527 }, { "epoch": 0.20777807995648626, "grad_norm": 0.609375, "learning_rate": 1.9301325941899004e-05, "loss": 0.9837, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1528 }, { "epoch": 0.20791406037530596, "grad_norm": 0.404296875, "learning_rate": 1.9299581403011082e-05, "loss": 0.681, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1529 }, { "epoch": 0.20805004079412565, "grad_norm": 0.37890625, "learning_rate": 1.929783476788382e-05, "loss": 0.6592, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1530 }, { "epoch": 0.20818602121294533, "grad_norm": 0.7265625, "learning_rate": 1.929608603691092e-05, "loss": 0.7318, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1531 }, { "epoch": 0.20832200163176504, "grad_norm": 0.6171875, "learning_rate": 1.929433521048657e-05, "loss": 0.5441, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1532 }, { "epoch": 0.20845798205058472, "grad_norm": 0.58984375, "learning_rate": 1.9292582289005436e-05, "loss": 0.6839, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1533 }, { "epoch": 0.2085939624694044, "grad_norm": 0.765625, "learning_rate": 1.929082727286264e-05, "loss": 0.7209, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1534 }, { "epoch": 0.2087299428882241, "grad_norm": 0.9296875, "learning_rate": 1.9289070162453787e-05, "loss": 0.8143, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1535 }, { "epoch": 0.2088659233070438, "grad_norm": 0.396484375, "learning_rate": 1.928731095817495e-05, "loss": 0.4707, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1536 }, { "epoch": 0.20900190372586347, "grad_norm": 0.482421875, "learning_rate": 1.928554966042268e-05, "loss": 0.5391, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1537 }, { "epoch": 0.20913788414468318, "grad_norm": 0.408203125, "learning_rate": 1.9283786269593984e-05, "loss": 0.6688, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1538 }, { "epoch": 0.20927386456350286, "grad_norm": 0.6640625, "learning_rate": 1.928202078608636e-05, "loss": 0.8105, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1539 }, { "epoch": 0.20940984498232254, "grad_norm": 0.34375, "learning_rate": 1.928025321029777e-05, "loss": 0.5859, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1540 }, { "epoch": 0.20954582540114222, "grad_norm": 0.353515625, "learning_rate": 1.9278483542626642e-05, "loss": 0.4808, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1541 }, { "epoch": 0.20968180581996193, "grad_norm": 0.5859375, "learning_rate": 1.9276711783471888e-05, "loss": 0.8424, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1542 }, { "epoch": 0.2098177862387816, "grad_norm": 0.384765625, "learning_rate": 1.9274937933232877e-05, "loss": 0.6576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1543 }, { "epoch": 0.2099537666576013, "grad_norm": 0.37109375, "learning_rate": 1.9273161992309462e-05, "loss": 0.6745, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1544 }, { "epoch": 0.210089747076421, "grad_norm": 0.49609375, "learning_rate": 1.927138396110196e-05, "loss": 0.6576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1545 }, { "epoch": 0.21022572749524068, "grad_norm": 0.66796875, "learning_rate": 1.9269603840011157e-05, "loss": 0.6141, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1546 }, { "epoch": 0.21036170791406036, "grad_norm": 0.44140625, "learning_rate": 1.926782162943832e-05, "loss": 0.5529, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1547 }, { "epoch": 0.21049768833288007, "grad_norm": 0.240234375, "learning_rate": 1.9266037329785183e-05, "loss": 0.4284, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1548 }, { "epoch": 0.21063366875169975, "grad_norm": 0.376953125, "learning_rate": 1.9264250941453937e-05, "loss": 0.6831, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1549 }, { "epoch": 0.21076964917051944, "grad_norm": 0.375, "learning_rate": 1.9262462464847263e-05, "loss": 0.6234, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1550 }, { "epoch": 0.21090562958933914, "grad_norm": 0.71484375, "learning_rate": 1.9260671900368312e-05, "loss": 0.7607, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1551 }, { "epoch": 0.21104161000815883, "grad_norm": 0.625, "learning_rate": 1.925887924842069e-05, "loss": 0.5199, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1552 }, { "epoch": 0.2111775904269785, "grad_norm": 0.3984375, "learning_rate": 1.9257084509408486e-05, "loss": 0.6774, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1553 }, { "epoch": 0.21131357084579822, "grad_norm": 0.41015625, "learning_rate": 1.9255287683736257e-05, "loss": 0.6619, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1554 }, { "epoch": 0.2114495512646179, "grad_norm": 0.5078125, "learning_rate": 1.9253488771809024e-05, "loss": 0.7539, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1555 }, { "epoch": 0.21158553168343758, "grad_norm": 0.27734375, "learning_rate": 1.9251687774032292e-05, "loss": 0.4176, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1556 }, { "epoch": 0.2117215121022573, "grad_norm": 0.546875, "learning_rate": 1.924988469081202e-05, "loss": 0.7358, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1557 }, { "epoch": 0.21185749252107697, "grad_norm": 0.478515625, "learning_rate": 1.924807952255465e-05, "loss": 0.8039, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1558 }, { "epoch": 0.21199347293989665, "grad_norm": 0.69140625, "learning_rate": 1.9246272269667093e-05, "loss": 0.7581, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1559 }, { "epoch": 0.21212945335871636, "grad_norm": 0.416015625, "learning_rate": 1.9244462932556717e-05, "loss": 0.7186, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1560 }, { "epoch": 0.21226543377753604, "grad_norm": 0.3984375, "learning_rate": 1.924265151163137e-05, "loss": 0.7222, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1561 }, { "epoch": 0.21240141419635572, "grad_norm": 1.796875, "learning_rate": 1.9240838007299375e-05, "loss": 0.8831, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1562 }, { "epoch": 0.2125373946151754, "grad_norm": 0.330078125, "learning_rate": 1.923902241996951e-05, "loss": 0.5946, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1563 }, { "epoch": 0.2126733750339951, "grad_norm": 0.51953125, "learning_rate": 1.9237204750051036e-05, "loss": 0.7863, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1564 }, { "epoch": 0.2128093554528148, "grad_norm": 0.353515625, "learning_rate": 1.923538499795368e-05, "loss": 0.6263, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1565 }, { "epoch": 0.21294533587163447, "grad_norm": 0.48828125, "learning_rate": 1.923356316408763e-05, "loss": 0.6442, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1566 }, { "epoch": 0.21308131629045418, "grad_norm": 0.451171875, "learning_rate": 1.9231739248863553e-05, "loss": 0.5471, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1567 }, { "epoch": 0.21321729670927386, "grad_norm": 0.41796875, "learning_rate": 1.922991325269258e-05, "loss": 0.7946, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1568 }, { "epoch": 0.21335327712809354, "grad_norm": 0.8203125, "learning_rate": 1.9228085175986316e-05, "loss": 0.7528, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1569 }, { "epoch": 0.21348925754691325, "grad_norm": 0.349609375, "learning_rate": 1.922625501915683e-05, "loss": 0.6296, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1570 }, { "epoch": 0.21362523796573293, "grad_norm": 0.369140625, "learning_rate": 1.922442278261666e-05, "loss": 0.6392, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1571 }, { "epoch": 0.21376121838455261, "grad_norm": 0.66015625, "learning_rate": 1.9222588466778815e-05, "loss": 0.6735, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1572 }, { "epoch": 0.21389719880337232, "grad_norm": 0.287109375, "learning_rate": 1.9220752072056777e-05, "loss": 0.4683, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1573 }, { "epoch": 0.214033179222192, "grad_norm": 0.41796875, "learning_rate": 1.921891359886449e-05, "loss": 0.6978, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1574 }, { "epoch": 0.21416915964101169, "grad_norm": 0.3515625, "learning_rate": 1.9217073047616363e-05, "loss": 0.4738, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1575 }, { "epoch": 0.2143051400598314, "grad_norm": 0.57421875, "learning_rate": 1.9215230418727282e-05, "loss": 0.832, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1576 }, { "epoch": 0.21444112047865108, "grad_norm": 0.34765625, "learning_rate": 1.9213385712612597e-05, "loss": 0.598, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1577 }, { "epoch": 0.21457710089747076, "grad_norm": 0.30078125, "learning_rate": 1.921153892968813e-05, "loss": 0.6291, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1578 }, { "epoch": 0.21471308131629047, "grad_norm": 0.71484375, "learning_rate": 1.9209690070370172e-05, "loss": 0.644, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1579 }, { "epoch": 0.21484906173511015, "grad_norm": 0.326171875, "learning_rate": 1.920783913507547e-05, "loss": 0.6525, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1580 }, { "epoch": 0.21498504215392983, "grad_norm": 0.337890625, "learning_rate": 1.920598612422125e-05, "loss": 0.5793, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1581 }, { "epoch": 0.21512102257274954, "grad_norm": 0.48046875, "learning_rate": 1.9204131038225207e-05, "loss": 0.6004, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1582 }, { "epoch": 0.21525700299156922, "grad_norm": 0.578125, "learning_rate": 1.9202273877505496e-05, "loss": 0.8721, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1583 }, { "epoch": 0.2153929834103889, "grad_norm": 0.294921875, "learning_rate": 1.9200414642480747e-05, "loss": 0.6379, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1584 }, { "epoch": 0.21552896382920858, "grad_norm": 0.458984375, "learning_rate": 1.9198553333570053e-05, "loss": 0.7025, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1585 }, { "epoch": 0.2156649442480283, "grad_norm": 0.59375, "learning_rate": 1.919668995119297e-05, "loss": 0.4209, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1586 }, { "epoch": 0.21580092466684797, "grad_norm": 0.36328125, "learning_rate": 1.9194824495769536e-05, "loss": 0.6421, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1587 }, { "epoch": 0.21593690508566765, "grad_norm": 0.29296875, "learning_rate": 1.9192956967720245e-05, "loss": 0.578, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1588 }, { "epoch": 0.21607288550448736, "grad_norm": 0.412109375, "learning_rate": 1.919108736746606e-05, "loss": 0.8073, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1589 }, { "epoch": 0.21620886592330704, "grad_norm": 0.44921875, "learning_rate": 1.9189215695428412e-05, "loss": 0.7202, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1590 }, { "epoch": 0.21634484634212672, "grad_norm": 0.2021484375, "learning_rate": 1.9187341952029194e-05, "loss": 0.2918, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1591 }, { "epoch": 0.21648082676094643, "grad_norm": 0.41796875, "learning_rate": 1.9185466137690777e-05, "loss": 0.6514, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1592 }, { "epoch": 0.2166168071797661, "grad_norm": 0.34375, "learning_rate": 1.918358825283599e-05, "loss": 0.5601, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1593 }, { "epoch": 0.2167527875985858, "grad_norm": 1.2578125, "learning_rate": 1.9181708297888133e-05, "loss": 0.7471, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1594 }, { "epoch": 0.2168887680174055, "grad_norm": 0.3828125, "learning_rate": 1.917982627327097e-05, "loss": 0.6268, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1595 }, { "epoch": 0.21702474843622518, "grad_norm": 0.4453125, "learning_rate": 1.9177942179408727e-05, "loss": 0.7808, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1596 }, { "epoch": 0.21716072885504487, "grad_norm": 0.443359375, "learning_rate": 1.9176056016726113e-05, "loss": 0.7178, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1597 }, { "epoch": 0.21729670927386457, "grad_norm": 0.443359375, "learning_rate": 1.917416778564828e-05, "loss": 0.7728, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1598 }, { "epoch": 0.21743268969268426, "grad_norm": 0.439453125, "learning_rate": 1.917227748660087e-05, "loss": 0.6914, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1599 }, { "epoch": 0.21756867011150394, "grad_norm": 0.6328125, "learning_rate": 1.917038512000997e-05, "loss": 0.5109, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1600 }, { "epoch": 0.21770465053032365, "grad_norm": 0.365234375, "learning_rate": 1.9168490686302144e-05, "loss": 0.6484, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1601 }, { "epoch": 0.21784063094914333, "grad_norm": 0.26953125, "learning_rate": 1.9166594185904427e-05, "loss": 0.5177, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1602 }, { "epoch": 0.217976611367963, "grad_norm": 0.404296875, "learning_rate": 1.9164695619244303e-05, "loss": 0.6105, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1603 }, { "epoch": 0.21811259178678272, "grad_norm": 0.3984375, "learning_rate": 1.9162794986749744e-05, "loss": 0.6982, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1604 }, { "epoch": 0.2182485722056024, "grad_norm": 0.671875, "learning_rate": 1.9160892288849168e-05, "loss": 0.4574, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1605 }, { "epoch": 0.21838455262442208, "grad_norm": 0.3671875, "learning_rate": 1.915898752597147e-05, "loss": 0.6748, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1606 }, { "epoch": 0.21852053304324176, "grad_norm": 0.390625, "learning_rate": 1.9157080698546e-05, "loss": 0.5054, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1607 }, { "epoch": 0.21865651346206147, "grad_norm": 0.33203125, "learning_rate": 1.9155171807002587e-05, "loss": 0.63, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1608 }, { "epoch": 0.21879249388088115, "grad_norm": 0.275390625, "learning_rate": 1.915326085177152e-05, "loss": 0.502, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1609 }, { "epoch": 0.21892847429970083, "grad_norm": 0.328125, "learning_rate": 1.9151347833283545e-05, "loss": 0.5355, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1610 }, { "epoch": 0.21906445471852054, "grad_norm": 0.5625, "learning_rate": 1.9149432751969885e-05, "loss": 0.7925, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1611 }, { "epoch": 0.21920043513734022, "grad_norm": 0.388671875, "learning_rate": 1.9147515608262224e-05, "loss": 0.6997, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1612 }, { "epoch": 0.2193364155561599, "grad_norm": 0.375, "learning_rate": 1.9145596402592704e-05, "loss": 0.6453, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1613 }, { "epoch": 0.2194723959749796, "grad_norm": 0.396484375, "learning_rate": 1.9143675135393937e-05, "loss": 0.6707, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1614 }, { "epoch": 0.2196083763937993, "grad_norm": 0.3359375, "learning_rate": 1.9141751807099006e-05, "loss": 0.6713, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1615 }, { "epoch": 0.21974435681261897, "grad_norm": 0.353515625, "learning_rate": 1.913982641814145e-05, "loss": 0.5851, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1616 }, { "epoch": 0.21988033723143868, "grad_norm": 0.3515625, "learning_rate": 1.9137898968955276e-05, "loss": 0.4886, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1617 }, { "epoch": 0.22001631765025836, "grad_norm": 0.57421875, "learning_rate": 1.9135969459974953e-05, "loss": 0.569, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1618 }, { "epoch": 0.22015229806907805, "grad_norm": 0.34375, "learning_rate": 1.9134037891635417e-05, "loss": 0.647, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1619 }, { "epoch": 0.22028827848789775, "grad_norm": 0.53515625, "learning_rate": 1.9132104264372065e-05, "loss": 0.3506, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1620 }, { "epoch": 0.22042425890671744, "grad_norm": 0.37109375, "learning_rate": 1.9130168578620764e-05, "loss": 0.5728, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1621 }, { "epoch": 0.22056023932553712, "grad_norm": 0.451171875, "learning_rate": 1.9128230834817837e-05, "loss": 0.6652, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1622 }, { "epoch": 0.22069621974435683, "grad_norm": 0.283203125, "learning_rate": 1.912629103340008e-05, "loss": 0.6055, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1623 }, { "epoch": 0.2208322001631765, "grad_norm": 0.65625, "learning_rate": 1.912434917480474e-05, "loss": 0.726, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1624 }, { "epoch": 0.2209681805819962, "grad_norm": 0.294921875, "learning_rate": 1.9122405259469548e-05, "loss": 0.5804, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1625 }, { "epoch": 0.22110416100081587, "grad_norm": 0.400390625, "learning_rate": 1.9120459287832677e-05, "loss": 0.5794, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1626 }, { "epoch": 0.22124014141963558, "grad_norm": 0.337890625, "learning_rate": 1.911851126033277e-05, "loss": 0.5417, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1627 }, { "epoch": 0.22137612183845526, "grad_norm": 0.49609375, "learning_rate": 1.911656117740895e-05, "loss": 0.777, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1628 }, { "epoch": 0.22151210225727494, "grad_norm": 0.75390625, "learning_rate": 1.9114609039500775e-05, "loss": 0.8013, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1629 }, { "epoch": 0.22164808267609465, "grad_norm": 0.482421875, "learning_rate": 1.9112654847048287e-05, "loss": 0.6296, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1630 }, { "epoch": 0.22178406309491433, "grad_norm": 0.400390625, "learning_rate": 1.9110698600491985e-05, "loss": 0.7471, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1631 }, { "epoch": 0.221920043513734, "grad_norm": 0.57421875, "learning_rate": 1.9108740300272832e-05, "loss": 0.4813, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1632 }, { "epoch": 0.22205602393255372, "grad_norm": 0.78125, "learning_rate": 1.910677994683225e-05, "loss": 0.937, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1633 }, { "epoch": 0.2221920043513734, "grad_norm": 0.53125, "learning_rate": 1.9104817540612125e-05, "loss": 0.7738, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1634 }, { "epoch": 0.22232798477019308, "grad_norm": 0.369140625, "learning_rate": 1.9102853082054817e-05, "loss": 0.7316, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1635 }, { "epoch": 0.2224639651890128, "grad_norm": 0.318359375, "learning_rate": 1.9100886571603126e-05, "loss": 0.6683, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1636 }, { "epoch": 0.22259994560783247, "grad_norm": 0.296875, "learning_rate": 1.9098918009700337e-05, "loss": 0.4448, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1637 }, { "epoch": 0.22273592602665215, "grad_norm": 1.6875, "learning_rate": 1.909694739679018e-05, "loss": 0.7692, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1638 }, { "epoch": 0.22287190644547186, "grad_norm": 0.24609375, "learning_rate": 1.9094974733316865e-05, "loss": 0.5068, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1639 }, { "epoch": 0.22300788686429154, "grad_norm": 0.62109375, "learning_rate": 1.9093000019725044e-05, "loss": 0.6294, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1640 }, { "epoch": 0.22314386728311122, "grad_norm": 0.3203125, "learning_rate": 1.9091023256459852e-05, "loss": 0.5765, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1641 }, { "epoch": 0.22327984770193093, "grad_norm": 0.310546875, "learning_rate": 1.9089044443966868e-05, "loss": 0.5675, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1642 }, { "epoch": 0.22341582812075061, "grad_norm": 0.314453125, "learning_rate": 1.9087063582692143e-05, "loss": 0.5291, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1643 }, { "epoch": 0.2235518085395703, "grad_norm": 0.310546875, "learning_rate": 1.9085080673082185e-05, "loss": 0.6312, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1644 }, { "epoch": 0.22368778895839, "grad_norm": 0.6015625, "learning_rate": 1.908309571558397e-05, "loss": 0.7936, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1645 }, { "epoch": 0.22382376937720969, "grad_norm": 0.36328125, "learning_rate": 1.9081108710644933e-05, "loss": 0.7062, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1646 }, { "epoch": 0.22395974979602937, "grad_norm": 3.640625, "learning_rate": 1.9079119658712963e-05, "loss": 0.9623, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1647 }, { "epoch": 0.22409573021484905, "grad_norm": 0.404296875, "learning_rate": 1.907712856023642e-05, "loss": 0.6563, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1648 }, { "epoch": 0.22423171063366876, "grad_norm": 0.46484375, "learning_rate": 1.9075135415664125e-05, "loss": 0.686, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1649 }, { "epoch": 0.22436769105248844, "grad_norm": 0.57421875, "learning_rate": 1.9073140225445352e-05, "loss": 0.6377, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1650 }, { "epoch": 0.22450367147130812, "grad_norm": 0.515625, "learning_rate": 1.907114299002984e-05, "loss": 0.8743, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1651 }, { "epoch": 0.22463965189012783, "grad_norm": 0.427734375, "learning_rate": 1.9069143709867797e-05, "loss": 0.8509, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1652 }, { "epoch": 0.2247756323089475, "grad_norm": 0.458984375, "learning_rate": 1.906714238540988e-05, "loss": 0.7132, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1653 }, { "epoch": 0.2249116127277672, "grad_norm": 0.875, "learning_rate": 1.9065139017107218e-05, "loss": 0.8034, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1654 }, { "epoch": 0.2250475931465869, "grad_norm": 0.8203125, "learning_rate": 1.906313360541139e-05, "loss": 0.63, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1655 }, { "epoch": 0.22518357356540658, "grad_norm": 0.423828125, "learning_rate": 1.9061126150774437e-05, "loss": 0.7083, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1656 }, { "epoch": 0.22531955398422626, "grad_norm": 0.890625, "learning_rate": 1.9059116653648867e-05, "loss": 0.8542, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1657 }, { "epoch": 0.22545553440304597, "grad_norm": 0.447265625, "learning_rate": 1.9057105114487648e-05, "loss": 0.7384, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1658 }, { "epoch": 0.22559151482186565, "grad_norm": 0.26171875, "learning_rate": 1.9055091533744204e-05, "loss": 0.4655, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1659 }, { "epoch": 0.22572749524068533, "grad_norm": 0.57421875, "learning_rate": 1.9053075911872415e-05, "loss": 0.799, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1660 }, { "epoch": 0.22586347565950504, "grad_norm": 0.431640625, "learning_rate": 1.9051058249326638e-05, "loss": 0.7148, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1661 }, { "epoch": 0.22599945607832472, "grad_norm": 0.56640625, "learning_rate": 1.9049038546561667e-05, "loss": 0.6167, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1662 }, { "epoch": 0.2261354364971444, "grad_norm": 0.640625, "learning_rate": 1.904701680403278e-05, "loss": 0.5216, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1663 }, { "epoch": 0.2262714169159641, "grad_norm": 0.5078125, "learning_rate": 1.904499302219569e-05, "loss": 0.5946, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1664 }, { "epoch": 0.2264073973347838, "grad_norm": 0.392578125, "learning_rate": 1.9042967201506594e-05, "loss": 0.6587, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1665 }, { "epoch": 0.22654337775360348, "grad_norm": 1.0078125, "learning_rate": 1.9040939342422128e-05, "loss": 0.6885, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1666 }, { "epoch": 0.22667935817242318, "grad_norm": 0.322265625, "learning_rate": 1.9038909445399397e-05, "loss": 0.5942, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1667 }, { "epoch": 0.22681533859124287, "grad_norm": 0.421875, "learning_rate": 1.9036877510895967e-05, "loss": 0.5425, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1668 }, { "epoch": 0.22695131901006255, "grad_norm": 0.310546875, "learning_rate": 1.903484353936986e-05, "loss": 0.6292, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1669 }, { "epoch": 0.22708729942888223, "grad_norm": 0.25390625, "learning_rate": 1.903280753127956e-05, "loss": 0.411, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1670 }, { "epoch": 0.22722327984770194, "grad_norm": 0.283203125, "learning_rate": 1.9030769487084005e-05, "loss": 0.5378, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1671 }, { "epoch": 0.22735926026652162, "grad_norm": 0.44921875, "learning_rate": 1.9028729407242598e-05, "loss": 0.8638, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1672 }, { "epoch": 0.2274952406853413, "grad_norm": 0.59765625, "learning_rate": 1.90266872922152e-05, "loss": 0.6331, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1673 }, { "epoch": 0.227631221104161, "grad_norm": 0.3671875, "learning_rate": 1.902464314246212e-05, "loss": 0.6816, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1674 }, { "epoch": 0.2277672015229807, "grad_norm": 0.322265625, "learning_rate": 1.902259695844414e-05, "loss": 0.5373, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1675 }, { "epoch": 0.22790318194180037, "grad_norm": 0.322265625, "learning_rate": 1.90205487406225e-05, "loss": 0.5334, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1676 }, { "epoch": 0.22803916236062008, "grad_norm": 0.306640625, "learning_rate": 1.9018498489458882e-05, "loss": 0.5456, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1677 }, { "epoch": 0.22817514277943976, "grad_norm": 0.412109375, "learning_rate": 1.9016446205415446e-05, "loss": 0.6628, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1678 }, { "epoch": 0.22831112319825944, "grad_norm": 0.6796875, "learning_rate": 1.90143918889548e-05, "loss": 0.6764, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1679 }, { "epoch": 0.22844710361707915, "grad_norm": 0.392578125, "learning_rate": 1.901233554054001e-05, "loss": 0.6965, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1680 }, { "epoch": 0.22858308403589883, "grad_norm": 0.298828125, "learning_rate": 1.9010277160634605e-05, "loss": 0.5182, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1681 }, { "epoch": 0.2287190644547185, "grad_norm": 0.33203125, "learning_rate": 1.900821674970257e-05, "loss": 0.6538, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1682 }, { "epoch": 0.22885504487353822, "grad_norm": 0.3828125, "learning_rate": 1.900615430820834e-05, "loss": 0.6374, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1683 }, { "epoch": 0.2289910252923579, "grad_norm": 0.4765625, "learning_rate": 1.900408983661682e-05, "loss": 0.6432, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1684 }, { "epoch": 0.22912700571117758, "grad_norm": 0.458984375, "learning_rate": 1.9002023335393366e-05, "loss": 0.9085, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1685 }, { "epoch": 0.2292629861299973, "grad_norm": 0.5390625, "learning_rate": 1.8999954805003794e-05, "loss": 0.7752, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1686 }, { "epoch": 0.22939896654881697, "grad_norm": 0.3828125, "learning_rate": 1.8997884245914372e-05, "loss": 0.6475, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1687 }, { "epoch": 0.22953494696763665, "grad_norm": 0.36328125, "learning_rate": 1.8995811658591836e-05, "loss": 0.5662, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1688 }, { "epoch": 0.22967092738645636, "grad_norm": 0.4765625, "learning_rate": 1.8993737043503367e-05, "loss": 0.7453, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1689 }, { "epoch": 0.22980690780527604, "grad_norm": 0.48828125, "learning_rate": 1.899166040111661e-05, "loss": 0.6637, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1690 }, { "epoch": 0.22994288822409573, "grad_norm": 0.42578125, "learning_rate": 1.8989581731899665e-05, "loss": 0.6637, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1691 }, { "epoch": 0.2300788686429154, "grad_norm": 0.46875, "learning_rate": 1.898750103632109e-05, "loss": 0.7559, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1692 }, { "epoch": 0.23021484906173512, "grad_norm": 0.419921875, "learning_rate": 1.89854183148499e-05, "loss": 0.6558, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1693 }, { "epoch": 0.2303508294805548, "grad_norm": 0.72265625, "learning_rate": 1.898333356795557e-05, "loss": 0.7474, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1694 }, { "epoch": 0.23048680989937448, "grad_norm": 0.435546875, "learning_rate": 1.8981246796108018e-05, "loss": 0.659, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1695 }, { "epoch": 0.2306227903181942, "grad_norm": 0.35546875, "learning_rate": 1.8979157999777635e-05, "loss": 0.6297, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1696 }, { "epoch": 0.23075877073701387, "grad_norm": 0.66015625, "learning_rate": 1.8977067179435254e-05, "loss": 0.6324, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1697 }, { "epoch": 0.23089475115583355, "grad_norm": 0.333984375, "learning_rate": 1.897497433555218e-05, "loss": 0.6437, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1698 }, { "epoch": 0.23103073157465326, "grad_norm": 0.369140625, "learning_rate": 1.8972879468600163e-05, "loss": 0.5862, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1699 }, { "epoch": 0.23116671199347294, "grad_norm": 0.51171875, "learning_rate": 1.897078257905141e-05, "loss": 0.8094, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1700 }, { "epoch": 0.23130269241229262, "grad_norm": 0.392578125, "learning_rate": 1.8968683667378588e-05, "loss": 0.7062, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1701 }, { "epoch": 0.23143867283111233, "grad_norm": 0.28515625, "learning_rate": 1.896658273405481e-05, "loss": 0.5568, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1702 }, { "epoch": 0.231574653249932, "grad_norm": 0.54296875, "learning_rate": 1.8964479779553665e-05, "loss": 0.6567, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1703 }, { "epoch": 0.2317106336687517, "grad_norm": 0.29296875, "learning_rate": 1.896237480434917e-05, "loss": 0.3986, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1704 }, { "epoch": 0.2318466140875714, "grad_norm": 0.384765625, "learning_rate": 1.8960267808915823e-05, "loss": 0.5697, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1705 }, { "epoch": 0.23198259450639108, "grad_norm": 0.4609375, "learning_rate": 1.8958158793728563e-05, "loss": 0.806, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1706 }, { "epoch": 0.23211857492521076, "grad_norm": 0.318359375, "learning_rate": 1.8956047759262788e-05, "loss": 0.5067, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1707 }, { "epoch": 0.23225455534403047, "grad_norm": 0.5390625, "learning_rate": 1.895393470599435e-05, "loss": 0.9036, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1708 }, { "epoch": 0.23239053576285015, "grad_norm": 0.3359375, "learning_rate": 1.895181963439956e-05, "loss": 0.6538, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1709 }, { "epoch": 0.23252651618166983, "grad_norm": 0.453125, "learning_rate": 1.8949702544955175e-05, "loss": 0.5877, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1710 }, { "epoch": 0.23266249660048952, "grad_norm": 0.203125, "learning_rate": 1.894758343813842e-05, "loss": 0.4023, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1711 }, { "epoch": 0.23279847701930922, "grad_norm": 0.6015625, "learning_rate": 1.8945462314426958e-05, "loss": 0.7729, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1712 }, { "epoch": 0.2329344574381289, "grad_norm": 0.34375, "learning_rate": 1.894333917429893e-05, "loss": 0.6763, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1713 }, { "epoch": 0.2330704378569486, "grad_norm": 0.40625, "learning_rate": 1.8941214018232905e-05, "loss": 0.8161, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1714 }, { "epoch": 0.2332064182757683, "grad_norm": 0.326171875, "learning_rate": 1.893908684670793e-05, "loss": 0.4992, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1715 }, { "epoch": 0.23334239869458798, "grad_norm": 0.47265625, "learning_rate": 1.8936957660203484e-05, "loss": 0.6032, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1716 }, { "epoch": 0.23347837911340766, "grad_norm": 0.90234375, "learning_rate": 1.8934826459199517e-05, "loss": 0.8337, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1717 }, { "epoch": 0.23361435953222737, "grad_norm": 0.486328125, "learning_rate": 1.893269324417643e-05, "loss": 0.7456, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1718 }, { "epoch": 0.23375033995104705, "grad_norm": 0.64453125, "learning_rate": 1.8930558015615076e-05, "loss": 0.4889, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1719 }, { "epoch": 0.23388632036986673, "grad_norm": 0.50390625, "learning_rate": 1.8928420773996757e-05, "loss": 0.6263, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1720 }, { "epoch": 0.23402230078868644, "grad_norm": 0.40234375, "learning_rate": 1.892628151980324e-05, "loss": 0.5957, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1721 }, { "epoch": 0.23415828120750612, "grad_norm": 0.33203125, "learning_rate": 1.892414025351673e-05, "loss": 0.557, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1722 }, { "epoch": 0.2342942616263258, "grad_norm": 0.46875, "learning_rate": 1.8921996975619903e-05, "loss": 0.8913, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1723 }, { "epoch": 0.2344302420451455, "grad_norm": 0.361328125, "learning_rate": 1.8919851686595875e-05, "loss": 0.653, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1724 }, { "epoch": 0.2345662224639652, "grad_norm": 0.361328125, "learning_rate": 1.8917704386928223e-05, "loss": 0.7302, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1725 }, { "epoch": 0.23470220288278487, "grad_norm": 0.28515625, "learning_rate": 1.891555507710097e-05, "loss": 0.4757, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1726 }, { "epoch": 0.23483818330160458, "grad_norm": 0.388671875, "learning_rate": 1.8913403757598603e-05, "loss": 0.5924, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1727 }, { "epoch": 0.23497416372042426, "grad_norm": 0.376953125, "learning_rate": 1.8911250428906056e-05, "loss": 0.6265, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1728 }, { "epoch": 0.23511014413924394, "grad_norm": 0.52734375, "learning_rate": 1.8909095091508708e-05, "loss": 0.7531, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1729 }, { "epoch": 0.23524612455806365, "grad_norm": 0.52734375, "learning_rate": 1.8906937745892407e-05, "loss": 0.7705, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1730 }, { "epoch": 0.23538210497688333, "grad_norm": 0.625, "learning_rate": 1.8904778392543435e-05, "loss": 0.8866, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1731 }, { "epoch": 0.23551808539570301, "grad_norm": 0.8125, "learning_rate": 1.8902617031948552e-05, "loss": 0.7661, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1732 }, { "epoch": 0.2356540658145227, "grad_norm": 0.546875, "learning_rate": 1.8900453664594937e-05, "loss": 0.5744, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1733 }, { "epoch": 0.2357900462333424, "grad_norm": 0.439453125, "learning_rate": 1.8898288290970256e-05, "loss": 0.7648, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1734 }, { "epoch": 0.23592602665216209, "grad_norm": 0.3046875, "learning_rate": 1.8896120911562595e-05, "loss": 0.4618, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1735 }, { "epoch": 0.23606200707098177, "grad_norm": 0.58984375, "learning_rate": 1.8893951526860522e-05, "loss": 0.6603, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1736 }, { "epoch": 0.23619798748980148, "grad_norm": 0.3828125, "learning_rate": 1.8891780137353036e-05, "loss": 0.5915, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1737 }, { "epoch": 0.23633396790862116, "grad_norm": 0.81640625, "learning_rate": 1.8889606743529593e-05, "loss": 0.6719, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1738 }, { "epoch": 0.23646994832744084, "grad_norm": 0.3828125, "learning_rate": 1.8887431345880107e-05, "loss": 0.5836, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1739 }, { "epoch": 0.23660592874626055, "grad_norm": 0.376953125, "learning_rate": 1.8885253944894938e-05, "loss": 0.6799, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1740 }, { "epoch": 0.23674190916508023, "grad_norm": 0.37109375, "learning_rate": 1.8883074541064895e-05, "loss": 0.4876, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1741 }, { "epoch": 0.2368778895838999, "grad_norm": 0.3515625, "learning_rate": 1.8880893134881247e-05, "loss": 0.7945, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1742 }, { "epoch": 0.23701387000271962, "grad_norm": 0.470703125, "learning_rate": 1.8878709726835707e-05, "loss": 0.7798, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1743 }, { "epoch": 0.2371498504215393, "grad_norm": 0.28515625, "learning_rate": 1.887652431742044e-05, "loss": 0.6087, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1744 }, { "epoch": 0.23728583084035898, "grad_norm": 0.302734375, "learning_rate": 1.8874336907128073e-05, "loss": 0.4906, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1745 }, { "epoch": 0.2374218112591787, "grad_norm": 0.421875, "learning_rate": 1.8872147496451666e-05, "loss": 0.7384, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1746 }, { "epoch": 0.23755779167799837, "grad_norm": 0.330078125, "learning_rate": 1.8869956085884743e-05, "loss": 0.7183, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1747 }, { "epoch": 0.23769377209681805, "grad_norm": 0.62890625, "learning_rate": 1.8867762675921272e-05, "loss": 0.6781, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1748 }, { "epoch": 0.23782975251563776, "grad_norm": 0.287109375, "learning_rate": 1.8865567267055676e-05, "loss": 0.576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1749 }, { "epoch": 0.23796573293445744, "grad_norm": 0.478515625, "learning_rate": 1.8863369859782824e-05, "loss": 0.9368, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1750 }, { "epoch": 0.23810171335327712, "grad_norm": 0.369140625, "learning_rate": 1.8861170454598046e-05, "loss": 0.4471, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1751 }, { "epoch": 0.23823769377209683, "grad_norm": 0.8203125, "learning_rate": 1.885896905199711e-05, "loss": 0.8625, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1752 }, { "epoch": 0.2383736741909165, "grad_norm": 0.337890625, "learning_rate": 1.8856765652476235e-05, "loss": 0.6043, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1753 }, { "epoch": 0.2385096546097362, "grad_norm": 0.388671875, "learning_rate": 1.8854560256532098e-05, "loss": 0.6655, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1754 }, { "epoch": 0.23864563502855587, "grad_norm": 0.64453125, "learning_rate": 1.885235286466183e-05, "loss": 0.7871, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1755 }, { "epoch": 0.23878161544737558, "grad_norm": 0.326171875, "learning_rate": 1.885014347736299e-05, "loss": 0.6213, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1756 }, { "epoch": 0.23891759586619526, "grad_norm": 0.302734375, "learning_rate": 1.884793209513361e-05, "loss": 0.4642, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1757 }, { "epoch": 0.23905357628501495, "grad_norm": 0.388671875, "learning_rate": 1.884571871847216e-05, "loss": 0.6995, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1758 }, { "epoch": 0.23918955670383465, "grad_norm": 0.283203125, "learning_rate": 1.8843503347877565e-05, "loss": 0.5578, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1759 }, { "epoch": 0.23932553712265434, "grad_norm": 0.5, "learning_rate": 1.884128598384919e-05, "loss": 0.7852, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1760 }, { "epoch": 0.23946151754147402, "grad_norm": 0.484375, "learning_rate": 1.883906662688686e-05, "loss": 0.7333, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1761 }, { "epoch": 0.23959749796029373, "grad_norm": 0.255859375, "learning_rate": 1.883684527749085e-05, "loss": 0.5519, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1762 }, { "epoch": 0.2397334783791134, "grad_norm": 0.71875, "learning_rate": 1.883462193616187e-05, "loss": 0.8322, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1763 }, { "epoch": 0.2398694587979331, "grad_norm": 0.546875, "learning_rate": 1.8832396603401093e-05, "loss": 0.6589, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1764 }, { "epoch": 0.2400054392167528, "grad_norm": 0.57421875, "learning_rate": 1.8830169279710137e-05, "loss": 0.6414, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1765 }, { "epoch": 0.24014141963557248, "grad_norm": 0.419921875, "learning_rate": 1.8827939965591068e-05, "loss": 0.651, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1766 }, { "epoch": 0.24027740005439216, "grad_norm": 0.333984375, "learning_rate": 1.8825708661546396e-05, "loss": 0.4673, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1767 }, { "epoch": 0.24041338047321187, "grad_norm": 0.48828125, "learning_rate": 1.882347536807909e-05, "loss": 0.7371, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1768 }, { "epoch": 0.24054936089203155, "grad_norm": 0.41796875, "learning_rate": 1.8821240085692563e-05, "loss": 0.7054, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1769 }, { "epoch": 0.24068534131085123, "grad_norm": 0.41015625, "learning_rate": 1.881900281489067e-05, "loss": 0.7259, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1770 }, { "epoch": 0.24082132172967094, "grad_norm": 0.259765625, "learning_rate": 1.8816763556177715e-05, "loss": 0.5513, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1771 }, { "epoch": 0.24095730214849062, "grad_norm": 0.416015625, "learning_rate": 1.8814522310058465e-05, "loss": 0.5228, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1772 }, { "epoch": 0.2410932825673103, "grad_norm": 0.5703125, "learning_rate": 1.881227907703812e-05, "loss": 0.5731, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1773 }, { "epoch": 0.24122926298613, "grad_norm": 0.36328125, "learning_rate": 1.8810033857622328e-05, "loss": 0.634, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1774 }, { "epoch": 0.2413652434049497, "grad_norm": 0.5546875, "learning_rate": 1.8807786652317194e-05, "loss": 0.5021, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1775 }, { "epoch": 0.24150122382376937, "grad_norm": 0.55078125, "learning_rate": 1.8805537461629266e-05, "loss": 0.5768, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1776 }, { "epoch": 0.24163720424258905, "grad_norm": 0.7109375, "learning_rate": 1.8803286286065535e-05, "loss": 0.7269, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1777 }, { "epoch": 0.24177318466140876, "grad_norm": 1.9140625, "learning_rate": 1.8801033126133443e-05, "loss": 0.6976, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1778 }, { "epoch": 0.24190916508022844, "grad_norm": 0.451171875, "learning_rate": 1.8798777982340884e-05, "loss": 0.6878, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1779 }, { "epoch": 0.24204514549904813, "grad_norm": 0.322265625, "learning_rate": 1.8796520855196197e-05, "loss": 0.4855, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1780 }, { "epoch": 0.24218112591786783, "grad_norm": 2.1875, "learning_rate": 1.8794261745208156e-05, "loss": 0.8027, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1781 }, { "epoch": 0.24231710633668752, "grad_norm": 0.32421875, "learning_rate": 1.8792000652886005e-05, "loss": 0.5671, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1782 }, { "epoch": 0.2424530867555072, "grad_norm": 0.48046875, "learning_rate": 1.878973757873941e-05, "loss": 0.6986, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1783 }, { "epoch": 0.2425890671743269, "grad_norm": 0.52734375, "learning_rate": 1.87874725232785e-05, "loss": 0.5967, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1784 }, { "epoch": 0.2427250475931466, "grad_norm": 0.50390625, "learning_rate": 1.8785205487013853e-05, "loss": 0.8584, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1785 }, { "epoch": 0.24286102801196627, "grad_norm": 0.42578125, "learning_rate": 1.8782936470456476e-05, "loss": 0.7188, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1786 }, { "epoch": 0.24299700843078598, "grad_norm": 0.283203125, "learning_rate": 1.8780665474117838e-05, "loss": 0.5908, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1787 }, { "epoch": 0.24313298884960566, "grad_norm": 0.64453125, "learning_rate": 1.8778392498509848e-05, "loss": 0.755, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1788 }, { "epoch": 0.24326896926842534, "grad_norm": 1.1953125, "learning_rate": 1.8776117544144866e-05, "loss": 0.6968, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1789 }, { "epoch": 0.24340494968724505, "grad_norm": 1.0078125, "learning_rate": 1.8773840611535693e-05, "loss": 0.6297, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1790 }, { "epoch": 0.24354093010606473, "grad_norm": 0.5625, "learning_rate": 1.8771561701195573e-05, "loss": 0.7892, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1791 }, { "epoch": 0.2436769105248844, "grad_norm": 0.3828125, "learning_rate": 1.8769280813638208e-05, "loss": 0.6471, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1792 }, { "epoch": 0.24381289094370412, "grad_norm": 0.337890625, "learning_rate": 1.8766997949377726e-05, "loss": 0.5863, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1793 }, { "epoch": 0.2439488713625238, "grad_norm": 0.423828125, "learning_rate": 1.8764713108928727e-05, "loss": 0.8213, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1794 }, { "epoch": 0.24408485178134348, "grad_norm": 0.337890625, "learning_rate": 1.876242629280623e-05, "loss": 0.5778, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1795 }, { "epoch": 0.24422083220016316, "grad_norm": 0.31640625, "learning_rate": 1.8760137501525723e-05, "loss": 0.5397, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1796 }, { "epoch": 0.24435681261898287, "grad_norm": 0.640625, "learning_rate": 1.8757846735603118e-05, "loss": 0.6081, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1797 }, { "epoch": 0.24449279303780255, "grad_norm": 0.427734375, "learning_rate": 1.8755553995554786e-05, "loss": 0.7645, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1798 }, { "epoch": 0.24462877345662223, "grad_norm": 0.43359375, "learning_rate": 1.8753259281897532e-05, "loss": 0.6867, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1799 }, { "epoch": 0.24476475387544194, "grad_norm": 0.5859375, "learning_rate": 1.8750962595148622e-05, "loss": 0.9146, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1800 }, { "epoch": 0.24490073429426162, "grad_norm": 0.373046875, "learning_rate": 1.8748663935825755e-05, "loss": 0.6917, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1801 }, { "epoch": 0.2450367147130813, "grad_norm": 0.408203125, "learning_rate": 1.8746363304447073e-05, "loss": 0.7674, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1802 }, { "epoch": 0.24517269513190101, "grad_norm": 0.390625, "learning_rate": 1.874406070153117e-05, "loss": 0.5991, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1803 }, { "epoch": 0.2453086755507207, "grad_norm": 0.318359375, "learning_rate": 1.8741756127597074e-05, "loss": 0.6011, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1804 }, { "epoch": 0.24544465596954038, "grad_norm": 0.3671875, "learning_rate": 1.8739449583164274e-05, "loss": 0.5887, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1805 }, { "epoch": 0.24558063638836009, "grad_norm": 0.353515625, "learning_rate": 1.8737141068752688e-05, "loss": 0.5607, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1806 }, { "epoch": 0.24571661680717977, "grad_norm": 0.484375, "learning_rate": 1.8734830584882685e-05, "loss": 0.7088, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1807 }, { "epoch": 0.24585259722599945, "grad_norm": 0.59375, "learning_rate": 1.8732518132075072e-05, "loss": 0.7363, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1808 }, { "epoch": 0.24598857764481916, "grad_norm": 0.32421875, "learning_rate": 1.873020371085111e-05, "loss": 0.6258, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1809 }, { "epoch": 0.24612455806363884, "grad_norm": 0.458984375, "learning_rate": 1.8727887321732497e-05, "loss": 0.9084, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1810 }, { "epoch": 0.24626053848245852, "grad_norm": 0.5234375, "learning_rate": 1.872556896524137e-05, "loss": 0.6825, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1811 }, { "epoch": 0.24639651890127823, "grad_norm": 0.51953125, "learning_rate": 1.872324864190032e-05, "loss": 0.401, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1812 }, { "epoch": 0.2465324993200979, "grad_norm": 0.41796875, "learning_rate": 1.8720926352232376e-05, "loss": 0.5555, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1813 }, { "epoch": 0.2466684797389176, "grad_norm": 0.4296875, "learning_rate": 1.8718602096761008e-05, "loss": 0.6025, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1814 }, { "epoch": 0.2468044601577373, "grad_norm": 0.451171875, "learning_rate": 1.8716275876010135e-05, "loss": 0.6206, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1815 }, { "epoch": 0.24694044057655698, "grad_norm": 0.33203125, "learning_rate": 1.8713947690504114e-05, "loss": 0.6121, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1816 }, { "epoch": 0.24707642099537666, "grad_norm": 0.455078125, "learning_rate": 1.8711617540767743e-05, "loss": 0.6143, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1817 }, { "epoch": 0.24721240141419634, "grad_norm": 0.66796875, "learning_rate": 1.870928542732627e-05, "loss": 0.5825, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1818 }, { "epoch": 0.24734838183301605, "grad_norm": 0.5859375, "learning_rate": 1.8706951350705384e-05, "loss": 0.7082, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1819 }, { "epoch": 0.24748436225183573, "grad_norm": 0.6484375, "learning_rate": 1.8704615311431214e-05, "loss": 0.6488, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1820 }, { "epoch": 0.2476203426706554, "grad_norm": 0.3046875, "learning_rate": 1.8702277310030324e-05, "loss": 0.5785, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1821 }, { "epoch": 0.24775632308947512, "grad_norm": 0.51953125, "learning_rate": 1.8699937347029735e-05, "loss": 0.4901, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1822 }, { "epoch": 0.2478923035082948, "grad_norm": 0.376953125, "learning_rate": 1.86975954229569e-05, "loss": 0.6455, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1823 }, { "epoch": 0.24802828392711448, "grad_norm": 0.47265625, "learning_rate": 1.8695251538339724e-05, "loss": 0.7453, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1824 }, { "epoch": 0.2481642643459342, "grad_norm": 0.263671875, "learning_rate": 1.869290569370654e-05, "loss": 0.4228, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1825 }, { "epoch": 0.24830024476475387, "grad_norm": 0.34375, "learning_rate": 1.869055788958613e-05, "loss": 0.5335, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1826 }, { "epoch": 0.24843622518357356, "grad_norm": 0.51953125, "learning_rate": 1.8688208126507725e-05, "loss": 0.7948, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1827 }, { "epoch": 0.24857220560239326, "grad_norm": 0.76953125, "learning_rate": 1.8685856405000984e-05, "loss": 0.6579, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1828 }, { "epoch": 0.24870818602121295, "grad_norm": 0.294921875, "learning_rate": 1.8683502725596013e-05, "loss": 0.5711, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1829 }, { "epoch": 0.24884416644003263, "grad_norm": 0.5546875, "learning_rate": 1.8681147088823363e-05, "loss": 0.7184, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1830 }, { "epoch": 0.24898014685885234, "grad_norm": 0.396484375, "learning_rate": 1.8678789495214026e-05, "loss": 0.8141, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1831 }, { "epoch": 0.24911612727767202, "grad_norm": 0.3203125, "learning_rate": 1.8676429945299427e-05, "loss": 0.6325, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1832 }, { "epoch": 0.2492521076964917, "grad_norm": 0.63671875, "learning_rate": 1.867406843961144e-05, "loss": 0.7562, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1833 }, { "epoch": 0.2493880881153114, "grad_norm": 0.52734375, "learning_rate": 1.8671704978682376e-05, "loss": 0.6418, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1834 }, { "epoch": 0.2495240685341311, "grad_norm": 0.384765625, "learning_rate": 1.8669339563044987e-05, "loss": 0.5983, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1835 }, { "epoch": 0.24966004895295077, "grad_norm": 0.8125, "learning_rate": 1.866697219323247e-05, "loss": 0.939, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1836 }, { "epoch": 0.24979602937177048, "grad_norm": 0.271484375, "learning_rate": 1.866460286977846e-05, "loss": 0.41, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1837 }, { "epoch": 0.24993200979059016, "grad_norm": 0.263671875, "learning_rate": 1.8662231593217028e-05, "loss": 0.4564, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1838 }, { "epoch": 0.25006799020940984, "grad_norm": 0.4296875, "learning_rate": 1.8659858364082688e-05, "loss": 0.561, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1839 }, { "epoch": 0.2502039706282295, "grad_norm": 1.0625, "learning_rate": 1.8657483182910398e-05, "loss": 0.7266, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1840 }, { "epoch": 0.2503399510470492, "grad_norm": 0.37890625, "learning_rate": 1.865510605023555e-05, "loss": 0.717, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1841 }, { "epoch": 0.25047593146586894, "grad_norm": 0.361328125, "learning_rate": 1.8652726966593982e-05, "loss": 0.6733, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1842 }, { "epoch": 0.2506119118846886, "grad_norm": 0.345703125, "learning_rate": 1.8650345932521966e-05, "loss": 0.6401, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1843 }, { "epoch": 0.2507478923035083, "grad_norm": 0.47265625, "learning_rate": 1.8647962948556217e-05, "loss": 0.6836, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1844 }, { "epoch": 0.250883872722328, "grad_norm": 0.453125, "learning_rate": 1.864557801523389e-05, "loss": 0.7336, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1845 }, { "epoch": 0.25101985314114766, "grad_norm": 0.46875, "learning_rate": 1.8643191133092576e-05, "loss": 0.6235, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1846 }, { "epoch": 0.25115583355996735, "grad_norm": 0.73046875, "learning_rate": 1.8640802302670305e-05, "loss": 0.613, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1847 }, { "epoch": 0.2512918139787871, "grad_norm": 0.388671875, "learning_rate": 1.8638411524505557e-05, "loss": 0.7186, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1848 }, { "epoch": 0.25142779439760676, "grad_norm": 0.90234375, "learning_rate": 1.8636018799137234e-05, "loss": 0.7969, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1849 }, { "epoch": 0.25156377481642644, "grad_norm": 0.26171875, "learning_rate": 1.863362412710469e-05, "loss": 0.4289, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1850 }, { "epoch": 0.2516997552352461, "grad_norm": 0.392578125, "learning_rate": 1.8631227508947715e-05, "loss": 0.6152, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1851 }, { "epoch": 0.2518357356540658, "grad_norm": 0.412109375, "learning_rate": 1.8628828945206533e-05, "loss": 0.6766, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1852 }, { "epoch": 0.2519717160728855, "grad_norm": 0.34375, "learning_rate": 1.8626428436421805e-05, "loss": 0.5521, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1853 }, { "epoch": 0.25210769649170517, "grad_norm": 0.66796875, "learning_rate": 1.8624025983134643e-05, "loss": 0.6899, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1854 }, { "epoch": 0.2522436769105249, "grad_norm": 0.6015625, "learning_rate": 1.8621621585886586e-05, "loss": 0.8853, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1855 }, { "epoch": 0.2523796573293446, "grad_norm": 0.31640625, "learning_rate": 1.8619215245219617e-05, "loss": 0.5164, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1856 }, { "epoch": 0.25251563774816427, "grad_norm": 0.45703125, "learning_rate": 1.861680696167615e-05, "loss": 0.7013, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1857 }, { "epoch": 0.25265161816698395, "grad_norm": 0.291015625, "learning_rate": 1.8614396735799044e-05, "loss": 0.4188, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1858 }, { "epoch": 0.25278759858580363, "grad_norm": 0.984375, "learning_rate": 1.8611984568131595e-05, "loss": 0.6956, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1859 }, { "epoch": 0.2529235790046233, "grad_norm": 0.482421875, "learning_rate": 1.8609570459217533e-05, "loss": 0.5814, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1860 }, { "epoch": 0.25305955942344305, "grad_norm": 0.5625, "learning_rate": 1.8607154409601028e-05, "loss": 0.8088, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1861 }, { "epoch": 0.25319553984226273, "grad_norm": 0.3125, "learning_rate": 1.8604736419826686e-05, "loss": 0.4517, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1862 }, { "epoch": 0.2533315202610824, "grad_norm": 0.4375, "learning_rate": 1.8602316490439558e-05, "loss": 0.6543, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1863 }, { "epoch": 0.2534675006799021, "grad_norm": 0.4140625, "learning_rate": 1.8599894621985114e-05, "loss": 0.8101, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1864 }, { "epoch": 0.2536034810987218, "grad_norm": 0.45703125, "learning_rate": 1.8597470815009285e-05, "loss": 0.6048, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1865 }, { "epoch": 0.25373946151754145, "grad_norm": 0.28515625, "learning_rate": 1.8595045070058417e-05, "loss": 0.5247, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1866 }, { "epoch": 0.2538754419363612, "grad_norm": 0.3515625, "learning_rate": 1.8592617387679304e-05, "loss": 0.7062, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1867 }, { "epoch": 0.25401142235518087, "grad_norm": 0.345703125, "learning_rate": 1.8590187768419184e-05, "loss": 0.612, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1868 }, { "epoch": 0.25414740277400055, "grad_norm": 0.7265625, "learning_rate": 1.8587756212825713e-05, "loss": 0.7669, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1869 }, { "epoch": 0.25428338319282023, "grad_norm": 0.380859375, "learning_rate": 1.8585322721446997e-05, "loss": 0.5815, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1870 }, { "epoch": 0.2544193636116399, "grad_norm": 0.24609375, "learning_rate": 1.8582887294831578e-05, "loss": 0.41, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1871 }, { "epoch": 0.2545553440304596, "grad_norm": 0.73046875, "learning_rate": 1.8580449933528425e-05, "loss": 0.7793, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1872 }, { "epoch": 0.2546913244492793, "grad_norm": 0.53515625, "learning_rate": 1.8578010638086953e-05, "loss": 0.742, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1873 }, { "epoch": 0.254827304868099, "grad_norm": 0.34375, "learning_rate": 1.8575569409057005e-05, "loss": 0.627, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1874 }, { "epoch": 0.2549632852869187, "grad_norm": 0.453125, "learning_rate": 1.857312624698887e-05, "loss": 0.6976, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1875 }, { "epoch": 0.2550992657057384, "grad_norm": 0.3515625, "learning_rate": 1.8570681152433262e-05, "loss": 0.5562, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1876 }, { "epoch": 0.25523524612455806, "grad_norm": 0.384765625, "learning_rate": 1.8568234125941334e-05, "loss": 0.5863, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1877 }, { "epoch": 0.25537122654337774, "grad_norm": 0.8125, "learning_rate": 1.856578516806468e-05, "loss": 0.728, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1878 }, { "epoch": 0.2555072069621974, "grad_norm": 0.40234375, "learning_rate": 1.8563334279355324e-05, "loss": 0.7811, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1879 }, { "epoch": 0.25564318738101716, "grad_norm": 0.455078125, "learning_rate": 1.8560881460365726e-05, "loss": 0.6523, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1880 }, { "epoch": 0.25577916779983684, "grad_norm": 0.337890625, "learning_rate": 1.8558426711648777e-05, "loss": 0.5686, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1881 }, { "epoch": 0.2559151482186565, "grad_norm": 0.45703125, "learning_rate": 1.8555970033757815e-05, "loss": 0.7188, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1882 }, { "epoch": 0.2560511286374762, "grad_norm": 0.439453125, "learning_rate": 1.85535114272466e-05, "loss": 0.7406, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1883 }, { "epoch": 0.2561871090562959, "grad_norm": 0.326171875, "learning_rate": 1.8551050892669333e-05, "loss": 0.604, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1884 }, { "epoch": 0.25632308947511556, "grad_norm": 0.6328125, "learning_rate": 1.8548588430580648e-05, "loss": 0.5145, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1885 }, { "epoch": 0.2564590698939353, "grad_norm": 0.310546875, "learning_rate": 1.8546124041535617e-05, "loss": 0.4775, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1886 }, { "epoch": 0.256595050312755, "grad_norm": 0.3671875, "learning_rate": 1.854365772608974e-05, "loss": 0.6019, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1887 }, { "epoch": 0.25673103073157466, "grad_norm": 0.39453125, "learning_rate": 1.854118948479896e-05, "loss": 0.6945, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1888 }, { "epoch": 0.25686701115039434, "grad_norm": 0.35546875, "learning_rate": 1.853871931821964e-05, "loss": 0.6738, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1889 }, { "epoch": 0.257002991569214, "grad_norm": 0.376953125, "learning_rate": 1.8536247226908592e-05, "loss": 0.7785, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1890 }, { "epoch": 0.2571389719880337, "grad_norm": 0.359375, "learning_rate": 1.8533773211423054e-05, "loss": 0.6771, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1891 }, { "epoch": 0.2572749524068534, "grad_norm": 0.384765625, "learning_rate": 1.8531297272320705e-05, "loss": 0.4513, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1892 }, { "epoch": 0.2574109328256731, "grad_norm": 0.419921875, "learning_rate": 1.8528819410159638e-05, "loss": 0.6795, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1893 }, { "epoch": 0.2575469132444928, "grad_norm": 0.32421875, "learning_rate": 1.8526339625498408e-05, "loss": 0.596, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1894 }, { "epoch": 0.2576828936633125, "grad_norm": 0.423828125, "learning_rate": 1.852385791889598e-05, "loss": 0.7596, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1895 }, { "epoch": 0.25781887408213217, "grad_norm": 0.33203125, "learning_rate": 1.8521374290911762e-05, "loss": 0.5767, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1896 }, { "epoch": 0.25795485450095185, "grad_norm": 0.31640625, "learning_rate": 1.85188887421056e-05, "loss": 0.5285, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1897 }, { "epoch": 0.25809083491977153, "grad_norm": 0.412109375, "learning_rate": 1.8516401273037757e-05, "loss": 0.7995, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1898 }, { "epoch": 0.25822681533859126, "grad_norm": 0.404296875, "learning_rate": 1.8513911884268952e-05, "loss": 0.6769, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1899 }, { "epoch": 0.25836279575741095, "grad_norm": 0.69921875, "learning_rate": 1.851142057636031e-05, "loss": 0.689, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1900 }, { "epoch": 0.2584987761762306, "grad_norm": 0.3984375, "learning_rate": 1.8508927349873412e-05, "loss": 0.7212, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1901 }, { "epoch": 0.2586347565950503, "grad_norm": 0.431640625, "learning_rate": 1.850643220537026e-05, "loss": 0.5841, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1902 }, { "epoch": 0.25877073701387, "grad_norm": 0.5078125, "learning_rate": 1.8503935143413286e-05, "loss": 0.6763, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1903 }, { "epoch": 0.25890671743268967, "grad_norm": 0.4296875, "learning_rate": 1.8501436164565363e-05, "loss": 0.5988, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1904 }, { "epoch": 0.2590426978515094, "grad_norm": 0.5546875, "learning_rate": 1.849893526938979e-05, "loss": 0.6589, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1905 }, { "epoch": 0.2591786782703291, "grad_norm": 0.6015625, "learning_rate": 1.8496432458450297e-05, "loss": 0.6389, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1906 }, { "epoch": 0.25931465868914877, "grad_norm": 0.333984375, "learning_rate": 1.849392773231105e-05, "loss": 0.5003, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1907 }, { "epoch": 0.25945063910796845, "grad_norm": 0.400390625, "learning_rate": 1.8491421091536643e-05, "loss": 0.6304, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1908 }, { "epoch": 0.25958661952678813, "grad_norm": 0.5390625, "learning_rate": 1.8488912536692108e-05, "loss": 0.5954, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1909 }, { "epoch": 0.2597225999456078, "grad_norm": 0.6953125, "learning_rate": 1.84864020683429e-05, "loss": 0.6245, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1910 }, { "epoch": 0.25985858036442755, "grad_norm": 0.359375, "learning_rate": 1.848388968705491e-05, "loss": 0.5407, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1911 }, { "epoch": 0.25999456078324723, "grad_norm": 0.380859375, "learning_rate": 1.8481375393394458e-05, "loss": 0.4707, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1912 }, { "epoch": 0.2601305412020669, "grad_norm": 0.365234375, "learning_rate": 1.84788591879283e-05, "loss": 0.7189, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1913 }, { "epoch": 0.2602665216208866, "grad_norm": 0.48046875, "learning_rate": 1.847634107122362e-05, "loss": 0.8053, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1914 }, { "epoch": 0.2604025020397063, "grad_norm": 0.4375, "learning_rate": 1.847382104384803e-05, "loss": 0.5955, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1915 }, { "epoch": 0.26053848245852596, "grad_norm": 0.34375, "learning_rate": 1.847129910636957e-05, "loss": 0.6286, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1916 }, { "epoch": 0.26067446287734564, "grad_norm": 0.3984375, "learning_rate": 1.8468775259356725e-05, "loss": 0.8094, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1917 }, { "epoch": 0.2608104432961654, "grad_norm": 0.279296875, "learning_rate": 1.8466249503378396e-05, "loss": 0.491, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1918 }, { "epoch": 0.26094642371498505, "grad_norm": 0.353515625, "learning_rate": 1.8463721839003917e-05, "loss": 0.6986, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1919 }, { "epoch": 0.26108240413380474, "grad_norm": 0.31640625, "learning_rate": 1.846119226680306e-05, "loss": 0.4427, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1920 }, { "epoch": 0.2612183845526244, "grad_norm": 0.36328125, "learning_rate": 1.845866078734602e-05, "loss": 0.8133, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1921 }, { "epoch": 0.2613543649714441, "grad_norm": 0.46875, "learning_rate": 1.8456127401203422e-05, "loss": 0.5689, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1922 }, { "epoch": 0.2614903453902638, "grad_norm": 0.498046875, "learning_rate": 1.845359210894632e-05, "loss": 0.7304, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1923 }, { "epoch": 0.2616263258090835, "grad_norm": 0.5390625, "learning_rate": 1.8451054911146204e-05, "loss": 0.826, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1924 }, { "epoch": 0.2617623062279032, "grad_norm": 0.34765625, "learning_rate": 1.8448515808374986e-05, "loss": 0.6174, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1925 }, { "epoch": 0.2618982866467229, "grad_norm": 0.54296875, "learning_rate": 1.8445974801205017e-05, "loss": 0.8062, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1926 }, { "epoch": 0.26203426706554256, "grad_norm": 0.416015625, "learning_rate": 1.844343189020906e-05, "loss": 0.751, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1927 }, { "epoch": 0.26217024748436224, "grad_norm": 0.515625, "learning_rate": 1.844088707596033e-05, "loss": 0.8573, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1928 }, { "epoch": 0.2623062279031819, "grad_norm": 0.3671875, "learning_rate": 1.843834035903245e-05, "loss": 0.6711, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1929 }, { "epoch": 0.26244220832200166, "grad_norm": 0.40625, "learning_rate": 1.843579173999949e-05, "loss": 0.6681, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1930 }, { "epoch": 0.26257818874082134, "grad_norm": 0.33984375, "learning_rate": 1.8433241219435937e-05, "loss": 0.6084, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1931 }, { "epoch": 0.262714169159641, "grad_norm": 0.5234375, "learning_rate": 1.8430688797916702e-05, "loss": 0.7646, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1932 }, { "epoch": 0.2628501495784607, "grad_norm": 0.384765625, "learning_rate": 1.8428134476017138e-05, "loss": 0.6274, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1933 }, { "epoch": 0.2629861299972804, "grad_norm": 0.3671875, "learning_rate": 1.842557825431302e-05, "loss": 0.6092, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1934 }, { "epoch": 0.26312211041610006, "grad_norm": 0.32421875, "learning_rate": 1.8423020133380555e-05, "loss": 0.6562, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1935 }, { "epoch": 0.26325809083491974, "grad_norm": 0.3125, "learning_rate": 1.8420460113796367e-05, "loss": 0.5887, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1936 }, { "epoch": 0.2633940712537395, "grad_norm": 1.0, "learning_rate": 1.8417898196137526e-05, "loss": 0.5511, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1937 }, { "epoch": 0.26353005167255916, "grad_norm": 0.7734375, "learning_rate": 1.8415334380981507e-05, "loss": 0.7542, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1938 }, { "epoch": 0.26366603209137884, "grad_norm": 0.4296875, "learning_rate": 1.841276866890624e-05, "loss": 0.7256, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1939 }, { "epoch": 0.2638020125101985, "grad_norm": 0.318359375, "learning_rate": 1.8410201060490053e-05, "loss": 0.5348, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1940 }, { "epoch": 0.2639379929290182, "grad_norm": 0.64453125, "learning_rate": 1.8407631556311725e-05, "loss": 0.5254, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1941 }, { "epoch": 0.2640739733478379, "grad_norm": 0.27734375, "learning_rate": 1.8405060156950453e-05, "loss": 0.5404, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1942 }, { "epoch": 0.2642099537666576, "grad_norm": 0.322265625, "learning_rate": 1.840248686298586e-05, "loss": 0.675, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1943 }, { "epoch": 0.2643459341854773, "grad_norm": 0.2890625, "learning_rate": 1.8399911674997998e-05, "loss": 0.4515, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1944 }, { "epoch": 0.264481914604297, "grad_norm": 0.373046875, "learning_rate": 1.8397334593567347e-05, "loss": 0.7109, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1945 }, { "epoch": 0.26461789502311667, "grad_norm": 0.6875, "learning_rate": 1.8394755619274813e-05, "loss": 0.9688, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1946 }, { "epoch": 0.26475387544193635, "grad_norm": 0.609375, "learning_rate": 1.839217475270173e-05, "loss": 0.5249, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1947 }, { "epoch": 0.26488985586075603, "grad_norm": 0.4453125, "learning_rate": 1.8389591994429846e-05, "loss": 0.6341, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1948 }, { "epoch": 0.26502583627957577, "grad_norm": 0.359375, "learning_rate": 1.8387007345041362e-05, "loss": 0.6164, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1949 }, { "epoch": 0.26516181669839545, "grad_norm": 0.6640625, "learning_rate": 1.8384420805118878e-05, "loss": 0.7524, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1950 }, { "epoch": 0.26529779711721513, "grad_norm": 0.5, "learning_rate": 1.838183237524544e-05, "loss": 0.6087, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1951 }, { "epoch": 0.2654337775360348, "grad_norm": 0.41796875, "learning_rate": 1.8379242056004504e-05, "loss": 0.71, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1952 }, { "epoch": 0.2655697579548545, "grad_norm": 0.244140625, "learning_rate": 1.8376649847979964e-05, "loss": 0.4486, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1953 }, { "epoch": 0.26570573837367417, "grad_norm": 0.69921875, "learning_rate": 1.8374055751756138e-05, "loss": 0.701, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1954 }, { "epoch": 0.2658417187924939, "grad_norm": 0.4453125, "learning_rate": 1.8371459767917762e-05, "loss": 0.6719, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1955 }, { "epoch": 0.2659776992113136, "grad_norm": 0.80078125, "learning_rate": 1.8368861897050002e-05, "loss": 0.7104, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1956 }, { "epoch": 0.26611367963013327, "grad_norm": 1.140625, "learning_rate": 1.8366262139738454e-05, "loss": 0.7358, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1957 }, { "epoch": 0.26624966004895295, "grad_norm": 0.5546875, "learning_rate": 1.836366049656913e-05, "loss": 0.4793, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1958 }, { "epoch": 0.26638564046777263, "grad_norm": 0.37109375, "learning_rate": 1.836105696812848e-05, "loss": 0.5633, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1959 }, { "epoch": 0.2665216208865923, "grad_norm": 0.37890625, "learning_rate": 1.8358451555003363e-05, "loss": 0.6291, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1960 }, { "epoch": 0.266657601305412, "grad_norm": 0.359375, "learning_rate": 1.8355844257781076e-05, "loss": 0.6247, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1961 }, { "epoch": 0.26679358172423173, "grad_norm": 0.4296875, "learning_rate": 1.835323507704933e-05, "loss": 0.6948, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1962 }, { "epoch": 0.2669295621430514, "grad_norm": 0.60546875, "learning_rate": 1.8350624013396276e-05, "loss": 0.6621, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1963 }, { "epoch": 0.2670655425618711, "grad_norm": 0.431640625, "learning_rate": 1.8348011067410475e-05, "loss": 0.8191, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1964 }, { "epoch": 0.2672015229806908, "grad_norm": 0.47265625, "learning_rate": 1.834539623968091e-05, "loss": 0.6993, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1965 }, { "epoch": 0.26733750339951046, "grad_norm": 0.546875, "learning_rate": 1.8342779530797004e-05, "loss": 0.8358, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1966 }, { "epoch": 0.26747348381833014, "grad_norm": 0.39453125, "learning_rate": 1.8340160941348594e-05, "loss": 0.6735, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1967 }, { "epoch": 0.2676094642371499, "grad_norm": 0.451171875, "learning_rate": 1.833754047192594e-05, "loss": 0.7751, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1968 }, { "epoch": 0.26774544465596956, "grad_norm": 0.33984375, "learning_rate": 1.8334918123119724e-05, "loss": 0.714, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1969 }, { "epoch": 0.26788142507478924, "grad_norm": 0.4296875, "learning_rate": 1.8332293895521062e-05, "loss": 0.8368, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1970 }, { "epoch": 0.2680174054936089, "grad_norm": 0.29296875, "learning_rate": 1.8329667789721487e-05, "loss": 0.5493, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1971 }, { "epoch": 0.2681533859124286, "grad_norm": 0.29296875, "learning_rate": 1.832703980631295e-05, "loss": 0.458, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1972 }, { "epoch": 0.2682893663312483, "grad_norm": 0.267578125, "learning_rate": 1.8324409945887836e-05, "loss": 0.488, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1973 }, { "epoch": 0.268425346750068, "grad_norm": 0.62890625, "learning_rate": 1.8321778209038944e-05, "loss": 0.5677, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1974 }, { "epoch": 0.2685613271688877, "grad_norm": 0.447265625, "learning_rate": 1.83191445963595e-05, "loss": 0.6532, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1975 }, { "epoch": 0.2686973075877074, "grad_norm": 0.458984375, "learning_rate": 1.8316509108443156e-05, "loss": 0.7152, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1976 }, { "epoch": 0.26883328800652706, "grad_norm": 0.357421875, "learning_rate": 1.8313871745883974e-05, "loss": 0.6468, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1977 }, { "epoch": 0.26896926842534674, "grad_norm": 0.30078125, "learning_rate": 1.831123250927646e-05, "loss": 0.5311, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1978 }, { "epoch": 0.2691052488441664, "grad_norm": 0.609375, "learning_rate": 1.8308591399215523e-05, "loss": 0.5693, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1979 }, { "epoch": 0.2692412292629861, "grad_norm": 0.67578125, "learning_rate": 1.830594841629651e-05, "loss": 0.4572, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1980 }, { "epoch": 0.26937720968180584, "grad_norm": 0.423828125, "learning_rate": 1.8303303561115166e-05, "loss": 0.6375, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1981 }, { "epoch": 0.2695131901006255, "grad_norm": 0.71484375, "learning_rate": 1.8300656834267685e-05, "loss": 0.4932, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1982 }, { "epoch": 0.2696491705194452, "grad_norm": 0.5078125, "learning_rate": 1.829800823635067e-05, "loss": 0.7328, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1983 }, { "epoch": 0.2697851509382649, "grad_norm": 0.373046875, "learning_rate": 1.8295357767961144e-05, "loss": 0.4994, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1984 }, { "epoch": 0.26992113135708456, "grad_norm": 0.310546875, "learning_rate": 1.8292705429696564e-05, "loss": 0.5952, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1985 }, { "epoch": 0.27005711177590425, "grad_norm": 0.384765625, "learning_rate": 1.829005122215479e-05, "loss": 0.6763, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1986 }, { "epoch": 0.270193092194724, "grad_norm": 0.44921875, "learning_rate": 1.8287395145934114e-05, "loss": 0.8576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1987 }, { "epoch": 0.27032907261354366, "grad_norm": 0.2451171875, "learning_rate": 1.8284737201633248e-05, "loss": 0.3373, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1988 }, { "epoch": 0.27046505303236335, "grad_norm": 0.4375, "learning_rate": 1.8282077389851328e-05, "loss": 0.5597, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1989 }, { "epoch": 0.270601033451183, "grad_norm": 0.390625, "learning_rate": 1.8279415711187908e-05, "loss": 0.7062, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1990 }, { "epoch": 0.2707370138700027, "grad_norm": 0.73828125, "learning_rate": 1.827675216624296e-05, "loss": 0.8252, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1991 }, { "epoch": 0.2708729942888224, "grad_norm": 0.453125, "learning_rate": 1.8274086755616883e-05, "loss": 0.638, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1992 }, { "epoch": 0.2710089747076421, "grad_norm": 0.376953125, "learning_rate": 1.8271419479910492e-05, "loss": 0.5365, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1993 }, { "epoch": 0.2711449551264618, "grad_norm": 0.765625, "learning_rate": 1.8268750339725024e-05, "loss": 0.6348, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1994 }, { "epoch": 0.2712809355452815, "grad_norm": 0.330078125, "learning_rate": 1.8266079335662135e-05, "loss": 0.5845, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1995 }, { "epoch": 0.27141691596410117, "grad_norm": 0.38671875, "learning_rate": 1.8263406468323898e-05, "loss": 0.682, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1996 }, { "epoch": 0.27155289638292085, "grad_norm": 0.3984375, "learning_rate": 1.8260731738312817e-05, "loss": 0.6951, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1997 }, { "epoch": 0.27168887680174053, "grad_norm": 0.640625, "learning_rate": 1.8258055146231806e-05, "loss": 0.5145, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1998 }, { "epoch": 0.2718248572205602, "grad_norm": 0.392578125, "learning_rate": 1.82553766926842e-05, "loss": 0.5758, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 1999 }, { "epoch": 0.27196083763937995, "grad_norm": 0.40625, "learning_rate": 1.825269637827376e-05, "loss": 0.6991, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2000 }, { "epoch": 0.27209681805819963, "grad_norm": 0.63671875, "learning_rate": 1.8250014203604655e-05, "loss": 0.6502, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2001 }, { "epoch": 0.2722327984770193, "grad_norm": 0.3515625, "learning_rate": 1.8247330169281485e-05, "loss": 0.7607, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2002 }, { "epoch": 0.272368778895839, "grad_norm": 0.50390625, "learning_rate": 1.824464427590926e-05, "loss": 0.7287, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2003 }, { "epoch": 0.2725047593146587, "grad_norm": 0.2734375, "learning_rate": 1.824195652409342e-05, "loss": 0.5544, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2004 }, { "epoch": 0.27264073973347835, "grad_norm": 0.37109375, "learning_rate": 1.8239266914439806e-05, "loss": 0.6597, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2005 }, { "epoch": 0.2727767201522981, "grad_norm": 0.5859375, "learning_rate": 1.8236575447554702e-05, "loss": 0.7842, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2006 }, { "epoch": 0.2729127005711178, "grad_norm": 0.365234375, "learning_rate": 1.8233882124044786e-05, "loss": 0.7489, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2007 }, { "epoch": 0.27304868098993745, "grad_norm": 0.380859375, "learning_rate": 1.823118694451717e-05, "loss": 0.7355, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2008 }, { "epoch": 0.27318466140875713, "grad_norm": 0.5546875, "learning_rate": 1.8228489909579388e-05, "loss": 0.5291, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2009 }, { "epoch": 0.2733206418275768, "grad_norm": 0.50390625, "learning_rate": 1.8225791019839375e-05, "loss": 0.7109, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2010 }, { "epoch": 0.2734566222463965, "grad_norm": 0.52734375, "learning_rate": 1.8223090275905496e-05, "loss": 0.7505, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2011 }, { "epoch": 0.27359260266521623, "grad_norm": 1.4375, "learning_rate": 1.8220387678386536e-05, "loss": 0.6351, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2012 }, { "epoch": 0.2737285830840359, "grad_norm": 0.318359375, "learning_rate": 1.8217683227891687e-05, "loss": 0.6139, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2013 }, { "epoch": 0.2738645635028556, "grad_norm": 0.341796875, "learning_rate": 1.821497692503057e-05, "loss": 0.5594, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2014 }, { "epoch": 0.2740005439216753, "grad_norm": 0.26953125, "learning_rate": 1.8212268770413214e-05, "loss": 0.5337, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2015 }, { "epoch": 0.27413652434049496, "grad_norm": 0.408203125, "learning_rate": 1.8209558764650077e-05, "loss": 0.7782, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2016 }, { "epoch": 0.27427250475931464, "grad_norm": 0.486328125, "learning_rate": 1.8206846908352023e-05, "loss": 0.8843, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2017 }, { "epoch": 0.2744084851781344, "grad_norm": 0.35546875, "learning_rate": 1.820413320213034e-05, "loss": 0.5775, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2018 }, { "epoch": 0.27454446559695406, "grad_norm": 0.703125, "learning_rate": 1.8201417646596723e-05, "loss": 0.8452, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2019 }, { "epoch": 0.27468044601577374, "grad_norm": 0.7578125, "learning_rate": 1.81987002423633e-05, "loss": 0.4753, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2020 }, { "epoch": 0.2748164264345934, "grad_norm": 0.419921875, "learning_rate": 1.8195980990042607e-05, "loss": 0.6217, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2021 }, { "epoch": 0.2749524068534131, "grad_norm": 0.306640625, "learning_rate": 1.819325989024759e-05, "loss": 0.6099, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2022 }, { "epoch": 0.2750883872722328, "grad_norm": 0.59375, "learning_rate": 1.8190536943591627e-05, "loss": 0.764, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2023 }, { "epoch": 0.27522436769105246, "grad_norm": 0.41796875, "learning_rate": 1.8187812150688495e-05, "loss": 0.707, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2024 }, { "epoch": 0.2753603481098722, "grad_norm": 0.357421875, "learning_rate": 1.8185085512152402e-05, "loss": 0.6753, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2025 }, { "epoch": 0.2754963285286919, "grad_norm": 0.322265625, "learning_rate": 1.8182357028597963e-05, "loss": 0.4932, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2026 }, { "epoch": 0.27563230894751156, "grad_norm": 0.373046875, "learning_rate": 1.8179626700640208e-05, "loss": 0.6719, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2027 }, { "epoch": 0.27576828936633124, "grad_norm": 0.341796875, "learning_rate": 1.8176894528894595e-05, "loss": 0.5985, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2028 }, { "epoch": 0.2759042697851509, "grad_norm": 0.58203125, "learning_rate": 1.817416051397698e-05, "loss": 0.6374, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2029 }, { "epoch": 0.2760402502039706, "grad_norm": 0.73828125, "learning_rate": 1.817142465650365e-05, "loss": 0.5711, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2030 }, { "epoch": 0.27617623062279034, "grad_norm": 0.271484375, "learning_rate": 1.8168686957091295e-05, "loss": 0.474, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2031 }, { "epoch": 0.27631221104161, "grad_norm": 0.318359375, "learning_rate": 1.816594741635703e-05, "loss": 0.5368, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2032 }, { "epoch": 0.2764481914604297, "grad_norm": 0.6484375, "learning_rate": 1.816320603491838e-05, "loss": 0.7199, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2033 }, { "epoch": 0.2765841718792494, "grad_norm": 0.388671875, "learning_rate": 1.8160462813393287e-05, "loss": 0.6743, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2034 }, { "epoch": 0.27672015229806907, "grad_norm": 0.5625, "learning_rate": 1.8157717752400103e-05, "loss": 0.7402, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2035 }, { "epoch": 0.27685613271688875, "grad_norm": 0.375, "learning_rate": 1.8154970852557604e-05, "loss": 0.6146, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2036 }, { "epoch": 0.2769921131357085, "grad_norm": 0.42578125, "learning_rate": 1.815222211448497e-05, "loss": 0.6919, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2037 }, { "epoch": 0.27712809355452817, "grad_norm": 0.57421875, "learning_rate": 1.81494715388018e-05, "loss": 0.7627, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2038 }, { "epoch": 0.27726407397334785, "grad_norm": 0.50390625, "learning_rate": 1.814671912612811e-05, "loss": 0.8294, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2039 }, { "epoch": 0.2774000543921675, "grad_norm": 0.361328125, "learning_rate": 1.8143964877084327e-05, "loss": 0.6224, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2040 }, { "epoch": 0.2775360348109872, "grad_norm": 0.32421875, "learning_rate": 1.814120879229129e-05, "loss": 0.5409, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2041 }, { "epoch": 0.2776720152298069, "grad_norm": 0.51953125, "learning_rate": 1.813845087237026e-05, "loss": 0.8062, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2042 }, { "epoch": 0.27780799564862657, "grad_norm": 0.77734375, "learning_rate": 1.81356911179429e-05, "loss": 0.6432, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2043 }, { "epoch": 0.2779439760674463, "grad_norm": 0.484375, "learning_rate": 1.813292952963129e-05, "loss": 0.4993, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2044 }, { "epoch": 0.278079956486266, "grad_norm": 0.30859375, "learning_rate": 1.8130166108057936e-05, "loss": 0.5767, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2045 }, { "epoch": 0.27821593690508567, "grad_norm": 0.328125, "learning_rate": 1.812740085384574e-05, "loss": 0.7114, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2046 }, { "epoch": 0.27835191732390535, "grad_norm": 0.53515625, "learning_rate": 1.8124633767618024e-05, "loss": 0.8127, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2047 }, { "epoch": 0.27848789774272503, "grad_norm": 0.400390625, "learning_rate": 1.8121864849998526e-05, "loss": 0.6257, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2048 }, { "epoch": 0.2786238781615447, "grad_norm": 0.3515625, "learning_rate": 1.811909410161139e-05, "loss": 0.6358, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2049 }, { "epoch": 0.27875985858036445, "grad_norm": 0.6328125, "learning_rate": 1.811632152308118e-05, "loss": 0.8262, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2050 }, { "epoch": 0.27889583899918413, "grad_norm": 0.478515625, "learning_rate": 1.811354711503287e-05, "loss": 0.7145, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2051 }, { "epoch": 0.2790318194180038, "grad_norm": 0.474609375, "learning_rate": 1.811077087809184e-05, "loss": 0.8003, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2052 }, { "epoch": 0.2791677998368235, "grad_norm": 1.8359375, "learning_rate": 1.8107992812883894e-05, "loss": 0.8472, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2053 }, { "epoch": 0.2793037802556432, "grad_norm": 0.357421875, "learning_rate": 1.8105212920035238e-05, "loss": 0.6047, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2054 }, { "epoch": 0.27943976067446286, "grad_norm": 0.40234375, "learning_rate": 1.8102431200172496e-05, "loss": 0.6095, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2055 }, { "epoch": 0.2795757410932826, "grad_norm": 0.5078125, "learning_rate": 1.8099647653922697e-05, "loss": 0.7889, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2056 }, { "epoch": 0.2797117215121023, "grad_norm": 0.4921875, "learning_rate": 1.8096862281913292e-05, "loss": 0.9289, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2057 }, { "epoch": 0.27984770193092195, "grad_norm": 0.69140625, "learning_rate": 1.8094075084772137e-05, "loss": 0.7196, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2058 }, { "epoch": 0.27998368234974164, "grad_norm": 1.1484375, "learning_rate": 1.8091286063127498e-05, "loss": 0.6086, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2059 }, { "epoch": 0.2801196627685613, "grad_norm": 0.359375, "learning_rate": 1.8088495217608054e-05, "loss": 0.6092, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2060 }, { "epoch": 0.280255643187381, "grad_norm": 0.3984375, "learning_rate": 1.8085702548842897e-05, "loss": 0.5687, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2061 }, { "epoch": 0.2803916236062007, "grad_norm": 0.54296875, "learning_rate": 1.8082908057461534e-05, "loss": 0.276, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2062 }, { "epoch": 0.2805276040250204, "grad_norm": 0.2890625, "learning_rate": 1.8080111744093867e-05, "loss": 0.639, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2063 }, { "epoch": 0.2806635844438401, "grad_norm": 0.81640625, "learning_rate": 1.807731360937023e-05, "loss": 0.6637, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2064 }, { "epoch": 0.2807995648626598, "grad_norm": 0.2890625, "learning_rate": 1.8074513653921348e-05, "loss": 0.4574, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2065 }, { "epoch": 0.28093554528147946, "grad_norm": 0.361328125, "learning_rate": 1.807171187837837e-05, "loss": 0.6948, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2066 }, { "epoch": 0.28107152570029914, "grad_norm": 0.376953125, "learning_rate": 1.8068908283372847e-05, "loss": 0.6283, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2067 }, { "epoch": 0.2812075061191188, "grad_norm": 0.373046875, "learning_rate": 1.8066102869536748e-05, "loss": 0.7354, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2068 }, { "epoch": 0.28134348653793856, "grad_norm": 0.39453125, "learning_rate": 1.8063295637502444e-05, "loss": 0.6662, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2069 }, { "epoch": 0.28147946695675824, "grad_norm": 0.27734375, "learning_rate": 1.8060486587902725e-05, "loss": 0.5116, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2070 }, { "epoch": 0.2816154473755779, "grad_norm": 0.32421875, "learning_rate": 1.8057675721370775e-05, "loss": 0.6401, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2071 }, { "epoch": 0.2817514277943976, "grad_norm": 0.33203125, "learning_rate": 1.8054863038540207e-05, "loss": 0.5527, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2072 }, { "epoch": 0.2818874082132173, "grad_norm": 0.65234375, "learning_rate": 1.805204854004503e-05, "loss": 0.7959, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2073 }, { "epoch": 0.28202338863203696, "grad_norm": 0.431640625, "learning_rate": 1.8049232226519667e-05, "loss": 0.8118, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2074 }, { "epoch": 0.2821593690508567, "grad_norm": 0.318359375, "learning_rate": 1.8046414098598947e-05, "loss": 0.5879, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2075 }, { "epoch": 0.2822953494696764, "grad_norm": 0.349609375, "learning_rate": 1.8043594156918117e-05, "loss": 0.6038, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2076 }, { "epoch": 0.28243132988849606, "grad_norm": 0.359375, "learning_rate": 1.8040772402112824e-05, "loss": 0.6121, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2077 }, { "epoch": 0.28256731030731574, "grad_norm": 0.63671875, "learning_rate": 1.8037948834819122e-05, "loss": 0.577, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2078 }, { "epoch": 0.2827032907261354, "grad_norm": 0.392578125, "learning_rate": 1.803512345567348e-05, "loss": 0.6312, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2079 }, { "epoch": 0.2828392711449551, "grad_norm": 0.4140625, "learning_rate": 1.803229626531277e-05, "loss": 0.6178, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2080 }, { "epoch": 0.28297525156377484, "grad_norm": 0.4375, "learning_rate": 1.802946726437428e-05, "loss": 0.6112, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2081 }, { "epoch": 0.2831112319825945, "grad_norm": 0.609375, "learning_rate": 1.8026636453495703e-05, "loss": 0.5428, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2082 }, { "epoch": 0.2832472124014142, "grad_norm": 0.3671875, "learning_rate": 1.8023803833315133e-05, "loss": 0.5877, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2083 }, { "epoch": 0.2833831928202339, "grad_norm": 0.37890625, "learning_rate": 1.8020969404471078e-05, "loss": 0.6333, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2084 }, { "epoch": 0.28351917323905357, "grad_norm": 0.326171875, "learning_rate": 1.8018133167602453e-05, "loss": 0.5815, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2085 }, { "epoch": 0.28365515365787325, "grad_norm": 0.51953125, "learning_rate": 1.8015295123348587e-05, "loss": 0.5859, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2086 }, { "epoch": 0.28379113407669293, "grad_norm": 0.5859375, "learning_rate": 1.80124552723492e-05, "loss": 0.6647, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2087 }, { "epoch": 0.28392711449551267, "grad_norm": 0.57421875, "learning_rate": 1.8009613615244438e-05, "loss": 0.7633, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2088 }, { "epoch": 0.28406309491433235, "grad_norm": 0.470703125, "learning_rate": 1.8006770152674836e-05, "loss": 0.4507, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2089 }, { "epoch": 0.28419907533315203, "grad_norm": 2.109375, "learning_rate": 1.8003924885281355e-05, "loss": 0.8883, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2090 }, { "epoch": 0.2843350557519717, "grad_norm": 0.357421875, "learning_rate": 1.8001077813705345e-05, "loss": 0.5682, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2091 }, { "epoch": 0.2844710361707914, "grad_norm": 0.384765625, "learning_rate": 1.7998228938588572e-05, "loss": 0.7088, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2092 }, { "epoch": 0.2846070165896111, "grad_norm": 0.3515625, "learning_rate": 1.7995378260573213e-05, "loss": 0.7197, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2093 }, { "epoch": 0.2847429970084308, "grad_norm": 0.515625, "learning_rate": 1.799252578030184e-05, "loss": 0.8449, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2094 }, { "epoch": 0.2848789774272505, "grad_norm": 0.369140625, "learning_rate": 1.798967149841744e-05, "loss": 0.6597, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2095 }, { "epoch": 0.28501495784607017, "grad_norm": 0.37109375, "learning_rate": 1.79868154155634e-05, "loss": 0.6527, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2096 }, { "epoch": 0.28515093826488985, "grad_norm": 0.54296875, "learning_rate": 1.798395753238352e-05, "loss": 0.569, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2097 }, { "epoch": 0.28528691868370953, "grad_norm": 0.55859375, "learning_rate": 1.7981097849522e-05, "loss": 0.7985, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2098 }, { "epoch": 0.2854228991025292, "grad_norm": 0.62890625, "learning_rate": 1.7978236367623448e-05, "loss": 0.8136, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2099 }, { "epoch": 0.28555887952134895, "grad_norm": 0.60546875, "learning_rate": 1.797537308733287e-05, "loss": 0.7601, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2100 }, { "epoch": 0.28569485994016863, "grad_norm": 0.4296875, "learning_rate": 1.79725080092957e-05, "loss": 0.6816, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2101 }, { "epoch": 0.2858308403589883, "grad_norm": 0.4453125, "learning_rate": 1.7969641134157747e-05, "loss": 0.8571, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2102 }, { "epoch": 0.285966820777808, "grad_norm": 0.37109375, "learning_rate": 1.7966772462565244e-05, "loss": 0.7616, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2103 }, { "epoch": 0.2861028011966277, "grad_norm": 0.51171875, "learning_rate": 1.796390199516483e-05, "loss": 0.8564, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2104 }, { "epoch": 0.28623878161544736, "grad_norm": 0.83984375, "learning_rate": 1.796102973260353e-05, "loss": 0.8057, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2105 }, { "epoch": 0.28637476203426704, "grad_norm": 0.6484375, "learning_rate": 1.7958155675528807e-05, "loss": 0.6765, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2106 }, { "epoch": 0.2865107424530868, "grad_norm": 0.4296875, "learning_rate": 1.795527982458849e-05, "loss": 0.7568, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2107 }, { "epoch": 0.28664672287190646, "grad_norm": 0.400390625, "learning_rate": 1.7952402180430842e-05, "loss": 0.6777, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2108 }, { "epoch": 0.28678270329072614, "grad_norm": 0.32421875, "learning_rate": 1.7949522743704514e-05, "loss": 0.5923, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2109 }, { "epoch": 0.2869186837095458, "grad_norm": 0.279296875, "learning_rate": 1.794664151505857e-05, "loss": 0.5031, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2110 }, { "epoch": 0.2870546641283655, "grad_norm": 0.50390625, "learning_rate": 1.7943758495142472e-05, "loss": 0.8381, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2111 }, { "epoch": 0.2871906445471852, "grad_norm": 0.50390625, "learning_rate": 1.7940873684606085e-05, "loss": 0.728, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2112 }, { "epoch": 0.2873266249660049, "grad_norm": 0.5703125, "learning_rate": 1.793798708409969e-05, "loss": 0.7106, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2113 }, { "epoch": 0.2874626053848246, "grad_norm": 2.359375, "learning_rate": 1.793509869427395e-05, "loss": 0.695, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2114 }, { "epoch": 0.2875985858036443, "grad_norm": 0.1865234375, "learning_rate": 1.793220851577995e-05, "loss": 0.3076, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2115 }, { "epoch": 0.28773456622246396, "grad_norm": 0.62890625, "learning_rate": 1.7929316549269172e-05, "loss": 0.5889, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2116 }, { "epoch": 0.28787054664128364, "grad_norm": 0.322265625, "learning_rate": 1.7926422795393497e-05, "loss": 0.6318, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2117 }, { "epoch": 0.2880065270601033, "grad_norm": 0.59375, "learning_rate": 1.7923527254805216e-05, "loss": 0.7321, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2118 }, { "epoch": 0.28814250747892306, "grad_norm": 1.1171875, "learning_rate": 1.792062992815702e-05, "loss": 0.6712, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2119 }, { "epoch": 0.28827848789774274, "grad_norm": 0.314453125, "learning_rate": 1.7917730816102003e-05, "loss": 0.5985, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2120 }, { "epoch": 0.2884144683165624, "grad_norm": 0.416015625, "learning_rate": 1.7914829919293653e-05, "loss": 0.7243, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2121 }, { "epoch": 0.2885504487353821, "grad_norm": 0.4609375, "learning_rate": 1.791192723838587e-05, "loss": 0.7331, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2122 }, { "epoch": 0.2886864291542018, "grad_norm": 0.34375, "learning_rate": 1.790902277403296e-05, "loss": 0.7521, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2123 }, { "epoch": 0.28882240957302147, "grad_norm": 0.69140625, "learning_rate": 1.7906116526889616e-05, "loss": 0.7485, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2124 }, { "epoch": 0.2889583899918412, "grad_norm": 0.3828125, "learning_rate": 1.790320849761095e-05, "loss": 0.5706, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2125 }, { "epoch": 0.2890943704106609, "grad_norm": 0.45703125, "learning_rate": 1.790029868685246e-05, "loss": 0.7194, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2126 }, { "epoch": 0.28923035082948056, "grad_norm": 0.5078125, "learning_rate": 1.7897387095270058e-05, "loss": 0.6489, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2127 }, { "epoch": 0.28936633124830025, "grad_norm": 0.36328125, "learning_rate": 1.789447372352005e-05, "loss": 0.7067, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2128 }, { "epoch": 0.2895023116671199, "grad_norm": 0.302734375, "learning_rate": 1.7891558572259147e-05, "loss": 0.6237, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2129 }, { "epoch": 0.2896382920859396, "grad_norm": 0.3046875, "learning_rate": 1.7888641642144458e-05, "loss": 0.592, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2130 }, { "epoch": 0.2897742725047593, "grad_norm": 0.5625, "learning_rate": 1.7885722933833496e-05, "loss": 0.5734, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2131 }, { "epoch": 0.289910252923579, "grad_norm": 0.84375, "learning_rate": 1.7882802447984175e-05, "loss": 0.7729, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2132 }, { "epoch": 0.2900462333423987, "grad_norm": 0.314453125, "learning_rate": 1.7879880185254806e-05, "loss": 0.5316, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2133 }, { "epoch": 0.2901822137612184, "grad_norm": 0.3828125, "learning_rate": 1.78769561463041e-05, "loss": 0.5088, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2134 }, { "epoch": 0.29031819418003807, "grad_norm": 0.85546875, "learning_rate": 1.7874030331791178e-05, "loss": 0.8003, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2135 }, { "epoch": 0.29045417459885775, "grad_norm": 0.349609375, "learning_rate": 1.787110274237555e-05, "loss": 0.5327, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2136 }, { "epoch": 0.29059015501767743, "grad_norm": 0.4453125, "learning_rate": 1.786817337871713e-05, "loss": 0.688, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2137 }, { "epoch": 0.29072613543649717, "grad_norm": 0.2890625, "learning_rate": 1.7865242241476237e-05, "loss": 0.5137, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2138 }, { "epoch": 0.29086211585531685, "grad_norm": 0.30078125, "learning_rate": 1.7862309331313578e-05, "loss": 0.5898, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2139 }, { "epoch": 0.29099809627413653, "grad_norm": 0.443359375, "learning_rate": 1.785937464889027e-05, "loss": 0.7388, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2140 }, { "epoch": 0.2911340766929562, "grad_norm": 0.76171875, "learning_rate": 1.785643819486783e-05, "loss": 0.6136, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2141 }, { "epoch": 0.2912700571117759, "grad_norm": 0.416015625, "learning_rate": 1.7853499969908163e-05, "loss": 0.7539, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2142 }, { "epoch": 0.2914060375305956, "grad_norm": 0.2392578125, "learning_rate": 1.7850559974673588e-05, "loss": 0.4494, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2143 }, { "epoch": 0.2915420179494153, "grad_norm": 0.54296875, "learning_rate": 1.7847618209826813e-05, "loss": 0.6126, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2144 }, { "epoch": 0.291677998368235, "grad_norm": 1.1171875, "learning_rate": 1.784467467603095e-05, "loss": 0.8721, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2145 }, { "epoch": 0.2918139787870547, "grad_norm": 0.609375, "learning_rate": 1.78417293739495e-05, "loss": 0.7767, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2146 }, { "epoch": 0.29194995920587435, "grad_norm": 0.287109375, "learning_rate": 1.7838782304246378e-05, "loss": 0.5493, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2147 }, { "epoch": 0.29208593962469404, "grad_norm": 0.4296875, "learning_rate": 1.7835833467585887e-05, "loss": 0.6755, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2148 }, { "epoch": 0.2922219200435137, "grad_norm": 0.71875, "learning_rate": 1.7832882864632734e-05, "loss": 0.7454, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2149 }, { "epoch": 0.2923579004623334, "grad_norm": 0.40234375, "learning_rate": 1.7829930496052014e-05, "loss": 0.763, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2150 }, { "epoch": 0.29249388088115313, "grad_norm": 0.53515625, "learning_rate": 1.7826976362509236e-05, "loss": 0.6618, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2151 }, { "epoch": 0.2926298612999728, "grad_norm": 0.263671875, "learning_rate": 1.7824020464670286e-05, "loss": 0.5439, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2152 }, { "epoch": 0.2927658417187925, "grad_norm": 0.28125, "learning_rate": 1.782106280320147e-05, "loss": 0.4549, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2153 }, { "epoch": 0.2929018221376122, "grad_norm": 0.482421875, "learning_rate": 1.7818103378769474e-05, "loss": 0.7326, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2154 }, { "epoch": 0.29303780255643186, "grad_norm": 0.7734375, "learning_rate": 1.7815142192041393e-05, "loss": 0.5452, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2155 }, { "epoch": 0.29317378297525154, "grad_norm": 0.349609375, "learning_rate": 1.781217924368472e-05, "loss": 0.5301, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2156 }, { "epoch": 0.2933097633940713, "grad_norm": 0.625, "learning_rate": 1.7809214534367324e-05, "loss": 0.6291, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2157 }, { "epoch": 0.29344574381289096, "grad_norm": 0.33984375, "learning_rate": 1.78062480647575e-05, "loss": 0.5648, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2158 }, { "epoch": 0.29358172423171064, "grad_norm": 0.5234375, "learning_rate": 1.780327983552392e-05, "loss": 0.6283, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2159 }, { "epoch": 0.2937177046505303, "grad_norm": 0.455078125, "learning_rate": 1.7800309847335664e-05, "loss": 0.7257, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2160 }, { "epoch": 0.29385368506935, "grad_norm": 0.31640625, "learning_rate": 1.7797338100862204e-05, "loss": 0.5111, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2161 }, { "epoch": 0.2939896654881697, "grad_norm": 0.41015625, "learning_rate": 1.77943645967734e-05, "loss": 0.6553, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2162 }, { "epoch": 0.2941256459069894, "grad_norm": 0.52734375, "learning_rate": 1.779138933573952e-05, "loss": 0.4943, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2163 }, { "epoch": 0.2942616263258091, "grad_norm": 0.392578125, "learning_rate": 1.7788412318431232e-05, "loss": 0.7936, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2164 }, { "epoch": 0.2943976067446288, "grad_norm": 0.326171875, "learning_rate": 1.778543354551958e-05, "loss": 0.5815, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2165 }, { "epoch": 0.29453358716344846, "grad_norm": 0.3203125, "learning_rate": 1.7782453017676025e-05, "loss": 0.5498, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2166 }, { "epoch": 0.29466956758226814, "grad_norm": 0.4453125, "learning_rate": 1.777947073557241e-05, "loss": 0.613, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2167 }, { "epoch": 0.2948055480010878, "grad_norm": 0.341796875, "learning_rate": 1.777648669988098e-05, "loss": 0.7041, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2168 }, { "epoch": 0.2949415284199075, "grad_norm": 0.267578125, "learning_rate": 1.777350091127437e-05, "loss": 0.5096, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2169 }, { "epoch": 0.29507750883872724, "grad_norm": 0.294921875, "learning_rate": 1.7770513370425615e-05, "loss": 0.5145, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2170 }, { "epoch": 0.2952134892575469, "grad_norm": 1.3125, "learning_rate": 1.7767524078008144e-05, "loss": 0.9092, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2171 }, { "epoch": 0.2953494696763666, "grad_norm": 0.59375, "learning_rate": 1.776453303469578e-05, "loss": 0.6903, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2172 }, { "epoch": 0.2954854500951863, "grad_norm": 0.74609375, "learning_rate": 1.776154024116274e-05, "loss": 0.7041, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2173 }, { "epoch": 0.29562143051400597, "grad_norm": 0.310546875, "learning_rate": 1.7758545698083636e-05, "loss": 0.5462, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2174 }, { "epoch": 0.29575741093282565, "grad_norm": 0.41015625, "learning_rate": 1.7755549406133474e-05, "loss": 0.6986, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2175 }, { "epoch": 0.2958933913516454, "grad_norm": 0.578125, "learning_rate": 1.775255136598766e-05, "loss": 0.7378, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2176 }, { "epoch": 0.29602937177046507, "grad_norm": 0.65234375, "learning_rate": 1.774955157832198e-05, "loss": 0.5692, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2177 }, { "epoch": 0.29616535218928475, "grad_norm": 0.375, "learning_rate": 1.7746550043812627e-05, "loss": 0.5942, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2178 }, { "epoch": 0.29630133260810443, "grad_norm": 0.296875, "learning_rate": 1.7743546763136187e-05, "loss": 0.4686, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2179 }, { "epoch": 0.2964373130269241, "grad_norm": 0.34375, "learning_rate": 1.7740541736969635e-05, "loss": 0.4427, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2180 }, { "epoch": 0.2965732934457438, "grad_norm": 0.2890625, "learning_rate": 1.7737534965990336e-05, "loss": 0.5457, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2181 }, { "epoch": 0.2967092738645635, "grad_norm": 0.61328125, "learning_rate": 1.7734526450876057e-05, "loss": 0.8576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2182 }, { "epoch": 0.2968452542833832, "grad_norm": 0.50390625, "learning_rate": 1.7731516192304955e-05, "loss": 0.8071, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2183 }, { "epoch": 0.2969812347022029, "grad_norm": 0.63671875, "learning_rate": 1.7728504190955574e-05, "loss": 0.6466, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2184 }, { "epoch": 0.29711721512102257, "grad_norm": 0.40625, "learning_rate": 1.772549044750686e-05, "loss": 0.5923, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2185 }, { "epoch": 0.29725319553984225, "grad_norm": 0.40625, "learning_rate": 1.772247496263815e-05, "loss": 0.5275, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2186 }, { "epoch": 0.29738917595866193, "grad_norm": 0.37890625, "learning_rate": 1.7719457737029162e-05, "loss": 0.6652, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2187 }, { "epoch": 0.29752515637748167, "grad_norm": 0.6328125, "learning_rate": 1.7716438771360023e-05, "loss": 0.7243, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2188 }, { "epoch": 0.29766113679630135, "grad_norm": 0.34375, "learning_rate": 1.7713418066311243e-05, "loss": 0.6045, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2189 }, { "epoch": 0.29779711721512103, "grad_norm": 0.3671875, "learning_rate": 1.7710395622563726e-05, "loss": 0.6453, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2190 }, { "epoch": 0.2979330976339407, "grad_norm": 0.453125, "learning_rate": 1.770737144079877e-05, "loss": 0.6636, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2191 }, { "epoch": 0.2980690780527604, "grad_norm": 24.375, "learning_rate": 1.7704345521698057e-05, "loss": 0.9497, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2192 }, { "epoch": 0.2982050584715801, "grad_norm": 0.5859375, "learning_rate": 1.7701317865943675e-05, "loss": 0.9126, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2193 }, { "epoch": 0.29834103889039976, "grad_norm": 0.41015625, "learning_rate": 1.7698288474218087e-05, "loss": 0.6725, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2194 }, { "epoch": 0.2984770193092195, "grad_norm": 0.283203125, "learning_rate": 1.7695257347204157e-05, "loss": 0.6405, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2195 }, { "epoch": 0.2986129997280392, "grad_norm": 0.40625, "learning_rate": 1.7692224485585136e-05, "loss": 0.5379, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2196 }, { "epoch": 0.29874898014685886, "grad_norm": 0.44140625, "learning_rate": 1.768918989004467e-05, "loss": 0.5996, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2197 }, { "epoch": 0.29888496056567854, "grad_norm": 0.875, "learning_rate": 1.7686153561266794e-05, "loss": 0.7733, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2198 }, { "epoch": 0.2990209409844982, "grad_norm": 0.53515625, "learning_rate": 1.7683115499935936e-05, "loss": 0.6276, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2199 }, { "epoch": 0.2991569214033179, "grad_norm": 0.3125, "learning_rate": 1.768007570673691e-05, "loss": 0.6169, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2200 }, { "epoch": 0.29929290182213764, "grad_norm": 0.62890625, "learning_rate": 1.767703418235492e-05, "loss": 0.7487, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2201 }, { "epoch": 0.2994288822409573, "grad_norm": 0.466796875, "learning_rate": 1.7673990927475564e-05, "loss": 0.6548, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2202 }, { "epoch": 0.299564862659777, "grad_norm": 0.890625, "learning_rate": 1.7670945942784835e-05, "loss": 0.6946, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2203 }, { "epoch": 0.2997008430785967, "grad_norm": 0.421875, "learning_rate": 1.7667899228969104e-05, "loss": 0.6466, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2204 }, { "epoch": 0.29983682349741636, "grad_norm": 0.359375, "learning_rate": 1.766485078671514e-05, "loss": 0.5726, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2205 }, { "epoch": 0.29997280391623604, "grad_norm": 0.298828125, "learning_rate": 1.766180061671009e-05, "loss": 0.5871, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2206 }, { "epoch": 0.3001087843350558, "grad_norm": 0.306640625, "learning_rate": 1.7658748719641515e-05, "loss": 0.5509, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2207 }, { "epoch": 0.30024476475387546, "grad_norm": 0.412109375, "learning_rate": 1.765569509619734e-05, "loss": 0.68, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2208 }, { "epoch": 0.30038074517269514, "grad_norm": 0.7578125, "learning_rate": 1.7652639747065886e-05, "loss": 0.5027, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2209 }, { "epoch": 0.3005167255915148, "grad_norm": 0.453125, "learning_rate": 1.7649582672935878e-05, "loss": 0.6522, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2210 }, { "epoch": 0.3006527060103345, "grad_norm": 0.7890625, "learning_rate": 1.764652387449641e-05, "loss": 0.5099, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2211 }, { "epoch": 0.3007886864291542, "grad_norm": 0.40625, "learning_rate": 1.764346335243697e-05, "loss": 0.6177, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2212 }, { "epoch": 0.30092466684797387, "grad_norm": 0.6171875, "learning_rate": 1.764040110744744e-05, "loss": 0.7062, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2213 }, { "epoch": 0.3010606472667936, "grad_norm": 0.41796875, "learning_rate": 1.7637337140218094e-05, "loss": 0.7668, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2214 }, { "epoch": 0.3011966276856133, "grad_norm": 0.5546875, "learning_rate": 1.763427145143958e-05, "loss": 0.9679, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2215 }, { "epoch": 0.30133260810443296, "grad_norm": 0.376953125, "learning_rate": 1.7631204041802942e-05, "loss": 0.6302, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2216 }, { "epoch": 0.30146858852325265, "grad_norm": 0.4453125, "learning_rate": 1.7628134911999613e-05, "loss": 0.6004, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2217 }, { "epoch": 0.3016045689420723, "grad_norm": 0.283203125, "learning_rate": 1.7625064062721414e-05, "loss": 0.5106, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2218 }, { "epoch": 0.301740549360892, "grad_norm": 0.326171875, "learning_rate": 1.762199149466055e-05, "loss": 0.5798, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2219 }, { "epoch": 0.30187652977971174, "grad_norm": 10.1875, "learning_rate": 1.7618917208509616e-05, "loss": 0.8766, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2220 }, { "epoch": 0.3020125101985314, "grad_norm": 0.291015625, "learning_rate": 1.7615841204961594e-05, "loss": 0.4756, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2221 }, { "epoch": 0.3021484906173511, "grad_norm": 0.43359375, "learning_rate": 1.7612763484709855e-05, "loss": 0.6502, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2222 }, { "epoch": 0.3022844710361708, "grad_norm": 0.478515625, "learning_rate": 1.7609684048448148e-05, "loss": 0.7244, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2223 }, { "epoch": 0.30242045145499047, "grad_norm": 0.6171875, "learning_rate": 1.7606602896870625e-05, "loss": 0.9297, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2224 }, { "epoch": 0.30255643187381015, "grad_norm": 0.314453125, "learning_rate": 1.7603520030671805e-05, "loss": 0.6444, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2225 }, { "epoch": 0.3026924122926299, "grad_norm": 0.34765625, "learning_rate": 1.760043545054661e-05, "loss": 0.7231, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2226 }, { "epoch": 0.30282839271144957, "grad_norm": 0.369140625, "learning_rate": 1.759734915719034e-05, "loss": 0.579, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2227 }, { "epoch": 0.30296437313026925, "grad_norm": 0.86328125, "learning_rate": 1.7594261151298687e-05, "loss": 0.9325, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2228 }, { "epoch": 0.30310035354908893, "grad_norm": 0.3125, "learning_rate": 1.759117143356772e-05, "loss": 0.5729, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2229 }, { "epoch": 0.3032363339679086, "grad_norm": 0.40625, "learning_rate": 1.7588080004693905e-05, "loss": 0.6753, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2230 }, { "epoch": 0.3033723143867283, "grad_norm": 0.46484375, "learning_rate": 1.7584986865374084e-05, "loss": 0.7067, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2231 }, { "epoch": 0.303508294805548, "grad_norm": 0.412109375, "learning_rate": 1.7581892016305486e-05, "loss": 0.6842, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2232 }, { "epoch": 0.3036442752243677, "grad_norm": 0.302734375, "learning_rate": 1.757879545818573e-05, "loss": 0.571, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2233 }, { "epoch": 0.3037802556431874, "grad_norm": 0.53515625, "learning_rate": 1.7575697191712823e-05, "loss": 0.519, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2234 }, { "epoch": 0.3039162360620071, "grad_norm": 0.44140625, "learning_rate": 1.7572597217585146e-05, "loss": 0.6558, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2235 }, { "epoch": 0.30405221648082675, "grad_norm": 0.376953125, "learning_rate": 1.756949553650147e-05, "loss": 0.8545, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2236 }, { "epoch": 0.30418819689964643, "grad_norm": 0.40625, "learning_rate": 1.756639214916096e-05, "loss": 0.5576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2237 }, { "epoch": 0.3043241773184661, "grad_norm": 0.3359375, "learning_rate": 1.7563287056263147e-05, "loss": 0.5124, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2238 }, { "epoch": 0.30446015773728585, "grad_norm": 0.29296875, "learning_rate": 1.7560180258507968e-05, "loss": 0.529, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2239 }, { "epoch": 0.30459613815610553, "grad_norm": 0.53125, "learning_rate": 1.7557071756595723e-05, "loss": 0.6012, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2240 }, { "epoch": 0.3047321185749252, "grad_norm": 0.390625, "learning_rate": 1.7553961551227112e-05, "loss": 0.6462, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2241 }, { "epoch": 0.3048680989937449, "grad_norm": 0.427734375, "learning_rate": 1.755084964310321e-05, "loss": 0.8926, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2242 }, { "epoch": 0.3050040794125646, "grad_norm": 0.373046875, "learning_rate": 1.7547736032925485e-05, "loss": 0.7407, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2243 }, { "epoch": 0.30514005983138426, "grad_norm": 0.349609375, "learning_rate": 1.754462072139578e-05, "loss": 0.6595, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2244 }, { "epoch": 0.305276040250204, "grad_norm": 0.54296875, "learning_rate": 1.754150370921632e-05, "loss": 0.8905, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2245 }, { "epoch": 0.3054120206690237, "grad_norm": 1.140625, "learning_rate": 1.753838499708972e-05, "loss": 0.6564, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2246 }, { "epoch": 0.30554800108784336, "grad_norm": 0.423828125, "learning_rate": 1.753526458571898e-05, "loss": 0.5496, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2247 }, { "epoch": 0.30568398150666304, "grad_norm": 0.58203125, "learning_rate": 1.753214247580747e-05, "loss": 0.4417, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2248 }, { "epoch": 0.3058199619254827, "grad_norm": 0.625, "learning_rate": 1.7529018668058962e-05, "loss": 0.6899, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2249 }, { "epoch": 0.3059559423443024, "grad_norm": 0.453125, "learning_rate": 1.7525893163177596e-05, "loss": 0.7827, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2250 }, { "epoch": 0.30609192276312214, "grad_norm": 0.765625, "learning_rate": 1.7522765961867896e-05, "loss": 0.7653, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2251 }, { "epoch": 0.3062279031819418, "grad_norm": 0.326171875, "learning_rate": 1.7519637064834777e-05, "loss": 0.5096, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2252 }, { "epoch": 0.3063638836007615, "grad_norm": 0.447265625, "learning_rate": 1.7516506472783525e-05, "loss": 0.5884, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2253 }, { "epoch": 0.3064998640195812, "grad_norm": 0.5234375, "learning_rate": 1.7513374186419822e-05, "loss": 0.8604, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2254 }, { "epoch": 0.30663584443840086, "grad_norm": 0.3359375, "learning_rate": 1.7510240206449712e-05, "loss": 0.5282, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2255 }, { "epoch": 0.30677182485722054, "grad_norm": 0.6953125, "learning_rate": 1.750710453357964e-05, "loss": 0.8735, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2256 }, { "epoch": 0.3069078052760402, "grad_norm": 0.435546875, "learning_rate": 1.7503967168516426e-05, "loss": 0.6751, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2257 }, { "epoch": 0.30704378569485996, "grad_norm": 0.4453125, "learning_rate": 1.7500828111967266e-05, "loss": 0.7193, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2258 }, { "epoch": 0.30717976611367964, "grad_norm": 0.330078125, "learning_rate": 1.7497687364639747e-05, "loss": 0.5326, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2259 }, { "epoch": 0.3073157465324993, "grad_norm": 0.376953125, "learning_rate": 1.7494544927241828e-05, "loss": 0.6335, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2260 }, { "epoch": 0.307451726951319, "grad_norm": 0.50390625, "learning_rate": 1.7491400800481853e-05, "loss": 0.7196, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2261 }, { "epoch": 0.3075877073701387, "grad_norm": 0.3984375, "learning_rate": 1.7488254985068547e-05, "loss": 0.6906, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2262 }, { "epoch": 0.30772368778895837, "grad_norm": 0.33203125, "learning_rate": 1.7485107481711014e-05, "loss": 0.4969, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2263 }, { "epoch": 0.3078596682077781, "grad_norm": 0.65234375, "learning_rate": 1.748195829111874e-05, "loss": 0.6624, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2264 }, { "epoch": 0.3079956486265978, "grad_norm": 0.5703125, "learning_rate": 1.7478807414001595e-05, "loss": 0.8815, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2265 }, { "epoch": 0.30813162904541747, "grad_norm": 0.396484375, "learning_rate": 1.7475654851069822e-05, "loss": 0.659, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2266 }, { "epoch": 0.30826760946423715, "grad_norm": 0.275390625, "learning_rate": 1.7472500603034047e-05, "loss": 0.5151, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2267 }, { "epoch": 0.30840358988305683, "grad_norm": 0.3203125, "learning_rate": 1.7469344670605274e-05, "loss": 0.5885, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2268 }, { "epoch": 0.3085395703018765, "grad_norm": 0.349609375, "learning_rate": 1.7466187054494895e-05, "loss": 0.5451, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2269 }, { "epoch": 0.30867555072069625, "grad_norm": 0.4609375, "learning_rate": 1.746302775541467e-05, "loss": 0.4784, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2270 }, { "epoch": 0.3088115311395159, "grad_norm": 0.388671875, "learning_rate": 1.7459866774076748e-05, "loss": 0.6799, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2271 }, { "epoch": 0.3089475115583356, "grad_norm": 0.640625, "learning_rate": 1.745670411119365e-05, "loss": 0.5946, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2272 }, { "epoch": 0.3090834919771553, "grad_norm": 0.6328125, "learning_rate": 1.7453539767478276e-05, "loss": 0.7637, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2273 }, { "epoch": 0.30921947239597497, "grad_norm": 0.671875, "learning_rate": 1.745037374364391e-05, "loss": 0.8657, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2274 }, { "epoch": 0.30935545281479465, "grad_norm": 0.2353515625, "learning_rate": 1.7447206040404218e-05, "loss": 0.4271, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2275 }, { "epoch": 0.30949143323361433, "grad_norm": 0.5625, "learning_rate": 1.744403665847323e-05, "loss": 0.7925, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2276 }, { "epoch": 0.30962741365243407, "grad_norm": 0.25, "learning_rate": 1.7440865598565372e-05, "loss": 0.5208, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2277 }, { "epoch": 0.30976339407125375, "grad_norm": 0.41796875, "learning_rate": 1.7437692861395433e-05, "loss": 0.6481, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2278 }, { "epoch": 0.30989937449007343, "grad_norm": 0.2353515625, "learning_rate": 1.7434518447678586e-05, "loss": 0.4157, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2279 }, { "epoch": 0.3100353549088931, "grad_norm": 0.423828125, "learning_rate": 1.7431342358130394e-05, "loss": 0.6346, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2280 }, { "epoch": 0.3101713353277128, "grad_norm": 0.84765625, "learning_rate": 1.7428164593466772e-05, "loss": 0.6688, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2281 }, { "epoch": 0.3103073157465325, "grad_norm": 0.5390625, "learning_rate": 1.7424985154404036e-05, "loss": 0.6909, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2282 }, { "epoch": 0.3104432961653522, "grad_norm": 0.32421875, "learning_rate": 1.7421804041658867e-05, "loss": 0.493, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2283 }, { "epoch": 0.3105792765841719, "grad_norm": 0.326171875, "learning_rate": 1.7418621255948325e-05, "loss": 0.4271, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2284 }, { "epoch": 0.3107152570029916, "grad_norm": 1.296875, "learning_rate": 1.7415436797989856e-05, "loss": 0.7603, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2285 }, { "epoch": 0.31085123742181126, "grad_norm": 0.859375, "learning_rate": 1.7412250668501268e-05, "loss": 0.6764, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2286 }, { "epoch": 0.31098721784063094, "grad_norm": 0.3828125, "learning_rate": 1.7409062868200756e-05, "loss": 0.7093, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2287 }, { "epoch": 0.3111231982594506, "grad_norm": 0.375, "learning_rate": 1.740587339780689e-05, "loss": 0.5951, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2288 }, { "epoch": 0.31125917867827035, "grad_norm": 0.392578125, "learning_rate": 1.7402682258038615e-05, "loss": 0.6222, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2289 }, { "epoch": 0.31139515909709004, "grad_norm": 0.283203125, "learning_rate": 1.7399489449615258e-05, "loss": 0.5485, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2290 }, { "epoch": 0.3115311395159097, "grad_norm": 0.466796875, "learning_rate": 1.7396294973256508e-05, "loss": 0.8643, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2291 }, { "epoch": 0.3116671199347294, "grad_norm": 0.3828125, "learning_rate": 1.7393098829682445e-05, "loss": 0.6074, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2292 }, { "epoch": 0.3118031003535491, "grad_norm": 0.337890625, "learning_rate": 1.7389901019613512e-05, "loss": 0.439, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2293 }, { "epoch": 0.31193908077236876, "grad_norm": 0.359375, "learning_rate": 1.7386701543770544e-05, "loss": 0.5749, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2294 }, { "epoch": 0.3120750611911885, "grad_norm": 0.4140625, "learning_rate": 1.738350040287474e-05, "loss": 0.6035, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2295 }, { "epoch": 0.3122110416100082, "grad_norm": 0.423828125, "learning_rate": 1.738029759764767e-05, "loss": 0.694, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2296 }, { "epoch": 0.31234702202882786, "grad_norm": 0.48046875, "learning_rate": 1.737709312881129e-05, "loss": 0.7967, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2297 }, { "epoch": 0.31248300244764754, "grad_norm": 0.482421875, "learning_rate": 1.7373886997087925e-05, "loss": 0.7596, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2298 }, { "epoch": 0.3126189828664672, "grad_norm": 0.359375, "learning_rate": 1.737067920320028e-05, "loss": 0.6395, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2299 }, { "epoch": 0.3127549632852869, "grad_norm": 0.50390625, "learning_rate": 1.7367469747871428e-05, "loss": 0.7949, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2300 }, { "epoch": 0.3128909437041066, "grad_norm": 0.263671875, "learning_rate": 1.7364258631824817e-05, "loss": 0.5312, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2301 }, { "epoch": 0.3130269241229263, "grad_norm": 0.3359375, "learning_rate": 1.7361045855784277e-05, "loss": 0.5137, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2302 }, { "epoch": 0.313162904541746, "grad_norm": 0.291015625, "learning_rate": 1.7357831420474002e-05, "loss": 0.5044, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2303 }, { "epoch": 0.3132988849605657, "grad_norm": 0.55859375, "learning_rate": 1.735461532661857e-05, "loss": 0.6123, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2304 }, { "epoch": 0.31343486537938536, "grad_norm": 0.314453125, "learning_rate": 1.735139757494292e-05, "loss": 0.3621, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2305 }, { "epoch": 0.31357084579820504, "grad_norm": 0.60546875, "learning_rate": 1.7348178166172385e-05, "loss": 0.6084, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2306 }, { "epoch": 0.3137068262170247, "grad_norm": 0.458984375, "learning_rate": 1.7344957101032646e-05, "loss": 0.842, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2307 }, { "epoch": 0.31384280663584446, "grad_norm": 0.5546875, "learning_rate": 1.734173438024978e-05, "loss": 0.5597, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2308 }, { "epoch": 0.31397878705466414, "grad_norm": 0.3046875, "learning_rate": 1.7338510004550225e-05, "loss": 0.6029, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2309 }, { "epoch": 0.3141147674734838, "grad_norm": 0.244140625, "learning_rate": 1.7335283974660792e-05, "loss": 0.4443, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2310 }, { "epoch": 0.3142507478923035, "grad_norm": 0.30859375, "learning_rate": 1.7332056291308667e-05, "loss": 0.4058, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2311 }, { "epoch": 0.3143867283111232, "grad_norm": 0.30859375, "learning_rate": 1.7328826955221413e-05, "loss": 0.5728, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2312 }, { "epoch": 0.31452270872994287, "grad_norm": 0.375, "learning_rate": 1.732559596712696e-05, "loss": 0.7783, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2313 }, { "epoch": 0.3146586891487626, "grad_norm": 0.37890625, "learning_rate": 1.7322363327753616e-05, "loss": 0.6107, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2314 }, { "epoch": 0.3147946695675823, "grad_norm": 0.361328125, "learning_rate": 1.7319129037830054e-05, "loss": 0.5576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2315 }, { "epoch": 0.31493064998640197, "grad_norm": 0.375, "learning_rate": 1.731589309808532e-05, "loss": 0.6574, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2316 }, { "epoch": 0.31506663040522165, "grad_norm": 0.333984375, "learning_rate": 1.7312655509248837e-05, "loss": 0.6323, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2317 }, { "epoch": 0.31520261082404133, "grad_norm": 0.427734375, "learning_rate": 1.7309416272050395e-05, "loss": 0.5799, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2318 }, { "epoch": 0.315338591242861, "grad_norm": 0.427734375, "learning_rate": 1.7306175387220165e-05, "loss": 0.7847, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2319 }, { "epoch": 0.3154745716616807, "grad_norm": 0.484375, "learning_rate": 1.7302932855488675e-05, "loss": 0.7061, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2320 }, { "epoch": 0.31561055208050043, "grad_norm": 0.65625, "learning_rate": 1.7299688677586832e-05, "loss": 0.8216, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2321 }, { "epoch": 0.3157465324993201, "grad_norm": 0.46875, "learning_rate": 1.7296442854245915e-05, "loss": 0.5907, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2322 }, { "epoch": 0.3158825129181398, "grad_norm": 0.44921875, "learning_rate": 1.7293195386197573e-05, "loss": 0.6371, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2323 }, { "epoch": 0.31601849333695947, "grad_norm": 0.7265625, "learning_rate": 1.728994627417382e-05, "loss": 0.7026, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2324 }, { "epoch": 0.31615447375577915, "grad_norm": 1.546875, "learning_rate": 1.7286695518907052e-05, "loss": 0.7371, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2325 }, { "epoch": 0.31629045417459883, "grad_norm": 3.15625, "learning_rate": 1.7283443121130027e-05, "loss": 0.792, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2326 }, { "epoch": 0.31642643459341857, "grad_norm": 0.40625, "learning_rate": 1.7280189081575874e-05, "loss": 0.6868, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2327 }, { "epoch": 0.31656241501223825, "grad_norm": 0.345703125, "learning_rate": 1.727693340097809e-05, "loss": 0.6086, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2328 }, { "epoch": 0.31669839543105793, "grad_norm": 0.3359375, "learning_rate": 1.7273676080070554e-05, "loss": 0.5793, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2329 }, { "epoch": 0.3168343758498776, "grad_norm": 0.322265625, "learning_rate": 1.7270417119587503e-05, "loss": 0.5441, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2330 }, { "epoch": 0.3169703562686973, "grad_norm": 1.359375, "learning_rate": 1.726715652026354e-05, "loss": 0.9119, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2331 }, { "epoch": 0.317106336687517, "grad_norm": 0.34765625, "learning_rate": 1.726389428283365e-05, "loss": 0.6991, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2332 }, { "epoch": 0.3172423171063367, "grad_norm": 0.703125, "learning_rate": 1.7260630408033176e-05, "loss": 0.8462, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2333 }, { "epoch": 0.3173782975251564, "grad_norm": 0.46484375, "learning_rate": 1.7257364896597842e-05, "loss": 0.8228, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2334 }, { "epoch": 0.3175142779439761, "grad_norm": 0.546875, "learning_rate": 1.7254097749263735e-05, "loss": 0.7017, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2335 }, { "epoch": 0.31765025836279576, "grad_norm": 0.4375, "learning_rate": 1.7250828966767303e-05, "loss": 0.5753, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2336 }, { "epoch": 0.31778623878161544, "grad_norm": 0.55078125, "learning_rate": 1.7247558549845372e-05, "loss": 0.5737, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2337 }, { "epoch": 0.3179222192004351, "grad_norm": 0.37890625, "learning_rate": 1.7244286499235136e-05, "loss": 0.7391, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2338 }, { "epoch": 0.3180581996192548, "grad_norm": 0.671875, "learning_rate": 1.724101281567415e-05, "loss": 0.6374, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2339 }, { "epoch": 0.31819418003807454, "grad_norm": 0.37109375, "learning_rate": 1.723773749990035e-05, "loss": 0.7876, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2340 }, { "epoch": 0.3183301604568942, "grad_norm": 0.302734375, "learning_rate": 1.7234460552652026e-05, "loss": 0.6221, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2341 }, { "epoch": 0.3184661408757139, "grad_norm": 0.294921875, "learning_rate": 1.7231181974667846e-05, "loss": 0.5804, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2342 }, { "epoch": 0.3186021212945336, "grad_norm": 0.44921875, "learning_rate": 1.7227901766686842e-05, "loss": 0.6864, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2343 }, { "epoch": 0.31873810171335326, "grad_norm": 0.349609375, "learning_rate": 1.7224619929448406e-05, "loss": 0.5566, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2344 }, { "epoch": 0.31887408213217294, "grad_norm": 0.31640625, "learning_rate": 1.722133646369231e-05, "loss": 0.431, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2345 }, { "epoch": 0.3190100625509927, "grad_norm": 0.34375, "learning_rate": 1.721805137015869e-05, "loss": 0.5804, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2346 }, { "epoch": 0.31914604296981236, "grad_norm": 0.87109375, "learning_rate": 1.721476464958804e-05, "loss": 0.6787, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2347 }, { "epoch": 0.31928202338863204, "grad_norm": 0.451171875, "learning_rate": 1.721147630272123e-05, "loss": 0.9093, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2348 }, { "epoch": 0.3194180038074517, "grad_norm": 0.400390625, "learning_rate": 1.7208186330299496e-05, "loss": 0.6087, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2349 }, { "epoch": 0.3195539842262714, "grad_norm": 0.25390625, "learning_rate": 1.7204894733064433e-05, "loss": 0.4141, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2350 }, { "epoch": 0.3196899646450911, "grad_norm": 0.69921875, "learning_rate": 1.720160151175801e-05, "loss": 0.7166, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2351 }, { "epoch": 0.3198259450639108, "grad_norm": 0.3203125, "learning_rate": 1.7198306667122557e-05, "loss": 0.5381, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2352 }, { "epoch": 0.3199619254827305, "grad_norm": 0.353515625, "learning_rate": 1.7195010199900773e-05, "loss": 0.5728, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2353 }, { "epoch": 0.3200979059015502, "grad_norm": 0.578125, "learning_rate": 1.7191712110835728e-05, "loss": 0.6512, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2354 }, { "epoch": 0.32023388632036986, "grad_norm": 0.291015625, "learning_rate": 1.7188412400670846e-05, "loss": 0.543, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2355 }, { "epoch": 0.32036986673918955, "grad_norm": 0.357421875, "learning_rate": 1.718511107014992e-05, "loss": 0.6255, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2356 }, { "epoch": 0.3205058471580092, "grad_norm": 0.953125, "learning_rate": 1.7181808120017115e-05, "loss": 0.5781, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2357 }, { "epoch": 0.32064182757682896, "grad_norm": 0.32421875, "learning_rate": 1.717850355101695e-05, "loss": 0.5679, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2358 }, { "epoch": 0.32077780799564865, "grad_norm": 0.3125, "learning_rate": 1.7175197363894326e-05, "loss": 0.5755, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2359 }, { "epoch": 0.3209137884144683, "grad_norm": 0.416015625, "learning_rate": 1.7171889559394488e-05, "loss": 0.6462, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2360 }, { "epoch": 0.321049768833288, "grad_norm": 0.458984375, "learning_rate": 1.7168580138263064e-05, "loss": 0.8636, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2361 }, { "epoch": 0.3211857492521077, "grad_norm": 0.390625, "learning_rate": 1.7165269101246026e-05, "loss": 0.7454, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2362 }, { "epoch": 0.32132172967092737, "grad_norm": 0.8984375, "learning_rate": 1.7161956449089736e-05, "loss": 0.7472, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2363 }, { "epoch": 0.32145771008974705, "grad_norm": 0.287109375, "learning_rate": 1.7158642182540896e-05, "loss": 0.4388, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2364 }, { "epoch": 0.3215936905085668, "grad_norm": 0.37109375, "learning_rate": 1.7155326302346584e-05, "loss": 0.6234, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2365 }, { "epoch": 0.32172967092738647, "grad_norm": 0.359375, "learning_rate": 1.7152008809254247e-05, "loss": 0.6182, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2366 }, { "epoch": 0.32186565134620615, "grad_norm": 0.443359375, "learning_rate": 1.7148689704011682e-05, "loss": 0.8507, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2367 }, { "epoch": 0.32200163176502583, "grad_norm": 0.24609375, "learning_rate": 1.714536898736705e-05, "loss": 0.4012, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2368 }, { "epoch": 0.3221376121838455, "grad_norm": 0.416015625, "learning_rate": 1.7142046660068898e-05, "loss": 0.5487, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2369 }, { "epoch": 0.3222735926026652, "grad_norm": 0.38671875, "learning_rate": 1.7138722722866106e-05, "loss": 0.7354, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2370 }, { "epoch": 0.32240957302148493, "grad_norm": 0.55859375, "learning_rate": 1.7135397176507935e-05, "loss": 0.563, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2371 }, { "epoch": 0.3225455534403046, "grad_norm": 0.96875, "learning_rate": 1.7132070021744e-05, "loss": 0.8333, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2372 }, { "epoch": 0.3226815338591243, "grad_norm": 0.298828125, "learning_rate": 1.7128741259324283e-05, "loss": 0.4261, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2373 }, { "epoch": 0.322817514277944, "grad_norm": 0.427734375, "learning_rate": 1.7125410889999135e-05, "loss": 0.7869, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2374 }, { "epoch": 0.32295349469676365, "grad_norm": 0.35546875, "learning_rate": 1.7122078914519257e-05, "loss": 0.5627, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2375 }, { "epoch": 0.32308947511558334, "grad_norm": 0.353515625, "learning_rate": 1.7118745333635713e-05, "loss": 0.6208, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2376 }, { "epoch": 0.32322545553440307, "grad_norm": 0.5234375, "learning_rate": 1.7115410148099933e-05, "loss": 0.7842, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2377 }, { "epoch": 0.32336143595322275, "grad_norm": 0.423828125, "learning_rate": 1.7112073358663716e-05, "loss": 0.8057, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2378 }, { "epoch": 0.32349741637204243, "grad_norm": 0.455078125, "learning_rate": 1.7108734966079214e-05, "loss": 0.6276, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2379 }, { "epoch": 0.3236333967908621, "grad_norm": 0.416015625, "learning_rate": 1.7105394971098938e-05, "loss": 0.6134, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2380 }, { "epoch": 0.3237693772096818, "grad_norm": 0.361328125, "learning_rate": 1.710205337447576e-05, "loss": 0.4193, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2381 }, { "epoch": 0.3239053576285015, "grad_norm": 0.6875, "learning_rate": 1.7098710176962923e-05, "loss": 0.4982, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2382 }, { "epoch": 0.32404133804732116, "grad_norm": 0.380859375, "learning_rate": 1.7095365379314025e-05, "loss": 0.617, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2383 }, { "epoch": 0.3241773184661409, "grad_norm": 0.228515625, "learning_rate": 1.709201898228302e-05, "loss": 0.4136, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2384 }, { "epoch": 0.3243132988849606, "grad_norm": 0.703125, "learning_rate": 1.708867098662423e-05, "loss": 0.6724, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2385 }, { "epoch": 0.32444927930378026, "grad_norm": 0.46484375, "learning_rate": 1.7085321393092332e-05, "loss": 0.8628, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2386 }, { "epoch": 0.32458525972259994, "grad_norm": 0.32421875, "learning_rate": 1.7081970202442363e-05, "loss": 0.5786, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2387 }, { "epoch": 0.3247212401414196, "grad_norm": 0.30859375, "learning_rate": 1.707861741542973e-05, "loss": 0.6346, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2388 }, { "epoch": 0.3248572205602393, "grad_norm": 0.60546875, "learning_rate": 1.7075263032810188e-05, "loss": 0.745, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2389 }, { "epoch": 0.32499320097905904, "grad_norm": 0.279296875, "learning_rate": 1.7071907055339855e-05, "loss": 0.4753, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2390 }, { "epoch": 0.3251291813978787, "grad_norm": 0.51953125, "learning_rate": 1.706854948377521e-05, "loss": 0.5228, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2391 }, { "epoch": 0.3252651618166984, "grad_norm": 0.271484375, "learning_rate": 1.706519031887309e-05, "loss": 0.5309, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2392 }, { "epoch": 0.3254011422355181, "grad_norm": 0.31640625, "learning_rate": 1.7061829561390692e-05, "loss": 0.6768, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2393 }, { "epoch": 0.32553712265433776, "grad_norm": 0.52734375, "learning_rate": 1.7058467212085574e-05, "loss": 0.8018, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2394 }, { "epoch": 0.32567310307315744, "grad_norm": 0.46875, "learning_rate": 1.7055103271715647e-05, "loss": 0.707, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2395 }, { "epoch": 0.3258090834919772, "grad_norm": 0.357421875, "learning_rate": 1.7051737741039187e-05, "loss": 0.6274, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2396 }, { "epoch": 0.32594506391079686, "grad_norm": 0.373046875, "learning_rate": 1.7048370620814827e-05, "loss": 0.6458, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2397 }, { "epoch": 0.32608104432961654, "grad_norm": 0.3203125, "learning_rate": 1.704500191180155e-05, "loss": 0.6076, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2398 }, { "epoch": 0.3262170247484362, "grad_norm": 0.353515625, "learning_rate": 1.7041631614758716e-05, "loss": 0.6414, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2399 }, { "epoch": 0.3263530051672559, "grad_norm": 0.328125, "learning_rate": 1.703825973044602e-05, "loss": 0.6787, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2400 }, { "epoch": 0.3264889855860756, "grad_norm": 0.60546875, "learning_rate": 1.7034886259623534e-05, "loss": 0.6296, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2401 }, { "epoch": 0.32662496600489527, "grad_norm": 0.3828125, "learning_rate": 1.7031511203051675e-05, "loss": 0.6807, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2402 }, { "epoch": 0.326760946423715, "grad_norm": 0.2353515625, "learning_rate": 1.7028134561491222e-05, "loss": 0.4357, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2403 }, { "epoch": 0.3268969268425347, "grad_norm": 0.3359375, "learning_rate": 1.702475633570331e-05, "loss": 0.6722, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2404 }, { "epoch": 0.32703290726135437, "grad_norm": 0.484375, "learning_rate": 1.702137652644944e-05, "loss": 0.7891, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2405 }, { "epoch": 0.32716888768017405, "grad_norm": 0.53515625, "learning_rate": 1.701799513449145e-05, "loss": 0.8117, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2406 }, { "epoch": 0.32730486809899373, "grad_norm": 0.326171875, "learning_rate": 1.7014612160591556e-05, "loss": 0.5703, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2407 }, { "epoch": 0.3274408485178134, "grad_norm": 0.404296875, "learning_rate": 1.7011227605512325e-05, "loss": 0.7074, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2408 }, { "epoch": 0.32757682893663315, "grad_norm": 0.625, "learning_rate": 1.7007841470016667e-05, "loss": 0.6654, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2409 }, { "epoch": 0.3277128093554528, "grad_norm": 0.54296875, "learning_rate": 1.7004453754867866e-05, "loss": 0.3226, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2410 }, { "epoch": 0.3278487897742725, "grad_norm": 0.357421875, "learning_rate": 1.700106446082955e-05, "loss": 0.5833, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2411 }, { "epoch": 0.3279847701930922, "grad_norm": 2.328125, "learning_rate": 1.699767358866571e-05, "loss": 0.8271, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2412 }, { "epoch": 0.32812075061191187, "grad_norm": 0.8828125, "learning_rate": 1.699428113914069e-05, "loss": 0.8468, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2413 }, { "epoch": 0.32825673103073155, "grad_norm": 0.71875, "learning_rate": 1.6990887113019184e-05, "loss": 0.7345, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2414 }, { "epoch": 0.3283927114495513, "grad_norm": 0.427734375, "learning_rate": 1.6987491511066258e-05, "loss": 0.6855, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2415 }, { "epoch": 0.32852869186837097, "grad_norm": 0.2734375, "learning_rate": 1.6984094334047313e-05, "loss": 0.4523, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2416 }, { "epoch": 0.32866467228719065, "grad_norm": 0.275390625, "learning_rate": 1.6980695582728115e-05, "loss": 0.5337, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2417 }, { "epoch": 0.32880065270601033, "grad_norm": 0.333984375, "learning_rate": 1.6977295257874787e-05, "loss": 0.5483, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2418 }, { "epoch": 0.32893663312483, "grad_norm": 0.3203125, "learning_rate": 1.6973893360253807e-05, "loss": 0.6646, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2419 }, { "epoch": 0.3290726135436497, "grad_norm": 0.375, "learning_rate": 1.6970489890631996e-05, "loss": 0.6457, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2420 }, { "epoch": 0.32920859396246943, "grad_norm": 0.55859375, "learning_rate": 1.6967084849776543e-05, "loss": 0.7974, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2421 }, { "epoch": 0.3293445743812891, "grad_norm": 0.30859375, "learning_rate": 1.6963678238454982e-05, "loss": 0.6608, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2422 }, { "epoch": 0.3294805548001088, "grad_norm": 0.7890625, "learning_rate": 1.6960270057435212e-05, "loss": 0.6769, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2423 }, { "epoch": 0.3296165352189285, "grad_norm": 0.373046875, "learning_rate": 1.6956860307485473e-05, "loss": 0.6502, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2424 }, { "epoch": 0.32975251563774816, "grad_norm": 0.4375, "learning_rate": 1.6953448989374365e-05, "loss": 0.5676, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2425 }, { "epoch": 0.32988849605656784, "grad_norm": 0.291015625, "learning_rate": 1.695003610387084e-05, "loss": 0.4684, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2426 }, { "epoch": 0.3300244764753875, "grad_norm": 0.400390625, "learning_rate": 1.6946621651744208e-05, "loss": 0.8175, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2427 }, { "epoch": 0.33016045689420725, "grad_norm": 0.40625, "learning_rate": 1.6943205633764123e-05, "loss": 0.7331, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2428 }, { "epoch": 0.33029643731302694, "grad_norm": 0.9296875, "learning_rate": 1.69397880507006e-05, "loss": 0.7697, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2429 }, { "epoch": 0.3304324177318466, "grad_norm": 0.51953125, "learning_rate": 1.6936368903324004e-05, "loss": 0.6497, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2430 }, { "epoch": 0.3305683981506663, "grad_norm": 0.33984375, "learning_rate": 1.6932948192405055e-05, "loss": 0.578, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2431 }, { "epoch": 0.330704378569486, "grad_norm": 0.6953125, "learning_rate": 1.692952591871482e-05, "loss": 0.8696, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2432 }, { "epoch": 0.33084035898830566, "grad_norm": 0.451171875, "learning_rate": 1.6926102083024722e-05, "loss": 0.4784, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2433 }, { "epoch": 0.3309763394071254, "grad_norm": 0.486328125, "learning_rate": 1.6922676686106537e-05, "loss": 0.7728, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2434 }, { "epoch": 0.3311123198259451, "grad_norm": 0.28125, "learning_rate": 1.691924972873239e-05, "loss": 0.5137, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2435 }, { "epoch": 0.33124830024476476, "grad_norm": 0.365234375, "learning_rate": 1.6915821211674752e-05, "loss": 0.474, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2436 }, { "epoch": 0.33138428066358444, "grad_norm": 0.796875, "learning_rate": 1.691239113570646e-05, "loss": 0.7609, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2437 }, { "epoch": 0.3315202610824041, "grad_norm": 0.359375, "learning_rate": 1.6908959501600702e-05, "loss": 0.5923, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2438 }, { "epoch": 0.3316562415012238, "grad_norm": 0.41015625, "learning_rate": 1.6905526310131e-05, "loss": 0.702, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2439 }, { "epoch": 0.33179222192004354, "grad_norm": 0.5859375, "learning_rate": 1.690209156207124e-05, "loss": 0.8556, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2440 }, { "epoch": 0.3319282023388632, "grad_norm": 0.56640625, "learning_rate": 1.6898655258195654e-05, "loss": 0.7351, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2441 }, { "epoch": 0.3320641827576829, "grad_norm": 0.66015625, "learning_rate": 1.6895217399278833e-05, "loss": 0.8136, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2442 }, { "epoch": 0.3322001631765026, "grad_norm": 0.228515625, "learning_rate": 1.6891777986095706e-05, "loss": 0.3377, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2443 }, { "epoch": 0.33233614359532226, "grad_norm": 0.390625, "learning_rate": 1.6888337019421564e-05, "loss": 0.682, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2444 }, { "epoch": 0.33247212401414195, "grad_norm": 0.392578125, "learning_rate": 1.6884894500032038e-05, "loss": 0.5682, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2445 }, { "epoch": 0.3326081044329616, "grad_norm": 0.50390625, "learning_rate": 1.6881450428703118e-05, "loss": 0.8359, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2446 }, { "epoch": 0.33274408485178136, "grad_norm": 0.67578125, "learning_rate": 1.687800480621114e-05, "loss": 0.7502, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2447 }, { "epoch": 0.33288006527060104, "grad_norm": 0.81640625, "learning_rate": 1.6874557633332784e-05, "loss": 0.6213, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2448 }, { "epoch": 0.3330160456894207, "grad_norm": 0.65234375, "learning_rate": 1.687110891084509e-05, "loss": 0.7944, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2449 }, { "epoch": 0.3331520261082404, "grad_norm": 0.494140625, "learning_rate": 1.6867658639525443e-05, "loss": 0.6833, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2450 }, { "epoch": 0.3332880065270601, "grad_norm": 0.384765625, "learning_rate": 1.686420682015157e-05, "loss": 0.7544, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2451 }, { "epoch": 0.33342398694587977, "grad_norm": 0.333984375, "learning_rate": 1.686075345350156e-05, "loss": 0.681, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2452 }, { "epoch": 0.3335599673646995, "grad_norm": 0.65234375, "learning_rate": 1.685729854035384e-05, "loss": 0.6812, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2453 }, { "epoch": 0.3336959477835192, "grad_norm": 0.55859375, "learning_rate": 1.685384208148719e-05, "loss": 0.5968, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2454 }, { "epoch": 0.33383192820233887, "grad_norm": 0.30078125, "learning_rate": 1.685038407768074e-05, "loss": 0.5863, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2455 }, { "epoch": 0.33396790862115855, "grad_norm": 0.6953125, "learning_rate": 1.684692452971397e-05, "loss": 0.7419, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2456 }, { "epoch": 0.33410388903997823, "grad_norm": 0.21484375, "learning_rate": 1.6843463438366697e-05, "loss": 0.3351, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2457 }, { "epoch": 0.3342398694587979, "grad_norm": 0.30859375, "learning_rate": 1.6840000804419094e-05, "loss": 0.4541, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2458 }, { "epoch": 0.33437584987761765, "grad_norm": 0.345703125, "learning_rate": 1.6836536628651688e-05, "loss": 0.6069, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2459 }, { "epoch": 0.33451183029643733, "grad_norm": 0.376953125, "learning_rate": 1.6833070911845335e-05, "loss": 0.7435, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2460 }, { "epoch": 0.334647810715257, "grad_norm": 0.51171875, "learning_rate": 1.682960365478126e-05, "loss": 0.6746, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2461 }, { "epoch": 0.3347837911340767, "grad_norm": 0.484375, "learning_rate": 1.6826134858241027e-05, "loss": 0.771, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2462 }, { "epoch": 0.3349197715528964, "grad_norm": 0.62109375, "learning_rate": 1.6822664523006533e-05, "loss": 0.5516, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2463 }, { "epoch": 0.33505575197171605, "grad_norm": 0.6640625, "learning_rate": 1.6819192649860047e-05, "loss": 0.507, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2464 }, { "epoch": 0.3351917323905358, "grad_norm": 0.333984375, "learning_rate": 1.681571923958416e-05, "loss": 0.5713, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2465 }, { "epoch": 0.33532771280935547, "grad_norm": 0.70703125, "learning_rate": 1.6812244292961827e-05, "loss": 0.7118, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2466 }, { "epoch": 0.33546369322817515, "grad_norm": 0.515625, "learning_rate": 1.6808767810776346e-05, "loss": 0.7889, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2467 }, { "epoch": 0.33559967364699483, "grad_norm": 0.435546875, "learning_rate": 1.6805289793811356e-05, "loss": 0.6097, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2468 }, { "epoch": 0.3357356540658145, "grad_norm": 0.333984375, "learning_rate": 1.6801810242850844e-05, "loss": 0.5775, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2469 }, { "epoch": 0.3358716344846342, "grad_norm": 0.31640625, "learning_rate": 1.679832915867914e-05, "loss": 0.4359, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2470 }, { "epoch": 0.3360076149034539, "grad_norm": 0.486328125, "learning_rate": 1.6794846542080937e-05, "loss": 0.6702, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2471 }, { "epoch": 0.3361435953222736, "grad_norm": 0.376953125, "learning_rate": 1.6791362393841243e-05, "loss": 0.5252, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2472 }, { "epoch": 0.3362795757410933, "grad_norm": 0.87109375, "learning_rate": 1.6787876714745432e-05, "loss": 0.7181, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2473 }, { "epoch": 0.336415556159913, "grad_norm": 0.69921875, "learning_rate": 1.6784389505579222e-05, "loss": 0.791, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2474 }, { "epoch": 0.33655153657873266, "grad_norm": 0.62890625, "learning_rate": 1.678090076712867e-05, "loss": 0.6232, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2475 }, { "epoch": 0.33668751699755234, "grad_norm": 0.61328125, "learning_rate": 1.6777410500180183e-05, "loss": 0.7103, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2476 }, { "epoch": 0.336823497416372, "grad_norm": 0.5703125, "learning_rate": 1.6773918705520506e-05, "loss": 0.889, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2477 }, { "epoch": 0.33695947783519176, "grad_norm": 0.3359375, "learning_rate": 1.6770425383936734e-05, "loss": 0.5843, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2478 }, { "epoch": 0.33709545825401144, "grad_norm": 0.462890625, "learning_rate": 1.6766930536216307e-05, "loss": 0.7062, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2479 }, { "epoch": 0.3372314386728311, "grad_norm": 0.267578125, "learning_rate": 1.6763434163147e-05, "loss": 0.5531, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2480 }, { "epoch": 0.3373674190916508, "grad_norm": 0.353515625, "learning_rate": 1.675993626551695e-05, "loss": 0.627, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2481 }, { "epoch": 0.3375033995104705, "grad_norm": 3.71875, "learning_rate": 1.6756436844114613e-05, "loss": 0.7417, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2482 }, { "epoch": 0.33763937992929016, "grad_norm": 0.69140625, "learning_rate": 1.6752935899728808e-05, "loss": 0.7539, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2483 }, { "epoch": 0.3377753603481099, "grad_norm": 0.55859375, "learning_rate": 1.674943343314869e-05, "loss": 0.5483, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2484 }, { "epoch": 0.3379113407669296, "grad_norm": 0.28125, "learning_rate": 1.6745929445163757e-05, "loss": 0.6024, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2485 }, { "epoch": 0.33804732118574926, "grad_norm": 0.87109375, "learning_rate": 1.674242393656385e-05, "loss": 0.6911, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2486 }, { "epoch": 0.33818330160456894, "grad_norm": 0.486328125, "learning_rate": 1.673891690813915e-05, "loss": 0.6799, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2487 }, { "epoch": 0.3383192820233886, "grad_norm": 0.59375, "learning_rate": 1.6735408360680193e-05, "loss": 0.7487, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2488 }, { "epoch": 0.3384552624422083, "grad_norm": 0.58203125, "learning_rate": 1.673189829497784e-05, "loss": 0.6987, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2489 }, { "epoch": 0.338591242861028, "grad_norm": 0.74609375, "learning_rate": 1.6728386711823313e-05, "loss": 0.6479, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2490 }, { "epoch": 0.3387272232798477, "grad_norm": 0.345703125, "learning_rate": 1.6724873612008155e-05, "loss": 0.6247, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2491 }, { "epoch": 0.3388632036986674, "grad_norm": 0.45703125, "learning_rate": 1.672135899632427e-05, "loss": 0.7629, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2492 }, { "epoch": 0.3389991841174871, "grad_norm": 0.34765625, "learning_rate": 1.6717842865563886e-05, "loss": 0.6278, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2493 }, { "epoch": 0.33913516453630677, "grad_norm": 0.3671875, "learning_rate": 1.671432522051959e-05, "loss": 0.6621, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2494 }, { "epoch": 0.33927114495512645, "grad_norm": 0.412109375, "learning_rate": 1.6710806061984298e-05, "loss": 0.7808, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2495 }, { "epoch": 0.33940712537394613, "grad_norm": 0.328125, "learning_rate": 1.6707285390751275e-05, "loss": 0.575, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2496 }, { "epoch": 0.33954310579276586, "grad_norm": 0.5234375, "learning_rate": 1.6703763207614118e-05, "loss": 0.7884, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2497 }, { "epoch": 0.33967908621158555, "grad_norm": 0.419921875, "learning_rate": 1.6700239513366777e-05, "loss": 0.7782, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2498 }, { "epoch": 0.3398150666304052, "grad_norm": 0.376953125, "learning_rate": 1.669671430880353e-05, "loss": 0.7259, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2499 }, { "epoch": 0.3399510470492249, "grad_norm": 0.51953125, "learning_rate": 1.6693187594719002e-05, "loss": 0.5929, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2500 }, { "epoch": 0.3400870274680446, "grad_norm": 0.50390625, "learning_rate": 1.668965937190816e-05, "loss": 0.8244, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2501 }, { "epoch": 0.34022300788686427, "grad_norm": 0.53515625, "learning_rate": 1.6686129641166308e-05, "loss": 0.5314, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2502 }, { "epoch": 0.340358988305684, "grad_norm": 0.375, "learning_rate": 1.6682598403289086e-05, "loss": 0.5612, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2503 }, { "epoch": 0.3404949687245037, "grad_norm": 0.306640625, "learning_rate": 1.6679065659072486e-05, "loss": 0.5797, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2504 }, { "epoch": 0.34063094914332337, "grad_norm": 0.373046875, "learning_rate": 1.6675531409312827e-05, "loss": 0.6746, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2505 }, { "epoch": 0.34076692956214305, "grad_norm": 0.326171875, "learning_rate": 1.6671995654806774e-05, "loss": 0.6318, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2506 }, { "epoch": 0.34090290998096273, "grad_norm": 0.51953125, "learning_rate": 1.6668458396351327e-05, "loss": 0.8089, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2507 }, { "epoch": 0.3410388903997824, "grad_norm": 0.3828125, "learning_rate": 1.6664919634743826e-05, "loss": 0.5339, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2508 }, { "epoch": 0.3411748708186021, "grad_norm": 0.828125, "learning_rate": 1.6661379370781956e-05, "loss": 0.7388, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2509 }, { "epoch": 0.34131085123742183, "grad_norm": 0.267578125, "learning_rate": 1.6657837605263733e-05, "loss": 0.5186, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2510 }, { "epoch": 0.3414468316562415, "grad_norm": 0.34765625, "learning_rate": 1.6654294338987513e-05, "loss": 0.8431, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2511 }, { "epoch": 0.3415828120750612, "grad_norm": 0.33203125, "learning_rate": 1.6650749572751994e-05, "loss": 0.5931, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2512 }, { "epoch": 0.3417187924938809, "grad_norm": 0.578125, "learning_rate": 1.664720330735621e-05, "loss": 0.7723, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2513 }, { "epoch": 0.34185477291270056, "grad_norm": 0.30078125, "learning_rate": 1.664365554359953e-05, "loss": 0.7217, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2514 }, { "epoch": 0.34199075333152024, "grad_norm": 0.37109375, "learning_rate": 1.6640106282281665e-05, "loss": 0.5986, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2515 }, { "epoch": 0.34212673375034, "grad_norm": 0.59375, "learning_rate": 1.663655552420266e-05, "loss": 0.7482, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2516 }, { "epoch": 0.34226271416915965, "grad_norm": 0.640625, "learning_rate": 1.6633003270162903e-05, "loss": 0.6934, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2517 }, { "epoch": 0.34239869458797934, "grad_norm": 0.48828125, "learning_rate": 1.6629449520963112e-05, "loss": 0.3488, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2518 }, { "epoch": 0.342534675006799, "grad_norm": 0.29296875, "learning_rate": 1.6625894277404347e-05, "loss": 0.5462, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2519 }, { "epoch": 0.3426706554256187, "grad_norm": 0.40625, "learning_rate": 1.6622337540288005e-05, "loss": 0.6195, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2520 }, { "epoch": 0.3428066358444384, "grad_norm": 0.443359375, "learning_rate": 1.6618779310415813e-05, "loss": 0.5688, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2521 }, { "epoch": 0.3429426162632581, "grad_norm": 0.54296875, "learning_rate": 1.6615219588589844e-05, "loss": 0.7153, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2522 }, { "epoch": 0.3430785966820778, "grad_norm": 0.51953125, "learning_rate": 1.6611658375612502e-05, "loss": 0.8218, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2523 }, { "epoch": 0.3432145771008975, "grad_norm": 0.326171875, "learning_rate": 1.6608095672286527e-05, "loss": 0.529, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2524 }, { "epoch": 0.34335055751971716, "grad_norm": 0.419921875, "learning_rate": 1.6604531479414998e-05, "loss": 0.7956, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2525 }, { "epoch": 0.34348653793853684, "grad_norm": 0.455078125, "learning_rate": 1.6600965797801322e-05, "loss": 0.8755, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2526 }, { "epoch": 0.3436225183573565, "grad_norm": 0.359375, "learning_rate": 1.6597398628249256e-05, "loss": 0.5993, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2527 }, { "epoch": 0.34375849877617626, "grad_norm": 0.32421875, "learning_rate": 1.6593829971562877e-05, "loss": 0.6162, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2528 }, { "epoch": 0.34389447919499594, "grad_norm": 0.5703125, "learning_rate": 1.6590259828546603e-05, "loss": 0.7383, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2529 }, { "epoch": 0.3440304596138156, "grad_norm": 0.51171875, "learning_rate": 1.6586688200005193e-05, "loss": 0.5588, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2530 }, { "epoch": 0.3441664400326353, "grad_norm": 0.625, "learning_rate": 1.6583115086743734e-05, "loss": 0.6523, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2531 }, { "epoch": 0.344302420451455, "grad_norm": 0.458984375, "learning_rate": 1.6579540489567648e-05, "loss": 0.772, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2532 }, { "epoch": 0.34443840087027466, "grad_norm": 0.71484375, "learning_rate": 1.6575964409282695e-05, "loss": 0.5356, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2533 }, { "epoch": 0.34457438128909434, "grad_norm": 0.49609375, "learning_rate": 1.6572386846694963e-05, "loss": 0.8172, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2534 }, { "epoch": 0.3447103617079141, "grad_norm": 0.349609375, "learning_rate": 1.6568807802610886e-05, "loss": 0.5802, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2535 }, { "epoch": 0.34484634212673376, "grad_norm": 0.462890625, "learning_rate": 1.6565227277837214e-05, "loss": 0.742, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2536 }, { "epoch": 0.34498232254555344, "grad_norm": 0.265625, "learning_rate": 1.656164527318105e-05, "loss": 0.4155, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2537 }, { "epoch": 0.3451183029643731, "grad_norm": 2.21875, "learning_rate": 1.6558061789449818e-05, "loss": 1.007, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2538 }, { "epoch": 0.3452542833831928, "grad_norm": 0.482421875, "learning_rate": 1.6554476827451277e-05, "loss": 0.7458, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2539 }, { "epoch": 0.3453902638020125, "grad_norm": 0.4375, "learning_rate": 1.6550890387993525e-05, "loss": 0.8304, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2540 }, { "epoch": 0.3455262442208322, "grad_norm": 0.5234375, "learning_rate": 1.654730247188499e-05, "loss": 0.5679, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2541 }, { "epoch": 0.3456622246396519, "grad_norm": 0.61328125, "learning_rate": 1.6543713079934426e-05, "loss": 0.7515, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2542 }, { "epoch": 0.3457982050584716, "grad_norm": 0.419921875, "learning_rate": 1.654012221295093e-05, "loss": 0.501, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2543 }, { "epoch": 0.34593418547729127, "grad_norm": 0.333984375, "learning_rate": 1.653652987174393e-05, "loss": 0.6842, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2544 }, { "epoch": 0.34607016589611095, "grad_norm": 0.57421875, "learning_rate": 1.653293605712318e-05, "loss": 0.8055, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2545 }, { "epoch": 0.34620614631493063, "grad_norm": 0.380859375, "learning_rate": 1.652934076989877e-05, "loss": 0.5649, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2546 }, { "epoch": 0.34634212673375037, "grad_norm": 0.3125, "learning_rate": 1.6525744010881125e-05, "loss": 0.5708, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2547 }, { "epoch": 0.34647810715257005, "grad_norm": 0.359375, "learning_rate": 1.652214578088099e-05, "loss": 0.4259, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2548 }, { "epoch": 0.34661408757138973, "grad_norm": 0.443359375, "learning_rate": 1.651854608070946e-05, "loss": 0.6742, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2549 }, { "epoch": 0.3467500679902094, "grad_norm": 0.34765625, "learning_rate": 1.6514944911177946e-05, "loss": 0.5848, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2550 }, { "epoch": 0.3468860484090291, "grad_norm": 0.419921875, "learning_rate": 1.6511342273098195e-05, "loss": 0.679, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2551 }, { "epoch": 0.34702202882784877, "grad_norm": 0.373046875, "learning_rate": 1.6507738167282294e-05, "loss": 0.6113, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2552 }, { "epoch": 0.34715800924666845, "grad_norm": 0.5078125, "learning_rate": 1.650413259454264e-05, "loss": 0.6143, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2553 }, { "epoch": 0.3472939896654882, "grad_norm": 0.498046875, "learning_rate": 1.650052555569198e-05, "loss": 0.701, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2554 }, { "epoch": 0.34742997008430787, "grad_norm": 0.6171875, "learning_rate": 1.6496917051543386e-05, "loss": 0.7537, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2555 }, { "epoch": 0.34756595050312755, "grad_norm": 0.40625, "learning_rate": 1.649330708291025e-05, "loss": 0.6644, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2556 }, { "epoch": 0.34770193092194723, "grad_norm": 0.310546875, "learning_rate": 1.6489695650606314e-05, "loss": 0.637, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2557 }, { "epoch": 0.3478379113407669, "grad_norm": 0.369140625, "learning_rate": 1.6486082755445635e-05, "loss": 0.5994, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2558 }, { "epoch": 0.3479738917595866, "grad_norm": 0.412109375, "learning_rate": 1.6482468398242606e-05, "loss": 0.5921, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2559 }, { "epoch": 0.34810987217840633, "grad_norm": 0.50390625, "learning_rate": 1.6478852579811943e-05, "loss": 0.5596, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2560 }, { "epoch": 0.348245852597226, "grad_norm": 0.419921875, "learning_rate": 1.6475235300968696e-05, "loss": 0.6891, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2561 }, { "epoch": 0.3483818330160457, "grad_norm": 0.2578125, "learning_rate": 1.647161656252825e-05, "loss": 0.4948, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2562 }, { "epoch": 0.3485178134348654, "grad_norm": 0.328125, "learning_rate": 1.6467996365306304e-05, "loss": 0.5894, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2563 }, { "epoch": 0.34865379385368506, "grad_norm": 0.283203125, "learning_rate": 1.64643747101189e-05, "loss": 0.5549, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2564 }, { "epoch": 0.34878977427250474, "grad_norm": 0.74609375, "learning_rate": 1.6460751597782402e-05, "loss": 0.7253, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2565 }, { "epoch": 0.3489257546913245, "grad_norm": 0.40625, "learning_rate": 1.6457127029113508e-05, "loss": 0.8138, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2566 }, { "epoch": 0.34906173511014416, "grad_norm": 0.3125, "learning_rate": 1.645350100492923e-05, "loss": 0.6455, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2567 }, { "epoch": 0.34919771552896384, "grad_norm": 0.73828125, "learning_rate": 1.6449873526046928e-05, "loss": 0.8564, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2568 }, { "epoch": 0.3493336959477835, "grad_norm": 0.39453125, "learning_rate": 1.6446244593284277e-05, "loss": 0.5665, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2569 }, { "epoch": 0.3494696763666032, "grad_norm": 0.373046875, "learning_rate": 1.644261420745928e-05, "loss": 0.7184, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2570 }, { "epoch": 0.3496056567854229, "grad_norm": 0.5078125, "learning_rate": 1.6438982369390273e-05, "loss": 0.6554, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2571 }, { "epoch": 0.34974163720424256, "grad_norm": 0.5078125, "learning_rate": 1.6435349079895918e-05, "loss": 0.8664, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2572 }, { "epoch": 0.3498776176230623, "grad_norm": 0.46875, "learning_rate": 1.64317143397952e-05, "loss": 0.5039, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2573 }, { "epoch": 0.350013598041882, "grad_norm": 0.326171875, "learning_rate": 1.642807814990744e-05, "loss": 0.5815, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2574 }, { "epoch": 0.35014957846070166, "grad_norm": 0.3671875, "learning_rate": 1.6424440511052268e-05, "loss": 0.6431, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2575 }, { "epoch": 0.35028555887952134, "grad_norm": 0.310546875, "learning_rate": 1.642080142404966e-05, "loss": 0.6021, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2576 }, { "epoch": 0.350421539298341, "grad_norm": 0.40625, "learning_rate": 1.6417160889719915e-05, "loss": 0.652, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2577 }, { "epoch": 0.3505575197171607, "grad_norm": 0.5546875, "learning_rate": 1.6413518908883646e-05, "loss": 0.735, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2578 }, { "epoch": 0.35069350013598044, "grad_norm": 0.47265625, "learning_rate": 1.6409875482361803e-05, "loss": 0.7581, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2579 }, { "epoch": 0.3508294805548001, "grad_norm": 0.345703125, "learning_rate": 1.6406230610975654e-05, "loss": 0.6017, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2580 }, { "epoch": 0.3509654609736198, "grad_norm": 0.41015625, "learning_rate": 1.6402584295546808e-05, "loss": 0.8162, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2581 }, { "epoch": 0.3511014413924395, "grad_norm": 0.26171875, "learning_rate": 1.6398936536897182e-05, "loss": 0.5345, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2582 }, { "epoch": 0.35123742181125917, "grad_norm": 0.462890625, "learning_rate": 1.6395287335849028e-05, "loss": 0.8127, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2583 }, { "epoch": 0.35137340223007885, "grad_norm": 0.3125, "learning_rate": 1.6391636693224916e-05, "loss": 0.5386, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2584 }, { "epoch": 0.3515093826488986, "grad_norm": 0.400390625, "learning_rate": 1.638798460984775e-05, "loss": 0.6961, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2585 }, { "epoch": 0.35164536306771826, "grad_norm": 0.53125, "learning_rate": 1.6384331086540753e-05, "loss": 0.4741, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2586 }, { "epoch": 0.35178134348653795, "grad_norm": 0.3984375, "learning_rate": 1.6380676124127472e-05, "loss": 0.6968, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2587 }, { "epoch": 0.3519173239053576, "grad_norm": 0.66015625, "learning_rate": 1.6377019723431783e-05, "loss": 0.6634, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2588 }, { "epoch": 0.3520533043241773, "grad_norm": 0.4375, "learning_rate": 1.6373361885277882e-05, "loss": 0.748, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2589 }, { "epoch": 0.352189284742997, "grad_norm": 0.193359375, "learning_rate": 1.6369702610490295e-05, "loss": 0.4435, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2590 }, { "epoch": 0.3523252651618167, "grad_norm": 0.35546875, "learning_rate": 1.636604189989386e-05, "loss": 0.7449, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2591 }, { "epoch": 0.3524612455806364, "grad_norm": 0.462890625, "learning_rate": 1.636237975431375e-05, "loss": 0.7464, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2592 }, { "epoch": 0.3525972259994561, "grad_norm": 0.8671875, "learning_rate": 1.6358716174575452e-05, "loss": 0.9137, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2593 }, { "epoch": 0.35273320641827577, "grad_norm": 0.30078125, "learning_rate": 1.635505116150479e-05, "loss": 0.4803, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2594 }, { "epoch": 0.35286918683709545, "grad_norm": 0.30859375, "learning_rate": 1.6351384715927897e-05, "loss": 0.5531, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2595 }, { "epoch": 0.35300516725591513, "grad_norm": 0.353515625, "learning_rate": 1.6347716838671238e-05, "loss": 0.5023, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2596 }, { "epoch": 0.3531411476747348, "grad_norm": 0.478515625, "learning_rate": 1.6344047530561597e-05, "loss": 0.849, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2597 }, { "epoch": 0.35327712809355455, "grad_norm": 0.53125, "learning_rate": 1.634037679242608e-05, "loss": 0.6446, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2598 }, { "epoch": 0.35341310851237423, "grad_norm": 0.3046875, "learning_rate": 1.6336704625092114e-05, "loss": 0.5228, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2599 }, { "epoch": 0.3535490889311939, "grad_norm": 0.2890625, "learning_rate": 1.633303102938745e-05, "loss": 0.6147, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2600 }, { "epoch": 0.3536850693500136, "grad_norm": 0.44140625, "learning_rate": 1.632935600614017e-05, "loss": 0.6817, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2601 }, { "epoch": 0.3538210497688333, "grad_norm": 0.5078125, "learning_rate": 1.6325679556178656e-05, "loss": 0.9048, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2602 }, { "epoch": 0.35395703018765295, "grad_norm": 0.357421875, "learning_rate": 1.6322001680331635e-05, "loss": 0.616, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2603 }, { "epoch": 0.3540930106064727, "grad_norm": 0.32421875, "learning_rate": 1.6318322379428144e-05, "loss": 0.6077, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2604 }, { "epoch": 0.3542289910252924, "grad_norm": 0.341796875, "learning_rate": 1.631464165429754e-05, "loss": 0.6692, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2605 }, { "epoch": 0.35436497144411205, "grad_norm": 0.361328125, "learning_rate": 1.6310959505769503e-05, "loss": 0.7266, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2606 }, { "epoch": 0.35450095186293173, "grad_norm": 0.546875, "learning_rate": 1.6307275934674034e-05, "loss": 0.7992, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2607 }, { "epoch": 0.3546369322817514, "grad_norm": 0.380859375, "learning_rate": 1.6303590941841457e-05, "loss": 0.6224, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2608 }, { "epoch": 0.3547729127005711, "grad_norm": 0.31640625, "learning_rate": 1.6299904528102414e-05, "loss": 0.6296, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2609 }, { "epoch": 0.35490889311939083, "grad_norm": 0.330078125, "learning_rate": 1.6296216694287864e-05, "loss": 0.6268, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2610 }, { "epoch": 0.3550448735382105, "grad_norm": 0.232421875, "learning_rate": 1.62925274412291e-05, "loss": 0.402, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2611 }, { "epoch": 0.3551808539570302, "grad_norm": 0.384765625, "learning_rate": 1.6288836769757712e-05, "loss": 0.563, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2612 }, { "epoch": 0.3553168343758499, "grad_norm": 0.328125, "learning_rate": 1.628514468070563e-05, "loss": 0.5687, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2613 }, { "epoch": 0.35545281479466956, "grad_norm": 0.357421875, "learning_rate": 1.6281451174905093e-05, "loss": 0.7492, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2614 }, { "epoch": 0.35558879521348924, "grad_norm": 1.4296875, "learning_rate": 1.6277756253188663e-05, "loss": 0.7646, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2615 }, { "epoch": 0.3557247756323089, "grad_norm": 0.4375, "learning_rate": 1.6274059916389223e-05, "loss": 0.4924, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2616 }, { "epoch": 0.35586075605112866, "grad_norm": 0.431640625, "learning_rate": 1.627036216533997e-05, "loss": 0.7158, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2617 }, { "epoch": 0.35599673646994834, "grad_norm": 0.73828125, "learning_rate": 1.6266663000874423e-05, "loss": 0.7209, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2618 }, { "epoch": 0.356132716888768, "grad_norm": 0.3984375, "learning_rate": 1.626296242382642e-05, "loss": 0.6336, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2619 }, { "epoch": 0.3562686973075877, "grad_norm": 0.6484375, "learning_rate": 1.6259260435030114e-05, "loss": 0.811, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2620 }, { "epoch": 0.3564046777264074, "grad_norm": 0.921875, "learning_rate": 1.625555703531998e-05, "loss": 0.75, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2621 }, { "epoch": 0.35654065814522706, "grad_norm": 0.875, "learning_rate": 1.625185222553081e-05, "loss": 0.6517, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2622 }, { "epoch": 0.3566766385640468, "grad_norm": 0.470703125, "learning_rate": 1.6248146006497712e-05, "loss": 0.4359, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2623 }, { "epoch": 0.3568126189828665, "grad_norm": 0.66015625, "learning_rate": 1.6244438379056115e-05, "loss": 0.6253, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2624 }, { "epoch": 0.35694859940168616, "grad_norm": 0.6484375, "learning_rate": 1.6240729344041762e-05, "loss": 0.783, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2625 }, { "epoch": 0.35708457982050584, "grad_norm": 0.40234375, "learning_rate": 1.6237018902290717e-05, "loss": 0.6021, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2626 }, { "epoch": 0.3572205602393255, "grad_norm": 0.2412109375, "learning_rate": 1.6233307054639353e-05, "loss": 0.5015, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2627 }, { "epoch": 0.3573565406581452, "grad_norm": 0.298828125, "learning_rate": 1.622959380192437e-05, "loss": 0.3491, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2628 }, { "epoch": 0.35749252107696494, "grad_norm": 0.31640625, "learning_rate": 1.6225879144982783e-05, "loss": 0.5042, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2629 }, { "epoch": 0.3576285014957846, "grad_norm": 0.322265625, "learning_rate": 1.6222163084651918e-05, "loss": 0.6387, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2630 }, { "epoch": 0.3577644819146043, "grad_norm": 0.3984375, "learning_rate": 1.6218445621769424e-05, "loss": 0.6278, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2631 }, { "epoch": 0.357900462333424, "grad_norm": 0.65625, "learning_rate": 1.6214726757173255e-05, "loss": 0.8075, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2632 }, { "epoch": 0.35803644275224367, "grad_norm": 0.498046875, "learning_rate": 1.6211006491701695e-05, "loss": 0.5348, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2633 }, { "epoch": 0.35817242317106335, "grad_norm": 0.384765625, "learning_rate": 1.6207284826193334e-05, "loss": 0.5345, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2634 }, { "epoch": 0.3583084035898831, "grad_norm": 0.439453125, "learning_rate": 1.6203561761487084e-05, "loss": 0.6545, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2635 }, { "epoch": 0.35844438400870277, "grad_norm": 0.5625, "learning_rate": 1.6199837298422167e-05, "loss": 0.8231, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2636 }, { "epoch": 0.35858036442752245, "grad_norm": 0.4609375, "learning_rate": 1.6196111437838126e-05, "loss": 0.7222, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2637 }, { "epoch": 0.35871634484634213, "grad_norm": 0.40625, "learning_rate": 1.619238418057481e-05, "loss": 0.6375, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2638 }, { "epoch": 0.3588523252651618, "grad_norm": 0.466796875, "learning_rate": 1.618865552747239e-05, "loss": 0.6533, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2639 }, { "epoch": 0.3589883056839815, "grad_norm": 0.48046875, "learning_rate": 1.618492547937135e-05, "loss": 0.4797, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2640 }, { "epoch": 0.35912428610280117, "grad_norm": 0.609375, "learning_rate": 1.6181194037112495e-05, "loss": 0.6901, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2641 }, { "epoch": 0.3592602665216209, "grad_norm": 0.6484375, "learning_rate": 1.617746120153693e-05, "loss": 0.4272, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2642 }, { "epoch": 0.3593962469404406, "grad_norm": 0.58203125, "learning_rate": 1.6173726973486084e-05, "loss": 0.762, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2643 }, { "epoch": 0.35953222735926027, "grad_norm": 0.244140625, "learning_rate": 1.6169991353801696e-05, "loss": 0.4235, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2644 }, { "epoch": 0.35966820777807995, "grad_norm": 0.453125, "learning_rate": 1.6166254343325824e-05, "loss": 0.714, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2645 }, { "epoch": 0.35980418819689963, "grad_norm": 0.5546875, "learning_rate": 1.616251594290083e-05, "loss": 0.6821, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2646 }, { "epoch": 0.3599401686157193, "grad_norm": 0.55859375, "learning_rate": 1.6158776153369406e-05, "loss": 0.6506, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2647 }, { "epoch": 0.36007614903453905, "grad_norm": 0.384765625, "learning_rate": 1.6155034975574534e-05, "loss": 0.6559, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2648 }, { "epoch": 0.36021212945335873, "grad_norm": 0.3125, "learning_rate": 1.6151292410359528e-05, "loss": 0.5596, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2649 }, { "epoch": 0.3603481098721784, "grad_norm": 0.3203125, "learning_rate": 1.6147548458568007e-05, "loss": 0.5444, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2650 }, { "epoch": 0.3604840902909981, "grad_norm": 0.34375, "learning_rate": 1.6143803121043903e-05, "loss": 0.5524, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2651 }, { "epoch": 0.3606200707098178, "grad_norm": 0.376953125, "learning_rate": 1.614005639863146e-05, "loss": 0.6405, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2652 }, { "epoch": 0.36075605112863746, "grad_norm": 0.6484375, "learning_rate": 1.613630829217524e-05, "loss": 0.6126, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2653 }, { "epoch": 0.3608920315474572, "grad_norm": 0.859375, "learning_rate": 1.6132558802520103e-05, "loss": 0.5557, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2654 }, { "epoch": 0.3610280119662769, "grad_norm": 0.3671875, "learning_rate": 1.6128807930511235e-05, "loss": 0.6457, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2655 }, { "epoch": 0.36116399238509656, "grad_norm": 0.49609375, "learning_rate": 1.6125055676994132e-05, "loss": 0.8413, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2656 }, { "epoch": 0.36129997280391624, "grad_norm": 0.5078125, "learning_rate": 1.6121302042814588e-05, "loss": 0.797, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2657 }, { "epoch": 0.3614359532227359, "grad_norm": 0.404296875, "learning_rate": 1.611754702881873e-05, "loss": 0.6621, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2658 }, { "epoch": 0.3615719336415556, "grad_norm": 0.431640625, "learning_rate": 1.6113790635852974e-05, "loss": 0.6737, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2659 }, { "epoch": 0.3617079140603753, "grad_norm": 0.498046875, "learning_rate": 1.611003286476406e-05, "loss": 0.7634, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2660 }, { "epoch": 0.361843894479195, "grad_norm": 0.32421875, "learning_rate": 1.610627371639904e-05, "loss": 0.5285, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2661 }, { "epoch": 0.3619798748980147, "grad_norm": 0.330078125, "learning_rate": 1.610251319160527e-05, "loss": 0.5342, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2662 }, { "epoch": 0.3621158553168344, "grad_norm": 0.421875, "learning_rate": 1.6098751291230414e-05, "loss": 0.7539, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2663 }, { "epoch": 0.36225183573565406, "grad_norm": 0.3125, "learning_rate": 1.6094988016122454e-05, "loss": 0.6187, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2664 }, { "epoch": 0.36238781615447374, "grad_norm": 0.353515625, "learning_rate": 1.6091223367129676e-05, "loss": 0.6323, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2665 }, { "epoch": 0.3625237965732934, "grad_norm": 0.2578125, "learning_rate": 1.6087457345100678e-05, "loss": 0.4728, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2666 }, { "epoch": 0.36265977699211316, "grad_norm": 0.59375, "learning_rate": 1.608368995088437e-05, "loss": 0.7681, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2667 }, { "epoch": 0.36279575741093284, "grad_norm": 0.3203125, "learning_rate": 1.607992118532997e-05, "loss": 0.5472, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2668 }, { "epoch": 0.3629317378297525, "grad_norm": 0.310546875, "learning_rate": 1.6076151049287e-05, "loss": 0.5638, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2669 }, { "epoch": 0.3630677182485722, "grad_norm": 2.234375, "learning_rate": 1.6072379543605293e-05, "loss": 0.6829, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2670 }, { "epoch": 0.3632036986673919, "grad_norm": 0.5546875, "learning_rate": 1.6068606669135002e-05, "loss": 0.6213, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2671 }, { "epoch": 0.36333967908621156, "grad_norm": 0.265625, "learning_rate": 1.6064832426726566e-05, "loss": 0.5306, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2672 }, { "epoch": 0.3634756595050313, "grad_norm": 0.248046875, "learning_rate": 1.6061056817230754e-05, "loss": 0.4886, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2673 }, { "epoch": 0.363611639923851, "grad_norm": 0.31640625, "learning_rate": 1.6057279841498633e-05, "loss": 0.5646, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2674 }, { "epoch": 0.36374762034267066, "grad_norm": 0.482421875, "learning_rate": 1.605350150038158e-05, "loss": 0.5998, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2675 }, { "epoch": 0.36388360076149034, "grad_norm": 0.427734375, "learning_rate": 1.6049721794731276e-05, "loss": 0.3976, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2676 }, { "epoch": 0.36401958118031, "grad_norm": 0.59375, "learning_rate": 1.6045940725399715e-05, "loss": 0.5876, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2677 }, { "epoch": 0.3641555615991297, "grad_norm": 0.306640625, "learning_rate": 1.6042158293239197e-05, "loss": 0.5479, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2678 }, { "epoch": 0.3642915420179494, "grad_norm": 0.341796875, "learning_rate": 1.603837449910233e-05, "loss": 0.619, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2679 }, { "epoch": 0.3644275224367691, "grad_norm": 0.68359375, "learning_rate": 1.603458934384202e-05, "loss": 0.4867, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2680 }, { "epoch": 0.3645635028555888, "grad_norm": 0.439453125, "learning_rate": 1.6030802828311498e-05, "loss": 0.6885, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2681 }, { "epoch": 0.3646994832744085, "grad_norm": 0.24609375, "learning_rate": 1.6027014953364282e-05, "loss": 0.438, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2682 }, { "epoch": 0.36483546369322817, "grad_norm": 0.435546875, "learning_rate": 1.602322571985421e-05, "loss": 0.6851, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2683 }, { "epoch": 0.36497144411204785, "grad_norm": 0.4609375, "learning_rate": 1.6019435128635424e-05, "loss": 0.7129, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2684 }, { "epoch": 0.36510742453086753, "grad_norm": 0.58984375, "learning_rate": 1.601564318056236e-05, "loss": 0.6169, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2685 }, { "epoch": 0.36524340494968727, "grad_norm": 0.5234375, "learning_rate": 1.6011849876489777e-05, "loss": 0.6216, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2686 }, { "epoch": 0.36537938536850695, "grad_norm": 0.498046875, "learning_rate": 1.6008055217272733e-05, "loss": 0.9095, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2687 }, { "epoch": 0.36551536578732663, "grad_norm": 0.271484375, "learning_rate": 1.6004259203766586e-05, "loss": 0.4692, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2688 }, { "epoch": 0.3656513462061463, "grad_norm": 0.6796875, "learning_rate": 1.6000461836827007e-05, "loss": 0.5601, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2689 }, { "epoch": 0.365787326624966, "grad_norm": 0.890625, "learning_rate": 1.5996663117309965e-05, "loss": 0.8424, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2690 }, { "epoch": 0.3659233070437857, "grad_norm": 0.3515625, "learning_rate": 1.5992863046071744e-05, "loss": 0.6379, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2691 }, { "epoch": 0.3660592874626054, "grad_norm": 0.3203125, "learning_rate": 1.598906162396892e-05, "loss": 0.6825, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2692 }, { "epoch": 0.3661952678814251, "grad_norm": 0.31640625, "learning_rate": 1.5985258851858385e-05, "loss": 0.4961, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2693 }, { "epoch": 0.36633124830024477, "grad_norm": 0.470703125, "learning_rate": 1.598145473059733e-05, "loss": 0.6844, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2694 }, { "epoch": 0.36646722871906445, "grad_norm": 0.9375, "learning_rate": 1.5977649261043247e-05, "loss": 0.694, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2695 }, { "epoch": 0.36660320913788413, "grad_norm": 0.58203125, "learning_rate": 1.5973842444053937e-05, "loss": 0.7096, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2696 }, { "epoch": 0.3667391895567038, "grad_norm": 0.4609375, "learning_rate": 1.5970034280487504e-05, "loss": 0.6794, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2697 }, { "epoch": 0.36687516997552355, "grad_norm": 0.41015625, "learning_rate": 1.5966224771202355e-05, "loss": 0.5661, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2698 }, { "epoch": 0.36701115039434323, "grad_norm": 0.408203125, "learning_rate": 1.59624139170572e-05, "loss": 0.5386, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2699 }, { "epoch": 0.3671471308131629, "grad_norm": 0.5234375, "learning_rate": 1.5958601718911048e-05, "loss": 0.8781, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2700 }, { "epoch": 0.3672831112319826, "grad_norm": 0.5625, "learning_rate": 1.5954788177623218e-05, "loss": 0.6095, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2701 }, { "epoch": 0.3674190916508023, "grad_norm": 0.33203125, "learning_rate": 1.5950973294053332e-05, "loss": 0.5425, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2702 }, { "epoch": 0.36755507206962196, "grad_norm": 0.4296875, "learning_rate": 1.594715706906131e-05, "loss": 0.597, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2703 }, { "epoch": 0.36769105248844164, "grad_norm": 0.494140625, "learning_rate": 1.5943339503507372e-05, "loss": 0.6323, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2704 }, { "epoch": 0.3678270329072614, "grad_norm": 0.37890625, "learning_rate": 1.5939520598252042e-05, "loss": 0.6242, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2705 }, { "epoch": 0.36796301332608106, "grad_norm": 0.65234375, "learning_rate": 1.5935700354156158e-05, "loss": 0.7591, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2706 }, { "epoch": 0.36809899374490074, "grad_norm": 0.2236328125, "learning_rate": 1.593187877208084e-05, "loss": 0.416, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2707 }, { "epoch": 0.3682349741637204, "grad_norm": 0.486328125, "learning_rate": 1.5928055852887525e-05, "loss": 0.5529, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2708 }, { "epoch": 0.3683709545825401, "grad_norm": 0.333984375, "learning_rate": 1.5924231597437946e-05, "loss": 0.7039, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2709 }, { "epoch": 0.3685069350013598, "grad_norm": 0.6484375, "learning_rate": 1.592040600659413e-05, "loss": 0.5706, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2710 }, { "epoch": 0.3686429154201795, "grad_norm": 0.412109375, "learning_rate": 1.591657908121842e-05, "loss": 0.6704, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2711 }, { "epoch": 0.3687788958389992, "grad_norm": 0.375, "learning_rate": 1.5912750822173446e-05, "loss": 0.7546, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2712 }, { "epoch": 0.3689148762578189, "grad_norm": 0.294921875, "learning_rate": 1.590892123032215e-05, "loss": 0.5669, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2713 }, { "epoch": 0.36905085667663856, "grad_norm": 0.3984375, "learning_rate": 1.5905090306527763e-05, "loss": 0.4676, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2714 }, { "epoch": 0.36918683709545824, "grad_norm": 0.330078125, "learning_rate": 1.5901258051653826e-05, "loss": 0.6615, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2715 }, { "epoch": 0.3693228175142779, "grad_norm": 0.349609375, "learning_rate": 1.5897424466564176e-05, "loss": 0.6366, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2716 }, { "epoch": 0.36945879793309766, "grad_norm": 0.423828125, "learning_rate": 1.589358955212295e-05, "loss": 0.7176, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2717 }, { "epoch": 0.36959477835191734, "grad_norm": 0.255859375, "learning_rate": 1.588975330919458e-05, "loss": 0.3519, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2718 }, { "epoch": 0.369730758770737, "grad_norm": 0.287109375, "learning_rate": 1.5885915738643807e-05, "loss": 0.5052, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2719 }, { "epoch": 0.3698667391895567, "grad_norm": 0.337890625, "learning_rate": 1.5882076841335664e-05, "loss": 0.6841, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2720 }, { "epoch": 0.3700027196083764, "grad_norm": 0.3984375, "learning_rate": 1.587823661813549e-05, "loss": 0.6639, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2721 }, { "epoch": 0.37013870002719607, "grad_norm": 0.310546875, "learning_rate": 1.587439506990891e-05, "loss": 0.6451, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2722 }, { "epoch": 0.37027468044601575, "grad_norm": 0.4765625, "learning_rate": 1.587055219752186e-05, "loss": 0.7825, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2723 }, { "epoch": 0.3704106608648355, "grad_norm": 0.294921875, "learning_rate": 1.5866708001840572e-05, "loss": 0.6424, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2724 }, { "epoch": 0.37054664128365516, "grad_norm": 0.4921875, "learning_rate": 1.5862862483731574e-05, "loss": 0.7075, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2725 }, { "epoch": 0.37068262170247485, "grad_norm": 0.431640625, "learning_rate": 1.5859015644061694e-05, "loss": 0.7773, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2726 }, { "epoch": 0.3708186021212945, "grad_norm": 0.41796875, "learning_rate": 1.5855167483698056e-05, "loss": 0.6624, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2727 }, { "epoch": 0.3709545825401142, "grad_norm": 0.54296875, "learning_rate": 1.585131800350808e-05, "loss": 0.6457, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2728 }, { "epoch": 0.3710905629589339, "grad_norm": 0.380859375, "learning_rate": 1.5847467204359482e-05, "loss": 0.5843, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2729 }, { "epoch": 0.3712265433777536, "grad_norm": 0.263671875, "learning_rate": 1.584361508712029e-05, "loss": 0.5452, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2730 }, { "epoch": 0.3713625237965733, "grad_norm": 0.69140625, "learning_rate": 1.5839761652658815e-05, "loss": 0.6134, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2731 }, { "epoch": 0.371498504215393, "grad_norm": 0.431640625, "learning_rate": 1.583590690184366e-05, "loss": 0.8175, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2732 }, { "epoch": 0.37163448463421267, "grad_norm": 0.58203125, "learning_rate": 1.5832050835543742e-05, "loss": 0.5243, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2733 }, { "epoch": 0.37177046505303235, "grad_norm": 0.37890625, "learning_rate": 1.582819345462826e-05, "loss": 0.5959, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2734 }, { "epoch": 0.37190644547185203, "grad_norm": 0.8359375, "learning_rate": 1.5824334759966725e-05, "loss": 0.7362, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2735 }, { "epoch": 0.37204242589067177, "grad_norm": 0.466796875, "learning_rate": 1.582047475242892e-05, "loss": 0.7212, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2736 }, { "epoch": 0.37217840630949145, "grad_norm": 0.30078125, "learning_rate": 1.5816613432884945e-05, "loss": 0.5247, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2737 }, { "epoch": 0.37231438672831113, "grad_norm": 0.50390625, "learning_rate": 1.5812750802205187e-05, "loss": 0.7292, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2738 }, { "epoch": 0.3724503671471308, "grad_norm": 0.65234375, "learning_rate": 1.580888686126033e-05, "loss": 0.5954, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2739 }, { "epoch": 0.3725863475659505, "grad_norm": 0.416015625, "learning_rate": 1.5805021610921357e-05, "loss": 0.7202, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2740 }, { "epoch": 0.3727223279847702, "grad_norm": 0.37109375, "learning_rate": 1.5801155052059543e-05, "loss": 0.6707, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2741 }, { "epoch": 0.37285830840358986, "grad_norm": 0.40625, "learning_rate": 1.579728718554645e-05, "loss": 0.549, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2742 }, { "epoch": 0.3729942888224096, "grad_norm": 0.474609375, "learning_rate": 1.579341801225395e-05, "loss": 0.6751, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2743 }, { "epoch": 0.3731302692412293, "grad_norm": 0.50390625, "learning_rate": 1.57895475330542e-05, "loss": 0.632, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2744 }, { "epoch": 0.37326624966004895, "grad_norm": 0.41015625, "learning_rate": 1.5785675748819652e-05, "loss": 0.6543, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2745 }, { "epoch": 0.37340223007886864, "grad_norm": 0.3828125, "learning_rate": 1.5781802660423055e-05, "loss": 0.6748, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2746 }, { "epoch": 0.3735382104976883, "grad_norm": 0.353515625, "learning_rate": 1.5777928268737442e-05, "loss": 0.7225, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2747 }, { "epoch": 0.373674190916508, "grad_norm": 0.68359375, "learning_rate": 1.5774052574636165e-05, "loss": 0.792, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2748 }, { "epoch": 0.37381017133532773, "grad_norm": 0.36328125, "learning_rate": 1.577017557899284e-05, "loss": 0.6462, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2749 }, { "epoch": 0.3739461517541474, "grad_norm": 0.6875, "learning_rate": 1.5766297282681393e-05, "loss": 0.7957, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2750 }, { "epoch": 0.3740821321729671, "grad_norm": 0.38671875, "learning_rate": 1.576241768657604e-05, "loss": 0.7246, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2751 }, { "epoch": 0.3742181125917868, "grad_norm": 0.37890625, "learning_rate": 1.5758536791551286e-05, "loss": 0.6642, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2752 }, { "epoch": 0.37435409301060646, "grad_norm": 0.3515625, "learning_rate": 1.5754654598481938e-05, "loss": 0.6519, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2753 }, { "epoch": 0.37449007342942614, "grad_norm": 0.53515625, "learning_rate": 1.5750771108243085e-05, "loss": 0.7554, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2754 }, { "epoch": 0.3746260538482459, "grad_norm": 0.26953125, "learning_rate": 1.574688632171012e-05, "loss": 0.5177, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2755 }, { "epoch": 0.37476203426706556, "grad_norm": 0.6875, "learning_rate": 1.5743000239758708e-05, "loss": 0.7083, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2756 }, { "epoch": 0.37489801468588524, "grad_norm": 0.66015625, "learning_rate": 1.573911286326483e-05, "loss": 0.752, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2757 }, { "epoch": 0.3750339951047049, "grad_norm": 0.345703125, "learning_rate": 1.573522419310475e-05, "loss": 0.6418, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2758 }, { "epoch": 0.3751699755235246, "grad_norm": 0.392578125, "learning_rate": 1.5731334230155012e-05, "loss": 0.7324, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2759 }, { "epoch": 0.3753059559423443, "grad_norm": 0.51171875, "learning_rate": 1.5727442975292468e-05, "loss": 0.5601, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2760 }, { "epoch": 0.375441936361164, "grad_norm": 0.33984375, "learning_rate": 1.5723550429394255e-05, "loss": 0.5544, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2761 }, { "epoch": 0.3755779167799837, "grad_norm": 0.310546875, "learning_rate": 1.57196565933378e-05, "loss": 0.6073, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2762 }, { "epoch": 0.3757138971988034, "grad_norm": 0.255859375, "learning_rate": 1.5715761468000817e-05, "loss": 0.5267, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2763 }, { "epoch": 0.37584987761762306, "grad_norm": 0.474609375, "learning_rate": 1.571186505426132e-05, "loss": 0.6419, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2764 }, { "epoch": 0.37598585803644274, "grad_norm": 0.431640625, "learning_rate": 1.5707967352997605e-05, "loss": 0.6359, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2765 }, { "epoch": 0.3761218384552624, "grad_norm": 0.78515625, "learning_rate": 1.570406836508826e-05, "loss": 0.8308, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2766 }, { "epoch": 0.3762578188740821, "grad_norm": 0.443359375, "learning_rate": 1.570016809141217e-05, "loss": 0.7744, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2767 }, { "epoch": 0.37639379929290184, "grad_norm": 0.443359375, "learning_rate": 1.56962665328485e-05, "loss": 0.7137, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2768 }, { "epoch": 0.3765297797117215, "grad_norm": 0.287109375, "learning_rate": 1.569236369027671e-05, "loss": 0.4831, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2769 }, { "epoch": 0.3766657601305412, "grad_norm": 0.55859375, "learning_rate": 1.568845956457655e-05, "loss": 0.5529, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2770 }, { "epoch": 0.3768017405493609, "grad_norm": 0.392578125, "learning_rate": 1.5684554156628057e-05, "loss": 0.5968, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2771 }, { "epoch": 0.37693772096818057, "grad_norm": 0.341796875, "learning_rate": 1.568064746731156e-05, "loss": 0.7044, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2772 }, { "epoch": 0.37707370138700025, "grad_norm": 0.349609375, "learning_rate": 1.5676739497507666e-05, "loss": 0.7349, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2773 }, { "epoch": 0.37720968180582, "grad_norm": 0.314453125, "learning_rate": 1.567283024809729e-05, "loss": 0.5596, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2774 }, { "epoch": 0.37734566222463967, "grad_norm": 0.62890625, "learning_rate": 1.5668919719961617e-05, "loss": 0.627, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2775 }, { "epoch": 0.37748164264345935, "grad_norm": 0.458984375, "learning_rate": 1.5665007913982132e-05, "loss": 0.6385, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2776 }, { "epoch": 0.37761762306227903, "grad_norm": 0.32421875, "learning_rate": 1.56610948310406e-05, "loss": 0.6159, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2777 }, { "epoch": 0.3777536034810987, "grad_norm": 0.3671875, "learning_rate": 1.5657180472019083e-05, "loss": 0.689, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2778 }, { "epoch": 0.3778895838999184, "grad_norm": 0.5546875, "learning_rate": 1.565326483779992e-05, "loss": 0.6722, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2779 }, { "epoch": 0.3780255643187381, "grad_norm": 0.41796875, "learning_rate": 1.5649347929265746e-05, "loss": 0.569, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2780 }, { "epoch": 0.3781615447375578, "grad_norm": 0.2734375, "learning_rate": 1.564542974729948e-05, "loss": 0.4167, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2781 }, { "epoch": 0.3782975251563775, "grad_norm": 0.4765625, "learning_rate": 1.5641510292784328e-05, "loss": 0.6797, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2782 }, { "epoch": 0.37843350557519717, "grad_norm": 0.3828125, "learning_rate": 1.563758956660378e-05, "loss": 0.7992, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2783 }, { "epoch": 0.37856948599401685, "grad_norm": 0.25390625, "learning_rate": 1.563366756964162e-05, "loss": 0.499, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2784 }, { "epoch": 0.37870546641283653, "grad_norm": 0.34765625, "learning_rate": 1.5629744302781913e-05, "loss": 0.665, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2785 }, { "epoch": 0.3788414468316562, "grad_norm": 0.33203125, "learning_rate": 1.5625819766909008e-05, "loss": 0.7259, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2786 }, { "epoch": 0.37897742725047595, "grad_norm": 0.55078125, "learning_rate": 1.5621893962907547e-05, "loss": 0.6278, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2787 }, { "epoch": 0.37911340766929563, "grad_norm": 0.369140625, "learning_rate": 1.561796689166245e-05, "loss": 0.6999, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2788 }, { "epoch": 0.3792493880881153, "grad_norm": 0.333984375, "learning_rate": 1.5614038554058936e-05, "loss": 0.5584, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2789 }, { "epoch": 0.379385368506935, "grad_norm": 0.5625, "learning_rate": 1.5610108950982494e-05, "loss": 0.7563, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2790 }, { "epoch": 0.3795213489257547, "grad_norm": 0.51171875, "learning_rate": 1.56061780833189e-05, "loss": 0.7319, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2791 }, { "epoch": 0.37965732934457436, "grad_norm": 0.330078125, "learning_rate": 1.5602245951954228e-05, "loss": 0.5832, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2792 }, { "epoch": 0.3797933097633941, "grad_norm": 0.314453125, "learning_rate": 1.5598312557774823e-05, "loss": 0.5213, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2793 }, { "epoch": 0.3799292901822138, "grad_norm": 0.345703125, "learning_rate": 1.5594377901667323e-05, "loss": 0.604, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2794 }, { "epoch": 0.38006527060103346, "grad_norm": 0.51171875, "learning_rate": 1.5590441984518644e-05, "loss": 0.3914, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2795 }, { "epoch": 0.38020125101985314, "grad_norm": 0.37109375, "learning_rate": 1.5586504807215997e-05, "loss": 0.641, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2796 }, { "epoch": 0.3803372314386728, "grad_norm": 0.30859375, "learning_rate": 1.558256637064686e-05, "loss": 0.491, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2797 }, { "epoch": 0.3804732118574925, "grad_norm": 0.423828125, "learning_rate": 1.5578626675699014e-05, "loss": 0.765, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2798 }, { "epoch": 0.38060919227631224, "grad_norm": 0.53515625, "learning_rate": 1.557468572326051e-05, "loss": 0.535, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2799 }, { "epoch": 0.3807451726951319, "grad_norm": 0.5078125, "learning_rate": 1.5570743514219686e-05, "loss": 0.7161, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2800 }, { "epoch": 0.3808811531139516, "grad_norm": 0.3203125, "learning_rate": 1.5566800049465167e-05, "loss": 0.6234, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2801 }, { "epoch": 0.3810171335327713, "grad_norm": 0.51953125, "learning_rate": 1.5562855329885858e-05, "loss": 0.7213, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2802 }, { "epoch": 0.38115311395159096, "grad_norm": 0.77734375, "learning_rate": 1.5558909356370944e-05, "loss": 0.7332, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2803 }, { "epoch": 0.38128909437041064, "grad_norm": 0.4609375, "learning_rate": 1.5554962129809895e-05, "loss": 0.6341, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2804 }, { "epoch": 0.3814250747892304, "grad_norm": 0.50390625, "learning_rate": 1.555101365109247e-05, "loss": 0.7352, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2805 }, { "epoch": 0.38156105520805006, "grad_norm": 0.380859375, "learning_rate": 1.55470639211087e-05, "loss": 0.6245, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2806 }, { "epoch": 0.38169703562686974, "grad_norm": 0.53515625, "learning_rate": 1.5543112940748906e-05, "loss": 0.742, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2807 }, { "epoch": 0.3818330160456894, "grad_norm": 0.451171875, "learning_rate": 1.553916071090368e-05, "loss": 0.5397, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2808 }, { "epoch": 0.3819689964645091, "grad_norm": 0.3671875, "learning_rate": 1.553520723246391e-05, "loss": 0.6782, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2809 }, { "epoch": 0.3821049768833288, "grad_norm": 0.8359375, "learning_rate": 1.5531252506320755e-05, "loss": 0.6127, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2810 }, { "epoch": 0.38224095730214847, "grad_norm": 0.2578125, "learning_rate": 1.552729653336566e-05, "loss": 0.437, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2811 }, { "epoch": 0.3823769377209682, "grad_norm": 0.255859375, "learning_rate": 1.5523339314490348e-05, "loss": 0.5055, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2812 }, { "epoch": 0.3825129181397879, "grad_norm": 0.279296875, "learning_rate": 1.5519380850586828e-05, "loss": 0.5667, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2813 }, { "epoch": 0.38264889855860756, "grad_norm": 0.69140625, "learning_rate": 1.5515421142547383e-05, "loss": 0.7012, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2814 }, { "epoch": 0.38278487897742725, "grad_norm": 0.423828125, "learning_rate": 1.551146019126458e-05, "loss": 0.7304, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2815 }, { "epoch": 0.3829208593962469, "grad_norm": 0.341796875, "learning_rate": 1.5507497997631267e-05, "loss": 0.5103, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2816 }, { "epoch": 0.3830568398150666, "grad_norm": 0.37109375, "learning_rate": 1.5503534562540572e-05, "loss": 0.6278, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2817 }, { "epoch": 0.38319282023388634, "grad_norm": 0.3671875, "learning_rate": 1.5499569886885897e-05, "loss": 0.6901, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2818 }, { "epoch": 0.383328800652706, "grad_norm": 0.61328125, "learning_rate": 1.5495603971560932e-05, "loss": 0.9051, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2819 }, { "epoch": 0.3834647810715257, "grad_norm": 0.33203125, "learning_rate": 1.549163681745964e-05, "loss": 0.7305, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2820 }, { "epoch": 0.3836007614903454, "grad_norm": 0.314453125, "learning_rate": 1.548766842547627e-05, "loss": 0.5329, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2821 }, { "epoch": 0.38373674190916507, "grad_norm": 0.58984375, "learning_rate": 1.5483698796505345e-05, "loss": 0.5754, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2822 }, { "epoch": 0.38387272232798475, "grad_norm": 0.625, "learning_rate": 1.547972793144167e-05, "loss": 0.7661, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2823 }, { "epoch": 0.3840087027468045, "grad_norm": 0.72265625, "learning_rate": 1.5475755831180318e-05, "loss": 0.5914, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2824 }, { "epoch": 0.38414468316562417, "grad_norm": 0.515625, "learning_rate": 1.5471782496616658e-05, "loss": 0.7695, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2825 }, { "epoch": 0.38428066358444385, "grad_norm": 1.078125, "learning_rate": 1.5467807928646324e-05, "loss": 0.5741, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2826 }, { "epoch": 0.38441664400326353, "grad_norm": 0.435546875, "learning_rate": 1.546383212816523e-05, "loss": 0.7007, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2827 }, { "epoch": 0.3845526244220832, "grad_norm": 0.5, "learning_rate": 1.5459855096069577e-05, "loss": 0.6608, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2828 }, { "epoch": 0.3846886048409029, "grad_norm": 0.35546875, "learning_rate": 1.545587683325583e-05, "loss": 0.5877, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2829 }, { "epoch": 0.3848245852597226, "grad_norm": 0.47265625, "learning_rate": 1.5451897340620746e-05, "loss": 0.7152, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2830 }, { "epoch": 0.3849605656785423, "grad_norm": 0.27734375, "learning_rate": 1.544791661906134e-05, "loss": 0.5171, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2831 }, { "epoch": 0.385096546097362, "grad_norm": 0.76171875, "learning_rate": 1.5443934669474922e-05, "loss": 0.5845, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2832 }, { "epoch": 0.3852325265161817, "grad_norm": 0.4140625, "learning_rate": 1.5439951492759076e-05, "loss": 0.7458, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2833 }, { "epoch": 0.38536850693500135, "grad_norm": 0.3984375, "learning_rate": 1.543596708981165e-05, "loss": 0.6822, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2834 }, { "epoch": 0.38550448735382103, "grad_norm": 0.5546875, "learning_rate": 1.5431981461530786e-05, "loss": 0.5365, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2835 }, { "epoch": 0.3856404677726407, "grad_norm": 0.392578125, "learning_rate": 1.5427994608814884e-05, "loss": 0.6619, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2836 }, { "epoch": 0.38577644819146045, "grad_norm": 0.3359375, "learning_rate": 1.5424006532562638e-05, "loss": 0.5345, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2837 }, { "epoch": 0.38591242861028013, "grad_norm": 0.29296875, "learning_rate": 1.5420017233673e-05, "loss": 0.4639, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2838 }, { "epoch": 0.3860484090290998, "grad_norm": 0.27734375, "learning_rate": 1.5416026713045217e-05, "loss": 0.4899, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2839 }, { "epoch": 0.3861843894479195, "grad_norm": 0.625, "learning_rate": 1.5412034971578792e-05, "loss": 0.5977, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2840 }, { "epoch": 0.3863203698667392, "grad_norm": 0.451171875, "learning_rate": 1.540804201017352e-05, "loss": 0.7557, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2841 }, { "epoch": 0.38645635028555886, "grad_norm": 0.35546875, "learning_rate": 1.540404782972946e-05, "loss": 0.6324, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2842 }, { "epoch": 0.3865923307043786, "grad_norm": 0.2412109375, "learning_rate": 1.5400052431146947e-05, "loss": 0.4434, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2843 }, { "epoch": 0.3867283111231983, "grad_norm": 0.60546875, "learning_rate": 1.5396055815326593e-05, "loss": 0.6156, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2844 }, { "epoch": 0.38686429154201796, "grad_norm": 0.392578125, "learning_rate": 1.539205798316929e-05, "loss": 0.7666, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2845 }, { "epoch": 0.38700027196083764, "grad_norm": 0.53515625, "learning_rate": 1.538805893557619e-05, "loss": 0.798, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2846 }, { "epoch": 0.3871362523796573, "grad_norm": 0.63671875, "learning_rate": 1.5384058673448733e-05, "loss": 0.786, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2847 }, { "epoch": 0.387272232798477, "grad_norm": 0.384765625, "learning_rate": 1.5380057197688624e-05, "loss": 0.7054, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2848 }, { "epoch": 0.3874082132172967, "grad_norm": 0.42578125, "learning_rate": 1.5376054509197845e-05, "loss": 0.6794, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2849 }, { "epoch": 0.3875441936361164, "grad_norm": 0.455078125, "learning_rate": 1.537205060887865e-05, "loss": 0.8424, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2850 }, { "epoch": 0.3876801740549361, "grad_norm": 0.345703125, "learning_rate": 1.5368045497633573e-05, "loss": 0.7103, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2851 }, { "epoch": 0.3878161544737558, "grad_norm": 0.3671875, "learning_rate": 1.536403917636541e-05, "loss": 0.5832, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2852 }, { "epoch": 0.38795213489257546, "grad_norm": 0.310546875, "learning_rate": 1.5360031645977236e-05, "loss": 0.6273, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2853 }, { "epoch": 0.38808811531139514, "grad_norm": 0.451171875, "learning_rate": 1.5356022907372393e-05, "loss": 0.6196, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2854 }, { "epoch": 0.3882240957302148, "grad_norm": 0.451171875, "learning_rate": 1.535201296145451e-05, "loss": 0.6683, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2855 }, { "epoch": 0.38836007614903456, "grad_norm": 0.474609375, "learning_rate": 1.5348001809127463e-05, "loss": 0.8644, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2856 }, { "epoch": 0.38849605656785424, "grad_norm": 0.35546875, "learning_rate": 1.5343989451295428e-05, "loss": 0.6172, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2857 }, { "epoch": 0.3886320369866739, "grad_norm": 0.455078125, "learning_rate": 1.5339975888862833e-05, "loss": 0.4321, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2858 }, { "epoch": 0.3887680174054936, "grad_norm": 0.26171875, "learning_rate": 1.533596112273439e-05, "loss": 0.5308, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2859 }, { "epoch": 0.3889039978243133, "grad_norm": 0.3828125, "learning_rate": 1.5331945153815065e-05, "loss": 0.7323, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2860 }, { "epoch": 0.38903997824313297, "grad_norm": 0.255859375, "learning_rate": 1.5327927983010118e-05, "loss": 0.5588, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2861 }, { "epoch": 0.3891759586619527, "grad_norm": 0.337890625, "learning_rate": 1.5323909611225062e-05, "loss": 0.5674, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2862 }, { "epoch": 0.3893119390807724, "grad_norm": 0.357421875, "learning_rate": 1.531989003936569e-05, "loss": 0.5996, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2863 }, { "epoch": 0.38944791949959207, "grad_norm": 0.8203125, "learning_rate": 1.5315869268338064e-05, "loss": 0.8621, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2864 }, { "epoch": 0.38958389991841175, "grad_norm": 0.287109375, "learning_rate": 1.531184729904851e-05, "loss": 0.6279, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2865 }, { "epoch": 0.38971988033723143, "grad_norm": 0.2265625, "learning_rate": 1.530782413240363e-05, "loss": 0.4801, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2866 }, { "epoch": 0.3898558607560511, "grad_norm": 0.345703125, "learning_rate": 1.53037997693103e-05, "loss": 0.617, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2867 }, { "epoch": 0.38999184117487085, "grad_norm": 0.287109375, "learning_rate": 1.529977421067566e-05, "loss": 0.6042, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2868 }, { "epoch": 0.3901278215936905, "grad_norm": 0.39453125, "learning_rate": 1.5295747457407114e-05, "loss": 0.7373, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2869 }, { "epoch": 0.3902638020125102, "grad_norm": 0.33984375, "learning_rate": 1.529171951041234e-05, "loss": 0.6453, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2870 }, { "epoch": 0.3903997824313299, "grad_norm": 0.51953125, "learning_rate": 1.52876903705993e-05, "loss": 0.577, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2871 }, { "epoch": 0.39053576285014957, "grad_norm": 0.41796875, "learning_rate": 1.5283660038876198e-05, "loss": 0.7435, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2872 }, { "epoch": 0.39067174326896925, "grad_norm": 0.333984375, "learning_rate": 1.5279628516151522e-05, "loss": 0.54, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2873 }, { "epoch": 0.39080772368778893, "grad_norm": 0.5, "learning_rate": 1.5275595803334036e-05, "loss": 0.5837, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2874 }, { "epoch": 0.39094370410660867, "grad_norm": 0.45703125, "learning_rate": 1.527156190133275e-05, "loss": 0.6595, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2875 }, { "epoch": 0.39107968452542835, "grad_norm": 0.47265625, "learning_rate": 1.5267526811056965e-05, "loss": 0.4061, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2876 }, { "epoch": 0.39121566494424803, "grad_norm": 0.34765625, "learning_rate": 1.5263490533416234e-05, "loss": 0.5885, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2877 }, { "epoch": 0.3913516453630677, "grad_norm": 0.400390625, "learning_rate": 1.5259453069320387e-05, "loss": 0.6484, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2878 }, { "epoch": 0.3914876257818874, "grad_norm": 0.3125, "learning_rate": 1.5255414419679512e-05, "loss": 0.6519, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2879 }, { "epoch": 0.3916236062007071, "grad_norm": 0.58203125, "learning_rate": 1.5251374585403977e-05, "loss": 0.8499, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2880 }, { "epoch": 0.3917595866195268, "grad_norm": 0.326171875, "learning_rate": 1.5247333567404407e-05, "loss": 0.6917, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2881 }, { "epoch": 0.3918955670383465, "grad_norm": 0.388671875, "learning_rate": 1.5243291366591693e-05, "loss": 0.6522, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2882 }, { "epoch": 0.3920315474571662, "grad_norm": 0.34375, "learning_rate": 1.5239247983877003e-05, "loss": 0.5412, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2883 }, { "epoch": 0.39216752787598586, "grad_norm": 0.55078125, "learning_rate": 1.523520342017176e-05, "loss": 0.576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2884 }, { "epoch": 0.39230350829480554, "grad_norm": 0.353515625, "learning_rate": 1.5231157676387662e-05, "loss": 0.6154, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2885 }, { "epoch": 0.3924394887136252, "grad_norm": 0.64453125, "learning_rate": 1.5227110753436666e-05, "loss": 0.7397, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2886 }, { "epoch": 0.39257546913244495, "grad_norm": 0.291015625, "learning_rate": 1.5223062652231e-05, "loss": 0.4572, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2887 }, { "epoch": 0.39271144955126464, "grad_norm": 0.3515625, "learning_rate": 1.5219013373683154e-05, "loss": 0.63, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2888 }, { "epoch": 0.3928474299700843, "grad_norm": 0.5390625, "learning_rate": 1.5214962918705884e-05, "loss": 0.7394, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2889 }, { "epoch": 0.392983410388904, "grad_norm": 0.2490234375, "learning_rate": 1.5210911288212215e-05, "loss": 0.4793, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2890 }, { "epoch": 0.3931193908077237, "grad_norm": 0.412109375, "learning_rate": 1.5206858483115431e-05, "loss": 0.6047, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2891 }, { "epoch": 0.39325537122654336, "grad_norm": 0.427734375, "learning_rate": 1.5202804504329089e-05, "loss": 0.6011, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2892 }, { "epoch": 0.39339135164536304, "grad_norm": 0.4140625, "learning_rate": 1.5198749352767e-05, "loss": 0.7746, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2893 }, { "epoch": 0.3935273320641828, "grad_norm": 0.251953125, "learning_rate": 1.5194693029343249e-05, "loss": 0.3908, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2894 }, { "epoch": 0.39366331248300246, "grad_norm": 0.62890625, "learning_rate": 1.5190635534972173e-05, "loss": 0.8187, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2895 }, { "epoch": 0.39379929290182214, "grad_norm": 0.330078125, "learning_rate": 1.518657687056839e-05, "loss": 0.5902, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2896 }, { "epoch": 0.3939352733206418, "grad_norm": 0.310546875, "learning_rate": 1.5182517037046767e-05, "loss": 0.637, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2897 }, { "epoch": 0.3940712537394615, "grad_norm": 0.6328125, "learning_rate": 1.5178456035322445e-05, "loss": 0.4933, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2898 }, { "epoch": 0.3942072341582812, "grad_norm": 0.546875, "learning_rate": 1.517439386631082e-05, "loss": 0.6421, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2899 }, { "epoch": 0.3943432145771009, "grad_norm": 0.390625, "learning_rate": 1.5170330530927553e-05, "loss": 0.6299, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2900 }, { "epoch": 0.3944791949959206, "grad_norm": 0.63671875, "learning_rate": 1.516626603008857e-05, "loss": 0.6479, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2901 }, { "epoch": 0.3946151754147403, "grad_norm": 0.287109375, "learning_rate": 1.5162200364710065e-05, "loss": 0.5446, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2902 }, { "epoch": 0.39475115583355996, "grad_norm": 0.470703125, "learning_rate": 1.515813353570848e-05, "loss": 0.6864, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2903 }, { "epoch": 0.39488713625237964, "grad_norm": 0.40234375, "learning_rate": 1.5154065544000536e-05, "loss": 0.6665, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2904 }, { "epoch": 0.3950231166711993, "grad_norm": 0.341796875, "learning_rate": 1.5149996390503202e-05, "loss": 0.5425, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2905 }, { "epoch": 0.39515909709001906, "grad_norm": 0.55859375, "learning_rate": 1.5145926076133716e-05, "loss": 0.7397, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2906 }, { "epoch": 0.39529507750883874, "grad_norm": 0.56640625, "learning_rate": 1.5141854601809583e-05, "loss": 0.6097, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2907 }, { "epoch": 0.3954310579276584, "grad_norm": 0.453125, "learning_rate": 1.5137781968448553e-05, "loss": 0.6382, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2908 }, { "epoch": 0.3955670383464781, "grad_norm": 0.33203125, "learning_rate": 1.5133708176968656e-05, "loss": 0.5864, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2909 }, { "epoch": 0.3957030187652978, "grad_norm": 0.5390625, "learning_rate": 1.5129633228288168e-05, "loss": 0.6488, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2910 }, { "epoch": 0.39583899918411747, "grad_norm": 1.1953125, "learning_rate": 1.5125557123325636e-05, "loss": 0.7477, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2911 }, { "epoch": 0.39597497960293715, "grad_norm": 0.326171875, "learning_rate": 1.5121479862999864e-05, "loss": 0.6369, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2912 }, { "epoch": 0.3961109600217569, "grad_norm": 0.4140625, "learning_rate": 1.5117401448229911e-05, "loss": 0.5926, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2913 }, { "epoch": 0.39624694044057657, "grad_norm": 0.35546875, "learning_rate": 1.511332187993511e-05, "loss": 0.6426, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2914 }, { "epoch": 0.39638292085939625, "grad_norm": 0.423828125, "learning_rate": 1.5109241159035042e-05, "loss": 0.8258, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2915 }, { "epoch": 0.39651890127821593, "grad_norm": 0.34375, "learning_rate": 1.5105159286449548e-05, "loss": 0.7085, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2916 }, { "epoch": 0.3966548816970356, "grad_norm": 0.34375, "learning_rate": 1.5101076263098738e-05, "loss": 0.6624, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2917 }, { "epoch": 0.3967908621158553, "grad_norm": 0.404296875, "learning_rate": 1.5096992089902969e-05, "loss": 0.6989, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2918 }, { "epoch": 0.39692684253467503, "grad_norm": 0.310546875, "learning_rate": 1.5092906767782868e-05, "loss": 0.4653, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2919 }, { "epoch": 0.3970628229534947, "grad_norm": 0.66796875, "learning_rate": 1.5088820297659314e-05, "loss": 0.5485, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2920 }, { "epoch": 0.3971988033723144, "grad_norm": 0.369140625, "learning_rate": 1.5084732680453451e-05, "loss": 0.7064, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2921 }, { "epoch": 0.39733478379113407, "grad_norm": 0.31640625, "learning_rate": 1.5080643917086675e-05, "loss": 0.5934, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2922 }, { "epoch": 0.39747076420995375, "grad_norm": 0.48046875, "learning_rate": 1.5076554008480641e-05, "loss": 0.8242, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2923 }, { "epoch": 0.39760674462877343, "grad_norm": 0.26953125, "learning_rate": 1.507246295555727e-05, "loss": 0.5749, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2924 }, { "epoch": 0.39774272504759317, "grad_norm": 0.70703125, "learning_rate": 1.506837075923873e-05, "loss": 0.8934, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2925 }, { "epoch": 0.39787870546641285, "grad_norm": 0.494140625, "learning_rate": 1.5064277420447458e-05, "loss": 0.4928, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2926 }, { "epoch": 0.39801468588523253, "grad_norm": 0.236328125, "learning_rate": 1.5060182940106138e-05, "loss": 0.4823, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2927 }, { "epoch": 0.3981506663040522, "grad_norm": 0.455078125, "learning_rate": 1.5056087319137716e-05, "loss": 0.5926, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2928 }, { "epoch": 0.3982866467228719, "grad_norm": 0.76171875, "learning_rate": 1.5051990558465397e-05, "loss": 0.7873, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2929 }, { "epoch": 0.3984226271416916, "grad_norm": 0.375, "learning_rate": 1.504789265901264e-05, "loss": 0.6271, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2930 }, { "epoch": 0.3985586075605113, "grad_norm": 0.298828125, "learning_rate": 1.504379362170316e-05, "loss": 0.5999, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2931 }, { "epoch": 0.398694587979331, "grad_norm": 0.306640625, "learning_rate": 1.5039693447460933e-05, "loss": 0.6229, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2932 }, { "epoch": 0.3988305683981507, "grad_norm": 0.359375, "learning_rate": 1.5035592137210188e-05, "loss": 0.5934, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2933 }, { "epoch": 0.39896654881697036, "grad_norm": 0.57421875, "learning_rate": 1.503148969187541e-05, "loss": 0.7352, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2934 }, { "epoch": 0.39910252923579004, "grad_norm": 0.333984375, "learning_rate": 1.502738611238134e-05, "loss": 0.6274, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2935 }, { "epoch": 0.3992385096546097, "grad_norm": 0.326171875, "learning_rate": 1.502328139965297e-05, "loss": 0.4159, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2936 }, { "epoch": 0.3993744900734294, "grad_norm": 0.1796875, "learning_rate": 1.5019175554615564e-05, "loss": 0.3363, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2937 }, { "epoch": 0.39951047049224914, "grad_norm": 0.69921875, "learning_rate": 1.501506857819462e-05, "loss": 0.6636, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2938 }, { "epoch": 0.3996464509110688, "grad_norm": 0.37890625, "learning_rate": 1.5010960471315905e-05, "loss": 0.813, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2939 }, { "epoch": 0.3997824313298885, "grad_norm": 0.392578125, "learning_rate": 1.5006851234905434e-05, "loss": 0.6592, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2940 }, { "epoch": 0.3999184117487082, "grad_norm": 0.412109375, "learning_rate": 1.5002740869889483e-05, "loss": 0.7186, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2941 }, { "epoch": 0.40005439216752786, "grad_norm": 0.453125, "learning_rate": 1.4998629377194576e-05, "loss": 0.6761, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2942 }, { "epoch": 0.40019037258634754, "grad_norm": 0.44140625, "learning_rate": 1.4994516757747491e-05, "loss": 0.6712, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2943 }, { "epoch": 0.4003263530051673, "grad_norm": 0.322265625, "learning_rate": 1.4990403012475265e-05, "loss": 0.5887, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2944 }, { "epoch": 0.40046233342398696, "grad_norm": 0.427734375, "learning_rate": 1.498628814230519e-05, "loss": 0.6826, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2945 }, { "epoch": 0.40059831384280664, "grad_norm": 0.498046875, "learning_rate": 1.4982172148164804e-05, "loss": 0.634, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2946 }, { "epoch": 0.4007342942616263, "grad_norm": 0.5, "learning_rate": 1.4978055030981902e-05, "loss": 0.708, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2947 }, { "epoch": 0.400870274680446, "grad_norm": 0.2392578125, "learning_rate": 1.4973936791684534e-05, "loss": 0.373, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2948 }, { "epoch": 0.4010062550992657, "grad_norm": 0.75, "learning_rate": 1.4969817431201002e-05, "loss": 0.6379, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2949 }, { "epoch": 0.4011422355180854, "grad_norm": 0.212890625, "learning_rate": 1.496569695045986e-05, "loss": 0.3936, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2950 }, { "epoch": 0.4012782159369051, "grad_norm": 0.408203125, "learning_rate": 1.4961575350389914e-05, "loss": 0.6401, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2951 }, { "epoch": 0.4014141963557248, "grad_norm": 0.64453125, "learning_rate": 1.4957452631920224e-05, "loss": 0.7842, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2952 }, { "epoch": 0.40155017677454447, "grad_norm": 0.41796875, "learning_rate": 1.49533287959801e-05, "loss": 0.6755, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2953 }, { "epoch": 0.40168615719336415, "grad_norm": 0.458984375, "learning_rate": 1.4949203843499108e-05, "loss": 0.5465, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2954 }, { "epoch": 0.4018221376121838, "grad_norm": 0.275390625, "learning_rate": 1.4945077775407058e-05, "loss": 0.4354, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2955 }, { "epoch": 0.4019581180310035, "grad_norm": 0.4296875, "learning_rate": 1.4940950592634018e-05, "loss": 0.5848, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2956 }, { "epoch": 0.40209409844982325, "grad_norm": 0.306640625, "learning_rate": 1.4936822296110307e-05, "loss": 0.5726, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2957 }, { "epoch": 0.4022300788686429, "grad_norm": 0.400390625, "learning_rate": 1.493269288676649e-05, "loss": 0.7619, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2958 }, { "epoch": 0.4023660592874626, "grad_norm": 0.337890625, "learning_rate": 1.4928562365533393e-05, "loss": 0.6125, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2959 }, { "epoch": 0.4025020397062823, "grad_norm": 0.421875, "learning_rate": 1.4924430733342079e-05, "loss": 0.6675, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2960 }, { "epoch": 0.40263802012510197, "grad_norm": 0.56640625, "learning_rate": 1.492029799112387e-05, "loss": 0.6558, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2961 }, { "epoch": 0.40277400054392165, "grad_norm": 0.37109375, "learning_rate": 1.4916164139810343e-05, "loss": 0.6364, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2962 }, { "epoch": 0.4029099809627414, "grad_norm": 0.3984375, "learning_rate": 1.491202918033331e-05, "loss": 0.6522, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2963 }, { "epoch": 0.40304596138156107, "grad_norm": 0.341796875, "learning_rate": 1.4907893113624844e-05, "loss": 0.5752, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2964 }, { "epoch": 0.40318194180038075, "grad_norm": 0.259765625, "learning_rate": 1.4903755940617269e-05, "loss": 0.465, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2965 }, { "epoch": 0.40331792221920043, "grad_norm": 0.251953125, "learning_rate": 1.4899617662243149e-05, "loss": 0.4082, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2966 }, { "epoch": 0.4034539026380201, "grad_norm": 1.015625, "learning_rate": 1.4895478279435305e-05, "loss": 0.9027, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2967 }, { "epoch": 0.4035898830568398, "grad_norm": 0.61328125, "learning_rate": 1.4891337793126804e-05, "loss": 0.6916, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2968 }, { "epoch": 0.40372586347565953, "grad_norm": 0.33203125, "learning_rate": 1.4887196204250965e-05, "loss": 0.7126, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2969 }, { "epoch": 0.4038618438944792, "grad_norm": 0.296875, "learning_rate": 1.4883053513741347e-05, "loss": 0.5296, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2970 }, { "epoch": 0.4039978243132989, "grad_norm": 0.26953125, "learning_rate": 1.4878909722531768e-05, "loss": 0.4725, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2971 }, { "epoch": 0.4041338047321186, "grad_norm": 0.4296875, "learning_rate": 1.4874764831556285e-05, "loss": 0.756, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2972 }, { "epoch": 0.40426978515093825, "grad_norm": 0.314453125, "learning_rate": 1.4870618841749214e-05, "loss": 0.605, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2973 }, { "epoch": 0.40440576556975794, "grad_norm": 0.37109375, "learning_rate": 1.4866471754045106e-05, "loss": 0.6493, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2974 }, { "epoch": 0.4045417459885777, "grad_norm": 0.66796875, "learning_rate": 1.4862323569378765e-05, "loss": 0.84, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2975 }, { "epoch": 0.40467772640739735, "grad_norm": 0.47265625, "learning_rate": 1.4858174288685247e-05, "loss": 0.6562, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2976 }, { "epoch": 0.40481370682621703, "grad_norm": 0.58984375, "learning_rate": 1.4854023912899847e-05, "loss": 0.7114, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2977 }, { "epoch": 0.4049496872450367, "grad_norm": 0.4921875, "learning_rate": 1.484987244295811e-05, "loss": 0.8411, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2978 }, { "epoch": 0.4050856676638564, "grad_norm": 0.32421875, "learning_rate": 1.484571987979583e-05, "loss": 0.5609, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2979 }, { "epoch": 0.4052216480826761, "grad_norm": 0.447265625, "learning_rate": 1.4841566224349047e-05, "loss": 0.564, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2980 }, { "epoch": 0.40535762850149576, "grad_norm": 0.2294921875, "learning_rate": 1.483741147755404e-05, "loss": 0.4705, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2981 }, { "epoch": 0.4054936089203155, "grad_norm": 0.578125, "learning_rate": 1.4833255640347346e-05, "loss": 0.8089, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2982 }, { "epoch": 0.4056295893391352, "grad_norm": 0.41015625, "learning_rate": 1.4829098713665739e-05, "loss": 0.7199, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2983 }, { "epoch": 0.40576556975795486, "grad_norm": 0.1767578125, "learning_rate": 1.482494069844624e-05, "loss": 0.3133, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2984 }, { "epoch": 0.40590155017677454, "grad_norm": 0.2490234375, "learning_rate": 1.4820781595626116e-05, "loss": 0.4285, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2985 }, { "epoch": 0.4060375305955942, "grad_norm": 0.49609375, "learning_rate": 1.4816621406142883e-05, "loss": 0.8223, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2986 }, { "epoch": 0.4061735110144139, "grad_norm": 0.310546875, "learning_rate": 1.4812460130934297e-05, "loss": 0.5744, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2987 }, { "epoch": 0.40630949143323364, "grad_norm": 0.48046875, "learning_rate": 1.480829777093836e-05, "loss": 0.9087, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2988 }, { "epoch": 0.4064454718520533, "grad_norm": 0.388671875, "learning_rate": 1.4804134327093318e-05, "loss": 0.6715, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2989 }, { "epoch": 0.406581452270873, "grad_norm": 0.5625, "learning_rate": 1.4799969800337663e-05, "loss": 0.5966, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2990 }, { "epoch": 0.4067174326896927, "grad_norm": 0.494140625, "learning_rate": 1.4795804191610131e-05, "loss": 0.7578, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2991 }, { "epoch": 0.40685341310851236, "grad_norm": 0.53515625, "learning_rate": 1.4791637501849698e-05, "loss": 0.5728, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2992 }, { "epoch": 0.40698939352733204, "grad_norm": 0.365234375, "learning_rate": 1.4787469731995592e-05, "loss": 0.6943, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2993 }, { "epoch": 0.4071253739461518, "grad_norm": 0.298828125, "learning_rate": 1.4783300882987275e-05, "loss": 0.4992, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2994 }, { "epoch": 0.40726135436497146, "grad_norm": 0.474609375, "learning_rate": 1.4779130955764458e-05, "loss": 0.5651, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2995 }, { "epoch": 0.40739733478379114, "grad_norm": 0.404296875, "learning_rate": 1.4774959951267093e-05, "loss": 0.5068, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2996 }, { "epoch": 0.4075333152026108, "grad_norm": 0.3671875, "learning_rate": 1.4770787870435377e-05, "loss": 0.614, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2997 }, { "epoch": 0.4076692956214305, "grad_norm": 0.275390625, "learning_rate": 1.4766614714209748e-05, "loss": 0.603, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2998 }, { "epoch": 0.4078052760402502, "grad_norm": 0.515625, "learning_rate": 1.4762440483530883e-05, "loss": 0.7064, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 2999 }, { "epoch": 0.40794125645906987, "grad_norm": 0.38671875, "learning_rate": 1.4758265179339707e-05, "loss": 0.5901, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3000 }, { "epoch": 0.4080772368778896, "grad_norm": 0.314453125, "learning_rate": 1.4754088802577387e-05, "loss": 0.6592, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3001 }, { "epoch": 0.4082132172967093, "grad_norm": 0.380859375, "learning_rate": 1.4749911354185327e-05, "loss": 0.8311, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3002 }, { "epoch": 0.40834919771552897, "grad_norm": 0.337890625, "learning_rate": 1.4745732835105173e-05, "loss": 0.5758, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3003 }, { "epoch": 0.40848517813434865, "grad_norm": 0.53125, "learning_rate": 1.4741553246278817e-05, "loss": 0.4665, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3004 }, { "epoch": 0.40862115855316833, "grad_norm": 0.458984375, "learning_rate": 1.4737372588648388e-05, "loss": 0.7134, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3005 }, { "epoch": 0.408757138971988, "grad_norm": 0.478515625, "learning_rate": 1.473319086315626e-05, "loss": 0.6997, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3006 }, { "epoch": 0.40889311939080775, "grad_norm": 0.53515625, "learning_rate": 1.4729008070745038e-05, "loss": 0.5828, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3007 }, { "epoch": 0.4090290998096274, "grad_norm": 0.3671875, "learning_rate": 1.4724824212357583e-05, "loss": 0.5422, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3008 }, { "epoch": 0.4091650802284471, "grad_norm": 0.27734375, "learning_rate": 1.4720639288936985e-05, "loss": 0.467, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3009 }, { "epoch": 0.4093010606472668, "grad_norm": 0.419921875, "learning_rate": 1.4716453301426576e-05, "loss": 0.6095, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3010 }, { "epoch": 0.40943704106608647, "grad_norm": 0.44140625, "learning_rate": 1.4712266250769932e-05, "loss": 0.6929, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3011 }, { "epoch": 0.40957302148490615, "grad_norm": 0.373046875, "learning_rate": 1.4708078137910857e-05, "loss": 0.6146, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3012 }, { "epoch": 0.4097090019037259, "grad_norm": 0.37890625, "learning_rate": 1.4703888963793414e-05, "loss": 0.6309, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3013 }, { "epoch": 0.40984498232254557, "grad_norm": 0.337890625, "learning_rate": 1.4699698729361884e-05, "loss": 0.6637, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3014 }, { "epoch": 0.40998096274136525, "grad_norm": 0.435546875, "learning_rate": 1.4695507435560804e-05, "loss": 0.7677, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3015 }, { "epoch": 0.41011694316018493, "grad_norm": 0.314453125, "learning_rate": 1.4691315083334939e-05, "loss": 0.4762, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3016 }, { "epoch": 0.4102529235790046, "grad_norm": 0.353515625, "learning_rate": 1.4687121673629299e-05, "loss": 0.695, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3017 }, { "epoch": 0.4103889039978243, "grad_norm": 0.44140625, "learning_rate": 1.4682927207389126e-05, "loss": 0.7949, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3018 }, { "epoch": 0.410524884416644, "grad_norm": 0.462890625, "learning_rate": 1.4678731685559911e-05, "loss": 0.6335, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3019 }, { "epoch": 0.4106608648354637, "grad_norm": 0.6328125, "learning_rate": 1.4674535109087366e-05, "loss": 0.7618, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3020 }, { "epoch": 0.4107968452542834, "grad_norm": 0.50390625, "learning_rate": 1.4670337478917462e-05, "loss": 0.4572, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3021 }, { "epoch": 0.4109328256731031, "grad_norm": 0.34375, "learning_rate": 1.4666138795996387e-05, "loss": 0.6328, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3022 }, { "epoch": 0.41106880609192276, "grad_norm": 0.2412109375, "learning_rate": 1.466193906127058e-05, "loss": 0.3234, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3023 }, { "epoch": 0.41120478651074244, "grad_norm": 0.32421875, "learning_rate": 1.4657738275686711e-05, "loss": 0.6079, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3024 }, { "epoch": 0.4113407669295621, "grad_norm": 0.4921875, "learning_rate": 1.4653536440191687e-05, "loss": 0.7049, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3025 }, { "epoch": 0.41147674734838185, "grad_norm": 0.314453125, "learning_rate": 1.4649333555732656e-05, "loss": 0.5518, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3026 }, { "epoch": 0.41161272776720154, "grad_norm": 0.72265625, "learning_rate": 1.4645129623257e-05, "loss": 0.7435, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3027 }, { "epoch": 0.4117487081860212, "grad_norm": 0.494140625, "learning_rate": 1.4640924643712336e-05, "loss": 0.8302, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3028 }, { "epoch": 0.4118846886048409, "grad_norm": 0.56640625, "learning_rate": 1.4636718618046512e-05, "loss": 0.6335, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3029 }, { "epoch": 0.4120206690236606, "grad_norm": 0.3359375, "learning_rate": 1.4632511547207629e-05, "loss": 0.6012, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3030 }, { "epoch": 0.41215664944248026, "grad_norm": 0.466796875, "learning_rate": 1.4628303432144002e-05, "loss": 0.8123, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3031 }, { "epoch": 0.4122926298613, "grad_norm": 0.48828125, "learning_rate": 1.4624094273804196e-05, "loss": 0.6232, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3032 }, { "epoch": 0.4124286102801197, "grad_norm": 0.26953125, "learning_rate": 1.4619884073137006e-05, "loss": 0.4746, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3033 }, { "epoch": 0.41256459069893936, "grad_norm": 0.546875, "learning_rate": 1.4615672831091463e-05, "loss": 0.7739, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3034 }, { "epoch": 0.41270057111775904, "grad_norm": 0.46484375, "learning_rate": 1.461146054861683e-05, "loss": 0.7041, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3035 }, { "epoch": 0.4128365515365787, "grad_norm": 0.443359375, "learning_rate": 1.4607247226662613e-05, "loss": 0.6941, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3036 }, { "epoch": 0.4129725319553984, "grad_norm": 0.470703125, "learning_rate": 1.460303286617854e-05, "loss": 0.7713, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3037 }, { "epoch": 0.41310851237421814, "grad_norm": 0.57421875, "learning_rate": 1.4598817468114583e-05, "loss": 0.626, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3038 }, { "epoch": 0.4132444927930378, "grad_norm": 0.3984375, "learning_rate": 1.4594601033420944e-05, "loss": 0.7622, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3039 }, { "epoch": 0.4133804732118575, "grad_norm": 0.275390625, "learning_rate": 1.4590383563048055e-05, "loss": 0.4816, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3040 }, { "epoch": 0.4135164536306772, "grad_norm": 0.271484375, "learning_rate": 1.4586165057946591e-05, "loss": 0.3843, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3041 }, { "epoch": 0.41365243404949686, "grad_norm": 0.2890625, "learning_rate": 1.4581945519067452e-05, "loss": 0.3758, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3042 }, { "epoch": 0.41378841446831655, "grad_norm": 0.515625, "learning_rate": 1.4577724947361774e-05, "loss": 0.583, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3043 }, { "epoch": 0.4139243948871362, "grad_norm": 0.5234375, "learning_rate": 1.4573503343780924e-05, "loss": 0.8211, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3044 }, { "epoch": 0.41406037530595596, "grad_norm": 0.23046875, "learning_rate": 1.4569280709276506e-05, "loss": 0.4341, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3045 }, { "epoch": 0.41419635572477564, "grad_norm": 0.33984375, "learning_rate": 1.456505704480035e-05, "loss": 0.6886, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3046 }, { "epoch": 0.4143323361435953, "grad_norm": 0.330078125, "learning_rate": 1.4560832351304523e-05, "loss": 0.3815, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3047 }, { "epoch": 0.414468316562415, "grad_norm": 0.232421875, "learning_rate": 1.4556606629741326e-05, "loss": 0.4043, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3048 }, { "epoch": 0.4146042969812347, "grad_norm": 0.37890625, "learning_rate": 1.4552379881063283e-05, "loss": 0.7518, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3049 }, { "epoch": 0.41474027740005437, "grad_norm": 0.291015625, "learning_rate": 1.4548152106223157e-05, "loss": 0.5369, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3050 }, { "epoch": 0.4148762578188741, "grad_norm": 0.5078125, "learning_rate": 1.454392330617394e-05, "loss": 0.5814, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3051 }, { "epoch": 0.4150122382376938, "grad_norm": 0.44140625, "learning_rate": 1.4539693481868858e-05, "loss": 0.7544, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3052 }, { "epoch": 0.41514821865651347, "grad_norm": 0.35546875, "learning_rate": 1.4535462634261359e-05, "loss": 0.6536, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3053 }, { "epoch": 0.41528419907533315, "grad_norm": 0.267578125, "learning_rate": 1.4531230764305133e-05, "loss": 0.6021, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3054 }, { "epoch": 0.41542017949415283, "grad_norm": 0.302734375, "learning_rate": 1.4526997872954094e-05, "loss": 0.5824, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3055 }, { "epoch": 0.4155561599129725, "grad_norm": 0.828125, "learning_rate": 1.4522763961162385e-05, "loss": 0.7049, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3056 }, { "epoch": 0.41569214033179225, "grad_norm": 0.578125, "learning_rate": 1.4518529029884386e-05, "loss": 0.6701, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3057 }, { "epoch": 0.41582812075061193, "grad_norm": 0.28125, "learning_rate": 1.4514293080074697e-05, "loss": 0.5763, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3058 }, { "epoch": 0.4159641011694316, "grad_norm": 0.3359375, "learning_rate": 1.4510056112688159e-05, "loss": 0.591, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3059 }, { "epoch": 0.4161000815882513, "grad_norm": 0.423828125, "learning_rate": 1.450581812867983e-05, "loss": 0.8162, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3060 }, { "epoch": 0.416236062007071, "grad_norm": 0.70703125, "learning_rate": 1.4501579129005008e-05, "loss": 0.5923, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3061 }, { "epoch": 0.41637204242589065, "grad_norm": 0.34765625, "learning_rate": 1.4497339114619214e-05, "loss": 0.5651, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3062 }, { "epoch": 0.41650802284471033, "grad_norm": 0.234375, "learning_rate": 1.4493098086478196e-05, "loss": 0.4471, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3063 }, { "epoch": 0.41664400326353007, "grad_norm": 0.4140625, "learning_rate": 1.4488856045537937e-05, "loss": 0.6746, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3064 }, { "epoch": 0.41677998368234975, "grad_norm": 0.455078125, "learning_rate": 1.4484612992754647e-05, "loss": 0.6989, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3065 }, { "epoch": 0.41691596410116943, "grad_norm": 0.39453125, "learning_rate": 1.4480368929084758e-05, "loss": 0.6852, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3066 }, { "epoch": 0.4170519445199891, "grad_norm": 0.421875, "learning_rate": 1.4476123855484931e-05, "loss": 0.71, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3067 }, { "epoch": 0.4171879249388088, "grad_norm": 0.484375, "learning_rate": 1.4471877772912067e-05, "loss": 0.707, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3068 }, { "epoch": 0.4173239053576285, "grad_norm": 0.396484375, "learning_rate": 1.4467630682323276e-05, "loss": 0.7103, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3069 }, { "epoch": 0.4174598857764482, "grad_norm": 0.466796875, "learning_rate": 1.4463382584675908e-05, "loss": 0.6206, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3070 }, { "epoch": 0.4175958661952679, "grad_norm": 0.2734375, "learning_rate": 1.4459133480927539e-05, "loss": 0.4935, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3071 }, { "epoch": 0.4177318466140876, "grad_norm": 0.380859375, "learning_rate": 1.4454883372035964e-05, "loss": 0.7091, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3072 }, { "epoch": 0.41786782703290726, "grad_norm": 0.404296875, "learning_rate": 1.445063225895921e-05, "loss": 0.7796, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3073 }, { "epoch": 0.41800380745172694, "grad_norm": 0.33203125, "learning_rate": 1.4446380142655532e-05, "loss": 0.6226, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3074 }, { "epoch": 0.4181397878705466, "grad_norm": 0.412109375, "learning_rate": 1.4442127024083407e-05, "loss": 0.6877, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3075 }, { "epoch": 0.41827576828936636, "grad_norm": 0.400390625, "learning_rate": 1.4437872904201542e-05, "loss": 0.6973, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3076 }, { "epoch": 0.41841174870818604, "grad_norm": 0.341796875, "learning_rate": 1.4433617783968868e-05, "loss": 0.6323, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3077 }, { "epoch": 0.4185477291270057, "grad_norm": 0.2216796875, "learning_rate": 1.4429361664344538e-05, "loss": 0.4355, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3078 }, { "epoch": 0.4186837095458254, "grad_norm": 0.4296875, "learning_rate": 1.4425104546287934e-05, "loss": 0.7809, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3079 }, { "epoch": 0.4188196899646451, "grad_norm": 0.494140625, "learning_rate": 1.4420846430758666e-05, "loss": 0.6714, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3080 }, { "epoch": 0.41895567038346476, "grad_norm": 0.490234375, "learning_rate": 1.4416587318716561e-05, "loss": 0.7357, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3081 }, { "epoch": 0.41909165080228444, "grad_norm": 0.3046875, "learning_rate": 1.4412327211121678e-05, "loss": 0.5457, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3082 }, { "epoch": 0.4192276312211042, "grad_norm": 0.3671875, "learning_rate": 1.4408066108934296e-05, "loss": 0.6131, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3083 }, { "epoch": 0.41936361163992386, "grad_norm": 1.34375, "learning_rate": 1.4403804013114921e-05, "loss": 0.8003, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3084 }, { "epoch": 0.41949959205874354, "grad_norm": 0.63671875, "learning_rate": 1.4399540924624278e-05, "loss": 0.724, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3085 }, { "epoch": 0.4196355724775632, "grad_norm": 0.302734375, "learning_rate": 1.4395276844423322e-05, "loss": 0.6592, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3086 }, { "epoch": 0.4197715528963829, "grad_norm": 0.458984375, "learning_rate": 1.439101177347323e-05, "loss": 0.8862, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3087 }, { "epoch": 0.4199075333152026, "grad_norm": 0.36328125, "learning_rate": 1.4386745712735398e-05, "loss": 0.6701, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3088 }, { "epoch": 0.4200435137340223, "grad_norm": 0.515625, "learning_rate": 1.4382478663171449e-05, "loss": 0.8861, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3089 }, { "epoch": 0.420179494152842, "grad_norm": 0.40234375, "learning_rate": 1.4378210625743228e-05, "loss": 0.6484, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3090 }, { "epoch": 0.4203154745716617, "grad_norm": 0.349609375, "learning_rate": 1.4373941601412807e-05, "loss": 0.6969, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3091 }, { "epoch": 0.42045145499048137, "grad_norm": 0.69921875, "learning_rate": 1.4369671591142469e-05, "loss": 0.8148, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3092 }, { "epoch": 0.42058743540930105, "grad_norm": 0.2060546875, "learning_rate": 1.4365400595894732e-05, "loss": 0.3413, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3093 }, { "epoch": 0.42072341582812073, "grad_norm": 0.66796875, "learning_rate": 1.4361128616632329e-05, "loss": 0.8372, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3094 }, { "epoch": 0.42085939624694046, "grad_norm": 0.26953125, "learning_rate": 1.4356855654318215e-05, "loss": 0.6174, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3095 }, { "epoch": 0.42099537666576015, "grad_norm": 0.63671875, "learning_rate": 1.4352581709915572e-05, "loss": 0.646, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3096 }, { "epoch": 0.4211313570845798, "grad_norm": 0.44140625, "learning_rate": 1.4348306784387796e-05, "loss": 0.6401, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3097 }, { "epoch": 0.4212673375033995, "grad_norm": 0.39453125, "learning_rate": 1.4344030878698506e-05, "loss": 0.7096, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3098 }, { "epoch": 0.4214033179222192, "grad_norm": 0.84375, "learning_rate": 1.4339753993811545e-05, "loss": 0.7539, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3099 }, { "epoch": 0.42153929834103887, "grad_norm": 0.41015625, "learning_rate": 1.4335476130690978e-05, "loss": 0.6834, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3100 }, { "epoch": 0.4216752787598586, "grad_norm": 0.59375, "learning_rate": 1.4331197290301084e-05, "loss": 0.8639, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3101 }, { "epoch": 0.4218112591786783, "grad_norm": 0.75390625, "learning_rate": 1.4326917473606368e-05, "loss": 0.765, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3102 }, { "epoch": 0.42194723959749797, "grad_norm": 0.384765625, "learning_rate": 1.4322636681571552e-05, "loss": 0.6766, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3103 }, { "epoch": 0.42208322001631765, "grad_norm": 0.33984375, "learning_rate": 1.4318354915161578e-05, "loss": 0.6958, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3104 }, { "epoch": 0.42221920043513733, "grad_norm": 0.390625, "learning_rate": 1.4314072175341611e-05, "loss": 0.7438, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3105 }, { "epoch": 0.422355180853957, "grad_norm": 0.314453125, "learning_rate": 1.4309788463077033e-05, "loss": 0.5329, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3106 }, { "epoch": 0.4224911612727767, "grad_norm": 0.357421875, "learning_rate": 1.4305503779333441e-05, "loss": 0.7113, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3107 }, { "epoch": 0.42262714169159643, "grad_norm": 0.388671875, "learning_rate": 1.4301218125076656e-05, "loss": 0.6338, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3108 }, { "epoch": 0.4227631221104161, "grad_norm": 0.34375, "learning_rate": 1.4296931501272721e-05, "loss": 0.6549, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3109 }, { "epoch": 0.4228991025292358, "grad_norm": 0.4921875, "learning_rate": 1.4292643908887893e-05, "loss": 0.6248, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3110 }, { "epoch": 0.4230350829480555, "grad_norm": 0.39453125, "learning_rate": 1.4288355348888643e-05, "loss": 0.6214, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3111 }, { "epoch": 0.42317106336687516, "grad_norm": 0.349609375, "learning_rate": 1.4284065822241669e-05, "loss": 0.4924, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3112 }, { "epoch": 0.42330704378569484, "grad_norm": 0.423828125, "learning_rate": 1.4279775329913878e-05, "loss": 0.6886, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3113 }, { "epoch": 0.4234430242045146, "grad_norm": 0.267578125, "learning_rate": 1.4275483872872407e-05, "loss": 0.5378, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3114 }, { "epoch": 0.42357900462333425, "grad_norm": 0.470703125, "learning_rate": 1.4271191452084598e-05, "loss": 0.6966, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3115 }, { "epoch": 0.42371498504215394, "grad_norm": 0.9375, "learning_rate": 1.4266898068518013e-05, "loss": 0.9907, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3116 }, { "epoch": 0.4238509654609736, "grad_norm": 0.4609375, "learning_rate": 1.4262603723140438e-05, "loss": 0.4619, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3117 }, { "epoch": 0.4239869458797933, "grad_norm": 0.41015625, "learning_rate": 1.4258308416919868e-05, "loss": 0.5962, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3118 }, { "epoch": 0.424122926298613, "grad_norm": 0.59765625, "learning_rate": 1.4254012150824522e-05, "loss": 0.492, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3119 }, { "epoch": 0.4242589067174327, "grad_norm": 1.390625, "learning_rate": 1.4249714925822824e-05, "loss": 0.903, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3120 }, { "epoch": 0.4243948871362524, "grad_norm": 0.34765625, "learning_rate": 1.4245416742883422e-05, "loss": 0.5871, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3121 }, { "epoch": 0.4245308675550721, "grad_norm": 0.4609375, "learning_rate": 1.4241117602975186e-05, "loss": 0.4814, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3122 }, { "epoch": 0.42466684797389176, "grad_norm": 0.39453125, "learning_rate": 1.4236817507067189e-05, "loss": 0.7089, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3123 }, { "epoch": 0.42480282839271144, "grad_norm": 0.62890625, "learning_rate": 1.4232516456128725e-05, "loss": 0.7145, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3124 }, { "epoch": 0.4249388088115311, "grad_norm": 0.59765625, "learning_rate": 1.4228214451129306e-05, "loss": 0.9424, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3125 }, { "epoch": 0.4250747892303508, "grad_norm": 0.39453125, "learning_rate": 1.422391149303865e-05, "loss": 0.8651, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3126 }, { "epoch": 0.42521076964917054, "grad_norm": 0.30859375, "learning_rate": 1.421960758282671e-05, "loss": 0.6387, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3127 }, { "epoch": 0.4253467500679902, "grad_norm": 0.486328125, "learning_rate": 1.4215302721463624e-05, "loss": 0.5686, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3128 }, { "epoch": 0.4254827304868099, "grad_norm": 0.40625, "learning_rate": 1.4210996909919769e-05, "loss": 0.7134, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3129 }, { "epoch": 0.4256187109056296, "grad_norm": 0.54296875, "learning_rate": 1.4206690149165731e-05, "loss": 0.8548, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3130 }, { "epoch": 0.42575469132444926, "grad_norm": 0.55859375, "learning_rate": 1.42023824401723e-05, "loss": 0.6069, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3131 }, { "epoch": 0.42589067174326894, "grad_norm": 0.55078125, "learning_rate": 1.4198073783910488e-05, "loss": 0.5871, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3132 }, { "epoch": 0.4260266521620887, "grad_norm": 0.51171875, "learning_rate": 1.4193764181351519e-05, "loss": 0.5169, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3133 }, { "epoch": 0.42616263258090836, "grad_norm": 0.5234375, "learning_rate": 1.418945363346683e-05, "loss": 0.7511, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3134 }, { "epoch": 0.42629861299972804, "grad_norm": 0.29296875, "learning_rate": 1.4185142141228072e-05, "loss": 0.5177, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3135 }, { "epoch": 0.4264345934185477, "grad_norm": 0.37109375, "learning_rate": 1.418082970560711e-05, "loss": 0.5568, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3136 }, { "epoch": 0.4265705738373674, "grad_norm": 0.37890625, "learning_rate": 1.4176516327576014e-05, "loss": 0.6214, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3137 }, { "epoch": 0.4267065542561871, "grad_norm": 0.396484375, "learning_rate": 1.4172202008107076e-05, "loss": 0.5851, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3138 }, { "epoch": 0.4268425346750068, "grad_norm": 0.380859375, "learning_rate": 1.4167886748172797e-05, "loss": 0.6571, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3139 }, { "epoch": 0.4269785150938265, "grad_norm": 0.478515625, "learning_rate": 1.4163570548745886e-05, "loss": 0.6455, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3140 }, { "epoch": 0.4271144955126462, "grad_norm": 0.5390625, "learning_rate": 1.4159253410799272e-05, "loss": 0.5622, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3141 }, { "epoch": 0.42725047593146587, "grad_norm": 0.458984375, "learning_rate": 1.4154935335306084e-05, "loss": 0.8169, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3142 }, { "epoch": 0.42738645635028555, "grad_norm": 0.2373046875, "learning_rate": 1.4150616323239677e-05, "loss": 0.4801, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3143 }, { "epoch": 0.42752243676910523, "grad_norm": 0.326171875, "learning_rate": 1.41462963755736e-05, "loss": 0.5978, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3144 }, { "epoch": 0.42765841718792497, "grad_norm": 0.44140625, "learning_rate": 1.414197549328163e-05, "loss": 0.5617, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3145 }, { "epoch": 0.42779439760674465, "grad_norm": 0.294921875, "learning_rate": 1.4137653677337743e-05, "loss": 0.5027, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3146 }, { "epoch": 0.42793037802556433, "grad_norm": 0.412109375, "learning_rate": 1.4133330928716132e-05, "loss": 0.7606, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3147 }, { "epoch": 0.428066358444384, "grad_norm": 0.41796875, "learning_rate": 1.4129007248391193e-05, "loss": 0.5588, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3148 }, { "epoch": 0.4282023388632037, "grad_norm": 0.322265625, "learning_rate": 1.4124682637337538e-05, "loss": 0.4953, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3149 }, { "epoch": 0.42833831928202337, "grad_norm": 0.357421875, "learning_rate": 1.412035709652999e-05, "loss": 0.528, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3150 }, { "epoch": 0.42847429970084305, "grad_norm": 0.55078125, "learning_rate": 1.4116030626943576e-05, "loss": 0.6704, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3151 }, { "epoch": 0.4286102801196628, "grad_norm": 0.291015625, "learning_rate": 1.4111703229553536e-05, "loss": 0.6686, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3152 }, { "epoch": 0.42874626053848247, "grad_norm": 0.455078125, "learning_rate": 1.4107374905335316e-05, "loss": 0.5565, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3153 }, { "epoch": 0.42888224095730215, "grad_norm": 0.4140625, "learning_rate": 1.4103045655264576e-05, "loss": 0.7463, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3154 }, { "epoch": 0.42901822137612183, "grad_norm": 0.37890625, "learning_rate": 1.4098715480317182e-05, "loss": 0.6683, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3155 }, { "epoch": 0.4291542017949415, "grad_norm": 0.310546875, "learning_rate": 1.4094384381469209e-05, "loss": 0.6221, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3156 }, { "epoch": 0.4292901822137612, "grad_norm": 0.36328125, "learning_rate": 1.4090052359696935e-05, "loss": 0.5749, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3157 }, { "epoch": 0.42942616263258093, "grad_norm": 0.671875, "learning_rate": 1.4085719415976852e-05, "loss": 0.6336, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3158 }, { "epoch": 0.4295621430514006, "grad_norm": 0.40625, "learning_rate": 1.4081385551285664e-05, "loss": 0.5244, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3159 }, { "epoch": 0.4296981234702203, "grad_norm": 0.4921875, "learning_rate": 1.4077050766600273e-05, "loss": 0.6128, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3160 }, { "epoch": 0.42983410388904, "grad_norm": 0.4140625, "learning_rate": 1.4072715062897793e-05, "loss": 0.6994, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3161 }, { "epoch": 0.42997008430785966, "grad_norm": 0.4296875, "learning_rate": 1.4068378441155544e-05, "loss": 0.6561, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3162 }, { "epoch": 0.43010606472667934, "grad_norm": 0.470703125, "learning_rate": 1.4064040902351055e-05, "loss": 0.721, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3163 }, { "epoch": 0.4302420451454991, "grad_norm": 0.5234375, "learning_rate": 1.4059702447462059e-05, "loss": 0.5426, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3164 }, { "epoch": 0.43037802556431876, "grad_norm": 0.244140625, "learning_rate": 1.4055363077466497e-05, "loss": 0.4284, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3165 }, { "epoch": 0.43051400598313844, "grad_norm": 0.359375, "learning_rate": 1.4051022793342515e-05, "loss": 0.6978, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3166 }, { "epoch": 0.4306499864019581, "grad_norm": 0.296875, "learning_rate": 1.4046681596068468e-05, "loss": 0.5936, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3167 }, { "epoch": 0.4307859668207778, "grad_norm": 0.443359375, "learning_rate": 1.4042339486622913e-05, "loss": 0.597, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3168 }, { "epoch": 0.4309219472395975, "grad_norm": 0.44921875, "learning_rate": 1.4037996465984618e-05, "loss": 0.6214, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3169 }, { "epoch": 0.43105792765841716, "grad_norm": 0.455078125, "learning_rate": 1.4033652535132549e-05, "loss": 0.7832, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3170 }, { "epoch": 0.4311939080772369, "grad_norm": 0.357421875, "learning_rate": 1.4029307695045884e-05, "loss": 0.6999, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3171 }, { "epoch": 0.4313298884960566, "grad_norm": 0.484375, "learning_rate": 1.4024961946704e-05, "loss": 0.6995, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3172 }, { "epoch": 0.43146586891487626, "grad_norm": 0.4921875, "learning_rate": 1.4020615291086488e-05, "loss": 0.7965, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3173 }, { "epoch": 0.43160184933369594, "grad_norm": 0.328125, "learning_rate": 1.401626772917313e-05, "loss": 0.5656, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3174 }, { "epoch": 0.4317378297525156, "grad_norm": 0.275390625, "learning_rate": 1.4011919261943923e-05, "loss": 0.5827, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3175 }, { "epoch": 0.4318738101713353, "grad_norm": 0.349609375, "learning_rate": 1.4007569890379062e-05, "loss": 0.6029, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3176 }, { "epoch": 0.43200979059015504, "grad_norm": 1.1640625, "learning_rate": 1.4003219615458954e-05, "loss": 0.7808, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3177 }, { "epoch": 0.4321457710089747, "grad_norm": 0.6328125, "learning_rate": 1.3998868438164199e-05, "loss": 0.7796, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3178 }, { "epoch": 0.4322817514277944, "grad_norm": 0.421875, "learning_rate": 1.399451635947561e-05, "loss": 0.6943, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3179 }, { "epoch": 0.4324177318466141, "grad_norm": 0.443359375, "learning_rate": 1.3990163380374195e-05, "loss": 0.6929, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3180 }, { "epoch": 0.43255371226543377, "grad_norm": 2.9375, "learning_rate": 1.398580950184117e-05, "loss": 0.7948, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3181 }, { "epoch": 0.43268969268425345, "grad_norm": 0.26171875, "learning_rate": 1.3981454724857952e-05, "loss": 0.4744, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3182 }, { "epoch": 0.4328256731030732, "grad_norm": 0.279296875, "learning_rate": 1.3977099050406163e-05, "loss": 0.4585, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3183 }, { "epoch": 0.43296165352189286, "grad_norm": 0.470703125, "learning_rate": 1.3972742479467622e-05, "loss": 0.481, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3184 }, { "epoch": 0.43309763394071255, "grad_norm": 0.396484375, "learning_rate": 1.3968385013024355e-05, "loss": 0.6242, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3185 }, { "epoch": 0.4332336143595322, "grad_norm": 0.390625, "learning_rate": 1.3964026652058589e-05, "loss": 0.6969, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3186 }, { "epoch": 0.4333695947783519, "grad_norm": 0.333984375, "learning_rate": 1.395966739755275e-05, "loss": 0.6379, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3187 }, { "epoch": 0.4335055751971716, "grad_norm": 0.40234375, "learning_rate": 1.3955307250489466e-05, "loss": 0.7524, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3188 }, { "epoch": 0.43364155561599127, "grad_norm": 0.353515625, "learning_rate": 1.3950946211851574e-05, "loss": 0.6268, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3189 }, { "epoch": 0.433777536034811, "grad_norm": 0.3125, "learning_rate": 1.3946584282622102e-05, "loss": 0.686, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3190 }, { "epoch": 0.4339135164536307, "grad_norm": 0.7109375, "learning_rate": 1.394222146378428e-05, "loss": 0.9004, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3191 }, { "epoch": 0.43404949687245037, "grad_norm": 0.6953125, "learning_rate": 1.3937857756321544e-05, "loss": 0.6538, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3192 }, { "epoch": 0.43418547729127005, "grad_norm": 0.349609375, "learning_rate": 1.3933493161217521e-05, "loss": 0.6481, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3193 }, { "epoch": 0.43432145771008973, "grad_norm": 1.5, "learning_rate": 1.3929127679456051e-05, "loss": 0.7821, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3194 }, { "epoch": 0.4344574381289094, "grad_norm": 0.458984375, "learning_rate": 1.3924761312021165e-05, "loss": 0.9728, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3195 }, { "epoch": 0.43459341854772915, "grad_norm": 0.45703125, "learning_rate": 1.3920394059897093e-05, "loss": 0.6387, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3196 }, { "epoch": 0.43472939896654883, "grad_norm": 0.42578125, "learning_rate": 1.391602592406827e-05, "loss": 0.7362, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3197 }, { "epoch": 0.4348653793853685, "grad_norm": 0.515625, "learning_rate": 1.3911656905519326e-05, "loss": 0.7638, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3198 }, { "epoch": 0.4350013598041882, "grad_norm": 0.353515625, "learning_rate": 1.3907287005235088e-05, "loss": 0.6914, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3199 }, { "epoch": 0.4351373402230079, "grad_norm": 0.318359375, "learning_rate": 1.3902916224200593e-05, "loss": 0.4989, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3200 }, { "epoch": 0.43527332064182755, "grad_norm": 0.35546875, "learning_rate": 1.3898544563401062e-05, "loss": 0.7401, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3201 }, { "epoch": 0.4354093010606473, "grad_norm": 0.486328125, "learning_rate": 1.389417202382192e-05, "loss": 0.6637, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3202 }, { "epoch": 0.435545281479467, "grad_norm": 0.240234375, "learning_rate": 1.3889798606448797e-05, "loss": 0.4222, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3203 }, { "epoch": 0.43568126189828665, "grad_norm": 0.41796875, "learning_rate": 1.388542431226751e-05, "loss": 0.7694, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3204 }, { "epoch": 0.43581724231710633, "grad_norm": 0.34375, "learning_rate": 1.3881049142264078e-05, "loss": 0.6102, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3205 }, { "epoch": 0.435953222735926, "grad_norm": 0.416015625, "learning_rate": 1.387667309742472e-05, "loss": 0.6064, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3206 }, { "epoch": 0.4360892031547457, "grad_norm": 0.318359375, "learning_rate": 1.3872296178735851e-05, "loss": 0.5199, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3207 }, { "epoch": 0.43622518357356543, "grad_norm": 0.337890625, "learning_rate": 1.3867918387184079e-05, "loss": 0.6381, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3208 }, { "epoch": 0.4363611639923851, "grad_norm": 0.5625, "learning_rate": 1.3863539723756211e-05, "loss": 0.4092, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3209 }, { "epoch": 0.4364971444112048, "grad_norm": 0.72265625, "learning_rate": 1.3859160189439256e-05, "loss": 0.437, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3210 }, { "epoch": 0.4366331248300245, "grad_norm": 0.2373046875, "learning_rate": 1.3854779785220412e-05, "loss": 0.4232, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3211 }, { "epoch": 0.43676910524884416, "grad_norm": 0.36328125, "learning_rate": 1.3850398512087075e-05, "loss": 0.7031, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3212 }, { "epoch": 0.43690508566766384, "grad_norm": 0.421875, "learning_rate": 1.3846016371026838e-05, "loss": 0.5482, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3213 }, { "epoch": 0.4370410660864835, "grad_norm": 0.4375, "learning_rate": 1.384163336302749e-05, "loss": 0.7078, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3214 }, { "epoch": 0.43717704650530326, "grad_norm": 0.44921875, "learning_rate": 1.3837249489077014e-05, "loss": 0.7925, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3215 }, { "epoch": 0.43731302692412294, "grad_norm": 0.65234375, "learning_rate": 1.3832864750163585e-05, "loss": 0.6415, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3216 }, { "epoch": 0.4374490073429426, "grad_norm": 0.47265625, "learning_rate": 1.3828479147275584e-05, "loss": 0.8081, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3217 }, { "epoch": 0.4375849877617623, "grad_norm": 0.322265625, "learning_rate": 1.3824092681401574e-05, "loss": 0.6753, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3218 }, { "epoch": 0.437720968180582, "grad_norm": 1.734375, "learning_rate": 1.3819705353530322e-05, "loss": 0.7401, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3219 }, { "epoch": 0.43785694859940166, "grad_norm": 0.373046875, "learning_rate": 1.3815317164650782e-05, "loss": 0.6624, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3220 }, { "epoch": 0.4379929290182214, "grad_norm": 0.296875, "learning_rate": 1.3810928115752104e-05, "loss": 0.5413, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3221 }, { "epoch": 0.4381289094370411, "grad_norm": 0.365234375, "learning_rate": 1.3806538207823641e-05, "loss": 0.6089, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3222 }, { "epoch": 0.43826488985586076, "grad_norm": 0.455078125, "learning_rate": 1.3802147441854925e-05, "loss": 0.8076, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3223 }, { "epoch": 0.43840087027468044, "grad_norm": 0.31640625, "learning_rate": 1.379775581883569e-05, "loss": 0.6953, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3224 }, { "epoch": 0.4385368506935001, "grad_norm": 0.263671875, "learning_rate": 1.3793363339755863e-05, "loss": 0.4259, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3225 }, { "epoch": 0.4386728311123198, "grad_norm": 0.34375, "learning_rate": 1.378897000560556e-05, "loss": 0.658, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3226 }, { "epoch": 0.43880881153113954, "grad_norm": 0.4921875, "learning_rate": 1.3784575817375092e-05, "loss": 0.8607, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3227 }, { "epoch": 0.4389447919499592, "grad_norm": 0.32421875, "learning_rate": 1.3780180776054969e-05, "loss": 0.6738, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3228 }, { "epoch": 0.4390807723687789, "grad_norm": 0.58984375, "learning_rate": 1.3775784882635877e-05, "loss": 0.6865, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3229 }, { "epoch": 0.4392167527875986, "grad_norm": 0.396484375, "learning_rate": 1.3771388138108712e-05, "loss": 0.7373, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3230 }, { "epoch": 0.43935273320641827, "grad_norm": 0.57421875, "learning_rate": 1.3766990543464552e-05, "loss": 0.6479, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3231 }, { "epoch": 0.43948871362523795, "grad_norm": 0.37890625, "learning_rate": 1.3762592099694666e-05, "loss": 0.7673, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3232 }, { "epoch": 0.43962469404405763, "grad_norm": 0.390625, "learning_rate": 1.375819280779052e-05, "loss": 0.7567, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3233 }, { "epoch": 0.43976067446287737, "grad_norm": 0.56640625, "learning_rate": 1.3753792668743766e-05, "loss": 0.8455, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3234 }, { "epoch": 0.43989665488169705, "grad_norm": 0.3828125, "learning_rate": 1.3749391683546252e-05, "loss": 0.6899, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3235 }, { "epoch": 0.44003263530051673, "grad_norm": 0.462890625, "learning_rate": 1.3744989853190012e-05, "loss": 0.832, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3236 }, { "epoch": 0.4401686157193364, "grad_norm": 0.32421875, "learning_rate": 1.3740587178667273e-05, "loss": 0.6211, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3237 }, { "epoch": 0.4403045961381561, "grad_norm": 0.3828125, "learning_rate": 1.3736183660970449e-05, "loss": 0.5557, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3238 }, { "epoch": 0.44044057655697577, "grad_norm": 0.482421875, "learning_rate": 1.373177930109215e-05, "loss": 0.6654, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3239 }, { "epoch": 0.4405765569757955, "grad_norm": 0.462890625, "learning_rate": 1.3727374100025173e-05, "loss": 0.7804, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3240 }, { "epoch": 0.4407125373946152, "grad_norm": 0.56640625, "learning_rate": 1.37229680587625e-05, "loss": 0.7345, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3241 }, { "epoch": 0.44084851781343487, "grad_norm": 0.56640625, "learning_rate": 1.3718561178297311e-05, "loss": 0.6986, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3242 }, { "epoch": 0.44098449823225455, "grad_norm": 0.453125, "learning_rate": 1.371415345962297e-05, "loss": 0.8136, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3243 }, { "epoch": 0.44112047865107423, "grad_norm": 0.451171875, "learning_rate": 1.3709744903733027e-05, "loss": 0.8169, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3244 }, { "epoch": 0.4412564590698939, "grad_norm": 0.294921875, "learning_rate": 1.3705335511621229e-05, "loss": 0.5054, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3245 }, { "epoch": 0.44139243948871365, "grad_norm": 0.34375, "learning_rate": 1.3700925284281504e-05, "loss": 0.6803, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3246 }, { "epoch": 0.44152841990753333, "grad_norm": 0.28515625, "learning_rate": 1.3696514222707972e-05, "loss": 0.6169, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3247 }, { "epoch": 0.441664400326353, "grad_norm": 0.494140625, "learning_rate": 1.3692102327894942e-05, "loss": 0.7635, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3248 }, { "epoch": 0.4418003807451727, "grad_norm": 0.58203125, "learning_rate": 1.3687689600836906e-05, "loss": 0.9323, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3249 }, { "epoch": 0.4419363611639924, "grad_norm": 0.337890625, "learning_rate": 1.3683276042528548e-05, "loss": 0.6141, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3250 }, { "epoch": 0.44207234158281206, "grad_norm": 0.69921875, "learning_rate": 1.3678861653964738e-05, "loss": 0.6899, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3251 }, { "epoch": 0.44220832200163174, "grad_norm": 0.37890625, "learning_rate": 1.3674446436140534e-05, "loss": 0.6226, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3252 }, { "epoch": 0.4423443024204515, "grad_norm": 0.345703125, "learning_rate": 1.367003039005118e-05, "loss": 0.7049, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3253 }, { "epoch": 0.44248028283927116, "grad_norm": 0.2265625, "learning_rate": 1.3665613516692103e-05, "loss": 0.4149, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3254 }, { "epoch": 0.44261626325809084, "grad_norm": 0.80078125, "learning_rate": 1.3661195817058923e-05, "loss": 0.7856, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3255 }, { "epoch": 0.4427522436769105, "grad_norm": 0.2373046875, "learning_rate": 1.3656777292147445e-05, "loss": 0.445, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3256 }, { "epoch": 0.4428882240957302, "grad_norm": 0.609375, "learning_rate": 1.3652357942953658e-05, "loss": 0.7466, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3257 }, { "epoch": 0.4430242045145499, "grad_norm": 0.412109375, "learning_rate": 1.3647937770473739e-05, "loss": 0.66, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3258 }, { "epoch": 0.4431601849333696, "grad_norm": 0.447265625, "learning_rate": 1.3643516775704041e-05, "loss": 0.6603, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3259 }, { "epoch": 0.4432961653521893, "grad_norm": 0.361328125, "learning_rate": 1.3639094959641122e-05, "loss": 0.5946, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3260 }, { "epoch": 0.443432145771009, "grad_norm": 0.3515625, "learning_rate": 1.3634672323281702e-05, "loss": 0.5243, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3261 }, { "epoch": 0.44356812618982866, "grad_norm": 0.52734375, "learning_rate": 1.3630248867622705e-05, "loss": 0.5692, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3262 }, { "epoch": 0.44370410660864834, "grad_norm": 0.431640625, "learning_rate": 1.362582459366123e-05, "loss": 0.5273, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3263 }, { "epoch": 0.443840087027468, "grad_norm": 0.353515625, "learning_rate": 1.362139950239456e-05, "loss": 0.4847, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3264 }, { "epoch": 0.44397606744628776, "grad_norm": 0.73046875, "learning_rate": 1.361697359482017e-05, "loss": 0.5187, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3265 }, { "epoch": 0.44411204786510744, "grad_norm": 0.86328125, "learning_rate": 1.3612546871935705e-05, "loss": 0.7987, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3266 }, { "epoch": 0.4442480282839271, "grad_norm": 0.388671875, "learning_rate": 1.3608119334739011e-05, "loss": 0.6633, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3267 }, { "epoch": 0.4443840087027468, "grad_norm": 0.2294921875, "learning_rate": 1.3603690984228106e-05, "loss": 0.3774, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3268 }, { "epoch": 0.4445199891215665, "grad_norm": 0.4453125, "learning_rate": 1.3599261821401195e-05, "loss": 0.6763, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3269 }, { "epoch": 0.44465596954038616, "grad_norm": 0.39453125, "learning_rate": 1.3594831847256664e-05, "loss": 0.8634, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3270 }, { "epoch": 0.4447919499592059, "grad_norm": 0.283203125, "learning_rate": 1.3590401062793084e-05, "loss": 0.5978, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3271 }, { "epoch": 0.4449279303780256, "grad_norm": 0.470703125, "learning_rate": 1.3585969469009206e-05, "loss": 0.6686, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3272 }, { "epoch": 0.44506391079684526, "grad_norm": 0.34375, "learning_rate": 1.3581537066903966e-05, "loss": 0.6064, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3273 }, { "epoch": 0.44519989121566494, "grad_norm": 0.5078125, "learning_rate": 1.3577103857476488e-05, "loss": 0.8535, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3274 }, { "epoch": 0.4453358716344846, "grad_norm": 0.37890625, "learning_rate": 1.3572669841726062e-05, "loss": 0.71, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3275 }, { "epoch": 0.4454718520533043, "grad_norm": 0.578125, "learning_rate": 1.3568235020652178e-05, "loss": 0.7422, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3276 }, { "epoch": 0.445607832472124, "grad_norm": 0.5078125, "learning_rate": 1.3563799395254494e-05, "loss": 0.7619, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3277 }, { "epoch": 0.4457438128909437, "grad_norm": 0.3671875, "learning_rate": 1.355936296653285e-05, "loss": 0.6982, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3278 }, { "epoch": 0.4458797933097634, "grad_norm": 0.373046875, "learning_rate": 1.3554925735487281e-05, "loss": 0.6361, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3279 }, { "epoch": 0.4460157737285831, "grad_norm": 0.65625, "learning_rate": 1.3550487703117987e-05, "loss": 0.6579, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3280 }, { "epoch": 0.44615175414740277, "grad_norm": 0.30078125, "learning_rate": 1.3546048870425356e-05, "loss": 0.5132, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3281 }, { "epoch": 0.44628773456622245, "grad_norm": 0.35546875, "learning_rate": 1.3541609238409956e-05, "loss": 0.5277, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3282 }, { "epoch": 0.44642371498504213, "grad_norm": 0.74609375, "learning_rate": 1.3537168808072534e-05, "loss": 0.7974, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3283 }, { "epoch": 0.44655969540386187, "grad_norm": 0.55859375, "learning_rate": 1.3532727580414018e-05, "loss": 0.6515, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3284 }, { "epoch": 0.44669567582268155, "grad_norm": 0.2890625, "learning_rate": 1.3528285556435512e-05, "loss": 0.57, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3285 }, { "epoch": 0.44683165624150123, "grad_norm": 0.369140625, "learning_rate": 1.3523842737138306e-05, "loss": 0.6042, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3286 }, { "epoch": 0.4469676366603209, "grad_norm": 0.76171875, "learning_rate": 1.3519399123523862e-05, "loss": 0.5876, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3287 }, { "epoch": 0.4471036170791406, "grad_norm": 0.443359375, "learning_rate": 1.3514954716593828e-05, "loss": 0.7323, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3288 }, { "epoch": 0.4472395974979603, "grad_norm": 0.435546875, "learning_rate": 1.3510509517350028e-05, "loss": 0.576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3289 }, { "epoch": 0.44737557791678, "grad_norm": 0.515625, "learning_rate": 1.3506063526794462e-05, "loss": 0.738, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3290 }, { "epoch": 0.4475115583355997, "grad_norm": 0.337890625, "learning_rate": 1.3501616745929314e-05, "loss": 0.5824, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3291 }, { "epoch": 0.44764753875441937, "grad_norm": 0.60546875, "learning_rate": 1.3497169175756936e-05, "loss": 0.6276, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3292 }, { "epoch": 0.44778351917323905, "grad_norm": 0.44140625, "learning_rate": 1.3492720817279872e-05, "loss": 0.6252, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3293 }, { "epoch": 0.44791949959205873, "grad_norm": 0.462890625, "learning_rate": 1.3488271671500832e-05, "loss": 0.7464, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3294 }, { "epoch": 0.4480554800108784, "grad_norm": 0.45703125, "learning_rate": 1.348382173942271e-05, "loss": 0.8244, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3295 }, { "epoch": 0.4481914604296981, "grad_norm": 0.2578125, "learning_rate": 1.3479371022048577e-05, "loss": 0.382, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3296 }, { "epoch": 0.44832744084851783, "grad_norm": 0.28125, "learning_rate": 1.3474919520381673e-05, "loss": 0.5392, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3297 }, { "epoch": 0.4484634212673375, "grad_norm": 0.474609375, "learning_rate": 1.3470467235425426e-05, "loss": 0.9256, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3298 }, { "epoch": 0.4485994016861572, "grad_norm": 0.37890625, "learning_rate": 1.3466014168183434e-05, "loss": 0.6237, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3299 }, { "epoch": 0.4487353821049769, "grad_norm": 0.45703125, "learning_rate": 1.3461560319659472e-05, "loss": 0.6963, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3300 }, { "epoch": 0.44887136252379656, "grad_norm": 0.322265625, "learning_rate": 1.3457105690857494e-05, "loss": 0.5807, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3301 }, { "epoch": 0.44900734294261624, "grad_norm": 0.337890625, "learning_rate": 1.3452650282781625e-05, "loss": 0.606, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3302 }, { "epoch": 0.449143323361436, "grad_norm": 0.388671875, "learning_rate": 1.3448194096436171e-05, "loss": 0.6914, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3303 }, { "epoch": 0.44927930378025566, "grad_norm": 0.53515625, "learning_rate": 1.3443737132825609e-05, "loss": 0.5555, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3304 }, { "epoch": 0.44941528419907534, "grad_norm": 0.328125, "learning_rate": 1.3439279392954594e-05, "loss": 0.5967, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3305 }, { "epoch": 0.449551264617895, "grad_norm": 0.33984375, "learning_rate": 1.3434820877827957e-05, "loss": 0.5793, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3306 }, { "epoch": 0.4496872450367147, "grad_norm": 0.36328125, "learning_rate": 1.3430361588450696e-05, "loss": 0.6374, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3307 }, { "epoch": 0.4498232254555344, "grad_norm": 0.28515625, "learning_rate": 1.3425901525827993e-05, "loss": 0.5117, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3308 }, { "epoch": 0.4499592058743541, "grad_norm": 0.271484375, "learning_rate": 1.3421440690965203e-05, "loss": 0.432, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3309 }, { "epoch": 0.4500951862931738, "grad_norm": 0.27734375, "learning_rate": 1.3416979084867851e-05, "loss": 0.562, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3310 }, { "epoch": 0.4502311667119935, "grad_norm": 0.546875, "learning_rate": 1.3412516708541637e-05, "loss": 0.6413, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3311 }, { "epoch": 0.45036714713081316, "grad_norm": 0.8828125, "learning_rate": 1.3408053562992433e-05, "loss": 0.779, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3312 }, { "epoch": 0.45050312754963284, "grad_norm": 0.298828125, "learning_rate": 1.3403589649226292e-05, "loss": 0.5223, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3313 }, { "epoch": 0.4506391079684525, "grad_norm": 1.078125, "learning_rate": 1.3399124968249428e-05, "loss": 0.5498, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3314 }, { "epoch": 0.45077508838727226, "grad_norm": 0.59375, "learning_rate": 1.3394659521068239e-05, "loss": 0.6901, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3315 }, { "epoch": 0.45091106880609194, "grad_norm": 0.62890625, "learning_rate": 1.3390193308689293e-05, "loss": 0.8263, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3316 }, { "epoch": 0.4510470492249116, "grad_norm": 0.359375, "learning_rate": 1.3385726332119324e-05, "loss": 0.6478, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3317 }, { "epoch": 0.4511830296437313, "grad_norm": 0.474609375, "learning_rate": 1.3381258592365245e-05, "loss": 0.6605, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3318 }, { "epoch": 0.451319010062551, "grad_norm": 0.3359375, "learning_rate": 1.3376790090434141e-05, "loss": 0.6076, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3319 }, { "epoch": 0.45145499048137067, "grad_norm": 0.63671875, "learning_rate": 1.3372320827333267e-05, "loss": 0.6628, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3320 }, { "epoch": 0.45159097090019035, "grad_norm": 0.3203125, "learning_rate": 1.3367850804070046e-05, "loss": 0.7681, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3321 }, { "epoch": 0.4517269513190101, "grad_norm": 0.59375, "learning_rate": 1.336338002165208e-05, "loss": 0.3981, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3322 }, { "epoch": 0.45186293173782976, "grad_norm": 0.396484375, "learning_rate": 1.3358908481087133e-05, "loss": 0.7611, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3323 }, { "epoch": 0.45199891215664945, "grad_norm": 0.46875, "learning_rate": 1.3354436183383152e-05, "loss": 0.6867, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3324 }, { "epoch": 0.4521348925754691, "grad_norm": 0.87109375, "learning_rate": 1.3349963129548243e-05, "loss": 0.6686, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3325 }, { "epoch": 0.4522708729942888, "grad_norm": 0.388671875, "learning_rate": 1.3345489320590686e-05, "loss": 0.7262, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3326 }, { "epoch": 0.4524068534131085, "grad_norm": 0.52734375, "learning_rate": 1.3341014757518935e-05, "loss": 0.5101, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3327 }, { "epoch": 0.4525428338319282, "grad_norm": 0.3359375, "learning_rate": 1.333653944134161e-05, "loss": 0.5275, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3328 }, { "epoch": 0.4526788142507479, "grad_norm": 0.359375, "learning_rate": 1.33320633730675e-05, "loss": 0.494, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3329 }, { "epoch": 0.4528147946695676, "grad_norm": 0.388671875, "learning_rate": 1.332758655370557e-05, "loss": 0.6659, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3330 }, { "epoch": 0.45295077508838727, "grad_norm": 0.28515625, "learning_rate": 1.3323108984264948e-05, "loss": 0.5129, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3331 }, { "epoch": 0.45308675550720695, "grad_norm": 0.26171875, "learning_rate": 1.331863066575493e-05, "loss": 0.4533, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3332 }, { "epoch": 0.45322273592602663, "grad_norm": 0.466796875, "learning_rate": 1.3314151599184988e-05, "loss": 0.4948, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3333 }, { "epoch": 0.45335871634484637, "grad_norm": 0.265625, "learning_rate": 1.3309671785564757e-05, "loss": 0.4624, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3334 }, { "epoch": 0.45349469676366605, "grad_norm": 0.388671875, "learning_rate": 1.3305191225904039e-05, "loss": 0.6467, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3335 }, { "epoch": 0.45363067718248573, "grad_norm": 0.51953125, "learning_rate": 1.330070992121281e-05, "loss": 0.8799, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3336 }, { "epoch": 0.4537666576013054, "grad_norm": 0.412109375, "learning_rate": 1.329622787250121e-05, "loss": 0.5452, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3337 }, { "epoch": 0.4539026380201251, "grad_norm": 0.58203125, "learning_rate": 1.3291745080779545e-05, "loss": 0.5687, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3338 }, { "epoch": 0.4540386184389448, "grad_norm": 0.4296875, "learning_rate": 1.3287261547058298e-05, "loss": 0.6997, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3339 }, { "epoch": 0.45417459885776446, "grad_norm": 0.361328125, "learning_rate": 1.3282777272348106e-05, "loss": 0.756, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3340 }, { "epoch": 0.4543105792765842, "grad_norm": 0.5546875, "learning_rate": 1.3278292257659783e-05, "loss": 0.7824, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3341 }, { "epoch": 0.4544465596954039, "grad_norm": 0.37890625, "learning_rate": 1.3273806504004304e-05, "loss": 0.6743, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3342 }, { "epoch": 0.45458254011422355, "grad_norm": 0.3984375, "learning_rate": 1.326932001239281e-05, "loss": 0.5205, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3343 }, { "epoch": 0.45471852053304324, "grad_norm": 0.462890625, "learning_rate": 1.3264832783836618e-05, "loss": 0.8091, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3344 }, { "epoch": 0.4548545009518629, "grad_norm": 0.4453125, "learning_rate": 1.3260344819347199e-05, "loss": 0.832, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3345 }, { "epoch": 0.4549904813706826, "grad_norm": 0.443359375, "learning_rate": 1.3255856119936197e-05, "loss": 0.8051, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3346 }, { "epoch": 0.45512646178950233, "grad_norm": 0.326171875, "learning_rate": 1.3251366686615419e-05, "loss": 0.6535, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3347 }, { "epoch": 0.455262442208322, "grad_norm": 0.353515625, "learning_rate": 1.3246876520396837e-05, "loss": 0.6903, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3348 }, { "epoch": 0.4553984226271417, "grad_norm": 0.353515625, "learning_rate": 1.3242385622292593e-05, "loss": 0.6276, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3349 }, { "epoch": 0.4555344030459614, "grad_norm": 0.271484375, "learning_rate": 1.3237893993314988e-05, "loss": 0.5812, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3350 }, { "epoch": 0.45567038346478106, "grad_norm": 0.2734375, "learning_rate": 1.3233401634476487e-05, "loss": 0.5187, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3351 }, { "epoch": 0.45580636388360074, "grad_norm": 0.419921875, "learning_rate": 1.3228908546789729e-05, "loss": 0.6667, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3352 }, { "epoch": 0.4559423443024205, "grad_norm": 0.34375, "learning_rate": 1.3224414731267508e-05, "loss": 0.5986, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3353 }, { "epoch": 0.45607832472124016, "grad_norm": 0.50390625, "learning_rate": 1.321992018892278e-05, "loss": 0.5542, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3354 }, { "epoch": 0.45621430514005984, "grad_norm": 0.345703125, "learning_rate": 1.3215424920768678e-05, "loss": 0.5841, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3355 }, { "epoch": 0.4563502855588795, "grad_norm": 0.365234375, "learning_rate": 1.3210928927818487e-05, "loss": 0.6087, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3356 }, { "epoch": 0.4564862659776992, "grad_norm": 0.28125, "learning_rate": 1.3206432211085657e-05, "loss": 0.6559, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3357 }, { "epoch": 0.4566222463965189, "grad_norm": 0.330078125, "learning_rate": 1.3201934771583807e-05, "loss": 0.6193, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3358 }, { "epoch": 0.45675822681533856, "grad_norm": 0.392578125, "learning_rate": 1.319743661032671e-05, "loss": 0.5288, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3359 }, { "epoch": 0.4568942072341583, "grad_norm": 0.3203125, "learning_rate": 1.319293772832831e-05, "loss": 0.6074, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3360 }, { "epoch": 0.457030187652978, "grad_norm": 0.390625, "learning_rate": 1.3188438126602709e-05, "loss": 0.6575, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3361 }, { "epoch": 0.45716616807179766, "grad_norm": 0.2294921875, "learning_rate": 1.3183937806164174e-05, "loss": 0.3724, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3362 }, { "epoch": 0.45730214849061734, "grad_norm": 1.453125, "learning_rate": 1.3179436768027128e-05, "loss": 0.8695, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3363 }, { "epoch": 0.457438128909437, "grad_norm": 0.39453125, "learning_rate": 1.317493501320616e-05, "loss": 0.6305, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3364 }, { "epoch": 0.4575741093282567, "grad_norm": 0.380859375, "learning_rate": 1.3170432542716028e-05, "loss": 0.6968, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3365 }, { "epoch": 0.45771008974707644, "grad_norm": 0.3515625, "learning_rate": 1.3165929357571636e-05, "loss": 0.6362, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3366 }, { "epoch": 0.4578460701658961, "grad_norm": 0.3828125, "learning_rate": 1.316142545878806e-05, "loss": 0.6834, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3367 }, { "epoch": 0.4579820505847158, "grad_norm": 0.375, "learning_rate": 1.3156920847380535e-05, "loss": 0.541, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3368 }, { "epoch": 0.4581180310035355, "grad_norm": 0.58203125, "learning_rate": 1.3152415524364454e-05, "loss": 0.7355, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3369 }, { "epoch": 0.45825401142235517, "grad_norm": 0.59375, "learning_rate": 1.314790949075537e-05, "loss": 0.7855, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3370 }, { "epoch": 0.45838999184117485, "grad_norm": 0.470703125, "learning_rate": 1.3143402747569003e-05, "loss": 0.849, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3371 }, { "epoch": 0.4585259722599946, "grad_norm": 0.40625, "learning_rate": 1.3138895295821224e-05, "loss": 0.7869, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3372 }, { "epoch": 0.45866195267881427, "grad_norm": 1.8828125, "learning_rate": 1.3134387136528069e-05, "loss": 0.91, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3373 }, { "epoch": 0.45879793309763395, "grad_norm": 0.310546875, "learning_rate": 1.3129878270705732e-05, "loss": 0.5207, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3374 }, { "epoch": 0.45893391351645363, "grad_norm": 0.345703125, "learning_rate": 1.3125368699370567e-05, "loss": 0.569, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3375 }, { "epoch": 0.4590698939352733, "grad_norm": 0.349609375, "learning_rate": 1.3120858423539086e-05, "loss": 0.7427, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3376 }, { "epoch": 0.459205874354093, "grad_norm": 0.31640625, "learning_rate": 1.3116347444227961e-05, "loss": 0.5851, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3377 }, { "epoch": 0.4593418547729127, "grad_norm": 0.263671875, "learning_rate": 1.311183576245402e-05, "loss": 0.6541, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3378 }, { "epoch": 0.4594778351917324, "grad_norm": 0.35546875, "learning_rate": 1.3107323379234254e-05, "loss": 0.5882, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3379 }, { "epoch": 0.4596138156105521, "grad_norm": 0.43359375, "learning_rate": 1.3102810295585808e-05, "loss": 0.5355, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3380 }, { "epoch": 0.45974979602937177, "grad_norm": 0.498046875, "learning_rate": 1.3098296512525986e-05, "loss": 0.7643, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3381 }, { "epoch": 0.45988577644819145, "grad_norm": 0.42578125, "learning_rate": 1.3093782031072249e-05, "loss": 0.5396, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3382 }, { "epoch": 0.46002175686701113, "grad_norm": 0.349609375, "learning_rate": 1.3089266852242217e-05, "loss": 0.7212, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3383 }, { "epoch": 0.4601577372858308, "grad_norm": 0.55078125, "learning_rate": 1.3084750977053669e-05, "loss": 0.5926, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3384 }, { "epoch": 0.46029371770465055, "grad_norm": 0.455078125, "learning_rate": 1.3080234406524534e-05, "loss": 0.7699, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3385 }, { "epoch": 0.46042969812347023, "grad_norm": 0.43359375, "learning_rate": 1.3075717141672905e-05, "loss": 0.7285, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3386 }, { "epoch": 0.4605656785422899, "grad_norm": 0.384765625, "learning_rate": 1.3071199183517028e-05, "loss": 0.626, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3387 }, { "epoch": 0.4607016589611096, "grad_norm": 0.318359375, "learning_rate": 1.3066680533075312e-05, "loss": 0.6533, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3388 }, { "epoch": 0.4608376393799293, "grad_norm": 0.373046875, "learning_rate": 1.3062161191366303e-05, "loss": 0.6657, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3389 }, { "epoch": 0.46097361979874896, "grad_norm": 0.376953125, "learning_rate": 1.3057641159408726e-05, "loss": 0.6825, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3390 }, { "epoch": 0.4611096002175687, "grad_norm": 0.490234375, "learning_rate": 1.3053120438221449e-05, "loss": 0.7477, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3391 }, { "epoch": 0.4612455806363884, "grad_norm": 0.5703125, "learning_rate": 1.30485990288235e-05, "loss": 0.7694, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3392 }, { "epoch": 0.46138156105520806, "grad_norm": 0.44140625, "learning_rate": 1.3044076932234054e-05, "loss": 0.59, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3393 }, { "epoch": 0.46151754147402774, "grad_norm": 0.330078125, "learning_rate": 1.3039554149472449e-05, "loss": 0.6675, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3394 }, { "epoch": 0.4616535218928474, "grad_norm": 0.26171875, "learning_rate": 1.3035030681558178e-05, "loss": 0.4139, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3395 }, { "epoch": 0.4617895023116671, "grad_norm": 0.408203125, "learning_rate": 1.3030506529510887e-05, "loss": 0.7949, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3396 }, { "epoch": 0.46192548273048684, "grad_norm": 0.68359375, "learning_rate": 1.3025981694350371e-05, "loss": 0.7352, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3397 }, { "epoch": 0.4620614631493065, "grad_norm": 0.3828125, "learning_rate": 1.3021456177096585e-05, "loss": 0.7213, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3398 }, { "epoch": 0.4621974435681262, "grad_norm": 0.26953125, "learning_rate": 1.3016929978769637e-05, "loss": 0.542, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3399 }, { "epoch": 0.4623334239869459, "grad_norm": 0.671875, "learning_rate": 1.3012403100389785e-05, "loss": 0.6346, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3400 }, { "epoch": 0.46246940440576556, "grad_norm": 0.578125, "learning_rate": 1.3007875542977448e-05, "loss": 0.5745, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3401 }, { "epoch": 0.46260538482458524, "grad_norm": 0.5859375, "learning_rate": 1.3003347307553187e-05, "loss": 0.5815, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3402 }, { "epoch": 0.4627413652434049, "grad_norm": 0.32421875, "learning_rate": 1.2998818395137725e-05, "loss": 0.687, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3403 }, { "epoch": 0.46287734566222466, "grad_norm": 0.33203125, "learning_rate": 1.2994288806751932e-05, "loss": 0.6507, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3404 }, { "epoch": 0.46301332608104434, "grad_norm": 0.3046875, "learning_rate": 1.2989758543416832e-05, "loss": 0.576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3405 }, { "epoch": 0.463149306499864, "grad_norm": 0.5078125, "learning_rate": 1.2985227606153604e-05, "loss": 0.5977, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3406 }, { "epoch": 0.4632852869186837, "grad_norm": 0.734375, "learning_rate": 1.2980695995983576e-05, "loss": 0.7155, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3407 }, { "epoch": 0.4634212673375034, "grad_norm": 0.5078125, "learning_rate": 1.297616371392823e-05, "loss": 0.9053, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3408 }, { "epoch": 0.46355724775632307, "grad_norm": 0.486328125, "learning_rate": 1.2971630761009193e-05, "loss": 0.9463, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3409 }, { "epoch": 0.4636932281751428, "grad_norm": 0.46484375, "learning_rate": 1.2967097138248256e-05, "loss": 0.7352, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3410 }, { "epoch": 0.4638292085939625, "grad_norm": 0.3125, "learning_rate": 1.2962562846667344e-05, "loss": 0.6576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3411 }, { "epoch": 0.46396518901278216, "grad_norm": 0.341796875, "learning_rate": 1.2958027887288546e-05, "loss": 0.5643, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3412 }, { "epoch": 0.46410116943160185, "grad_norm": 0.369140625, "learning_rate": 1.2953492261134097e-05, "loss": 0.7072, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3413 }, { "epoch": 0.4642371498504215, "grad_norm": 0.390625, "learning_rate": 1.2948955969226384e-05, "loss": 0.6997, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3414 }, { "epoch": 0.4643731302692412, "grad_norm": 0.482421875, "learning_rate": 1.2944419012587941e-05, "loss": 0.7284, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3415 }, { "epoch": 0.46450911068806094, "grad_norm": 0.423828125, "learning_rate": 1.2939881392241453e-05, "loss": 0.7165, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3416 }, { "epoch": 0.4646450911068806, "grad_norm": 0.44140625, "learning_rate": 1.293534310920976e-05, "loss": 0.5685, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3417 }, { "epoch": 0.4647810715257003, "grad_norm": 0.322265625, "learning_rate": 1.2930804164515837e-05, "loss": 0.3879, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3418 }, { "epoch": 0.46491705194452, "grad_norm": 0.61328125, "learning_rate": 1.2926264559182825e-05, "loss": 0.7778, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3419 }, { "epoch": 0.46505303236333967, "grad_norm": 1.1484375, "learning_rate": 1.2921724294234005e-05, "loss": 0.8136, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3420 }, { "epoch": 0.46518901278215935, "grad_norm": 0.43359375, "learning_rate": 1.291718337069281e-05, "loss": 0.6374, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3421 }, { "epoch": 0.46532499320097903, "grad_norm": 0.384765625, "learning_rate": 1.2912641789582813e-05, "loss": 0.724, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3422 }, { "epoch": 0.46546097361979877, "grad_norm": 0.41015625, "learning_rate": 1.290809955192775e-05, "loss": 0.6989, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3423 }, { "epoch": 0.46559695403861845, "grad_norm": 0.57421875, "learning_rate": 1.2903556658751493e-05, "loss": 0.5936, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3424 }, { "epoch": 0.46573293445743813, "grad_norm": 0.6640625, "learning_rate": 1.2899013111078065e-05, "loss": 0.8094, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3425 }, { "epoch": 0.4658689148762578, "grad_norm": 0.44921875, "learning_rate": 1.2894468909931636e-05, "loss": 0.765, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3426 }, { "epoch": 0.4660048952950775, "grad_norm": 1.0234375, "learning_rate": 1.2889924056336531e-05, "loss": 0.6065, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3427 }, { "epoch": 0.4661408757138972, "grad_norm": 0.578125, "learning_rate": 1.288537855131721e-05, "loss": 0.5422, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3428 }, { "epoch": 0.4662768561327169, "grad_norm": 0.6796875, "learning_rate": 1.2880832395898285e-05, "loss": 0.733, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3429 }, { "epoch": 0.4664128365515366, "grad_norm": 0.365234375, "learning_rate": 1.2876285591104519e-05, "loss": 0.5693, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3430 }, { "epoch": 0.4665488169703563, "grad_norm": 0.7890625, "learning_rate": 1.2871738137960811e-05, "loss": 0.6823, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3431 }, { "epoch": 0.46668479738917595, "grad_norm": 0.32421875, "learning_rate": 1.2867190037492221e-05, "loss": 0.6053, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3432 }, { "epoch": 0.46682077780799563, "grad_norm": 0.306640625, "learning_rate": 1.286264129072394e-05, "loss": 0.5692, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3433 }, { "epoch": 0.4669567582268153, "grad_norm": 0.640625, "learning_rate": 1.2858091898681313e-05, "loss": 0.6858, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3434 }, { "epoch": 0.46709273864563505, "grad_norm": 0.29296875, "learning_rate": 1.285354186238983e-05, "loss": 0.4582, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3435 }, { "epoch": 0.46722871906445473, "grad_norm": 0.55859375, "learning_rate": 1.2848991182875124e-05, "loss": 0.5817, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3436 }, { "epoch": 0.4673646994832744, "grad_norm": 0.32421875, "learning_rate": 1.2844439861162972e-05, "loss": 0.6484, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3437 }, { "epoch": 0.4675006799020941, "grad_norm": 0.7734375, "learning_rate": 1.2839887898279298e-05, "loss": 0.7653, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3438 }, { "epoch": 0.4676366603209138, "grad_norm": 0.390625, "learning_rate": 1.2835335295250172e-05, "loss": 0.674, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3439 }, { "epoch": 0.46777264073973346, "grad_norm": 0.3359375, "learning_rate": 1.2830782053101807e-05, "loss": 0.6182, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3440 }, { "epoch": 0.4679086211585532, "grad_norm": 0.30078125, "learning_rate": 1.2826228172860555e-05, "loss": 0.6004, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3441 }, { "epoch": 0.4680446015773729, "grad_norm": 0.427734375, "learning_rate": 1.2821673655552922e-05, "loss": 0.652, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3442 }, { "epoch": 0.46818058199619256, "grad_norm": 0.49609375, "learning_rate": 1.2817118502205547e-05, "loss": 0.8846, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3443 }, { "epoch": 0.46831656241501224, "grad_norm": 0.5, "learning_rate": 1.281256271384522e-05, "loss": 0.6413, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3444 }, { "epoch": 0.4684525428338319, "grad_norm": 0.45703125, "learning_rate": 1.2808006291498872e-05, "loss": 0.7471, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3445 }, { "epoch": 0.4685885232526516, "grad_norm": 0.48046875, "learning_rate": 1.2803449236193573e-05, "loss": 0.569, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3446 }, { "epoch": 0.4687245036714713, "grad_norm": 0.470703125, "learning_rate": 1.2798891548956543e-05, "loss": 0.3838, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3447 }, { "epoch": 0.468860484090291, "grad_norm": 0.330078125, "learning_rate": 1.2794333230815137e-05, "loss": 0.5667, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3448 }, { "epoch": 0.4689964645091107, "grad_norm": 0.326171875, "learning_rate": 1.2789774282796857e-05, "loss": 0.6711, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3449 }, { "epoch": 0.4691324449279304, "grad_norm": 0.19921875, "learning_rate": 1.2785214705929345e-05, "loss": 0.3374, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3450 }, { "epoch": 0.46926842534675006, "grad_norm": 0.365234375, "learning_rate": 1.2780654501240387e-05, "loss": 0.7368, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3451 }, { "epoch": 0.46940440576556974, "grad_norm": 0.6484375, "learning_rate": 1.2776093669757907e-05, "loss": 0.6839, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3452 }, { "epoch": 0.4695403861843894, "grad_norm": 0.34375, "learning_rate": 1.2771532212509974e-05, "loss": 0.6045, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3453 }, { "epoch": 0.46967636660320916, "grad_norm": 0.373046875, "learning_rate": 1.2766970130524797e-05, "loss": 0.5672, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3454 }, { "epoch": 0.46981234702202884, "grad_norm": 0.3359375, "learning_rate": 1.276240742483072e-05, "loss": 0.481, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3455 }, { "epoch": 0.4699483274408485, "grad_norm": 0.2734375, "learning_rate": 1.2757844096456238e-05, "loss": 0.4946, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3456 }, { "epoch": 0.4700843078596682, "grad_norm": 0.412109375, "learning_rate": 1.2753280146429978e-05, "loss": 0.693, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3457 }, { "epoch": 0.4702202882784879, "grad_norm": 0.52734375, "learning_rate": 1.274871557578071e-05, "loss": 0.6838, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3458 }, { "epoch": 0.47035626869730757, "grad_norm": 0.318359375, "learning_rate": 1.2744150385537346e-05, "loss": 0.6693, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3459 }, { "epoch": 0.4704922491161273, "grad_norm": 0.2470703125, "learning_rate": 1.2739584576728935e-05, "loss": 0.55, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3460 }, { "epoch": 0.470628229534947, "grad_norm": 0.375, "learning_rate": 1.2735018150384667e-05, "loss": 0.6353, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3461 }, { "epoch": 0.47076420995376667, "grad_norm": 0.337890625, "learning_rate": 1.273045110753387e-05, "loss": 0.6527, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3462 }, { "epoch": 0.47090019037258635, "grad_norm": 0.62890625, "learning_rate": 1.2725883449206009e-05, "loss": 0.8512, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3463 }, { "epoch": 0.47103617079140603, "grad_norm": 0.32421875, "learning_rate": 1.2721315176430692e-05, "loss": 0.708, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3464 }, { "epoch": 0.4711721512102257, "grad_norm": 0.267578125, "learning_rate": 1.2716746290237664e-05, "loss": 0.5679, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3465 }, { "epoch": 0.4713081316290454, "grad_norm": 0.34375, "learning_rate": 1.2712176791656807e-05, "loss": 0.6888, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3466 }, { "epoch": 0.4714441120478651, "grad_norm": 0.33203125, "learning_rate": 1.2707606681718143e-05, "loss": 0.6815, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3467 }, { "epoch": 0.4715800924666848, "grad_norm": 0.41796875, "learning_rate": 1.2703035961451829e-05, "loss": 0.6921, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3468 }, { "epoch": 0.4717160728855045, "grad_norm": 0.396484375, "learning_rate": 1.2698464631888161e-05, "loss": 0.6681, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3469 }, { "epoch": 0.47185205330432417, "grad_norm": 0.40234375, "learning_rate": 1.2693892694057573e-05, "loss": 0.6919, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3470 }, { "epoch": 0.47198803372314385, "grad_norm": 0.314453125, "learning_rate": 1.2689320148990636e-05, "loss": 0.5942, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3471 }, { "epoch": 0.47212401414196353, "grad_norm": 1.3125, "learning_rate": 1.2684746997718059e-05, "loss": 0.7541, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3472 }, { "epoch": 0.47225999456078327, "grad_norm": 0.451171875, "learning_rate": 1.2680173241270683e-05, "loss": 0.5527, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3473 }, { "epoch": 0.47239597497960295, "grad_norm": 0.62890625, "learning_rate": 1.267559888067949e-05, "loss": 0.6144, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3474 }, { "epoch": 0.47253195539842263, "grad_norm": 0.337890625, "learning_rate": 1.2671023916975598e-05, "loss": 0.6681, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3475 }, { "epoch": 0.4726679358172423, "grad_norm": 0.3515625, "learning_rate": 1.2666448351190259e-05, "loss": 0.7482, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3476 }, { "epoch": 0.472803916236062, "grad_norm": 0.298828125, "learning_rate": 1.2661872184354862e-05, "loss": 0.6095, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3477 }, { "epoch": 0.4729398966548817, "grad_norm": 0.61328125, "learning_rate": 1.265729541750093e-05, "loss": 0.6854, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3478 }, { "epoch": 0.4730758770737014, "grad_norm": 0.369140625, "learning_rate": 1.2652718051660122e-05, "loss": 0.6364, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3479 }, { "epoch": 0.4732118574925211, "grad_norm": 0.81640625, "learning_rate": 1.2648140087864237e-05, "loss": 0.7674, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3480 }, { "epoch": 0.4733478379113408, "grad_norm": 0.4140625, "learning_rate": 1.2643561527145194e-05, "loss": 0.6349, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3481 }, { "epoch": 0.47348381833016046, "grad_norm": 0.53125, "learning_rate": 1.2638982370535064e-05, "loss": 0.6972, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3482 }, { "epoch": 0.47361979874898014, "grad_norm": 0.46875, "learning_rate": 1.2634402619066047e-05, "loss": 0.7992, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3483 }, { "epoch": 0.4737557791677998, "grad_norm": 0.28125, "learning_rate": 1.2629822273770467e-05, "loss": 0.6055, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3484 }, { "epoch": 0.47389175958661955, "grad_norm": 0.73828125, "learning_rate": 1.2625241335680796e-05, "loss": 0.9945, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3485 }, { "epoch": 0.47402774000543924, "grad_norm": 0.4296875, "learning_rate": 1.2620659805829631e-05, "loss": 0.6488, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3486 }, { "epoch": 0.4741637204242589, "grad_norm": 0.31640625, "learning_rate": 1.2616077685249705e-05, "loss": 0.5368, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3487 }, { "epoch": 0.4742997008430786, "grad_norm": 0.330078125, "learning_rate": 1.2611494974973884e-05, "loss": 0.6333, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3488 }, { "epoch": 0.4744356812618983, "grad_norm": 0.4921875, "learning_rate": 1.2606911676035165e-05, "loss": 0.7301, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3489 }, { "epoch": 0.47457166168071796, "grad_norm": 0.294921875, "learning_rate": 1.2602327789466686e-05, "loss": 0.5049, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3490 }, { "epoch": 0.47470764209953764, "grad_norm": 0.2578125, "learning_rate": 1.2597743316301705e-05, "loss": 0.3498, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3491 }, { "epoch": 0.4748436225183574, "grad_norm": 0.30078125, "learning_rate": 1.259315825757362e-05, "loss": 0.4194, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3492 }, { "epoch": 0.47497960293717706, "grad_norm": 0.54296875, "learning_rate": 1.2588572614315961e-05, "loss": 0.8433, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3493 }, { "epoch": 0.47511558335599674, "grad_norm": 0.59375, "learning_rate": 1.2583986387562388e-05, "loss": 0.5638, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3494 }, { "epoch": 0.4752515637748164, "grad_norm": 0.6875, "learning_rate": 1.2579399578346689e-05, "loss": 0.527, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3495 }, { "epoch": 0.4753875441936361, "grad_norm": 0.353515625, "learning_rate": 1.2574812187702789e-05, "loss": 0.5864, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3496 }, { "epoch": 0.4755235246124558, "grad_norm": 0.5, "learning_rate": 1.2570224216664744e-05, "loss": 0.6958, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3497 }, { "epoch": 0.4756595050312755, "grad_norm": 0.55859375, "learning_rate": 1.2565635666266736e-05, "loss": 0.6053, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3498 }, { "epoch": 0.4757954854500952, "grad_norm": 0.55078125, "learning_rate": 1.2561046537543078e-05, "loss": 0.7085, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3499 }, { "epoch": 0.4759314658689149, "grad_norm": 0.384765625, "learning_rate": 1.2556456831528223e-05, "loss": 0.7171, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3500 }, { "epoch": 0.47606744628773456, "grad_norm": 0.34765625, "learning_rate": 1.2551866549256739e-05, "loss": 0.6973, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3501 }, { "epoch": 0.47620342670655424, "grad_norm": 0.48828125, "learning_rate": 1.2547275691763339e-05, "loss": 0.6112, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3502 }, { "epoch": 0.4763394071253739, "grad_norm": 0.310546875, "learning_rate": 1.2542684260082853e-05, "loss": 0.3626, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3503 }, { "epoch": 0.47647538754419366, "grad_norm": 0.41015625, "learning_rate": 1.2538092255250249e-05, "loss": 0.7322, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3504 }, { "epoch": 0.47661136796301334, "grad_norm": 0.431640625, "learning_rate": 1.2533499678300618e-05, "loss": 0.6277, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3505 }, { "epoch": 0.476747348381833, "grad_norm": 0.92578125, "learning_rate": 1.2528906530269187e-05, "loss": 0.8543, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3506 }, { "epoch": 0.4768833288006527, "grad_norm": 0.4765625, "learning_rate": 1.2524312812191303e-05, "loss": 0.7212, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3507 }, { "epoch": 0.4770193092194724, "grad_norm": 0.45703125, "learning_rate": 1.2519718525102452e-05, "loss": 0.6921, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3508 }, { "epoch": 0.47715528963829207, "grad_norm": 0.78125, "learning_rate": 1.2515123670038236e-05, "loss": 0.5863, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3509 }, { "epoch": 0.47729127005711175, "grad_norm": 0.431640625, "learning_rate": 1.2510528248034395e-05, "loss": 0.7708, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3510 }, { "epoch": 0.4774272504759315, "grad_norm": 0.4375, "learning_rate": 1.2505932260126794e-05, "loss": 0.6893, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3511 }, { "epoch": 0.47756323089475117, "grad_norm": 0.443359375, "learning_rate": 1.2501335707351423e-05, "loss": 0.692, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3512 }, { "epoch": 0.47769921131357085, "grad_norm": 0.251953125, "learning_rate": 1.2496738590744402e-05, "loss": 0.4588, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3513 }, { "epoch": 0.47783519173239053, "grad_norm": 0.5, "learning_rate": 1.2492140911341979e-05, "loss": 0.8299, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3514 }, { "epoch": 0.4779711721512102, "grad_norm": 0.322265625, "learning_rate": 1.2487542670180523e-05, "loss": 0.5848, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3515 }, { "epoch": 0.4781071525700299, "grad_norm": 0.59765625, "learning_rate": 1.2482943868296537e-05, "loss": 0.8222, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3516 }, { "epoch": 0.47824313298884963, "grad_norm": 0.337890625, "learning_rate": 1.247834450672665e-05, "loss": 0.6502, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3517 }, { "epoch": 0.4783791134076693, "grad_norm": 0.55859375, "learning_rate": 1.2473744586507606e-05, "loss": 0.7021, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3518 }, { "epoch": 0.478515093826489, "grad_norm": 0.474609375, "learning_rate": 1.2469144108676288e-05, "loss": 0.8353, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3519 }, { "epoch": 0.47865107424530867, "grad_norm": 0.455078125, "learning_rate": 1.2464543074269702e-05, "loss": 0.596, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3520 }, { "epoch": 0.47878705466412835, "grad_norm": 0.59375, "learning_rate": 1.2459941484324975e-05, "loss": 0.6224, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3521 }, { "epoch": 0.47892303508294803, "grad_norm": 0.46484375, "learning_rate": 1.2455339339879362e-05, "loss": 0.7783, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3522 }, { "epoch": 0.47905901550176777, "grad_norm": 0.54296875, "learning_rate": 1.2450736641970243e-05, "loss": 0.792, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3523 }, { "epoch": 0.47919499592058745, "grad_norm": 0.447265625, "learning_rate": 1.244613339163512e-05, "loss": 0.6134, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3524 }, { "epoch": 0.47933097633940713, "grad_norm": 0.60546875, "learning_rate": 1.2441529589911628e-05, "loss": 0.7316, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3525 }, { "epoch": 0.4794669567582268, "grad_norm": 0.359375, "learning_rate": 1.2436925237837512e-05, "loss": 0.6561, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3526 }, { "epoch": 0.4796029371770465, "grad_norm": 0.419921875, "learning_rate": 1.2432320336450656e-05, "loss": 0.6877, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3527 }, { "epoch": 0.4797389175958662, "grad_norm": 0.6015625, "learning_rate": 1.2427714886789057e-05, "loss": 0.8042, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3528 }, { "epoch": 0.47987489801468586, "grad_norm": 0.5, "learning_rate": 1.242310888989084e-05, "loss": 0.8115, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3529 }, { "epoch": 0.4800108784335056, "grad_norm": 1.0859375, "learning_rate": 1.2418502346794257e-05, "loss": 0.745, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3530 }, { "epoch": 0.4801468588523253, "grad_norm": 0.3984375, "learning_rate": 1.2413895258537676e-05, "loss": 0.5402, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3531 }, { "epoch": 0.48028283927114496, "grad_norm": 0.470703125, "learning_rate": 1.240928762615959e-05, "loss": 0.7262, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3532 }, { "epoch": 0.48041881968996464, "grad_norm": 0.328125, "learning_rate": 1.2404679450698616e-05, "loss": 0.3812, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3533 }, { "epoch": 0.4805548001087843, "grad_norm": 0.4296875, "learning_rate": 1.2400070733193494e-05, "loss": 0.7209, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3534 }, { "epoch": 0.480690780527604, "grad_norm": 0.34375, "learning_rate": 1.2395461474683086e-05, "loss": 0.6426, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3535 }, { "epoch": 0.48082676094642374, "grad_norm": 0.447265625, "learning_rate": 1.2390851676206376e-05, "loss": 0.6919, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3536 }, { "epoch": 0.4809627413652434, "grad_norm": 0.359375, "learning_rate": 1.2386241338802468e-05, "loss": 0.6789, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3537 }, { "epoch": 0.4810987217840631, "grad_norm": 0.400390625, "learning_rate": 1.2381630463510585e-05, "loss": 0.5814, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3538 }, { "epoch": 0.4812347022028828, "grad_norm": 0.58203125, "learning_rate": 1.237701905137008e-05, "loss": 0.789, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3539 }, { "epoch": 0.48137068262170246, "grad_norm": 0.294921875, "learning_rate": 1.2372407103420418e-05, "loss": 0.4854, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3540 }, { "epoch": 0.48150666304052214, "grad_norm": 0.435546875, "learning_rate": 1.236779462070119e-05, "loss": 0.5747, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3541 }, { "epoch": 0.4816426434593419, "grad_norm": 0.306640625, "learning_rate": 1.2363181604252104e-05, "loss": 0.6022, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3542 }, { "epoch": 0.48177862387816156, "grad_norm": 0.65625, "learning_rate": 1.2358568055112995e-05, "loss": 0.8739, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3543 }, { "epoch": 0.48191460429698124, "grad_norm": 0.5859375, "learning_rate": 1.2353953974323807e-05, "loss": 0.7518, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3544 }, { "epoch": 0.4820505847158009, "grad_norm": 0.314453125, "learning_rate": 1.2349339362924614e-05, "loss": 0.6022, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3545 }, { "epoch": 0.4821865651346206, "grad_norm": 0.2890625, "learning_rate": 1.2344724221955607e-05, "loss": 0.5241, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3546 }, { "epoch": 0.4823225455534403, "grad_norm": 0.3671875, "learning_rate": 1.2340108552457093e-05, "loss": 0.5562, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3547 }, { "epoch": 0.48245852597226, "grad_norm": 0.43359375, "learning_rate": 1.2335492355469501e-05, "loss": 0.6354, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3548 }, { "epoch": 0.4825945063910797, "grad_norm": 0.455078125, "learning_rate": 1.2330875632033377e-05, "loss": 0.8029, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3549 }, { "epoch": 0.4827304868098994, "grad_norm": 0.55859375, "learning_rate": 1.2326258383189388e-05, "loss": 0.5912, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3550 }, { "epoch": 0.48286646722871907, "grad_norm": 0.640625, "learning_rate": 1.232164060997832e-05, "loss": 0.6141, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3551 }, { "epoch": 0.48300244764753875, "grad_norm": 0.4375, "learning_rate": 1.2317022313441074e-05, "loss": 0.6768, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3552 }, { "epoch": 0.4831384280663584, "grad_norm": 0.609375, "learning_rate": 1.231240349461867e-05, "loss": 0.6825, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3553 }, { "epoch": 0.4832744084851781, "grad_norm": 0.375, "learning_rate": 1.2307784154552244e-05, "loss": 0.651, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3554 }, { "epoch": 0.48341038890399785, "grad_norm": 0.302734375, "learning_rate": 1.2303164294283059e-05, "loss": 0.6027, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3555 }, { "epoch": 0.4835463693228175, "grad_norm": 0.39453125, "learning_rate": 1.2298543914852482e-05, "loss": 0.6771, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3556 }, { "epoch": 0.4836823497416372, "grad_norm": 0.361328125, "learning_rate": 1.2293923017302004e-05, "loss": 0.8223, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3557 }, { "epoch": 0.4838183301604569, "grad_norm": 0.2734375, "learning_rate": 1.2289301602673231e-05, "loss": 0.4939, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3558 }, { "epoch": 0.48395431057927657, "grad_norm": 0.41015625, "learning_rate": 1.2284679672007889e-05, "loss": 0.6146, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3559 }, { "epoch": 0.48409029099809625, "grad_norm": 0.41796875, "learning_rate": 1.2280057226347814e-05, "loss": 0.8631, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3560 }, { "epoch": 0.484226271416916, "grad_norm": 0.44140625, "learning_rate": 1.2275434266734967e-05, "loss": 0.6432, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3561 }, { "epoch": 0.48436225183573567, "grad_norm": 0.546875, "learning_rate": 1.2270810794211415e-05, "loss": 0.7161, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3562 }, { "epoch": 0.48449823225455535, "grad_norm": 0.2421875, "learning_rate": 1.2266186809819347e-05, "loss": 0.5002, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3563 }, { "epoch": 0.48463421267337503, "grad_norm": 0.5546875, "learning_rate": 1.2261562314601068e-05, "loss": 0.7285, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3564 }, { "epoch": 0.4847701930921947, "grad_norm": 0.44921875, "learning_rate": 1.2256937309598991e-05, "loss": 0.5083, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3565 }, { "epoch": 0.4849061735110144, "grad_norm": 0.271484375, "learning_rate": 1.225231179585565e-05, "loss": 0.5011, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3566 }, { "epoch": 0.48504215392983413, "grad_norm": 0.345703125, "learning_rate": 1.2247685774413694e-05, "loss": 0.4548, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3567 }, { "epoch": 0.4851781343486538, "grad_norm": 0.66015625, "learning_rate": 1.2243059246315884e-05, "loss": 0.832, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3568 }, { "epoch": 0.4853141147674735, "grad_norm": 0.333984375, "learning_rate": 1.2238432212605097e-05, "loss": 0.7493, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3569 }, { "epoch": 0.4854500951862932, "grad_norm": 0.75, "learning_rate": 1.223380467432432e-05, "loss": 0.7575, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3570 }, { "epoch": 0.48558607560511285, "grad_norm": 0.345703125, "learning_rate": 1.2229176632516662e-05, "loss": 0.6037, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3571 }, { "epoch": 0.48572205602393254, "grad_norm": 0.40625, "learning_rate": 1.2224548088225335e-05, "loss": 0.7059, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3572 }, { "epoch": 0.4858580364427522, "grad_norm": 0.5625, "learning_rate": 1.2219919042493671e-05, "loss": 0.4671, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3573 }, { "epoch": 0.48599401686157195, "grad_norm": 0.33984375, "learning_rate": 1.2215289496365115e-05, "loss": 0.6392, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3574 }, { "epoch": 0.48612999728039163, "grad_norm": 0.3359375, "learning_rate": 1.2210659450883221e-05, "loss": 0.5879, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3575 }, { "epoch": 0.4862659776992113, "grad_norm": 0.26953125, "learning_rate": 1.220602890709166e-05, "loss": 0.5734, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3576 }, { "epoch": 0.486401958118031, "grad_norm": 0.3671875, "learning_rate": 1.2201397866034212e-05, "loss": 0.7441, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3577 }, { "epoch": 0.4865379385368507, "grad_norm": 0.244140625, "learning_rate": 1.219676632875477e-05, "loss": 0.515, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3578 }, { "epoch": 0.48667391895567036, "grad_norm": 0.375, "learning_rate": 1.219213429629734e-05, "loss": 0.6401, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3579 }, { "epoch": 0.4868098993744901, "grad_norm": 0.28125, "learning_rate": 1.2187501769706038e-05, "loss": 0.5785, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3580 }, { "epoch": 0.4869458797933098, "grad_norm": 0.33203125, "learning_rate": 1.2182868750025092e-05, "loss": 0.562, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3581 }, { "epoch": 0.48708186021212946, "grad_norm": 0.3359375, "learning_rate": 1.2178235238298844e-05, "loss": 0.5544, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3582 }, { "epoch": 0.48721784063094914, "grad_norm": 0.349609375, "learning_rate": 1.217360123557174e-05, "loss": 0.5788, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3583 }, { "epoch": 0.4873538210497688, "grad_norm": 0.357421875, "learning_rate": 1.2168966742888344e-05, "loss": 0.5365, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3584 }, { "epoch": 0.4874898014685885, "grad_norm": 0.61328125, "learning_rate": 1.2164331761293326e-05, "loss": 0.7075, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3585 }, { "epoch": 0.48762578188740824, "grad_norm": 0.51953125, "learning_rate": 1.2159696291831469e-05, "loss": 0.6419, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3586 }, { "epoch": 0.4877617623062279, "grad_norm": 0.357421875, "learning_rate": 1.215506033554766e-05, "loss": 0.6717, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3587 }, { "epoch": 0.4878977427250476, "grad_norm": 0.263671875, "learning_rate": 1.2150423893486907e-05, "loss": 0.5861, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3588 }, { "epoch": 0.4880337231438673, "grad_norm": 0.4453125, "learning_rate": 1.2145786966694315e-05, "loss": 0.6986, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3589 }, { "epoch": 0.48816970356268696, "grad_norm": 0.3125, "learning_rate": 1.2141149556215107e-05, "loss": 0.5869, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3590 }, { "epoch": 0.48830568398150664, "grad_norm": 0.365234375, "learning_rate": 1.213651166309461e-05, "loss": 0.5343, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3591 }, { "epoch": 0.4884416644003263, "grad_norm": 0.34765625, "learning_rate": 1.2131873288378267e-05, "loss": 0.7316, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3592 }, { "epoch": 0.48857764481914606, "grad_norm": 0.359375, "learning_rate": 1.2127234433111616e-05, "loss": 0.6969, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3593 }, { "epoch": 0.48871362523796574, "grad_norm": 0.2470703125, "learning_rate": 1.2122595098340318e-05, "loss": 0.3893, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3594 }, { "epoch": 0.4888496056567854, "grad_norm": 0.5, "learning_rate": 1.2117955285110133e-05, "loss": 0.7992, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3595 }, { "epoch": 0.4889855860756051, "grad_norm": 0.294921875, "learning_rate": 1.211331499446693e-05, "loss": 0.3494, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3596 }, { "epoch": 0.4891215664944248, "grad_norm": 0.279296875, "learning_rate": 1.2108674227456693e-05, "loss": 0.4673, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3597 }, { "epoch": 0.48925754691324447, "grad_norm": 0.25390625, "learning_rate": 1.2104032985125505e-05, "loss": 0.5444, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3598 }, { "epoch": 0.4893935273320642, "grad_norm": 2.71875, "learning_rate": 1.2099391268519556e-05, "loss": 0.9268, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3599 }, { "epoch": 0.4895295077508839, "grad_norm": 0.380859375, "learning_rate": 1.2094749078685148e-05, "loss": 0.6509, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3600 }, { "epoch": 0.48966548816970357, "grad_norm": 0.3125, "learning_rate": 1.2090106416668688e-05, "loss": 0.6545, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3601 }, { "epoch": 0.48980146858852325, "grad_norm": 0.640625, "learning_rate": 1.2085463283516688e-05, "loss": 0.7209, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3602 }, { "epoch": 0.48993744900734293, "grad_norm": 0.31640625, "learning_rate": 1.2080819680275767e-05, "loss": 0.6286, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3603 }, { "epoch": 0.4900734294261626, "grad_norm": 0.1796875, "learning_rate": 1.2076175607992648e-05, "loss": 0.2826, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3604 }, { "epoch": 0.49020940984498235, "grad_norm": 0.53515625, "learning_rate": 1.2071531067714164e-05, "loss": 0.6494, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3605 }, { "epoch": 0.49034539026380203, "grad_norm": 0.345703125, "learning_rate": 1.2066886060487251e-05, "loss": 0.6222, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3606 }, { "epoch": 0.4904813706826217, "grad_norm": 0.40234375, "learning_rate": 1.2062240587358952e-05, "loss": 0.799, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3607 }, { "epoch": 0.4906173511014414, "grad_norm": 0.2578125, "learning_rate": 1.2057594649376408e-05, "loss": 0.5256, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3608 }, { "epoch": 0.49075333152026107, "grad_norm": 0.30859375, "learning_rate": 1.2052948247586872e-05, "loss": 0.4849, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3609 }, { "epoch": 0.49088931193908075, "grad_norm": 0.412109375, "learning_rate": 1.2048301383037706e-05, "loss": 0.7384, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3610 }, { "epoch": 0.4910252923579005, "grad_norm": 0.61328125, "learning_rate": 1.2043654056776363e-05, "loss": 0.7855, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3611 }, { "epoch": 0.49116127277672017, "grad_norm": 0.333984375, "learning_rate": 1.2039006269850413e-05, "loss": 0.5698, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3612 }, { "epoch": 0.49129725319553985, "grad_norm": 0.453125, "learning_rate": 1.2034358023307517e-05, "loss": 0.7458, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3613 }, { "epoch": 0.49143323361435953, "grad_norm": 0.458984375, "learning_rate": 1.2029709318195452e-05, "loss": 0.7856, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3614 }, { "epoch": 0.4915692140331792, "grad_norm": 0.52734375, "learning_rate": 1.2025060155562092e-05, "loss": 0.6027, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3615 }, { "epoch": 0.4917051944519989, "grad_norm": 0.6875, "learning_rate": 1.2020410536455413e-05, "loss": 0.7495, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3616 }, { "epoch": 0.4918411748708186, "grad_norm": 0.59375, "learning_rate": 1.2015760461923499e-05, "loss": 0.6714, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3617 }, { "epoch": 0.4919771552896383, "grad_norm": 0.7109375, "learning_rate": 1.201110993301453e-05, "loss": 0.5073, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3618 }, { "epoch": 0.492113135708458, "grad_norm": 0.484375, "learning_rate": 1.2006458950776797e-05, "loss": 0.5507, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3619 }, { "epoch": 0.4922491161272777, "grad_norm": 0.58203125, "learning_rate": 1.2001807516258684e-05, "loss": 0.4731, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3620 }, { "epoch": 0.49238509654609736, "grad_norm": 0.34765625, "learning_rate": 1.199715563050868e-05, "loss": 0.626, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3621 }, { "epoch": 0.49252107696491704, "grad_norm": 0.384765625, "learning_rate": 1.1992503294575385e-05, "loss": 0.7297, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3622 }, { "epoch": 0.4926570573837367, "grad_norm": 0.1826171875, "learning_rate": 1.1987850509507481e-05, "loss": 0.258, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3623 }, { "epoch": 0.49279303780255646, "grad_norm": 0.341796875, "learning_rate": 1.1983197276353773e-05, "loss": 0.4006, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3624 }, { "epoch": 0.49292901822137614, "grad_norm": 0.302734375, "learning_rate": 1.197854359616315e-05, "loss": 0.6068, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3625 }, { "epoch": 0.4930649986401958, "grad_norm": 0.54296875, "learning_rate": 1.197388946998461e-05, "loss": 0.6696, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3626 }, { "epoch": 0.4932009790590155, "grad_norm": 0.330078125, "learning_rate": 1.1969234898867252e-05, "loss": 0.5669, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3627 }, { "epoch": 0.4933369594778352, "grad_norm": 0.427734375, "learning_rate": 1.196457988386027e-05, "loss": 0.9554, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3628 }, { "epoch": 0.49347293989665486, "grad_norm": 0.439453125, "learning_rate": 1.1959924426012965e-05, "loss": 0.647, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3629 }, { "epoch": 0.4936089203154746, "grad_norm": 0.3515625, "learning_rate": 1.1955268526374733e-05, "loss": 0.6997, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3630 }, { "epoch": 0.4937449007342943, "grad_norm": 0.267578125, "learning_rate": 1.1950612185995067e-05, "loss": 0.4819, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3631 }, { "epoch": 0.49388088115311396, "grad_norm": 0.5390625, "learning_rate": 1.1945955405923568e-05, "loss": 0.5853, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3632 }, { "epoch": 0.49401686157193364, "grad_norm": 0.453125, "learning_rate": 1.194129818720993e-05, "loss": 0.6206, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3633 }, { "epoch": 0.4941528419907533, "grad_norm": 0.1845703125, "learning_rate": 1.1936640530903942e-05, "loss": 0.3558, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3634 }, { "epoch": 0.494288822409573, "grad_norm": 0.392578125, "learning_rate": 1.1931982438055506e-05, "loss": 0.66, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3635 }, { "epoch": 0.4944248028283927, "grad_norm": 0.51953125, "learning_rate": 1.1927323909714603e-05, "loss": 0.585, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3636 }, { "epoch": 0.4945607832472124, "grad_norm": 0.353515625, "learning_rate": 1.192266494693133e-05, "loss": 0.7534, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3637 }, { "epoch": 0.4946967636660321, "grad_norm": 0.6484375, "learning_rate": 1.1918005550755871e-05, "loss": 0.6014, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3638 }, { "epoch": 0.4948327440848518, "grad_norm": 0.318359375, "learning_rate": 1.1913345722238509e-05, "loss": 0.6107, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3639 }, { "epoch": 0.49496872450367146, "grad_norm": 0.34765625, "learning_rate": 1.190868546242963e-05, "loss": 0.5951, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3640 }, { "epoch": 0.49510470492249115, "grad_norm": 0.6953125, "learning_rate": 1.1904024772379713e-05, "loss": 0.9429, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3641 }, { "epoch": 0.4952406853413108, "grad_norm": 3.921875, "learning_rate": 1.1899363653139334e-05, "loss": 0.6525, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3642 }, { "epoch": 0.49537666576013056, "grad_norm": 0.296875, "learning_rate": 1.1894702105759164e-05, "loss": 0.5895, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3643 }, { "epoch": 0.49551264617895024, "grad_norm": 0.478515625, "learning_rate": 1.1890040131289975e-05, "loss": 0.5579, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3644 }, { "epoch": 0.4956486265977699, "grad_norm": 0.71875, "learning_rate": 1.1885377730782632e-05, "loss": 0.4544, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3645 }, { "epoch": 0.4957846070165896, "grad_norm": 0.390625, "learning_rate": 1.1880714905288102e-05, "loss": 0.6772, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3646 }, { "epoch": 0.4959205874354093, "grad_norm": 0.388671875, "learning_rate": 1.1876051655857434e-05, "loss": 0.5907, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3647 }, { "epoch": 0.49605656785422897, "grad_norm": 0.58203125, "learning_rate": 1.1871387983541789e-05, "loss": 0.4346, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3648 }, { "epoch": 0.4961925482730487, "grad_norm": 0.3515625, "learning_rate": 1.186672388939241e-05, "loss": 0.6649, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3649 }, { "epoch": 0.4963285286918684, "grad_norm": 0.4453125, "learning_rate": 1.1862059374460644e-05, "loss": 0.3963, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3650 }, { "epoch": 0.49646450911068807, "grad_norm": 0.412109375, "learning_rate": 1.185739443979793e-05, "loss": 0.7865, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3651 }, { "epoch": 0.49660048952950775, "grad_norm": 0.41015625, "learning_rate": 1.1852729086455801e-05, "loss": 0.6331, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3652 }, { "epoch": 0.49673646994832743, "grad_norm": 0.25, "learning_rate": 1.1848063315485884e-05, "loss": 0.4756, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3653 }, { "epoch": 0.4968724503671471, "grad_norm": 0.8359375, "learning_rate": 1.1843397127939901e-05, "loss": 0.7158, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3654 }, { "epoch": 0.49700843078596685, "grad_norm": 0.416015625, "learning_rate": 1.1838730524869667e-05, "loss": 0.71, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3655 }, { "epoch": 0.49714441120478653, "grad_norm": 0.37109375, "learning_rate": 1.1834063507327093e-05, "loss": 0.6234, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3656 }, { "epoch": 0.4972803916236062, "grad_norm": 0.423828125, "learning_rate": 1.182939607636418e-05, "loss": 0.8034, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3657 }, { "epoch": 0.4974163720424259, "grad_norm": 0.330078125, "learning_rate": 1.1824728233033027e-05, "loss": 0.6196, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3658 }, { "epoch": 0.4975523524612456, "grad_norm": 0.4921875, "learning_rate": 1.1820059978385821e-05, "loss": 0.7686, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3659 }, { "epoch": 0.49768833288006525, "grad_norm": 0.36328125, "learning_rate": 1.1815391313474845e-05, "loss": 0.6455, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3660 }, { "epoch": 0.49782431329888494, "grad_norm": 0.330078125, "learning_rate": 1.1810722239352468e-05, "loss": 0.5964, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3661 }, { "epoch": 0.49796029371770467, "grad_norm": 0.361328125, "learning_rate": 1.1806052757071163e-05, "loss": 0.7799, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3662 }, { "epoch": 0.49809627413652435, "grad_norm": 0.4609375, "learning_rate": 1.1801382867683484e-05, "loss": 0.7588, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3663 }, { "epoch": 0.49823225455534403, "grad_norm": 0.375, "learning_rate": 1.1796712572242082e-05, "loss": 0.6896, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3664 }, { "epoch": 0.4983682349741637, "grad_norm": 0.396484375, "learning_rate": 1.1792041871799701e-05, "loss": 0.5539, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3665 }, { "epoch": 0.4985042153929834, "grad_norm": 0.2734375, "learning_rate": 1.1787370767409171e-05, "loss": 0.4163, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3666 }, { "epoch": 0.4986401958118031, "grad_norm": 0.28515625, "learning_rate": 1.178269926012342e-05, "loss": 0.6519, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3667 }, { "epoch": 0.4987761762306228, "grad_norm": 0.29296875, "learning_rate": 1.177802735099546e-05, "loss": 0.5021, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3668 }, { "epoch": 0.4989121566494425, "grad_norm": 0.625, "learning_rate": 1.1773355041078394e-05, "loss": 0.6131, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3669 }, { "epoch": 0.4990481370682622, "grad_norm": 0.609375, "learning_rate": 1.176868233142542e-05, "loss": 0.9064, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3670 }, { "epoch": 0.49918411748708186, "grad_norm": 0.470703125, "learning_rate": 1.1764009223089827e-05, "loss": 0.7738, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3671 }, { "epoch": 0.49932009790590154, "grad_norm": 0.3203125, "learning_rate": 1.1759335717124988e-05, "loss": 0.5897, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3672 }, { "epoch": 0.4994560783247212, "grad_norm": 0.6171875, "learning_rate": 1.175466181458437e-05, "loss": 0.6401, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3673 }, { "epoch": 0.49959205874354096, "grad_norm": 0.3828125, "learning_rate": 1.1749987516521523e-05, "loss": 0.6619, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3674 }, { "epoch": 0.49972803916236064, "grad_norm": 0.50390625, "learning_rate": 1.1745312823990099e-05, "loss": 0.863, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3675 }, { "epoch": 0.4998640195811803, "grad_norm": 0.3359375, "learning_rate": 1.1740637738043822e-05, "loss": 0.6934, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3676 }, { "epoch": 0.5, "grad_norm": 0.68359375, "learning_rate": 1.173596225973652e-05, "loss": 0.771, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3677 }, { "epoch": 0.5001359804188197, "grad_norm": 0.38671875, "learning_rate": 1.1731286390122099e-05, "loss": 0.5417, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3678 }, { "epoch": 0.5002719608376394, "grad_norm": 0.94140625, "learning_rate": 1.172661013025456e-05, "loss": 0.7946, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3679 }, { "epoch": 0.500407941256459, "grad_norm": 0.56640625, "learning_rate": 1.1721933481187988e-05, "loss": 0.7559, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3680 }, { "epoch": 0.5005439216752787, "grad_norm": 0.384765625, "learning_rate": 1.1717256443976555e-05, "loss": 0.5374, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3681 }, { "epoch": 0.5006799020940984, "grad_norm": 0.65234375, "learning_rate": 1.1712579019674527e-05, "loss": 0.5111, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3682 }, { "epoch": 0.5008158825129181, "grad_norm": 0.34375, "learning_rate": 1.1707901209336247e-05, "loss": 0.5981, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3683 }, { "epoch": 0.5009518629317379, "grad_norm": 0.42578125, "learning_rate": 1.1703223014016154e-05, "loss": 0.6361, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3684 }, { "epoch": 0.5010878433505576, "grad_norm": 0.3984375, "learning_rate": 1.1698544434768773e-05, "loss": 0.7518, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3685 }, { "epoch": 0.5012238237693772, "grad_norm": 0.34375, "learning_rate": 1.1693865472648707e-05, "loss": 0.7311, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3686 }, { "epoch": 0.5013598041881969, "grad_norm": 0.40234375, "learning_rate": 1.1689186128710654e-05, "loss": 0.7487, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3687 }, { "epoch": 0.5014957846070166, "grad_norm": 0.341796875, "learning_rate": 1.1684506404009397e-05, "loss": 0.5781, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3688 }, { "epoch": 0.5016317650258363, "grad_norm": 0.3359375, "learning_rate": 1.1679826299599801e-05, "loss": 0.5524, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3689 }, { "epoch": 0.501767745444656, "grad_norm": 0.357421875, "learning_rate": 1.167514581653682e-05, "loss": 0.542, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3690 }, { "epoch": 0.5019037258634756, "grad_norm": 0.30078125, "learning_rate": 1.167046495587549e-05, "loss": 0.5382, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3691 }, { "epoch": 0.5020397062822953, "grad_norm": 0.7109375, "learning_rate": 1.1665783718670936e-05, "loss": 0.7295, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3692 }, { "epoch": 0.502175686701115, "grad_norm": 0.73046875, "learning_rate": 1.1661102105978367e-05, "loss": 0.9139, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3693 }, { "epoch": 0.5023116671199347, "grad_norm": 0.447265625, "learning_rate": 1.1656420118853073e-05, "loss": 0.7463, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3694 }, { "epoch": 0.5024476475387544, "grad_norm": 0.48046875, "learning_rate": 1.1651737758350432e-05, "loss": 0.6141, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3695 }, { "epoch": 0.5025836279575742, "grad_norm": 0.6015625, "learning_rate": 1.1647055025525908e-05, "loss": 0.8273, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3696 }, { "epoch": 0.5027196083763938, "grad_norm": 0.5859375, "learning_rate": 1.1642371921435045e-05, "loss": 0.6346, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3697 }, { "epoch": 0.5028555887952135, "grad_norm": 0.326171875, "learning_rate": 1.1637688447133468e-05, "loss": 0.5643, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3698 }, { "epoch": 0.5029915692140332, "grad_norm": 0.2890625, "learning_rate": 1.1633004603676895e-05, "loss": 0.5369, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3699 }, { "epoch": 0.5031275496328529, "grad_norm": 1.65625, "learning_rate": 1.1628320392121118e-05, "loss": 0.8101, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3700 }, { "epoch": 0.5032635300516726, "grad_norm": 0.59375, "learning_rate": 1.1623635813522019e-05, "loss": 0.7257, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3701 }, { "epoch": 0.5033995104704923, "grad_norm": 0.267578125, "learning_rate": 1.1618950868935554e-05, "loss": 0.4998, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3702 }, { "epoch": 0.5035354908893119, "grad_norm": 0.384765625, "learning_rate": 1.161426555941777e-05, "loss": 0.6232, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3703 }, { "epoch": 0.5036714713081316, "grad_norm": 0.36328125, "learning_rate": 1.1609579886024793e-05, "loss": 0.6374, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3704 }, { "epoch": 0.5038074517269513, "grad_norm": 0.2734375, "learning_rate": 1.1604893849812832e-05, "loss": 0.4243, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3705 }, { "epoch": 0.503943432145771, "grad_norm": 0.2275390625, "learning_rate": 1.1600207451838174e-05, "loss": 0.4012, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3706 }, { "epoch": 0.5040794125645907, "grad_norm": 0.35546875, "learning_rate": 1.1595520693157194e-05, "loss": 0.6738, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3707 }, { "epoch": 0.5042153929834103, "grad_norm": 0.412109375, "learning_rate": 1.1590833574826341e-05, "loss": 0.6529, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3708 }, { "epoch": 0.5043513734022301, "grad_norm": 0.375, "learning_rate": 1.1586146097902153e-05, "loss": 0.6027, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3709 }, { "epoch": 0.5044873538210498, "grad_norm": 0.267578125, "learning_rate": 1.158145826344124e-05, "loss": 0.4334, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3710 }, { "epoch": 0.5046233342398695, "grad_norm": 0.322265625, "learning_rate": 1.1576770072500301e-05, "loss": 0.5334, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3711 }, { "epoch": 0.5047593146586892, "grad_norm": 0.69921875, "learning_rate": 1.1572081526136108e-05, "loss": 0.7253, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3712 }, { "epoch": 0.5048952950775089, "grad_norm": 0.298828125, "learning_rate": 1.156739262540552e-05, "loss": 0.5163, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3713 }, { "epoch": 0.5050312754963285, "grad_norm": 0.291015625, "learning_rate": 1.1562703371365472e-05, "loss": 0.5236, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3714 }, { "epoch": 0.5051672559151482, "grad_norm": 0.37109375, "learning_rate": 1.1558013765072974e-05, "loss": 0.7214, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3715 }, { "epoch": 0.5053032363339679, "grad_norm": 0.390625, "learning_rate": 1.1553323807585128e-05, "loss": 0.6831, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3716 }, { "epoch": 0.5054392167527876, "grad_norm": 0.87890625, "learning_rate": 1.1548633499959105e-05, "loss": 0.7275, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3717 }, { "epoch": 0.5055751971716073, "grad_norm": 1.1953125, "learning_rate": 1.1543942843252158e-05, "loss": 0.5711, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3718 }, { "epoch": 0.5057111775904269, "grad_norm": 0.4375, "learning_rate": 1.1539251838521617e-05, "loss": 0.7262, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3719 }, { "epoch": 0.5058471580092466, "grad_norm": 0.375, "learning_rate": 1.1534560486824892e-05, "loss": 0.609, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3720 }, { "epoch": 0.5059831384280663, "grad_norm": 0.39453125, "learning_rate": 1.1529868789219474e-05, "loss": 0.6802, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3721 }, { "epoch": 0.5061191188468861, "grad_norm": 0.435546875, "learning_rate": 1.1525176746762922e-05, "loss": 0.7549, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3722 }, { "epoch": 0.5062550992657058, "grad_norm": 0.7421875, "learning_rate": 1.152048436051289e-05, "loss": 0.6824, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3723 }, { "epoch": 0.5063910796845255, "grad_norm": 0.326171875, "learning_rate": 1.1515791631527089e-05, "loss": 0.5762, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3724 }, { "epoch": 0.5065270601033451, "grad_norm": 0.69921875, "learning_rate": 1.1511098560863325e-05, "loss": 0.7282, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3725 }, { "epoch": 0.5066630405221648, "grad_norm": 0.310546875, "learning_rate": 1.150640514957947e-05, "loss": 0.6559, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3726 }, { "epoch": 0.5067990209409845, "grad_norm": 0.392578125, "learning_rate": 1.1501711398733474e-05, "loss": 0.6496, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3727 }, { "epoch": 0.5069350013598042, "grad_norm": 0.58984375, "learning_rate": 1.1497017309383372e-05, "loss": 0.5516, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3728 }, { "epoch": 0.5070709817786239, "grad_norm": 0.400390625, "learning_rate": 1.149232288258726e-05, "loss": 0.4657, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3729 }, { "epoch": 0.5072069621974435, "grad_norm": 1.34375, "learning_rate": 1.148762811940333e-05, "loss": 0.779, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3730 }, { "epoch": 0.5073429426162632, "grad_norm": 0.578125, "learning_rate": 1.1482933020889835e-05, "loss": 0.5806, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3731 }, { "epoch": 0.5074789230350829, "grad_norm": 0.2373046875, "learning_rate": 1.1478237588105105e-05, "loss": 0.3823, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3732 }, { "epoch": 0.5076149034539026, "grad_norm": 0.3828125, "learning_rate": 1.147354182210755e-05, "loss": 0.5736, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3733 }, { "epoch": 0.5077508838727224, "grad_norm": 0.6484375, "learning_rate": 1.1468845723955652e-05, "loss": 0.5877, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3734 }, { "epoch": 0.5078868642915421, "grad_norm": 0.396484375, "learning_rate": 1.1464149294707972e-05, "loss": 0.6899, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3735 }, { "epoch": 0.5080228447103617, "grad_norm": 0.2734375, "learning_rate": 1.1459452535423137e-05, "loss": 0.5475, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3736 }, { "epoch": 0.5081588251291814, "grad_norm": 0.50390625, "learning_rate": 1.145475544715986e-05, "loss": 0.7004, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3737 }, { "epoch": 0.5082948055480011, "grad_norm": 0.357421875, "learning_rate": 1.1450058030976916e-05, "loss": 0.6139, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3738 }, { "epoch": 0.5084307859668208, "grad_norm": 0.578125, "learning_rate": 1.1445360287933165e-05, "loss": 0.5726, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3739 }, { "epoch": 0.5085667663856405, "grad_norm": 0.33984375, "learning_rate": 1.1440662219087535e-05, "loss": 0.7021, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3740 }, { "epoch": 0.5087027468044601, "grad_norm": 0.5546875, "learning_rate": 1.1435963825499026e-05, "loss": 0.5425, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3741 }, { "epoch": 0.5088387272232798, "grad_norm": 0.21875, "learning_rate": 1.1431265108226715e-05, "loss": 0.3704, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3742 }, { "epoch": 0.5089747076420995, "grad_norm": 0.353515625, "learning_rate": 1.142656606832975e-05, "loss": 0.6177, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3743 }, { "epoch": 0.5091106880609192, "grad_norm": 0.4296875, "learning_rate": 1.142186670686735e-05, "loss": 0.7082, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3744 }, { "epoch": 0.5092466684797389, "grad_norm": 0.470703125, "learning_rate": 1.1417167024898812e-05, "loss": 0.7269, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3745 }, { "epoch": 0.5093826488985586, "grad_norm": 0.416015625, "learning_rate": 1.14124670234835e-05, "loss": 0.7173, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3746 }, { "epoch": 0.5095186293173783, "grad_norm": 0.578125, "learning_rate": 1.1407766703680852e-05, "loss": 0.6255, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3747 }, { "epoch": 0.509654609736198, "grad_norm": 0.341796875, "learning_rate": 1.1403066066550377e-05, "loss": 0.5801, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3748 }, { "epoch": 0.5097905901550177, "grad_norm": 0.462890625, "learning_rate": 1.1398365113151657e-05, "loss": 0.6353, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3749 }, { "epoch": 0.5099265705738374, "grad_norm": 0.60546875, "learning_rate": 1.1393663844544345e-05, "loss": 0.5837, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3750 }, { "epoch": 0.5100625509926571, "grad_norm": 0.259765625, "learning_rate": 1.1388962261788163e-05, "loss": 0.4305, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3751 }, { "epoch": 0.5101985314114768, "grad_norm": 0.44140625, "learning_rate": 1.1384260365942905e-05, "loss": 0.8978, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3752 }, { "epoch": 0.5103345118302964, "grad_norm": 0.388671875, "learning_rate": 1.1379558158068436e-05, "loss": 0.7295, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3753 }, { "epoch": 0.5104704922491161, "grad_norm": 0.4296875, "learning_rate": 1.1374855639224691e-05, "loss": 0.7607, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3754 }, { "epoch": 0.5106064726679358, "grad_norm": 0.41015625, "learning_rate": 1.1370152810471677e-05, "loss": 0.7866, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3755 }, { "epoch": 0.5107424530867555, "grad_norm": 0.3828125, "learning_rate": 1.1365449672869469e-05, "loss": 0.6625, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3756 }, { "epoch": 0.5108784335055752, "grad_norm": 0.2373046875, "learning_rate": 1.136074622747821e-05, "loss": 0.4714, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3757 }, { "epoch": 0.5110144139243948, "grad_norm": 0.279296875, "learning_rate": 1.1356042475358113e-05, "loss": 0.5765, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3758 }, { "epoch": 0.5111503943432146, "grad_norm": 0.310546875, "learning_rate": 1.1351338417569467e-05, "loss": 0.6092, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3759 }, { "epoch": 0.5112863747620343, "grad_norm": 0.318359375, "learning_rate": 1.1346634055172616e-05, "loss": 0.5627, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3760 }, { "epoch": 0.511422355180854, "grad_norm": 0.240234375, "learning_rate": 1.1341929389227985e-05, "loss": 0.3564, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3761 }, { "epoch": 0.5115583355996737, "grad_norm": 3.390625, "learning_rate": 1.1337224420796064e-05, "loss": 0.734, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3762 }, { "epoch": 0.5116943160184934, "grad_norm": 0.365234375, "learning_rate": 1.1332519150937411e-05, "loss": 0.6464, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3763 }, { "epoch": 0.511830296437313, "grad_norm": 0.380859375, "learning_rate": 1.132781358071265e-05, "loss": 0.7054, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3764 }, { "epoch": 0.5119662768561327, "grad_norm": 0.341796875, "learning_rate": 1.1323107711182474e-05, "loss": 0.6548, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3765 }, { "epoch": 0.5121022572749524, "grad_norm": 0.359375, "learning_rate": 1.131840154340764e-05, "loss": 0.7119, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3766 }, { "epoch": 0.5122382376937721, "grad_norm": 0.275390625, "learning_rate": 1.131369507844898e-05, "loss": 0.5706, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3767 }, { "epoch": 0.5123742181125918, "grad_norm": 0.287109375, "learning_rate": 1.130898831736739e-05, "loss": 0.5397, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3768 }, { "epoch": 0.5125101985314114, "grad_norm": 0.37109375, "learning_rate": 1.130428126122383e-05, "loss": 0.5381, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3769 }, { "epoch": 0.5126461789502311, "grad_norm": 0.69140625, "learning_rate": 1.1299573911079326e-05, "loss": 0.7012, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3770 }, { "epoch": 0.5127821593690508, "grad_norm": 0.50390625, "learning_rate": 1.1294866267994975e-05, "loss": 0.7144, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3771 }, { "epoch": 0.5129181397878706, "grad_norm": 0.33203125, "learning_rate": 1.1290158333031936e-05, "loss": 0.6217, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3772 }, { "epoch": 0.5130541202066903, "grad_norm": 0.2412109375, "learning_rate": 1.1285450107251434e-05, "loss": 0.5339, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3773 }, { "epoch": 0.51319010062551, "grad_norm": 0.494140625, "learning_rate": 1.1280741591714761e-05, "loss": 0.8332, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3774 }, { "epoch": 0.5133260810443296, "grad_norm": 0.32421875, "learning_rate": 1.1276032787483278e-05, "loss": 0.6408, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3775 }, { "epoch": 0.5134620614631493, "grad_norm": 2.6875, "learning_rate": 1.12713236956184e-05, "loss": 0.5524, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3776 }, { "epoch": 0.513598041881969, "grad_norm": 0.330078125, "learning_rate": 1.1266614317181619e-05, "loss": 0.6522, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3777 }, { "epoch": 0.5137340223007887, "grad_norm": 0.32421875, "learning_rate": 1.1261904653234486e-05, "loss": 0.5527, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3778 }, { "epoch": 0.5138700027196084, "grad_norm": 0.54296875, "learning_rate": 1.1257194704838616e-05, "loss": 0.618, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3779 }, { "epoch": 0.514005983138428, "grad_norm": 0.5703125, "learning_rate": 1.125248447305569e-05, "loss": 0.7586, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3780 }, { "epoch": 0.5141419635572477, "grad_norm": 0.27734375, "learning_rate": 1.1247773958947447e-05, "loss": 0.5207, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3781 }, { "epoch": 0.5142779439760674, "grad_norm": 0.57421875, "learning_rate": 1.1243063163575702e-05, "loss": 0.5876, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3782 }, { "epoch": 0.5144139243948871, "grad_norm": 0.482421875, "learning_rate": 1.123835208800232e-05, "loss": 0.5591, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3783 }, { "epoch": 0.5145499048137068, "grad_norm": 0.279296875, "learning_rate": 1.123364073328924e-05, "loss": 0.4932, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3784 }, { "epoch": 0.5146858852325266, "grad_norm": 0.306640625, "learning_rate": 1.1228929100498453e-05, "loss": 0.5202, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3785 }, { "epoch": 0.5148218656513462, "grad_norm": 0.439453125, "learning_rate": 1.1224217190692022e-05, "loss": 0.5488, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3786 }, { "epoch": 0.5149578460701659, "grad_norm": 0.341796875, "learning_rate": 1.121950500493207e-05, "loss": 0.6883, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3787 }, { "epoch": 0.5150938264889856, "grad_norm": 0.400390625, "learning_rate": 1.1214792544280781e-05, "loss": 0.6069, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3788 }, { "epoch": 0.5152298069078053, "grad_norm": 0.392578125, "learning_rate": 1.1210079809800397e-05, "loss": 0.7044, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3789 }, { "epoch": 0.515365787326625, "grad_norm": 0.314453125, "learning_rate": 1.1205366802553231e-05, "loss": 0.4863, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3790 }, { "epoch": 0.5155017677454447, "grad_norm": 0.306640625, "learning_rate": 1.1200653523601652e-05, "loss": 0.4512, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3791 }, { "epoch": 0.5156377481642643, "grad_norm": 0.431640625, "learning_rate": 1.1195939974008088e-05, "loss": 0.7327, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3792 }, { "epoch": 0.515773728583084, "grad_norm": 0.26953125, "learning_rate": 1.1191226154835033e-05, "loss": 0.5163, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3793 }, { "epoch": 0.5159097090019037, "grad_norm": 0.4921875, "learning_rate": 1.1186512067145039e-05, "loss": 0.6289, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3794 }, { "epoch": 0.5160456894207234, "grad_norm": 0.310546875, "learning_rate": 1.1181797712000719e-05, "loss": 0.6341, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3795 }, { "epoch": 0.5161816698395431, "grad_norm": 0.345703125, "learning_rate": 1.1177083090464744e-05, "loss": 0.4954, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3796 }, { "epoch": 0.5163176502583628, "grad_norm": 0.353515625, "learning_rate": 1.1172368203599853e-05, "loss": 0.5055, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3797 }, { "epoch": 0.5164536306771825, "grad_norm": 0.59765625, "learning_rate": 1.1167653052468832e-05, "loss": 0.5282, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3798 }, { "epoch": 0.5165896110960022, "grad_norm": 0.427734375, "learning_rate": 1.1162937638134538e-05, "loss": 0.6634, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3799 }, { "epoch": 0.5167255915148219, "grad_norm": 0.3671875, "learning_rate": 1.1158221961659883e-05, "loss": 0.7186, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3800 }, { "epoch": 0.5168615719336416, "grad_norm": 0.38671875, "learning_rate": 1.1153506024107836e-05, "loss": 0.6792, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3801 }, { "epoch": 0.5169975523524613, "grad_norm": 0.53515625, "learning_rate": 1.1148789826541429e-05, "loss": 0.7046, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3802 }, { "epoch": 0.5171335327712809, "grad_norm": 0.283203125, "learning_rate": 1.1144073370023751e-05, "loss": 0.4119, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3803 }, { "epoch": 0.5172695131901006, "grad_norm": 0.625, "learning_rate": 1.1139356655617945e-05, "loss": 0.7735, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3804 }, { "epoch": 0.5174054936089203, "grad_norm": 0.423828125, "learning_rate": 1.1134639684387222e-05, "loss": 0.6986, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3805 }, { "epoch": 0.51754147402774, "grad_norm": 0.80078125, "learning_rate": 1.1129922457394842e-05, "loss": 0.6807, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3806 }, { "epoch": 0.5176774544465597, "grad_norm": 0.302734375, "learning_rate": 1.1125204975704122e-05, "loss": 0.6546, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3807 }, { "epoch": 0.5178134348653793, "grad_norm": 0.3671875, "learning_rate": 1.1120487240378448e-05, "loss": 0.5271, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3808 }, { "epoch": 0.517949415284199, "grad_norm": 0.23828125, "learning_rate": 1.1115769252481248e-05, "loss": 0.3934, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3809 }, { "epoch": 0.5180853957030188, "grad_norm": 0.55859375, "learning_rate": 1.1111051013076017e-05, "loss": 0.8613, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3810 }, { "epoch": 0.5182213761218385, "grad_norm": 0.482421875, "learning_rate": 1.1106332523226307e-05, "loss": 0.7669, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3811 }, { "epoch": 0.5183573565406582, "grad_norm": 0.38671875, "learning_rate": 1.1101613783995718e-05, "loss": 0.5754, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3812 }, { "epoch": 0.5184933369594779, "grad_norm": 0.62109375, "learning_rate": 1.1096894796447912e-05, "loss": 0.4733, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3813 }, { "epoch": 0.5186293173782975, "grad_norm": 0.287109375, "learning_rate": 1.109217556164661e-05, "loss": 0.5373, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3814 }, { "epoch": 0.5187652977971172, "grad_norm": 0.423828125, "learning_rate": 1.1087456080655583e-05, "loss": 0.6657, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3815 }, { "epoch": 0.5189012782159369, "grad_norm": 0.423828125, "learning_rate": 1.108273635453866e-05, "loss": 0.7352, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3816 }, { "epoch": 0.5190372586347566, "grad_norm": 0.388671875, "learning_rate": 1.1078016384359725e-05, "loss": 0.5605, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3817 }, { "epoch": 0.5191732390535763, "grad_norm": 0.69921875, "learning_rate": 1.1073296171182716e-05, "loss": 0.7045, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3818 }, { "epoch": 0.5193092194723959, "grad_norm": 0.400390625, "learning_rate": 1.106857571607163e-05, "loss": 0.7743, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3819 }, { "epoch": 0.5194451998912156, "grad_norm": 0.6484375, "learning_rate": 1.1063855020090513e-05, "loss": 0.8784, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3820 }, { "epoch": 0.5195811803100353, "grad_norm": 0.369140625, "learning_rate": 1.1059134084303467e-05, "loss": 0.5503, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3821 }, { "epoch": 0.5197171607288551, "grad_norm": 0.310546875, "learning_rate": 1.105441290977465e-05, "loss": 0.6576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3822 }, { "epoch": 0.5198531411476748, "grad_norm": 0.458984375, "learning_rate": 1.104969149756827e-05, "loss": 0.5633, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3823 }, { "epoch": 0.5199891215664945, "grad_norm": 0.30078125, "learning_rate": 1.1044969848748594e-05, "loss": 0.6776, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3824 }, { "epoch": 0.5201251019853141, "grad_norm": 0.306640625, "learning_rate": 1.1040247964379938e-05, "loss": 0.6047, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3825 }, { "epoch": 0.5202610824041338, "grad_norm": 0.275390625, "learning_rate": 1.1035525845526673e-05, "loss": 0.4893, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3826 }, { "epoch": 0.5203970628229535, "grad_norm": 0.490234375, "learning_rate": 1.103080349325322e-05, "loss": 0.6851, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3827 }, { "epoch": 0.5205330432417732, "grad_norm": 0.33984375, "learning_rate": 1.102608090862406e-05, "loss": 0.7065, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3828 }, { "epoch": 0.5206690236605929, "grad_norm": 0.52734375, "learning_rate": 1.1021358092703718e-05, "loss": 0.6686, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3829 }, { "epoch": 0.5208050040794125, "grad_norm": 0.44921875, "learning_rate": 1.1016635046556773e-05, "loss": 0.4887, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3830 }, { "epoch": 0.5209409844982322, "grad_norm": 0.396484375, "learning_rate": 1.1011911771247858e-05, "loss": 0.6143, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3831 }, { "epoch": 0.5210769649170519, "grad_norm": 0.349609375, "learning_rate": 1.1007188267841661e-05, "loss": 0.7474, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3832 }, { "epoch": 0.5212129453358716, "grad_norm": 0.33984375, "learning_rate": 1.1002464537402913e-05, "loss": 0.5887, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3833 }, { "epoch": 0.5213489257546913, "grad_norm": 0.283203125, "learning_rate": 1.0997740580996403e-05, "loss": 0.5646, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3834 }, { "epoch": 0.5214849061735111, "grad_norm": 0.44921875, "learning_rate": 1.0993016399686967e-05, "loss": 0.7082, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3835 }, { "epoch": 0.5216208865923307, "grad_norm": 0.419921875, "learning_rate": 1.0988291994539493e-05, "loss": 0.6965, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3836 }, { "epoch": 0.5217568670111504, "grad_norm": 0.435546875, "learning_rate": 1.0983567366618922e-05, "loss": 0.6139, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3837 }, { "epoch": 0.5218928474299701, "grad_norm": 0.333984375, "learning_rate": 1.097884251699024e-05, "loss": 0.5438, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3838 }, { "epoch": 0.5220288278487898, "grad_norm": 0.494140625, "learning_rate": 1.0974117446718489e-05, "loss": 0.6784, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3839 }, { "epoch": 0.5221648082676095, "grad_norm": 0.38671875, "learning_rate": 1.0969392156868754e-05, "loss": 0.8081, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3840 }, { "epoch": 0.5223007886864292, "grad_norm": 0.421875, "learning_rate": 1.0964666648506175e-05, "loss": 0.617, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3841 }, { "epoch": 0.5224367691052488, "grad_norm": 0.294921875, "learning_rate": 1.095994092269594e-05, "loss": 0.5786, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3842 }, { "epoch": 0.5225727495240685, "grad_norm": 0.56640625, "learning_rate": 1.0955214980503284e-05, "loss": 0.6204, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3843 }, { "epoch": 0.5227087299428882, "grad_norm": 0.255859375, "learning_rate": 1.0950488822993495e-05, "loss": 0.5065, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3844 }, { "epoch": 0.5228447103617079, "grad_norm": 0.59375, "learning_rate": 1.0945762451231902e-05, "loss": 0.6097, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3845 }, { "epoch": 0.5229806907805276, "grad_norm": 0.35546875, "learning_rate": 1.0941035866283892e-05, "loss": 0.7129, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3846 }, { "epoch": 0.5231166711993473, "grad_norm": 0.2177734375, "learning_rate": 1.093630906921489e-05, "loss": 0.4232, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3847 }, { "epoch": 0.523252651618167, "grad_norm": 0.38671875, "learning_rate": 1.0931582061090377e-05, "loss": 0.4854, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3848 }, { "epoch": 0.5233886320369867, "grad_norm": 0.2236328125, "learning_rate": 1.0926854842975877e-05, "loss": 0.2568, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3849 }, { "epoch": 0.5235246124558064, "grad_norm": 0.390625, "learning_rate": 1.0922127415936966e-05, "loss": 0.5355, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3850 }, { "epoch": 0.5236605928746261, "grad_norm": 0.259765625, "learning_rate": 1.0917399781039259e-05, "loss": 0.5353, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3851 }, { "epoch": 0.5237965732934458, "grad_norm": 0.328125, "learning_rate": 1.0912671939348428e-05, "loss": 0.4437, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3852 }, { "epoch": 0.5239325537122654, "grad_norm": 0.265625, "learning_rate": 1.0907943891930181e-05, "loss": 0.4996, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3853 }, { "epoch": 0.5240685341310851, "grad_norm": 0.375, "learning_rate": 1.0903215639850283e-05, "loss": 0.596, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3854 }, { "epoch": 0.5242045145499048, "grad_norm": 0.4609375, "learning_rate": 1.0898487184174533e-05, "loss": 0.7412, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3855 }, { "epoch": 0.5243404949687245, "grad_norm": 0.294921875, "learning_rate": 1.0893758525968789e-05, "loss": 0.4181, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3856 }, { "epoch": 0.5244764753875442, "grad_norm": 0.314453125, "learning_rate": 1.0889029666298947e-05, "loss": 0.603, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3857 }, { "epoch": 0.5246124558063638, "grad_norm": 0.5234375, "learning_rate": 1.0884300606230949e-05, "loss": 0.5799, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3858 }, { "epoch": 0.5247484362251835, "grad_norm": 0.482421875, "learning_rate": 1.087957134683078e-05, "loss": 0.5814, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3859 }, { "epoch": 0.5248844166440033, "grad_norm": 0.482421875, "learning_rate": 1.0874841889164478e-05, "loss": 0.7645, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3860 }, { "epoch": 0.525020397062823, "grad_norm": 0.33203125, "learning_rate": 1.0870112234298118e-05, "loss": 0.5365, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3861 }, { "epoch": 0.5251563774816427, "grad_norm": 0.26953125, "learning_rate": 1.0865382383297823e-05, "loss": 0.3485, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3862 }, { "epoch": 0.5252923579004624, "grad_norm": 0.36328125, "learning_rate": 1.0860652337229755e-05, "loss": 0.757, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3863 }, { "epoch": 0.525428338319282, "grad_norm": 0.3046875, "learning_rate": 1.0855922097160131e-05, "loss": 0.5223, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3864 }, { "epoch": 0.5255643187381017, "grad_norm": 0.6875, "learning_rate": 1.0851191664155202e-05, "loss": 0.674, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3865 }, { "epoch": 0.5257002991569214, "grad_norm": 0.40625, "learning_rate": 1.0846461039281265e-05, "loss": 0.6564, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3866 }, { "epoch": 0.5258362795757411, "grad_norm": 0.412109375, "learning_rate": 1.0841730223604659e-05, "loss": 0.8179, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3867 }, { "epoch": 0.5259722599945608, "grad_norm": 0.546875, "learning_rate": 1.0836999218191771e-05, "loss": 0.9536, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3868 }, { "epoch": 0.5261082404133804, "grad_norm": 0.33984375, "learning_rate": 1.0832268024109025e-05, "loss": 0.6976, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3869 }, { "epoch": 0.5262442208322001, "grad_norm": 0.333984375, "learning_rate": 1.0827536642422894e-05, "loss": 0.6587, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3870 }, { "epoch": 0.5263802012510198, "grad_norm": 0.59375, "learning_rate": 1.0822805074199882e-05, "loss": 0.6742, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3871 }, { "epoch": 0.5265161816698395, "grad_norm": 0.39453125, "learning_rate": 1.0818073320506547e-05, "loss": 0.5993, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3872 }, { "epoch": 0.5266521620886593, "grad_norm": 0.337890625, "learning_rate": 1.0813341382409488e-05, "loss": 0.6086, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3873 }, { "epoch": 0.526788142507479, "grad_norm": 0.82421875, "learning_rate": 1.0808609260975333e-05, "loss": 0.625, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3874 }, { "epoch": 0.5269241229262986, "grad_norm": 0.3046875, "learning_rate": 1.0803876957270766e-05, "loss": 0.4312, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3875 }, { "epoch": 0.5270601033451183, "grad_norm": 0.61328125, "learning_rate": 1.0799144472362506e-05, "loss": 0.7194, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3876 }, { "epoch": 0.527196083763938, "grad_norm": 0.40234375, "learning_rate": 1.0794411807317312e-05, "loss": 0.7563, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3877 }, { "epoch": 0.5273320641827577, "grad_norm": 0.625, "learning_rate": 1.0789678963201982e-05, "loss": 0.624, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3878 }, { "epoch": 0.5274680446015774, "grad_norm": 0.6484375, "learning_rate": 1.0784945941083358e-05, "loss": 0.6683, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3879 }, { "epoch": 0.527604025020397, "grad_norm": 0.490234375, "learning_rate": 1.0780212742028324e-05, "loss": 0.8656, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3880 }, { "epoch": 0.5277400054392167, "grad_norm": 0.353515625, "learning_rate": 1.0775479367103798e-05, "loss": 0.5485, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3881 }, { "epoch": 0.5278759858580364, "grad_norm": 0.40625, "learning_rate": 1.0770745817376741e-05, "loss": 0.6626, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3882 }, { "epoch": 0.5280119662768561, "grad_norm": 2.828125, "learning_rate": 1.0766012093914156e-05, "loss": 0.6289, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3883 }, { "epoch": 0.5281479466956758, "grad_norm": 0.28515625, "learning_rate": 1.0761278197783079e-05, "loss": 0.5542, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3884 }, { "epoch": 0.5282839271144956, "grad_norm": 0.53515625, "learning_rate": 1.0756544130050586e-05, "loss": 0.9645, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3885 }, { "epoch": 0.5284199075333152, "grad_norm": 0.41796875, "learning_rate": 1.0751809891783799e-05, "loss": 0.6099, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3886 }, { "epoch": 0.5285558879521349, "grad_norm": 0.396484375, "learning_rate": 1.0747075484049872e-05, "loss": 0.7419, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3887 }, { "epoch": 0.5286918683709546, "grad_norm": 0.447265625, "learning_rate": 1.0742340907915993e-05, "loss": 0.7349, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3888 }, { "epoch": 0.5288278487897743, "grad_norm": 0.205078125, "learning_rate": 1.07376061644494e-05, "loss": 0.3188, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3889 }, { "epoch": 0.528963829208594, "grad_norm": 0.365234375, "learning_rate": 1.073287125471736e-05, "loss": 0.7174, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3890 }, { "epoch": 0.5290998096274137, "grad_norm": 0.2578125, "learning_rate": 1.0728136179787178e-05, "loss": 0.4561, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3891 }, { "epoch": 0.5292357900462333, "grad_norm": 0.45703125, "learning_rate": 1.07234009407262e-05, "loss": 0.7082, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3892 }, { "epoch": 0.529371770465053, "grad_norm": 0.5234375, "learning_rate": 1.0718665538601804e-05, "loss": 0.5298, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3893 }, { "epoch": 0.5295077508838727, "grad_norm": 0.26953125, "learning_rate": 1.071392997448141e-05, "loss": 0.465, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3894 }, { "epoch": 0.5296437313026924, "grad_norm": 0.3359375, "learning_rate": 1.0709194249432471e-05, "loss": 0.6157, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3895 }, { "epoch": 0.5297797117215121, "grad_norm": 0.435546875, "learning_rate": 1.070445836452248e-05, "loss": 0.7625, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3896 }, { "epoch": 0.5299156921403317, "grad_norm": 0.330078125, "learning_rate": 1.0699722320818957e-05, "loss": 0.6764, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3897 }, { "epoch": 0.5300516725591515, "grad_norm": 0.33984375, "learning_rate": 1.069498611938947e-05, "loss": 0.652, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3898 }, { "epoch": 0.5301876529779712, "grad_norm": 0.62109375, "learning_rate": 1.0690249761301614e-05, "loss": 0.6825, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3899 }, { "epoch": 0.5303236333967909, "grad_norm": 0.28515625, "learning_rate": 1.0685513247623023e-05, "loss": 0.585, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3900 }, { "epoch": 0.5304596138156106, "grad_norm": 0.353515625, "learning_rate": 1.0680776579421363e-05, "loss": 0.6341, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3901 }, { "epoch": 0.5305955942344303, "grad_norm": 0.37890625, "learning_rate": 1.0676039757764342e-05, "loss": 0.7407, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3902 }, { "epoch": 0.5307315746532499, "grad_norm": 0.80078125, "learning_rate": 1.0671302783719692e-05, "loss": 0.8491, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3903 }, { "epoch": 0.5308675550720696, "grad_norm": 0.26953125, "learning_rate": 1.0666565658355187e-05, "loss": 0.5996, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3904 }, { "epoch": 0.5310035354908893, "grad_norm": 0.41796875, "learning_rate": 1.0661828382738637e-05, "loss": 0.5622, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3905 }, { "epoch": 0.531139515909709, "grad_norm": 0.44140625, "learning_rate": 1.0657090957937874e-05, "loss": 0.6578, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3906 }, { "epoch": 0.5312754963285287, "grad_norm": 0.443359375, "learning_rate": 1.0652353385020777e-05, "loss": 0.6932, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3907 }, { "epoch": 0.5314114767473483, "grad_norm": 0.49609375, "learning_rate": 1.0647615665055251e-05, "loss": 0.5837, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3908 }, { "epoch": 0.531547457166168, "grad_norm": 0.31640625, "learning_rate": 1.0642877799109234e-05, "loss": 0.5329, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3909 }, { "epoch": 0.5316834375849878, "grad_norm": 0.330078125, "learning_rate": 1.0638139788250703e-05, "loss": 0.7259, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3910 }, { "epoch": 0.5318194180038075, "grad_norm": 0.2578125, "learning_rate": 1.0633401633547663e-05, "loss": 0.4451, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3911 }, { "epoch": 0.5319553984226272, "grad_norm": 0.341796875, "learning_rate": 1.0628663336068148e-05, "loss": 0.5413, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3912 }, { "epoch": 0.5320913788414469, "grad_norm": 0.439453125, "learning_rate": 1.0623924896880234e-05, "loss": 0.7988, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3913 }, { "epoch": 0.5322273592602665, "grad_norm": 0.458984375, "learning_rate": 1.061918631705202e-05, "loss": 0.8092, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3914 }, { "epoch": 0.5323633396790862, "grad_norm": 0.609375, "learning_rate": 1.0614447597651638e-05, "loss": 0.6973, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3915 }, { "epoch": 0.5324993200979059, "grad_norm": 0.27734375, "learning_rate": 1.0609708739747256e-05, "loss": 0.4943, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3916 }, { "epoch": 0.5326353005167256, "grad_norm": 0.326171875, "learning_rate": 1.060496974440707e-05, "loss": 0.6167, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3917 }, { "epoch": 0.5327712809355453, "grad_norm": 0.7578125, "learning_rate": 1.0600230612699306e-05, "loss": 0.7939, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3918 }, { "epoch": 0.532907261354365, "grad_norm": 0.40625, "learning_rate": 1.0595491345692226e-05, "loss": 0.9072, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3919 }, { "epoch": 0.5330432417731846, "grad_norm": 0.68359375, "learning_rate": 1.0590751944454114e-05, "loss": 0.8514, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3920 }, { "epoch": 0.5331792221920043, "grad_norm": 0.37890625, "learning_rate": 1.0586012410053293e-05, "loss": 0.6803, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3921 }, { "epoch": 0.533315202610824, "grad_norm": 0.326171875, "learning_rate": 1.0581272743558106e-05, "loss": 0.7028, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3922 }, { "epoch": 0.5334511830296438, "grad_norm": 0.5546875, "learning_rate": 1.0576532946036938e-05, "loss": 0.6618, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3923 }, { "epoch": 0.5335871634484635, "grad_norm": 0.54296875, "learning_rate": 1.0571793018558199e-05, "loss": 0.6351, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3924 }, { "epoch": 0.5337231438672831, "grad_norm": 0.310546875, "learning_rate": 1.0567052962190318e-05, "loss": 0.6483, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3925 }, { "epoch": 0.5338591242861028, "grad_norm": 0.318359375, "learning_rate": 1.056231277800177e-05, "loss": 0.3605, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3926 }, { "epoch": 0.5339951047049225, "grad_norm": 0.330078125, "learning_rate": 1.0557572467061044e-05, "loss": 0.6125, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3927 }, { "epoch": 0.5341310851237422, "grad_norm": 0.421875, "learning_rate": 1.0552832030436668e-05, "loss": 0.6738, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3928 }, { "epoch": 0.5342670655425619, "grad_norm": 0.341796875, "learning_rate": 1.0548091469197194e-05, "loss": 0.6291, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3929 }, { "epoch": 0.5344030459613816, "grad_norm": 0.65234375, "learning_rate": 1.0543350784411201e-05, "loss": 0.6253, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3930 }, { "epoch": 0.5345390263802012, "grad_norm": 0.625, "learning_rate": 1.05386099771473e-05, "loss": 0.7451, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3931 }, { "epoch": 0.5346750067990209, "grad_norm": 0.5703125, "learning_rate": 1.0533869048474123e-05, "loss": 0.5508, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3932 }, { "epoch": 0.5348109872178406, "grad_norm": 0.41796875, "learning_rate": 1.0529127999460335e-05, "loss": 0.7516, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3933 }, { "epoch": 0.5349469676366603, "grad_norm": 0.31640625, "learning_rate": 1.0524386831174629e-05, "loss": 0.6452, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3934 }, { "epoch": 0.53508294805548, "grad_norm": 0.3359375, "learning_rate": 1.0519645544685719e-05, "loss": 0.5462, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3935 }, { "epoch": 0.5352189284742997, "grad_norm": 0.3046875, "learning_rate": 1.051490414106235e-05, "loss": 0.5653, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3936 }, { "epoch": 0.5353549088931194, "grad_norm": 0.283203125, "learning_rate": 1.0510162621373295e-05, "loss": 0.6198, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3937 }, { "epoch": 0.5354908893119391, "grad_norm": 0.37109375, "learning_rate": 1.0505420986687344e-05, "loss": 0.6242, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3938 }, { "epoch": 0.5356268697307588, "grad_norm": 0.4921875, "learning_rate": 1.0500679238073326e-05, "loss": 0.7417, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3939 }, { "epoch": 0.5357628501495785, "grad_norm": 0.421875, "learning_rate": 1.0495937376600081e-05, "loss": 0.7161, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3940 }, { "epoch": 0.5358988305683982, "grad_norm": 0.4921875, "learning_rate": 1.0491195403336491e-05, "loss": 0.7384, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3941 }, { "epoch": 0.5360348109872178, "grad_norm": 0.4296875, "learning_rate": 1.0486453319351454e-05, "loss": 0.8078, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3942 }, { "epoch": 0.5361707914060375, "grad_norm": 0.625, "learning_rate": 1.0481711125713889e-05, "loss": 0.7179, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3943 }, { "epoch": 0.5363067718248572, "grad_norm": 0.58984375, "learning_rate": 1.0476968823492749e-05, "loss": 0.7062, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3944 }, { "epoch": 0.5364427522436769, "grad_norm": 0.9765625, "learning_rate": 1.0472226413757003e-05, "loss": 0.743, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3945 }, { "epoch": 0.5365787326624966, "grad_norm": 0.59375, "learning_rate": 1.046748389757565e-05, "loss": 0.7988, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3946 }, { "epoch": 0.5367147130813162, "grad_norm": 0.3359375, "learning_rate": 1.0462741276017711e-05, "loss": 0.7279, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3947 }, { "epoch": 0.536850693500136, "grad_norm": 0.796875, "learning_rate": 1.0457998550152231e-05, "loss": 0.6704, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3948 }, { "epoch": 0.5369866739189557, "grad_norm": 0.9296875, "learning_rate": 1.045325572104828e-05, "loss": 0.8242, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3949 }, { "epoch": 0.5371226543377754, "grad_norm": 0.54296875, "learning_rate": 1.0448512789774944e-05, "loss": 0.4077, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3950 }, { "epoch": 0.5372586347565951, "grad_norm": 0.376953125, "learning_rate": 1.0443769757401346e-05, "loss": 0.6667, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3951 }, { "epoch": 0.5373946151754148, "grad_norm": 0.494140625, "learning_rate": 1.0439026624996615e-05, "loss": 0.7303, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3952 }, { "epoch": 0.5375305955942344, "grad_norm": 1.796875, "learning_rate": 1.0434283393629918e-05, "loss": 1.0303, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3953 }, { "epoch": 0.5376665760130541, "grad_norm": 0.412109375, "learning_rate": 1.0429540064370433e-05, "loss": 0.5605, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3954 }, { "epoch": 0.5378025564318738, "grad_norm": 0.365234375, "learning_rate": 1.0424796638287367e-05, "loss": 0.561, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3955 }, { "epoch": 0.5379385368506935, "grad_norm": 0.76171875, "learning_rate": 1.0420053116449944e-05, "loss": 0.7869, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3956 }, { "epoch": 0.5380745172695132, "grad_norm": 0.40234375, "learning_rate": 1.0415309499927415e-05, "loss": 0.6405, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3957 }, { "epoch": 0.5382104976883328, "grad_norm": 0.337890625, "learning_rate": 1.0410565789789044e-05, "loss": 0.6222, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3958 }, { "epoch": 0.5383464781071525, "grad_norm": 0.60546875, "learning_rate": 1.0405821987104128e-05, "loss": 0.5444, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3959 }, { "epoch": 0.5384824585259722, "grad_norm": 0.51171875, "learning_rate": 1.0401078092941972e-05, "loss": 0.32, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3960 }, { "epoch": 0.538618438944792, "grad_norm": 0.66015625, "learning_rate": 1.039633410837191e-05, "loss": 0.7965, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3961 }, { "epoch": 0.5387544193636117, "grad_norm": 0.458984375, "learning_rate": 1.0391590034463295e-05, "loss": 0.7186, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3962 }, { "epoch": 0.5388903997824314, "grad_norm": 0.240234375, "learning_rate": 1.03868458722855e-05, "loss": 0.4222, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3963 }, { "epoch": 0.539026380201251, "grad_norm": 0.5234375, "learning_rate": 1.0382101622907914e-05, "loss": 0.7902, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3964 }, { "epoch": 0.5391623606200707, "grad_norm": 0.38671875, "learning_rate": 1.0377357287399952e-05, "loss": 0.7197, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3965 }, { "epoch": 0.5392983410388904, "grad_norm": 0.28515625, "learning_rate": 1.0372612866831042e-05, "loss": 0.6113, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3966 }, { "epoch": 0.5394343214577101, "grad_norm": 0.5546875, "learning_rate": 1.0367868362270639e-05, "loss": 0.8021, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3967 }, { "epoch": 0.5395703018765298, "grad_norm": 0.30859375, "learning_rate": 1.0363123774788206e-05, "loss": 0.6143, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3968 }, { "epoch": 0.5397062822953494, "grad_norm": 0.4609375, "learning_rate": 1.0358379105453239e-05, "loss": 0.6806, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3969 }, { "epoch": 0.5398422627141691, "grad_norm": 0.53125, "learning_rate": 1.0353634355335234e-05, "loss": 0.6118, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3970 }, { "epoch": 0.5399782431329888, "grad_norm": 0.400390625, "learning_rate": 1.0348889525503725e-05, "loss": 0.5378, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3971 }, { "epoch": 0.5401142235518085, "grad_norm": 0.462890625, "learning_rate": 1.034414461702825e-05, "loss": 0.6626, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3972 }, { "epoch": 0.5402502039706283, "grad_norm": 0.3984375, "learning_rate": 1.0339399630978373e-05, "loss": 0.7798, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3973 }, { "epoch": 0.540386184389448, "grad_norm": 0.41796875, "learning_rate": 1.033465456842367e-05, "loss": 0.6217, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3974 }, { "epoch": 0.5405221648082676, "grad_norm": 0.357421875, "learning_rate": 1.0329909430433733e-05, "loss": 0.4829, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3975 }, { "epoch": 0.5406581452270873, "grad_norm": 0.37890625, "learning_rate": 1.032516421807818e-05, "loss": 0.6428, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3976 }, { "epoch": 0.540794125645907, "grad_norm": 0.1884765625, "learning_rate": 1.0320418932426634e-05, "loss": 0.3923, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3977 }, { "epoch": 0.5409301060647267, "grad_norm": 0.421875, "learning_rate": 1.0315673574548746e-05, "loss": 0.5282, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3978 }, { "epoch": 0.5410660864835464, "grad_norm": 0.318359375, "learning_rate": 1.0310928145514173e-05, "loss": 0.6077, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3979 }, { "epoch": 0.541202066902366, "grad_norm": 0.490234375, "learning_rate": 1.0306182646392594e-05, "loss": 0.6942, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3980 }, { "epoch": 0.5413380473211857, "grad_norm": 0.53515625, "learning_rate": 1.0301437078253707e-05, "loss": 0.8532, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3981 }, { "epoch": 0.5414740277400054, "grad_norm": 0.271484375, "learning_rate": 1.0296691442167212e-05, "loss": 0.5358, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3982 }, { "epoch": 0.5416100081588251, "grad_norm": 0.58203125, "learning_rate": 1.0291945739202843e-05, "loss": 0.5859, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3983 }, { "epoch": 0.5417459885776448, "grad_norm": 0.53125, "learning_rate": 1.0287199970430331e-05, "loss": 0.8643, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3984 }, { "epoch": 0.5418819689964645, "grad_norm": 0.349609375, "learning_rate": 1.0282454136919438e-05, "loss": 0.6543, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3985 }, { "epoch": 0.5420179494152843, "grad_norm": 0.3515625, "learning_rate": 1.0277708239739925e-05, "loss": 0.5866, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3986 }, { "epoch": 0.5421539298341039, "grad_norm": 3.015625, "learning_rate": 1.0272962279961583e-05, "loss": 0.8009, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3987 }, { "epoch": 0.5422899102529236, "grad_norm": 0.294921875, "learning_rate": 1.0268216258654204e-05, "loss": 0.4469, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3988 }, { "epoch": 0.5424258906717433, "grad_norm": 0.50390625, "learning_rate": 1.0263470176887602e-05, "loss": 0.71, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3989 }, { "epoch": 0.542561871090563, "grad_norm": 0.36328125, "learning_rate": 1.0258724035731602e-05, "loss": 0.5049, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3990 }, { "epoch": 0.5426978515093827, "grad_norm": 0.248046875, "learning_rate": 1.0253977836256041e-05, "loss": 0.5116, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3991 }, { "epoch": 0.5428338319282023, "grad_norm": 0.45703125, "learning_rate": 1.0249231579530769e-05, "loss": 0.7915, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3992 }, { "epoch": 0.542969812347022, "grad_norm": 0.6875, "learning_rate": 1.0244485266625652e-05, "loss": 0.5161, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3993 }, { "epoch": 0.5431057927658417, "grad_norm": 0.33984375, "learning_rate": 1.0239738898610569e-05, "loss": 0.6112, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3994 }, { "epoch": 0.5432417731846614, "grad_norm": 0.318359375, "learning_rate": 1.0234992476555406e-05, "loss": 0.578, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3995 }, { "epoch": 0.5433777536034811, "grad_norm": 0.287109375, "learning_rate": 1.0230246001530065e-05, "loss": 0.5176, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3996 }, { "epoch": 0.5435137340223007, "grad_norm": 0.431640625, "learning_rate": 1.0225499474604459e-05, "loss": 0.6855, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3997 }, { "epoch": 0.5436497144411204, "grad_norm": 0.2158203125, "learning_rate": 1.0220752896848516e-05, "loss": 0.4141, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3998 }, { "epoch": 0.5437856948599402, "grad_norm": 0.326171875, "learning_rate": 1.021600626933217e-05, "loss": 0.6696, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 3999 }, { "epoch": 0.5439216752787599, "grad_norm": 0.40625, "learning_rate": 1.021125959312537e-05, "loss": 0.7953, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4000 }, { "epoch": 0.5440576556975796, "grad_norm": 0.232421875, "learning_rate": 1.0206512869298075e-05, "loss": 0.3942, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4001 }, { "epoch": 0.5441936361163993, "grad_norm": 0.357421875, "learning_rate": 1.0201766098920256e-05, "loss": 0.5529, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4002 }, { "epoch": 0.5443296165352189, "grad_norm": 0.5859375, "learning_rate": 1.0197019283061888e-05, "loss": 0.6716, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4003 }, { "epoch": 0.5444655969540386, "grad_norm": 0.421875, "learning_rate": 1.0192272422792968e-05, "loss": 0.7103, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4004 }, { "epoch": 0.5446015773728583, "grad_norm": 0.3203125, "learning_rate": 1.0187525519183494e-05, "loss": 0.689, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4005 }, { "epoch": 0.544737557791678, "grad_norm": 0.39453125, "learning_rate": 1.0182778573303473e-05, "loss": 0.6117, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4006 }, { "epoch": 0.5448735382104977, "grad_norm": 0.326171875, "learning_rate": 1.0178031586222927e-05, "loss": 0.6258, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4007 }, { "epoch": 0.5450095186293173, "grad_norm": 0.54296875, "learning_rate": 1.0173284559011886e-05, "loss": 0.812, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4008 }, { "epoch": 0.545145499048137, "grad_norm": 0.72265625, "learning_rate": 1.0168537492740387e-05, "loss": 0.8203, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4009 }, { "epoch": 0.5452814794669567, "grad_norm": 0.37890625, "learning_rate": 1.0163790388478476e-05, "loss": 0.6855, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4010 }, { "epoch": 0.5454174598857765, "grad_norm": 0.6328125, "learning_rate": 1.015904324729621e-05, "loss": 0.5991, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4011 }, { "epoch": 0.5455534403045962, "grad_norm": 0.423828125, "learning_rate": 1.0154296070263649e-05, "loss": 0.7508, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4012 }, { "epoch": 0.5456894207234159, "grad_norm": 0.228515625, "learning_rate": 1.0149548858450871e-05, "loss": 0.5124, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4013 }, { "epoch": 0.5458254011422355, "grad_norm": 0.34375, "learning_rate": 1.0144801612927948e-05, "loss": 0.6241, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4014 }, { "epoch": 0.5459613815610552, "grad_norm": 0.2255859375, "learning_rate": 1.0140054334764974e-05, "loss": 0.4373, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4015 }, { "epoch": 0.5460973619798749, "grad_norm": 0.462890625, "learning_rate": 1.013530702503204e-05, "loss": 0.6986, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4016 }, { "epoch": 0.5462333423986946, "grad_norm": 0.390625, "learning_rate": 1.0130559684799247e-05, "loss": 0.7274, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4017 }, { "epoch": 0.5463693228175143, "grad_norm": 0.75, "learning_rate": 1.0125812315136706e-05, "loss": 0.5562, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4018 }, { "epoch": 0.546505303236334, "grad_norm": 0.6875, "learning_rate": 1.0121064917114529e-05, "loss": 0.6951, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4019 }, { "epoch": 0.5466412836551536, "grad_norm": 0.6015625, "learning_rate": 1.011631749180284e-05, "loss": 0.638, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4020 }, { "epoch": 0.5467772640739733, "grad_norm": 0.25390625, "learning_rate": 1.0111570040271768e-05, "loss": 0.3813, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4021 }, { "epoch": 0.546913244492793, "grad_norm": 0.435546875, "learning_rate": 1.0106822563591445e-05, "loss": 0.6209, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4022 }, { "epoch": 0.5470492249116127, "grad_norm": 0.263671875, "learning_rate": 1.0102075062832009e-05, "loss": 0.4585, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4023 }, { "epoch": 0.5471852053304325, "grad_norm": 0.396484375, "learning_rate": 1.0097327539063605e-05, "loss": 0.744, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4024 }, { "epoch": 0.5473211857492521, "grad_norm": 0.42578125, "learning_rate": 1.0092579993356386e-05, "loss": 0.6647, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4025 }, { "epoch": 0.5474571661680718, "grad_norm": 0.75390625, "learning_rate": 1.0087832426780504e-05, "loss": 0.762, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4026 }, { "epoch": 0.5475931465868915, "grad_norm": 0.5, "learning_rate": 1.0083084840406119e-05, "loss": 0.7295, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4027 }, { "epoch": 0.5477291270057112, "grad_norm": 0.60546875, "learning_rate": 1.0078337235303396e-05, "loss": 0.7795, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4028 }, { "epoch": 0.5478651074245309, "grad_norm": 0.373046875, "learning_rate": 1.00735896125425e-05, "loss": 0.5564, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4029 }, { "epoch": 0.5480010878433506, "grad_norm": 0.54296875, "learning_rate": 1.0068841973193604e-05, "loss": 0.7176, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4030 }, { "epoch": 0.5481370682621702, "grad_norm": 0.4921875, "learning_rate": 1.0064094318326888e-05, "loss": 0.5215, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4031 }, { "epoch": 0.5482730486809899, "grad_norm": 0.3125, "learning_rate": 1.0059346649012529e-05, "loss": 0.5073, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4032 }, { "epoch": 0.5484090290998096, "grad_norm": 0.4296875, "learning_rate": 1.0054598966320712e-05, "loss": 0.798, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4033 }, { "epoch": 0.5485450095186293, "grad_norm": 0.244140625, "learning_rate": 1.0049851271321618e-05, "loss": 0.498, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4034 }, { "epoch": 0.548680989937449, "grad_norm": 0.30078125, "learning_rate": 1.0045103565085438e-05, "loss": 0.5417, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4035 }, { "epoch": 0.5488169703562688, "grad_norm": 0.31640625, "learning_rate": 1.0040355848682364e-05, "loss": 0.5309, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4036 }, { "epoch": 0.5489529507750884, "grad_norm": 0.5703125, "learning_rate": 1.0035608123182588e-05, "loss": 0.8174, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4037 }, { "epoch": 0.5490889311939081, "grad_norm": 0.32421875, "learning_rate": 1.0030860389656306e-05, "loss": 0.4373, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4038 }, { "epoch": 0.5492249116127278, "grad_norm": 0.287109375, "learning_rate": 1.0026112649173713e-05, "loss": 0.5634, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4039 }, { "epoch": 0.5493608920315475, "grad_norm": 0.5, "learning_rate": 1.0021364902805014e-05, "loss": 0.8991, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4040 }, { "epoch": 0.5494968724503672, "grad_norm": 0.62109375, "learning_rate": 1.00166171516204e-05, "loss": 0.6829, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4041 }, { "epoch": 0.5496328528691868, "grad_norm": 0.388671875, "learning_rate": 1.001186939669008e-05, "loss": 0.7223, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4042 }, { "epoch": 0.5497688332880065, "grad_norm": 0.3828125, "learning_rate": 1.000712163908425e-05, "loss": 0.5729, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4043 }, { "epoch": 0.5499048137068262, "grad_norm": 0.384765625, "learning_rate": 1.0002373879873118e-05, "loss": 0.5999, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4044 }, { "epoch": 0.5500407941256459, "grad_norm": 0.359375, "learning_rate": 9.997626120126885e-06, "loss": 0.5566, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4045 }, { "epoch": 0.5501767745444656, "grad_norm": 0.416015625, "learning_rate": 9.992878360915752e-06, "loss": 0.736, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4046 }, { "epoch": 0.5503127549632852, "grad_norm": 0.5, "learning_rate": 9.988130603309924e-06, "loss": 0.6559, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4047 }, { "epoch": 0.5504487353821049, "grad_norm": 0.70703125, "learning_rate": 9.983382848379602e-06, "loss": 0.7225, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4048 }, { "epoch": 0.5505847158009247, "grad_norm": 0.3125, "learning_rate": 9.978635097194991e-06, "loss": 0.4549, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4049 }, { "epoch": 0.5507206962197444, "grad_norm": 0.478515625, "learning_rate": 9.973887350826289e-06, "loss": 0.7248, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4050 }, { "epoch": 0.5508566766385641, "grad_norm": 0.3671875, "learning_rate": 9.969139610343696e-06, "loss": 0.7899, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4051 }, { "epoch": 0.5509926570573838, "grad_norm": 0.4140625, "learning_rate": 9.964391876817413e-06, "loss": 0.6403, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4052 }, { "epoch": 0.5511286374762034, "grad_norm": 0.466796875, "learning_rate": 9.959644151317639e-06, "loss": 0.7433, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4053 }, { "epoch": 0.5512646178950231, "grad_norm": 0.38671875, "learning_rate": 9.954896434914563e-06, "loss": 0.6927, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4054 }, { "epoch": 0.5514005983138428, "grad_norm": 0.33203125, "learning_rate": 9.950148728678385e-06, "loss": 0.6221, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4055 }, { "epoch": 0.5515365787326625, "grad_norm": 0.427734375, "learning_rate": 9.945401033679291e-06, "loss": 0.6364, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4056 }, { "epoch": 0.5516725591514822, "grad_norm": 0.28515625, "learning_rate": 9.940653350987473e-06, "loss": 0.4832, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4057 }, { "epoch": 0.5518085395703018, "grad_norm": 0.6875, "learning_rate": 9.935905681673115e-06, "loss": 0.763, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4058 }, { "epoch": 0.5519445199891215, "grad_norm": 0.609375, "learning_rate": 9.931158026806398e-06, "loss": 0.5938, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4059 }, { "epoch": 0.5520805004079412, "grad_norm": 0.375, "learning_rate": 9.926410387457506e-06, "loss": 0.479, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4060 }, { "epoch": 0.5522164808267609, "grad_norm": 0.3515625, "learning_rate": 9.921662764696611e-06, "loss": 0.6958, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4061 }, { "epoch": 0.5523524612455807, "grad_norm": 0.61328125, "learning_rate": 9.916915159593886e-06, "loss": 0.8275, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4062 }, { "epoch": 0.5524884416644004, "grad_norm": 0.349609375, "learning_rate": 9.9121675732195e-06, "loss": 0.661, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4063 }, { "epoch": 0.55262442208322, "grad_norm": 0.2275390625, "learning_rate": 9.907420006643619e-06, "loss": 0.4111, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4064 }, { "epoch": 0.5527604025020397, "grad_norm": 0.361328125, "learning_rate": 9.9026724609364e-06, "loss": 0.7183, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4065 }, { "epoch": 0.5528963829208594, "grad_norm": 0.193359375, "learning_rate": 9.897924937167996e-06, "loss": 0.3841, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4066 }, { "epoch": 0.5530323633396791, "grad_norm": 0.236328125, "learning_rate": 9.89317743640856e-06, "loss": 0.5156, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4067 }, { "epoch": 0.5531683437584988, "grad_norm": 0.33984375, "learning_rate": 9.888429959728236e-06, "loss": 0.554, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4068 }, { "epoch": 0.5533043241773185, "grad_norm": 0.75, "learning_rate": 9.883682508197163e-06, "loss": 0.7323, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4069 }, { "epoch": 0.5534403045961381, "grad_norm": 0.447265625, "learning_rate": 9.878935082885476e-06, "loss": 0.7046, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4070 }, { "epoch": 0.5535762850149578, "grad_norm": 0.47265625, "learning_rate": 9.8741876848633e-06, "loss": 0.6901, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4071 }, { "epoch": 0.5537122654337775, "grad_norm": 0.33984375, "learning_rate": 9.869440315200758e-06, "loss": 0.611, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4072 }, { "epoch": 0.5538482458525972, "grad_norm": 0.28515625, "learning_rate": 9.864692974967967e-06, "loss": 0.5171, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4073 }, { "epoch": 0.553984226271417, "grad_norm": 0.8046875, "learning_rate": 9.859945665235032e-06, "loss": 0.577, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4074 }, { "epoch": 0.5541202066902366, "grad_norm": 0.51171875, "learning_rate": 9.855198387072052e-06, "loss": 0.5692, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4075 }, { "epoch": 0.5542561871090563, "grad_norm": 0.41015625, "learning_rate": 9.85045114154913e-06, "loss": 0.7863, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4076 }, { "epoch": 0.554392167527876, "grad_norm": 0.388671875, "learning_rate": 9.845703929736351e-06, "loss": 0.6927, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4077 }, { "epoch": 0.5545281479466957, "grad_norm": 0.4609375, "learning_rate": 9.840956752703791e-06, "loss": 0.7654, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4078 }, { "epoch": 0.5546641283655154, "grad_norm": 0.5234375, "learning_rate": 9.836209611521525e-06, "loss": 0.9243, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4079 }, { "epoch": 0.554800108784335, "grad_norm": 0.283203125, "learning_rate": 9.831462507259614e-06, "loss": 0.5093, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4080 }, { "epoch": 0.5549360892031547, "grad_norm": 0.427734375, "learning_rate": 9.826715440988113e-06, "loss": 0.5748, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4081 }, { "epoch": 0.5550720696219744, "grad_norm": 0.56640625, "learning_rate": 9.821968413777073e-06, "loss": 0.5127, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4082 }, { "epoch": 0.5552080500407941, "grad_norm": 0.427734375, "learning_rate": 9.817221426696527e-06, "loss": 0.674, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4083 }, { "epoch": 0.5553440304596138, "grad_norm": 0.47265625, "learning_rate": 9.812474480816508e-06, "loss": 0.5775, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4084 }, { "epoch": 0.5554800108784335, "grad_norm": 0.412109375, "learning_rate": 9.80772757720703e-06, "loss": 0.6809, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4085 }, { "epoch": 0.5556159912972531, "grad_norm": 0.39453125, "learning_rate": 9.80298071693811e-06, "loss": 0.8293, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4086 }, { "epoch": 0.5557519717160729, "grad_norm": 0.55078125, "learning_rate": 9.798233901079745e-06, "loss": 0.7503, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4087 }, { "epoch": 0.5558879521348926, "grad_norm": 0.23828125, "learning_rate": 9.793487130701925e-06, "loss": 0.4471, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4088 }, { "epoch": 0.5560239325537123, "grad_norm": 0.396484375, "learning_rate": 9.788740406874632e-06, "loss": 0.6509, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4089 }, { "epoch": 0.556159912972532, "grad_norm": 0.423828125, "learning_rate": 9.783993730667833e-06, "loss": 0.6709, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4090 }, { "epoch": 0.5562958933913517, "grad_norm": 0.6171875, "learning_rate": 9.779247103151487e-06, "loss": 0.8538, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4091 }, { "epoch": 0.5564318738101713, "grad_norm": 0.341796875, "learning_rate": 9.774500525395544e-06, "loss": 0.6948, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4092 }, { "epoch": 0.556567854228991, "grad_norm": 0.337890625, "learning_rate": 9.76975399846994e-06, "loss": 0.4705, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4093 }, { "epoch": 0.5567038346478107, "grad_norm": 0.455078125, "learning_rate": 9.765007523444599e-06, "loss": 0.6924, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4094 }, { "epoch": 0.5568398150666304, "grad_norm": 0.37890625, "learning_rate": 9.760261101389434e-06, "loss": 0.7192, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4095 }, { "epoch": 0.5569757954854501, "grad_norm": 0.306640625, "learning_rate": 9.75551473337435e-06, "loss": 0.613, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4096 }, { "epoch": 0.5571117759042697, "grad_norm": 0.287109375, "learning_rate": 9.750768420469233e-06, "loss": 0.5566, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4097 }, { "epoch": 0.5572477563230894, "grad_norm": 0.80859375, "learning_rate": 9.74602216374396e-06, "loss": 0.7288, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4098 }, { "epoch": 0.5573837367419092, "grad_norm": 0.91015625, "learning_rate": 9.741275964268402e-06, "loss": 0.8944, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4099 }, { "epoch": 0.5575197171607289, "grad_norm": 0.34765625, "learning_rate": 9.7365298231124e-06, "loss": 0.6315, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4100 }, { "epoch": 0.5576556975795486, "grad_norm": 0.63671875, "learning_rate": 9.731783741345798e-06, "loss": 0.8652, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4101 }, { "epoch": 0.5577916779983683, "grad_norm": 0.357421875, "learning_rate": 9.72703772003842e-06, "loss": 0.559, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4102 }, { "epoch": 0.5579276584171879, "grad_norm": 0.5390625, "learning_rate": 9.722291760260077e-06, "loss": 0.3755, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4103 }, { "epoch": 0.5580636388360076, "grad_norm": 0.33203125, "learning_rate": 9.717545863080567e-06, "loss": 0.6276, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4104 }, { "epoch": 0.5581996192548273, "grad_norm": 0.26171875, "learning_rate": 9.71280002956967e-06, "loss": 0.4788, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4105 }, { "epoch": 0.558335599673647, "grad_norm": 0.416015625, "learning_rate": 9.708054260797162e-06, "loss": 0.6597, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4106 }, { "epoch": 0.5584715800924667, "grad_norm": 0.671875, "learning_rate": 9.70330855783279e-06, "loss": 0.4637, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4107 }, { "epoch": 0.5586075605112863, "grad_norm": 1.1953125, "learning_rate": 9.698562921746298e-06, "loss": 0.9836, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4108 }, { "epoch": 0.558743540930106, "grad_norm": 0.384765625, "learning_rate": 9.693817353607407e-06, "loss": 0.6074, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4109 }, { "epoch": 0.5588795213489257, "grad_norm": 0.412109375, "learning_rate": 9.68907185448583e-06, "loss": 0.6398, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4110 }, { "epoch": 0.5590155017677454, "grad_norm": 0.5859375, "learning_rate": 9.684326425451257e-06, "loss": 0.8696, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4111 }, { "epoch": 0.5591514821865652, "grad_norm": 0.26953125, "learning_rate": 9.679581067573367e-06, "loss": 0.5042, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4112 }, { "epoch": 0.5592874626053849, "grad_norm": 0.6796875, "learning_rate": 9.674835781921823e-06, "loss": 0.7684, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4113 }, { "epoch": 0.5594234430242045, "grad_norm": 0.69921875, "learning_rate": 9.670090569566269e-06, "loss": 0.6082, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4114 }, { "epoch": 0.5595594234430242, "grad_norm": 0.451171875, "learning_rate": 9.665345431576335e-06, "loss": 0.6672, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4115 }, { "epoch": 0.5596954038618439, "grad_norm": 0.63671875, "learning_rate": 9.66060036902163e-06, "loss": 0.6476, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4116 }, { "epoch": 0.5598313842806636, "grad_norm": 0.75, "learning_rate": 9.655855382971751e-06, "loss": 0.609, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4117 }, { "epoch": 0.5599673646994833, "grad_norm": 0.40625, "learning_rate": 9.651110474496279e-06, "loss": 0.7952, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4118 }, { "epoch": 0.560103345118303, "grad_norm": 0.3828125, "learning_rate": 9.646365644664771e-06, "loss": 0.7482, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4119 }, { "epoch": 0.5602393255371226, "grad_norm": 0.59375, "learning_rate": 9.641620894546768e-06, "loss": 0.7826, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4120 }, { "epoch": 0.5603753059559423, "grad_norm": 0.32421875, "learning_rate": 9.636876225211799e-06, "loss": 0.5706, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4121 }, { "epoch": 0.560511286374762, "grad_norm": 0.6015625, "learning_rate": 9.632131637729366e-06, "loss": 0.7253, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4122 }, { "epoch": 0.5606472667935817, "grad_norm": 0.380859375, "learning_rate": 9.627387133168963e-06, "loss": 0.6982, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4123 }, { "epoch": 0.5607832472124014, "grad_norm": 0.337890625, "learning_rate": 9.622642712600053e-06, "loss": 0.6735, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4124 }, { "epoch": 0.5609192276312212, "grad_norm": 0.2451171875, "learning_rate": 9.617898377092091e-06, "loss": 0.5394, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4125 }, { "epoch": 0.5610552080500408, "grad_norm": 0.423828125, "learning_rate": 9.613154127714505e-06, "loss": 0.751, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4126 }, { "epoch": 0.5611911884688605, "grad_norm": 0.404296875, "learning_rate": 9.60840996553671e-06, "loss": 0.7251, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4127 }, { "epoch": 0.5613271688876802, "grad_norm": 0.62890625, "learning_rate": 9.603665891628094e-06, "loss": 0.5959, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4128 }, { "epoch": 0.5614631493064999, "grad_norm": 0.62890625, "learning_rate": 9.598921907058033e-06, "loss": 0.6554, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4129 }, { "epoch": 0.5615991297253196, "grad_norm": 0.38671875, "learning_rate": 9.594178012895879e-06, "loss": 0.4884, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4130 }, { "epoch": 0.5617351101441392, "grad_norm": 0.30078125, "learning_rate": 9.58943421021096e-06, "loss": 0.5869, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4131 }, { "epoch": 0.5618710905629589, "grad_norm": 0.4921875, "learning_rate": 9.584690500072591e-06, "loss": 0.5516, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4132 }, { "epoch": 0.5620070709817786, "grad_norm": 0.416015625, "learning_rate": 9.579946883550061e-06, "loss": 0.6631, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4133 }, { "epoch": 0.5621430514005983, "grad_norm": 0.44921875, "learning_rate": 9.575203361712638e-06, "loss": 0.6535, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4134 }, { "epoch": 0.562279031819418, "grad_norm": 0.37890625, "learning_rate": 9.570459935629567e-06, "loss": 0.6991, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4135 }, { "epoch": 0.5624150122382376, "grad_norm": 0.427734375, "learning_rate": 9.565716606370083e-06, "loss": 0.6258, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4136 }, { "epoch": 0.5625509926570574, "grad_norm": 1.1953125, "learning_rate": 9.560973375003385e-06, "loss": 0.8779, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4137 }, { "epoch": 0.5626869730758771, "grad_norm": 0.45703125, "learning_rate": 9.556230242598656e-06, "loss": 0.874, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4138 }, { "epoch": 0.5628229534946968, "grad_norm": 0.310546875, "learning_rate": 9.551487210225057e-06, "loss": 0.5915, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4139 }, { "epoch": 0.5629589339135165, "grad_norm": 0.5234375, "learning_rate": 9.546744278951722e-06, "loss": 0.7132, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4140 }, { "epoch": 0.5630949143323362, "grad_norm": 0.30859375, "learning_rate": 9.542001449847769e-06, "loss": 0.5555, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4141 }, { "epoch": 0.5632308947511558, "grad_norm": 0.5703125, "learning_rate": 9.53725872398229e-06, "loss": 0.887, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4142 }, { "epoch": 0.5633668751699755, "grad_norm": 0.37109375, "learning_rate": 9.53251610242435e-06, "loss": 0.7855, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4143 }, { "epoch": 0.5635028555887952, "grad_norm": 0.40234375, "learning_rate": 9.527773586242999e-06, "loss": 0.4206, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4144 }, { "epoch": 0.5636388360076149, "grad_norm": 0.4296875, "learning_rate": 9.523031176507253e-06, "loss": 0.6909, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4145 }, { "epoch": 0.5637748164264346, "grad_norm": 0.65625, "learning_rate": 9.518288874286111e-06, "loss": 0.5381, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4146 }, { "epoch": 0.5639107968452542, "grad_norm": 0.314453125, "learning_rate": 9.513546680648546e-06, "loss": 0.6392, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4147 }, { "epoch": 0.5640467772640739, "grad_norm": 0.5859375, "learning_rate": 9.508804596663509e-06, "loss": 0.8521, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4148 }, { "epoch": 0.5641827576828936, "grad_norm": 0.279296875, "learning_rate": 9.504062623399919e-06, "loss": 0.6694, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4149 }, { "epoch": 0.5643187381017134, "grad_norm": 0.8046875, "learning_rate": 9.499320761926679e-06, "loss": 0.7124, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4150 }, { "epoch": 0.5644547185205331, "grad_norm": 0.345703125, "learning_rate": 9.494579013312657e-06, "loss": 0.668, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4151 }, { "epoch": 0.5645906989393528, "grad_norm": 0.283203125, "learning_rate": 9.489837378626709e-06, "loss": 0.6061, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4152 }, { "epoch": 0.5647266793581724, "grad_norm": 0.375, "learning_rate": 9.485095858937653e-06, "loss": 0.6621, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4153 }, { "epoch": 0.5648626597769921, "grad_norm": 0.283203125, "learning_rate": 9.480354455314283e-06, "loss": 0.5352, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4154 }, { "epoch": 0.5649986401958118, "grad_norm": 0.390625, "learning_rate": 9.475613168825374e-06, "loss": 0.6814, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4155 }, { "epoch": 0.5651346206146315, "grad_norm": 0.64453125, "learning_rate": 9.470872000539666e-06, "loss": 0.8195, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4156 }, { "epoch": 0.5652706010334512, "grad_norm": 0.318359375, "learning_rate": 9.466130951525879e-06, "loss": 0.6701, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4157 }, { "epoch": 0.5654065814522709, "grad_norm": 0.4375, "learning_rate": 9.461390022852703e-06, "loss": 0.6266, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4158 }, { "epoch": 0.5655425618710905, "grad_norm": 0.251953125, "learning_rate": 9.4566492155888e-06, "loss": 0.5247, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4159 }, { "epoch": 0.5656785422899102, "grad_norm": 0.392578125, "learning_rate": 9.451908530802809e-06, "loss": 0.7336, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4160 }, { "epoch": 0.5658145227087299, "grad_norm": 7.15625, "learning_rate": 9.447167969563334e-06, "loss": 0.8049, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4161 }, { "epoch": 0.5659505031275497, "grad_norm": 0.45703125, "learning_rate": 9.442427532938958e-06, "loss": 0.4959, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4162 }, { "epoch": 0.5660864835463694, "grad_norm": 0.36328125, "learning_rate": 9.437687221998232e-06, "loss": 0.7135, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4163 }, { "epoch": 0.566222463965189, "grad_norm": 0.408203125, "learning_rate": 9.432947037809683e-06, "loss": 0.8081, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4164 }, { "epoch": 0.5663584443840087, "grad_norm": 0.9765625, "learning_rate": 9.428206981441804e-06, "loss": 0.6777, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4165 }, { "epoch": 0.5664944248028284, "grad_norm": 0.46484375, "learning_rate": 9.423467053963063e-06, "loss": 0.7674, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4166 }, { "epoch": 0.5666304052216481, "grad_norm": 0.390625, "learning_rate": 9.418727256441895e-06, "loss": 0.5936, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4167 }, { "epoch": 0.5667663856404678, "grad_norm": 0.291015625, "learning_rate": 9.41398758994671e-06, "loss": 0.4957, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4168 }, { "epoch": 0.5669023660592875, "grad_norm": 0.357421875, "learning_rate": 9.409248055545889e-06, "loss": 0.7266, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4169 }, { "epoch": 0.5670383464781071, "grad_norm": 0.330078125, "learning_rate": 9.404508654307778e-06, "loss": 0.4956, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4170 }, { "epoch": 0.5671743268969268, "grad_norm": 0.37109375, "learning_rate": 9.399769387300697e-06, "loss": 0.4767, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4171 }, { "epoch": 0.5673103073157465, "grad_norm": 0.423828125, "learning_rate": 9.395030255592933e-06, "loss": 0.5936, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4172 }, { "epoch": 0.5674462877345662, "grad_norm": 0.32421875, "learning_rate": 9.390291260252748e-06, "loss": 0.4805, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4173 }, { "epoch": 0.5675822681533859, "grad_norm": 0.2890625, "learning_rate": 9.385552402348364e-06, "loss": 0.5845, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4174 }, { "epoch": 0.5677182485722057, "grad_norm": 0.29296875, "learning_rate": 9.380813682947985e-06, "loss": 0.5545, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4175 }, { "epoch": 0.5678542289910253, "grad_norm": 0.45703125, "learning_rate": 9.37607510311977e-06, "loss": 0.5936, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4176 }, { "epoch": 0.567990209409845, "grad_norm": 0.6640625, "learning_rate": 9.371336663931854e-06, "loss": 0.6735, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4177 }, { "epoch": 0.5681261898286647, "grad_norm": 0.734375, "learning_rate": 9.36659836645234e-06, "loss": 0.8677, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4178 }, { "epoch": 0.5682621702474844, "grad_norm": 0.4609375, "learning_rate": 9.361860211749299e-06, "loss": 0.7718, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4179 }, { "epoch": 0.5683981506663041, "grad_norm": 0.267578125, "learning_rate": 9.35712220089077e-06, "loss": 0.5239, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4180 }, { "epoch": 0.5685341310851237, "grad_norm": 0.333984375, "learning_rate": 9.352384334944754e-06, "loss": 0.5492, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4181 }, { "epoch": 0.5686701115039434, "grad_norm": 0.5390625, "learning_rate": 9.34764661497923e-06, "loss": 0.7573, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4182 }, { "epoch": 0.5688060919227631, "grad_norm": 0.9296875, "learning_rate": 9.342909042062131e-06, "loss": 0.7091, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4183 }, { "epoch": 0.5689420723415828, "grad_norm": 0.451171875, "learning_rate": 9.33817161726137e-06, "loss": 0.6722, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4184 }, { "epoch": 0.5690780527604025, "grad_norm": 0.4375, "learning_rate": 9.333434341644816e-06, "loss": 0.7166, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4185 }, { "epoch": 0.5692140331792221, "grad_norm": 0.4921875, "learning_rate": 9.328697216280313e-06, "loss": 0.6663, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4186 }, { "epoch": 0.5693500135980419, "grad_norm": 0.546875, "learning_rate": 9.323960242235663e-06, "loss": 0.6644, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4187 }, { "epoch": 0.5694859940168616, "grad_norm": 0.35546875, "learning_rate": 9.319223420578642e-06, "loss": 0.7788, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4188 }, { "epoch": 0.5696219744356813, "grad_norm": 5.09375, "learning_rate": 9.314486752376984e-06, "loss": 0.7094, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4189 }, { "epoch": 0.569757954854501, "grad_norm": 0.4140625, "learning_rate": 9.309750238698393e-06, "loss": 0.7199, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4190 }, { "epoch": 0.5698939352733207, "grad_norm": 0.50390625, "learning_rate": 9.305013880610536e-06, "loss": 0.797, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4191 }, { "epoch": 0.5700299156921403, "grad_norm": 0.33984375, "learning_rate": 9.30027767918105e-06, "loss": 0.5021, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4192 }, { "epoch": 0.57016589611096, "grad_norm": 0.3359375, "learning_rate": 9.295541635477527e-06, "loss": 0.6388, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4193 }, { "epoch": 0.5703018765297797, "grad_norm": 0.52734375, "learning_rate": 9.290805750567532e-06, "loss": 0.8442, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4194 }, { "epoch": 0.5704378569485994, "grad_norm": 0.73046875, "learning_rate": 9.286070025518589e-06, "loss": 0.6255, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4195 }, { "epoch": 0.5705738373674191, "grad_norm": 0.44921875, "learning_rate": 9.281334461398196e-06, "loss": 0.6479, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4196 }, { "epoch": 0.5707098177862387, "grad_norm": 0.384765625, "learning_rate": 9.276599059273801e-06, "loss": 0.6291, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4197 }, { "epoch": 0.5708457982050584, "grad_norm": 0.2314453125, "learning_rate": 9.271863820212822e-06, "loss": 0.3545, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4198 }, { "epoch": 0.5709817786238781, "grad_norm": 0.68359375, "learning_rate": 9.267128745282642e-06, "loss": 0.5549, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4199 }, { "epoch": 0.5711177590426979, "grad_norm": 0.28515625, "learning_rate": 9.262393835550601e-06, "loss": 0.5024, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4200 }, { "epoch": 0.5712537394615176, "grad_norm": 0.310546875, "learning_rate": 9.257659092084007e-06, "loss": 0.5586, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4201 }, { "epoch": 0.5713897198803373, "grad_norm": 0.423828125, "learning_rate": 9.25292451595013e-06, "loss": 0.7, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4202 }, { "epoch": 0.571525700299157, "grad_norm": 0.390625, "learning_rate": 9.248190108216203e-06, "loss": 0.8376, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4203 }, { "epoch": 0.5716616807179766, "grad_norm": 0.32421875, "learning_rate": 9.243455869949414e-06, "loss": 0.6156, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4204 }, { "epoch": 0.5717976611367963, "grad_norm": 0.490234375, "learning_rate": 9.238721802216923e-06, "loss": 0.6387, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4205 }, { "epoch": 0.571933641555616, "grad_norm": 0.38671875, "learning_rate": 9.233987906085844e-06, "loss": 0.5303, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4206 }, { "epoch": 0.5720696219744357, "grad_norm": 0.361328125, "learning_rate": 9.22925418262326e-06, "loss": 0.5913, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4207 }, { "epoch": 0.5722056023932554, "grad_norm": 0.341796875, "learning_rate": 9.224520632896206e-06, "loss": 0.5505, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4208 }, { "epoch": 0.572341582812075, "grad_norm": 0.51953125, "learning_rate": 9.219787257971679e-06, "loss": 0.436, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4209 }, { "epoch": 0.5724775632308947, "grad_norm": 0.32421875, "learning_rate": 9.215054058916646e-06, "loss": 0.5641, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4210 }, { "epoch": 0.5726135436497144, "grad_norm": 0.37890625, "learning_rate": 9.210321036798023e-06, "loss": 0.6438, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4211 }, { "epoch": 0.5727495240685341, "grad_norm": 1.6171875, "learning_rate": 9.205588192682693e-06, "loss": 0.5544, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4212 }, { "epoch": 0.5728855044873539, "grad_norm": 0.482421875, "learning_rate": 9.200855527637496e-06, "loss": 0.8208, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4213 }, { "epoch": 0.5730214849061736, "grad_norm": 1.90625, "learning_rate": 9.196123042729236e-06, "loss": 0.8584, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4214 }, { "epoch": 0.5731574653249932, "grad_norm": 0.330078125, "learning_rate": 9.19139073902467e-06, "loss": 0.5225, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4215 }, { "epoch": 0.5732934457438129, "grad_norm": 0.4296875, "learning_rate": 9.186658617590516e-06, "loss": 0.6891, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4216 }, { "epoch": 0.5734294261626326, "grad_norm": 0.388671875, "learning_rate": 9.181926679493454e-06, "loss": 0.6756, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4217 }, { "epoch": 0.5735654065814523, "grad_norm": 0.361328125, "learning_rate": 9.177194925800121e-06, "loss": 0.5354, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4218 }, { "epoch": 0.573701387000272, "grad_norm": 0.361328125, "learning_rate": 9.172463357577111e-06, "loss": 0.5697, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4219 }, { "epoch": 0.5738373674190916, "grad_norm": 0.54296875, "learning_rate": 9.167731975890977e-06, "loss": 0.6506, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4220 }, { "epoch": 0.5739733478379113, "grad_norm": 0.28515625, "learning_rate": 9.163000781808232e-06, "loss": 0.6061, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4221 }, { "epoch": 0.574109328256731, "grad_norm": 0.87109375, "learning_rate": 9.158269776395343e-06, "loss": 0.6868, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4222 }, { "epoch": 0.5742453086755507, "grad_norm": 0.404296875, "learning_rate": 9.153538960718738e-06, "loss": 0.818, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4223 }, { "epoch": 0.5743812890943704, "grad_norm": 0.35546875, "learning_rate": 9.1488083358448e-06, "loss": 0.5785, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4224 }, { "epoch": 0.5745172695131902, "grad_norm": 0.3125, "learning_rate": 9.14407790283987e-06, "loss": 0.5957, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4225 }, { "epoch": 0.5746532499320098, "grad_norm": 0.349609375, "learning_rate": 9.139347662770248e-06, "loss": 0.6675, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4226 }, { "epoch": 0.5747892303508295, "grad_norm": 0.3046875, "learning_rate": 9.134617616702182e-06, "loss": 0.5786, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4227 }, { "epoch": 0.5749252107696492, "grad_norm": 0.30078125, "learning_rate": 9.129887765701885e-06, "loss": 0.3937, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4228 }, { "epoch": 0.5750611911884689, "grad_norm": 0.31640625, "learning_rate": 9.125158110835525e-06, "loss": 0.7004, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4229 }, { "epoch": 0.5751971716072886, "grad_norm": 0.40625, "learning_rate": 9.120428653169222e-06, "loss": 0.812, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4230 }, { "epoch": 0.5753331520261082, "grad_norm": 0.48828125, "learning_rate": 9.115699393769055e-06, "loss": 0.6243, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4231 }, { "epoch": 0.5754691324449279, "grad_norm": 0.28515625, "learning_rate": 9.110970333701057e-06, "loss": 0.5602, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4232 }, { "epoch": 0.5756051128637476, "grad_norm": 0.2197265625, "learning_rate": 9.106241474031213e-06, "loss": 0.4012, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4233 }, { "epoch": 0.5757410932825673, "grad_norm": 0.30078125, "learning_rate": 9.101512815825469e-06, "loss": 0.5374, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4234 }, { "epoch": 0.575877073701387, "grad_norm": 0.375, "learning_rate": 9.096784360149722e-06, "loss": 0.588, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4235 }, { "epoch": 0.5760130541202066, "grad_norm": 0.34765625, "learning_rate": 9.092056108069822e-06, "loss": 0.6406, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4236 }, { "epoch": 0.5761490345390263, "grad_norm": 0.51953125, "learning_rate": 9.087328060651576e-06, "loss": 0.6392, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4237 }, { "epoch": 0.5762850149578461, "grad_norm": 0.40625, "learning_rate": 9.082600218960743e-06, "loss": 0.7349, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4238 }, { "epoch": 0.5764209953766658, "grad_norm": 0.30859375, "learning_rate": 9.077872584063037e-06, "loss": 0.6543, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4239 }, { "epoch": 0.5765569757954855, "grad_norm": 0.515625, "learning_rate": 9.073145157024126e-06, "loss": 0.784, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4240 }, { "epoch": 0.5766929562143052, "grad_norm": 0.5703125, "learning_rate": 9.068417938909628e-06, "loss": 0.695, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4241 }, { "epoch": 0.5768289366331248, "grad_norm": 0.365234375, "learning_rate": 9.063690930785116e-06, "loss": 0.7038, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4242 }, { "epoch": 0.5769649170519445, "grad_norm": 0.345703125, "learning_rate": 9.058964133716113e-06, "loss": 0.5802, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4243 }, { "epoch": 0.5771008974707642, "grad_norm": 0.486328125, "learning_rate": 9.054237548768103e-06, "loss": 0.6692, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4244 }, { "epoch": 0.5772368778895839, "grad_norm": 0.439453125, "learning_rate": 9.049511177006512e-06, "loss": 0.5453, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4245 }, { "epoch": 0.5773728583084036, "grad_norm": 0.41015625, "learning_rate": 9.04478501949672e-06, "loss": 0.6683, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4246 }, { "epoch": 0.5775088387272233, "grad_norm": 0.44921875, "learning_rate": 9.040059077304065e-06, "loss": 0.7368, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4247 }, { "epoch": 0.5776448191460429, "grad_norm": 1.65625, "learning_rate": 9.03533335149383e-06, "loss": 0.7036, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4248 }, { "epoch": 0.5777807995648626, "grad_norm": 0.703125, "learning_rate": 9.030607843131251e-06, "loss": 0.7962, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4249 }, { "epoch": 0.5779167799836824, "grad_norm": 0.30859375, "learning_rate": 9.025882553281518e-06, "loss": 0.6872, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4250 }, { "epoch": 0.5780527604025021, "grad_norm": 0.6640625, "learning_rate": 9.021157483009766e-06, "loss": 0.8265, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4251 }, { "epoch": 0.5781887408213218, "grad_norm": 0.306640625, "learning_rate": 9.016432633381083e-06, "loss": 0.5815, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4252 }, { "epoch": 0.5783247212401414, "grad_norm": 0.443359375, "learning_rate": 9.011708005460512e-06, "loss": 0.6868, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4253 }, { "epoch": 0.5784607016589611, "grad_norm": 0.72265625, "learning_rate": 9.006983600313038e-06, "loss": 0.7259, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4254 }, { "epoch": 0.5785966820777808, "grad_norm": 0.291015625, "learning_rate": 9.002259419003602e-06, "loss": 0.715, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4255 }, { "epoch": 0.5787326624966005, "grad_norm": 0.60546875, "learning_rate": 8.997535462597087e-06, "loss": 0.533, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4256 }, { "epoch": 0.5788686429154202, "grad_norm": 0.40234375, "learning_rate": 8.99281173215834e-06, "loss": 0.6929, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4257 }, { "epoch": 0.5790046233342399, "grad_norm": 0.298828125, "learning_rate": 8.98808822875214e-06, "loss": 0.6243, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4258 }, { "epoch": 0.5791406037530595, "grad_norm": 0.69921875, "learning_rate": 8.983364953443227e-06, "loss": 0.7183, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4259 }, { "epoch": 0.5792765841718792, "grad_norm": 0.423828125, "learning_rate": 8.978641907296283e-06, "loss": 0.4939, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4260 }, { "epoch": 0.5794125645906989, "grad_norm": 0.314453125, "learning_rate": 8.973919091375942e-06, "loss": 0.6141, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4261 }, { "epoch": 0.5795485450095186, "grad_norm": 0.376953125, "learning_rate": 8.969196506746781e-06, "loss": 0.2116, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4262 }, { "epoch": 0.5796845254283384, "grad_norm": 0.42578125, "learning_rate": 8.96447415447333e-06, "loss": 0.7725, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4263 }, { "epoch": 0.579820505847158, "grad_norm": 0.38671875, "learning_rate": 8.959752035620065e-06, "loss": 0.7773, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4264 }, { "epoch": 0.5799564862659777, "grad_norm": 0.359375, "learning_rate": 8.955030151251409e-06, "loss": 0.6966, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4265 }, { "epoch": 0.5800924666847974, "grad_norm": 0.326171875, "learning_rate": 8.950308502431733e-06, "loss": 0.6108, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4266 }, { "epoch": 0.5802284471036171, "grad_norm": 3.796875, "learning_rate": 8.945587090225354e-06, "loss": 0.8626, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4267 }, { "epoch": 0.5803644275224368, "grad_norm": 0.447265625, "learning_rate": 8.940865915696537e-06, "loss": 0.6172, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4268 }, { "epoch": 0.5805004079412565, "grad_norm": 0.62109375, "learning_rate": 8.93614497990949e-06, "loss": 0.5013, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4269 }, { "epoch": 0.5806363883600761, "grad_norm": 0.40234375, "learning_rate": 8.93142428392837e-06, "loss": 0.777, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4270 }, { "epoch": 0.5807723687788958, "grad_norm": 1.21875, "learning_rate": 8.926703828817285e-06, "loss": 0.7124, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4271 }, { "epoch": 0.5809083491977155, "grad_norm": 0.330078125, "learning_rate": 8.921983615640277e-06, "loss": 0.6639, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4272 }, { "epoch": 0.5810443296165352, "grad_norm": 0.68359375, "learning_rate": 8.917263645461343e-06, "loss": 0.738, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4273 }, { "epoch": 0.5811803100353549, "grad_norm": 0.365234375, "learning_rate": 8.91254391934442e-06, "loss": 0.5991, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4274 }, { "epoch": 0.5813162904541745, "grad_norm": 0.625, "learning_rate": 8.907824438353393e-06, "loss": 0.5526, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4275 }, { "epoch": 0.5814522708729943, "grad_norm": 0.408203125, "learning_rate": 8.90310520355209e-06, "loss": 0.9068, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4276 }, { "epoch": 0.581588251291814, "grad_norm": 0.466796875, "learning_rate": 8.898386216004287e-06, "loss": 0.8197, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4277 }, { "epoch": 0.5817242317106337, "grad_norm": 0.380859375, "learning_rate": 8.893667476773698e-06, "loss": 0.7713, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4278 }, { "epoch": 0.5818602121294534, "grad_norm": 0.384765625, "learning_rate": 8.888948986923984e-06, "loss": 0.5668, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4279 }, { "epoch": 0.5819961925482731, "grad_norm": 0.5625, "learning_rate": 8.884230747518755e-06, "loss": 0.8693, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4280 }, { "epoch": 0.5821321729670927, "grad_norm": 0.421875, "learning_rate": 8.879512759621556e-06, "loss": 0.6628, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4281 }, { "epoch": 0.5822681533859124, "grad_norm": 0.3046875, "learning_rate": 8.87479502429588e-06, "loss": 0.5501, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4282 }, { "epoch": 0.5824041338047321, "grad_norm": 0.484375, "learning_rate": 8.870077542605163e-06, "loss": 0.8542, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4283 }, { "epoch": 0.5825401142235518, "grad_norm": 0.34375, "learning_rate": 8.865360315612781e-06, "loss": 0.6047, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4284 }, { "epoch": 0.5826760946423715, "grad_norm": 0.59375, "learning_rate": 8.860643344382057e-06, "loss": 0.5851, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4285 }, { "epoch": 0.5828120750611911, "grad_norm": 0.265625, "learning_rate": 8.855926629976252e-06, "loss": 0.5215, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4286 }, { "epoch": 0.5829480554800108, "grad_norm": 0.38671875, "learning_rate": 8.851210173458573e-06, "loss": 0.743, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4287 }, { "epoch": 0.5830840358988306, "grad_norm": 0.376953125, "learning_rate": 8.846493975892166e-06, "loss": 0.5718, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4288 }, { "epoch": 0.5832200163176503, "grad_norm": 0.421875, "learning_rate": 8.84177803834012e-06, "loss": 0.6702, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4289 }, { "epoch": 0.58335599673647, "grad_norm": 0.55078125, "learning_rate": 8.837062361865465e-06, "loss": 0.7555, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4290 }, { "epoch": 0.5834919771552897, "grad_norm": 0.35546875, "learning_rate": 8.832346947531171e-06, "loss": 0.4993, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4291 }, { "epoch": 0.5836279575741093, "grad_norm": 0.51171875, "learning_rate": 8.827631796400152e-06, "loss": 0.6738, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4292 }, { "epoch": 0.583763937992929, "grad_norm": 0.54296875, "learning_rate": 8.822916909535258e-06, "loss": 0.8556, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4293 }, { "epoch": 0.5838999184117487, "grad_norm": 0.40625, "learning_rate": 8.818202287999285e-06, "loss": 0.6657, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4294 }, { "epoch": 0.5840358988305684, "grad_norm": 0.39453125, "learning_rate": 8.813487932854963e-06, "loss": 0.4635, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4295 }, { "epoch": 0.5841718792493881, "grad_norm": 0.384765625, "learning_rate": 8.808773845164969e-06, "loss": 0.6849, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4296 }, { "epoch": 0.5843078596682078, "grad_norm": 0.470703125, "learning_rate": 8.804060025991914e-06, "loss": 0.6341, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4297 }, { "epoch": 0.5844438400870274, "grad_norm": 0.3203125, "learning_rate": 8.799346476398351e-06, "loss": 0.6717, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4298 }, { "epoch": 0.5845798205058471, "grad_norm": 0.54296875, "learning_rate": 8.79463319744677e-06, "loss": 0.5046, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4299 }, { "epoch": 0.5847158009246668, "grad_norm": 0.38671875, "learning_rate": 8.789920190199607e-06, "loss": 0.6584, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4300 }, { "epoch": 0.5848517813434866, "grad_norm": 0.3984375, "learning_rate": 8.785207455719224e-06, "loss": 0.6838, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4301 }, { "epoch": 0.5849877617623063, "grad_norm": 0.263671875, "learning_rate": 8.780494995067934e-06, "loss": 0.5404, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4302 }, { "epoch": 0.585123742181126, "grad_norm": 0.244140625, "learning_rate": 8.775782809307983e-06, "loss": 0.564, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4303 }, { "epoch": 0.5852597225999456, "grad_norm": 1.109375, "learning_rate": 8.771070899501552e-06, "loss": 0.7622, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4304 }, { "epoch": 0.5853957030187653, "grad_norm": 0.291015625, "learning_rate": 8.766359266710767e-06, "loss": 0.5247, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4305 }, { "epoch": 0.585531683437585, "grad_norm": 0.341796875, "learning_rate": 8.761647911997684e-06, "loss": 0.6003, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4306 }, { "epoch": 0.5856676638564047, "grad_norm": 0.330078125, "learning_rate": 8.756936836424303e-06, "loss": 0.633, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4307 }, { "epoch": 0.5858036442752244, "grad_norm": 0.52734375, "learning_rate": 8.752226041052556e-06, "loss": 0.6021, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4308 }, { "epoch": 0.585939624694044, "grad_norm": 0.44921875, "learning_rate": 8.747515526944318e-06, "loss": 0.7816, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4309 }, { "epoch": 0.5860756051128637, "grad_norm": 0.328125, "learning_rate": 8.74280529516139e-06, "loss": 0.7419, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4310 }, { "epoch": 0.5862115855316834, "grad_norm": 0.5703125, "learning_rate": 8.738095346765519e-06, "loss": 0.8221, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4311 }, { "epoch": 0.5863475659505031, "grad_norm": 0.33203125, "learning_rate": 8.733385682818384e-06, "loss": 0.7314, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4312 }, { "epoch": 0.5864835463693229, "grad_norm": 0.4609375, "learning_rate": 8.728676304381604e-06, "loss": 0.7769, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4313 }, { "epoch": 0.5866195267881426, "grad_norm": 0.51171875, "learning_rate": 8.723967212516729e-06, "loss": 0.7122, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4314 }, { "epoch": 0.5867555072069622, "grad_norm": 0.34765625, "learning_rate": 8.719258408285244e-06, "loss": 0.6463, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4315 }, { "epoch": 0.5868914876257819, "grad_norm": 0.416015625, "learning_rate": 8.714549892748569e-06, "loss": 0.7202, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4316 }, { "epoch": 0.5870274680446016, "grad_norm": 0.51171875, "learning_rate": 8.709841666968069e-06, "loss": 0.7687, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4317 }, { "epoch": 0.5871634484634213, "grad_norm": 0.498046875, "learning_rate": 8.705133732005029e-06, "loss": 0.8013, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4318 }, { "epoch": 0.587299428882241, "grad_norm": 0.4609375, "learning_rate": 8.700426088920677e-06, "loss": 0.8218, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4319 }, { "epoch": 0.5874354093010606, "grad_norm": 0.33984375, "learning_rate": 8.695718738776173e-06, "loss": 0.706, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4320 }, { "epoch": 0.5875713897198803, "grad_norm": 0.421875, "learning_rate": 8.691011682632613e-06, "loss": 0.6822, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4321 }, { "epoch": 0.5877073701387, "grad_norm": 0.318359375, "learning_rate": 8.686304921551021e-06, "loss": 0.6807, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4322 }, { "epoch": 0.5878433505575197, "grad_norm": 0.37890625, "learning_rate": 8.681598456592363e-06, "loss": 0.6712, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4323 }, { "epoch": 0.5879793309763394, "grad_norm": 0.3359375, "learning_rate": 8.676892288817531e-06, "loss": 0.6652, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4324 }, { "epoch": 0.588115311395159, "grad_norm": 0.66796875, "learning_rate": 8.672186419287353e-06, "loss": 0.7925, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4325 }, { "epoch": 0.5882512918139788, "grad_norm": 0.5078125, "learning_rate": 8.667480849062592e-06, "loss": 0.5887, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4326 }, { "epoch": 0.5883872722327985, "grad_norm": 0.84375, "learning_rate": 8.662775579203937e-06, "loss": 0.8407, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4327 }, { "epoch": 0.5885232526516182, "grad_norm": 0.34375, "learning_rate": 8.658070610772016e-06, "loss": 0.7215, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4328 }, { "epoch": 0.5886592330704379, "grad_norm": 0.69140625, "learning_rate": 8.653365944827387e-06, "loss": 0.751, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4329 }, { "epoch": 0.5887952134892576, "grad_norm": 0.3515625, "learning_rate": 8.648661582430538e-06, "loss": 0.7306, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4330 }, { "epoch": 0.5889311939080772, "grad_norm": 0.4140625, "learning_rate": 8.643957524641889e-06, "loss": 0.7249, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4331 }, { "epoch": 0.5890671743268969, "grad_norm": 0.3984375, "learning_rate": 8.639253772521794e-06, "loss": 0.4883, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4332 }, { "epoch": 0.5892031547457166, "grad_norm": 0.376953125, "learning_rate": 8.634550327130533e-06, "loss": 0.7738, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4333 }, { "epoch": 0.5893391351645363, "grad_norm": 0.3671875, "learning_rate": 8.629847189528325e-06, "loss": 0.5889, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4334 }, { "epoch": 0.589475115583356, "grad_norm": 0.322265625, "learning_rate": 8.62514436077531e-06, "loss": 0.5895, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4335 }, { "epoch": 0.5896110960021756, "grad_norm": 0.2431640625, "learning_rate": 8.620441841931566e-06, "loss": 0.459, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4336 }, { "epoch": 0.5897470764209953, "grad_norm": 0.91015625, "learning_rate": 8.615739634057098e-06, "loss": 0.6761, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4337 }, { "epoch": 0.589883056839815, "grad_norm": 0.279296875, "learning_rate": 8.61103773821184e-06, "loss": 0.5949, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4338 }, { "epoch": 0.5900190372586348, "grad_norm": 0.447265625, "learning_rate": 8.606336155455658e-06, "loss": 0.8014, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4339 }, { "epoch": 0.5901550176774545, "grad_norm": 0.341796875, "learning_rate": 8.601634886848345e-06, "loss": 0.5387, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4340 }, { "epoch": 0.5902909980962742, "grad_norm": 0.3671875, "learning_rate": 8.596933933449625e-06, "loss": 0.6478, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4341 }, { "epoch": 0.5904269785150938, "grad_norm": 0.71875, "learning_rate": 8.59223329631915e-06, "loss": 0.7043, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4342 }, { "epoch": 0.5905629589339135, "grad_norm": 0.62890625, "learning_rate": 8.587532976516503e-06, "loss": 0.6181, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4343 }, { "epoch": 0.5906989393527332, "grad_norm": 0.28515625, "learning_rate": 8.582832975101191e-06, "loss": 0.5757, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4344 }, { "epoch": 0.5908349197715529, "grad_norm": 0.380859375, "learning_rate": 8.578133293132652e-06, "loss": 0.7505, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4345 }, { "epoch": 0.5909709001903726, "grad_norm": 0.45703125, "learning_rate": 8.573433931670255e-06, "loss": 0.8218, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4346 }, { "epoch": 0.5911068806091923, "grad_norm": 0.34765625, "learning_rate": 8.568734891773288e-06, "loss": 0.7186, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4347 }, { "epoch": 0.5912428610280119, "grad_norm": 0.5859375, "learning_rate": 8.564036174500977e-06, "loss": 0.8208, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4348 }, { "epoch": 0.5913788414468316, "grad_norm": 0.3203125, "learning_rate": 8.559337780912469e-06, "loss": 0.5183, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4349 }, { "epoch": 0.5915148218656513, "grad_norm": 10.5, "learning_rate": 8.554639712066837e-06, "loss": 0.8151, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4350 }, { "epoch": 0.5916508022844711, "grad_norm": 0.462890625, "learning_rate": 8.549941969023086e-06, "loss": 0.5962, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4351 }, { "epoch": 0.5917867827032908, "grad_norm": 1.0859375, "learning_rate": 8.545244552840144e-06, "loss": 0.6079, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4352 }, { "epoch": 0.5919227631221105, "grad_norm": 0.3359375, "learning_rate": 8.540547464576864e-06, "loss": 0.6313, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4353 }, { "epoch": 0.5920587435409301, "grad_norm": 0.294921875, "learning_rate": 8.535850705292032e-06, "loss": 0.521, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4354 }, { "epoch": 0.5921947239597498, "grad_norm": 0.326171875, "learning_rate": 8.53115427604435e-06, "loss": 0.5423, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4355 }, { "epoch": 0.5923307043785695, "grad_norm": 0.34375, "learning_rate": 8.526458177892452e-06, "loss": 0.464, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4356 }, { "epoch": 0.5924666847973892, "grad_norm": 0.2490234375, "learning_rate": 8.521762411894897e-06, "loss": 0.4318, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4357 }, { "epoch": 0.5926026652162089, "grad_norm": 0.443359375, "learning_rate": 8.517066979110168e-06, "loss": 0.5768, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4358 }, { "epoch": 0.5927386456350285, "grad_norm": 0.388671875, "learning_rate": 8.51237188059667e-06, "loss": 0.7653, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4359 }, { "epoch": 0.5928746260538482, "grad_norm": 0.3359375, "learning_rate": 8.507677117412741e-06, "loss": 0.6343, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4360 }, { "epoch": 0.5930106064726679, "grad_norm": 0.349609375, "learning_rate": 8.502982690616633e-06, "loss": 0.6672, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4361 }, { "epoch": 0.5931465868914876, "grad_norm": 0.66796875, "learning_rate": 8.49828860126653e-06, "loss": 0.5044, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4362 }, { "epoch": 0.5932825673103073, "grad_norm": 0.373046875, "learning_rate": 8.493594850420537e-06, "loss": 0.5015, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4363 }, { "epoch": 0.593418547729127, "grad_norm": 0.30859375, "learning_rate": 8.488901439136681e-06, "loss": 0.5877, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4364 }, { "epoch": 0.5935545281479467, "grad_norm": 0.66796875, "learning_rate": 8.484208368472916e-06, "loss": 0.6864, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4365 }, { "epoch": 0.5936905085667664, "grad_norm": 0.37109375, "learning_rate": 8.479515639487115e-06, "loss": 0.5514, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4366 }, { "epoch": 0.5938264889855861, "grad_norm": 0.6875, "learning_rate": 8.474823253237081e-06, "loss": 0.6877, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4367 }, { "epoch": 0.5939624694044058, "grad_norm": 0.41796875, "learning_rate": 8.470131210780532e-06, "loss": 0.8192, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4368 }, { "epoch": 0.5940984498232255, "grad_norm": 0.2041015625, "learning_rate": 8.465439513175111e-06, "loss": 0.3698, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4369 }, { "epoch": 0.5942344302420451, "grad_norm": 0.396484375, "learning_rate": 8.460748161478386e-06, "loss": 0.625, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4370 }, { "epoch": 0.5943704106608648, "grad_norm": 0.48828125, "learning_rate": 8.456057156747847e-06, "loss": 0.6188, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4371 }, { "epoch": 0.5945063910796845, "grad_norm": 0.45703125, "learning_rate": 8.451366500040898e-06, "loss": 0.6388, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4372 }, { "epoch": 0.5946423714985042, "grad_norm": 0.7578125, "learning_rate": 8.446676192414875e-06, "loss": 0.6593, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4373 }, { "epoch": 0.5947783519173239, "grad_norm": 0.33203125, "learning_rate": 8.44198623492703e-06, "loss": 0.6131, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4374 }, { "epoch": 0.5949143323361435, "grad_norm": 0.263671875, "learning_rate": 8.437296628634535e-06, "loss": 0.4801, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4375 }, { "epoch": 0.5950503127549633, "grad_norm": 0.33203125, "learning_rate": 8.432607374594484e-06, "loss": 0.569, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4376 }, { "epoch": 0.595186293173783, "grad_norm": 0.46484375, "learning_rate": 8.427918473863894e-06, "loss": 0.6616, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4377 }, { "epoch": 0.5953222735926027, "grad_norm": 0.337890625, "learning_rate": 8.423229927499704e-06, "loss": 0.6427, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4378 }, { "epoch": 0.5954582540114224, "grad_norm": 0.296875, "learning_rate": 8.418541736558763e-06, "loss": 0.6318, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4379 }, { "epoch": 0.5955942344302421, "grad_norm": 0.279296875, "learning_rate": 8.41385390209785e-06, "loss": 0.508, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4380 }, { "epoch": 0.5957302148490617, "grad_norm": 0.55859375, "learning_rate": 8.40916642517366e-06, "loss": 0.6634, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4381 }, { "epoch": 0.5958661952678814, "grad_norm": 0.498046875, "learning_rate": 8.40447930684281e-06, "loss": 0.5083, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4382 }, { "epoch": 0.5960021756867011, "grad_norm": 0.40234375, "learning_rate": 8.399792548161827e-06, "loss": 0.6497, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4383 }, { "epoch": 0.5961381561055208, "grad_norm": 0.255859375, "learning_rate": 8.395106150187171e-06, "loss": 0.4961, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4384 }, { "epoch": 0.5962741365243405, "grad_norm": 0.5078125, "learning_rate": 8.39042011397521e-06, "loss": 0.9159, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4385 }, { "epoch": 0.5964101169431602, "grad_norm": 0.412109375, "learning_rate": 8.385734440582232e-06, "loss": 0.7658, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4386 }, { "epoch": 0.5965460973619798, "grad_norm": 0.2431640625, "learning_rate": 8.38104913106445e-06, "loss": 0.4033, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4387 }, { "epoch": 0.5966820777807995, "grad_norm": 0.30859375, "learning_rate": 8.376364186477986e-06, "loss": 0.6517, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4388 }, { "epoch": 0.5968180581996193, "grad_norm": 0.41015625, "learning_rate": 8.371679607878884e-06, "loss": 0.5265, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4389 }, { "epoch": 0.596954038618439, "grad_norm": 0.35546875, "learning_rate": 8.366995396323107e-06, "loss": 0.5065, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4390 }, { "epoch": 0.5970900190372587, "grad_norm": 0.314453125, "learning_rate": 8.362311552866534e-06, "loss": 0.6048, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4391 }, { "epoch": 0.5972259994560783, "grad_norm": 0.37890625, "learning_rate": 8.357628078564958e-06, "loss": 0.5752, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4392 }, { "epoch": 0.597361979874898, "grad_norm": 0.259765625, "learning_rate": 8.352944974474094e-06, "loss": 0.4359, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4393 }, { "epoch": 0.5974979602937177, "grad_norm": 0.458984375, "learning_rate": 8.348262241649569e-06, "loss": 0.5798, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4394 }, { "epoch": 0.5976339407125374, "grad_norm": 0.64453125, "learning_rate": 8.34357988114693e-06, "loss": 0.6753, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4395 }, { "epoch": 0.5977699211313571, "grad_norm": 0.427734375, "learning_rate": 8.338897894021637e-06, "loss": 0.6468, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4396 }, { "epoch": 0.5979059015501768, "grad_norm": 0.62109375, "learning_rate": 8.334216281329067e-06, "loss": 0.5547, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4397 }, { "epoch": 0.5980418819689964, "grad_norm": 0.359375, "learning_rate": 8.329535044124514e-06, "loss": 0.7944, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4398 }, { "epoch": 0.5981778623878161, "grad_norm": 0.404296875, "learning_rate": 8.324854183463184e-06, "loss": 0.7181, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4399 }, { "epoch": 0.5983138428066358, "grad_norm": 0.357421875, "learning_rate": 8.320173700400202e-06, "loss": 0.4834, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4400 }, { "epoch": 0.5984498232254555, "grad_norm": 0.5234375, "learning_rate": 8.315493595990608e-06, "loss": 0.5768, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4401 }, { "epoch": 0.5985858036442753, "grad_norm": 0.234375, "learning_rate": 8.310813871289349e-06, "loss": 0.4102, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4402 }, { "epoch": 0.598721784063095, "grad_norm": 0.412109375, "learning_rate": 8.306134527351296e-06, "loss": 0.6398, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4403 }, { "epoch": 0.5988577644819146, "grad_norm": 0.296875, "learning_rate": 8.30145556523123e-06, "loss": 0.3846, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4404 }, { "epoch": 0.5989937449007343, "grad_norm": 1.515625, "learning_rate": 8.296776985983848e-06, "loss": 0.791, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4405 }, { "epoch": 0.599129725319554, "grad_norm": 0.44921875, "learning_rate": 8.292098790663756e-06, "loss": 0.5479, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4406 }, { "epoch": 0.5992657057383737, "grad_norm": 0.32421875, "learning_rate": 8.287420980325477e-06, "loss": 0.6147, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4407 }, { "epoch": 0.5994016861571934, "grad_norm": 0.3203125, "learning_rate": 8.282743556023447e-06, "loss": 0.5509, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4408 }, { "epoch": 0.599537666576013, "grad_norm": 0.365234375, "learning_rate": 8.278066518812015e-06, "loss": 0.6807, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4409 }, { "epoch": 0.5996736469948327, "grad_norm": 0.30078125, "learning_rate": 8.273389869745442e-06, "loss": 0.5601, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4410 }, { "epoch": 0.5998096274136524, "grad_norm": 0.33203125, "learning_rate": 8.268713609877905e-06, "loss": 0.603, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4411 }, { "epoch": 0.5999456078324721, "grad_norm": 0.37890625, "learning_rate": 8.264037740263484e-06, "loss": 0.6707, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4412 }, { "epoch": 0.6000815882512918, "grad_norm": 0.2890625, "learning_rate": 8.259362261956181e-06, "loss": 0.3776, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4413 }, { "epoch": 0.6002175686701116, "grad_norm": 0.37109375, "learning_rate": 8.254687176009906e-06, "loss": 0.4992, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4414 }, { "epoch": 0.6003535490889312, "grad_norm": 0.59765625, "learning_rate": 8.250012483478478e-06, "loss": 0.5479, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4415 }, { "epoch": 0.6004895295077509, "grad_norm": 0.34765625, "learning_rate": 8.245338185415634e-06, "loss": 0.6024, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4416 }, { "epoch": 0.6006255099265706, "grad_norm": 0.625, "learning_rate": 8.240664282875013e-06, "loss": 0.8981, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4417 }, { "epoch": 0.6007614903453903, "grad_norm": 0.58984375, "learning_rate": 8.235990776910174e-06, "loss": 0.5978, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4418 }, { "epoch": 0.60089747076421, "grad_norm": 0.3046875, "learning_rate": 8.231317668574581e-06, "loss": 0.6204, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4419 }, { "epoch": 0.6010334511830296, "grad_norm": 0.443359375, "learning_rate": 8.22664495892161e-06, "loss": 0.7791, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4420 }, { "epoch": 0.6011694316018493, "grad_norm": 0.416015625, "learning_rate": 8.221972649004546e-06, "loss": 0.6108, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4421 }, { "epoch": 0.601305412020669, "grad_norm": 0.326171875, "learning_rate": 8.217300739876585e-06, "loss": 0.6909, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4422 }, { "epoch": 0.6014413924394887, "grad_norm": 0.251953125, "learning_rate": 8.212629232590832e-06, "loss": 0.5524, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4423 }, { "epoch": 0.6015773728583084, "grad_norm": 0.75390625, "learning_rate": 8.207958128200304e-06, "loss": 0.8994, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4424 }, { "epoch": 0.601713353277128, "grad_norm": 0.396484375, "learning_rate": 8.203287427757923e-06, "loss": 0.7098, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4425 }, { "epoch": 0.6018493336959477, "grad_norm": 0.61328125, "learning_rate": 8.198617132316522e-06, "loss": 0.9502, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4426 }, { "epoch": 0.6019853141147675, "grad_norm": 0.3828125, "learning_rate": 8.193947242928844e-06, "loss": 0.8159, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4427 }, { "epoch": 0.6021212945335872, "grad_norm": 0.212890625, "learning_rate": 8.189277760647537e-06, "loss": 0.3991, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4428 }, { "epoch": 0.6022572749524069, "grad_norm": 0.255859375, "learning_rate": 8.184608686525162e-06, "loss": 0.6178, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4429 }, { "epoch": 0.6023932553712266, "grad_norm": 0.412109375, "learning_rate": 8.179940021614184e-06, "loss": 0.6816, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4430 }, { "epoch": 0.6025292357900462, "grad_norm": 0.232421875, "learning_rate": 8.175271766966977e-06, "loss": 0.4289, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4431 }, { "epoch": 0.6026652162088659, "grad_norm": 0.3515625, "learning_rate": 8.170603923635823e-06, "loss": 0.7526, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4432 }, { "epoch": 0.6028011966276856, "grad_norm": 0.51171875, "learning_rate": 8.165936492672912e-06, "loss": 0.5472, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4433 }, { "epoch": 0.6029371770465053, "grad_norm": 0.439453125, "learning_rate": 8.161269475130338e-06, "loss": 0.8791, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4434 }, { "epoch": 0.603073157465325, "grad_norm": 0.212890625, "learning_rate": 8.156602872060104e-06, "loss": 0.306, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4435 }, { "epoch": 0.6032091378841447, "grad_norm": 0.6015625, "learning_rate": 8.151936684514119e-06, "loss": 0.5602, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4436 }, { "epoch": 0.6033451183029643, "grad_norm": 0.287109375, "learning_rate": 8.147270913544202e-06, "loss": 0.5411, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4437 }, { "epoch": 0.603481098721784, "grad_norm": 0.251953125, "learning_rate": 8.142605560202073e-06, "loss": 0.5802, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4438 }, { "epoch": 0.6036170791406038, "grad_norm": 0.3046875, "learning_rate": 8.137940625539357e-06, "loss": 0.5493, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4439 }, { "epoch": 0.6037530595594235, "grad_norm": 0.443359375, "learning_rate": 8.133276110607593e-06, "loss": 0.6914, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4440 }, { "epoch": 0.6038890399782432, "grad_norm": 0.4453125, "learning_rate": 8.128612016458214e-06, "loss": 0.7244, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4441 }, { "epoch": 0.6040250203970629, "grad_norm": 0.341796875, "learning_rate": 8.123948344142568e-06, "loss": 0.715, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4442 }, { "epoch": 0.6041610008158825, "grad_norm": 0.50390625, "learning_rate": 8.119285094711901e-06, "loss": 0.5348, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4443 }, { "epoch": 0.6042969812347022, "grad_norm": 0.240234375, "learning_rate": 8.114622269217368e-06, "loss": 0.4035, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4444 }, { "epoch": 0.6044329616535219, "grad_norm": 0.3828125, "learning_rate": 8.109959868710026e-06, "loss": 0.6265, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4445 }, { "epoch": 0.6045689420723416, "grad_norm": 0.37890625, "learning_rate": 8.105297894240838e-06, "loss": 0.6971, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4446 }, { "epoch": 0.6047049224911613, "grad_norm": 0.328125, "learning_rate": 8.100636346860668e-06, "loss": 0.5187, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4447 }, { "epoch": 0.6048409029099809, "grad_norm": 0.373046875, "learning_rate": 8.095975227620289e-06, "loss": 0.6637, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4448 }, { "epoch": 0.6049768833288006, "grad_norm": 0.3125, "learning_rate": 8.091314537570371e-06, "loss": 0.561, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4449 }, { "epoch": 0.6051128637476203, "grad_norm": 0.546875, "learning_rate": 8.086654277761493e-06, "loss": 0.6434, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4450 }, { "epoch": 0.60524884416644, "grad_norm": 0.5234375, "learning_rate": 8.081994449244132e-06, "loss": 0.9176, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4451 }, { "epoch": 0.6053848245852598, "grad_norm": 0.57421875, "learning_rate": 8.077335053068673e-06, "loss": 0.724, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4452 }, { "epoch": 0.6055208050040795, "grad_norm": 0.357421875, "learning_rate": 8.0726760902854e-06, "loss": 0.5744, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4453 }, { "epoch": 0.6056567854228991, "grad_norm": 2.796875, "learning_rate": 8.0680175619445e-06, "loss": 0.8841, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4454 }, { "epoch": 0.6057927658417188, "grad_norm": 1.296875, "learning_rate": 8.063359469096061e-06, "loss": 0.7038, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4455 }, { "epoch": 0.6059287462605385, "grad_norm": 0.322265625, "learning_rate": 8.058701812790076e-06, "loss": 0.638, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4456 }, { "epoch": 0.6060647266793582, "grad_norm": 0.5, "learning_rate": 8.054044594076435e-06, "loss": 0.6185, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4457 }, { "epoch": 0.6062007070981779, "grad_norm": 0.328125, "learning_rate": 8.049387814004936e-06, "loss": 0.5716, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4458 }, { "epoch": 0.6063366875169975, "grad_norm": 0.3125, "learning_rate": 8.044731473625272e-06, "loss": 0.6408, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4459 }, { "epoch": 0.6064726679358172, "grad_norm": 0.515625, "learning_rate": 8.040075573987038e-06, "loss": 0.638, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4460 }, { "epoch": 0.6066086483546369, "grad_norm": 0.57421875, "learning_rate": 8.035420116139732e-06, "loss": 0.7892, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4461 }, { "epoch": 0.6067446287734566, "grad_norm": 0.37890625, "learning_rate": 8.030765101132751e-06, "loss": 0.6732, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4462 }, { "epoch": 0.6068806091922763, "grad_norm": 0.373046875, "learning_rate": 8.026110530015391e-06, "loss": 0.7114, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4463 }, { "epoch": 0.607016589611096, "grad_norm": 0.57421875, "learning_rate": 8.021456403836853e-06, "loss": 0.4694, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4464 }, { "epoch": 0.6071525700299157, "grad_norm": 0.4921875, "learning_rate": 8.01680272364623e-06, "loss": 0.7061, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4465 }, { "epoch": 0.6072885504487354, "grad_norm": 0.30078125, "learning_rate": 8.01214949049252e-06, "loss": 0.6169, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4466 }, { "epoch": 0.6074245308675551, "grad_norm": 0.6640625, "learning_rate": 8.00749670542462e-06, "loss": 0.6969, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4467 }, { "epoch": 0.6075605112863748, "grad_norm": 0.4140625, "learning_rate": 8.00284436949132e-06, "loss": 0.5095, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4468 }, { "epoch": 0.6076964917051945, "grad_norm": 0.326171875, "learning_rate": 7.99819248374132e-06, "loss": 0.6404, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4469 }, { "epoch": 0.6078324721240141, "grad_norm": 0.6015625, "learning_rate": 7.993541049223205e-06, "loss": 0.8314, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4470 }, { "epoch": 0.6079684525428338, "grad_norm": 0.4609375, "learning_rate": 7.988890066985471e-06, "loss": 0.599, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4471 }, { "epoch": 0.6081044329616535, "grad_norm": 0.25390625, "learning_rate": 7.984239538076503e-06, "loss": 0.4587, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4472 }, { "epoch": 0.6082404133804732, "grad_norm": 0.44921875, "learning_rate": 7.97958946354459e-06, "loss": 0.5008, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4473 }, { "epoch": 0.6083763937992929, "grad_norm": 0.396484375, "learning_rate": 7.974939844437912e-06, "loss": 0.529, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4474 }, { "epoch": 0.6085123742181126, "grad_norm": 0.31640625, "learning_rate": 7.97029068180455e-06, "loss": 0.6169, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4475 }, { "epoch": 0.6086483546369322, "grad_norm": 0.32421875, "learning_rate": 7.965641976692486e-06, "loss": 0.5291, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4476 }, { "epoch": 0.608784335055752, "grad_norm": 0.65625, "learning_rate": 7.960993730149592e-06, "loss": 0.6848, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4477 }, { "epoch": 0.6089203154745717, "grad_norm": 0.58984375, "learning_rate": 7.956345943223639e-06, "loss": 0.7736, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4478 }, { "epoch": 0.6090562958933914, "grad_norm": 0.455078125, "learning_rate": 7.951698616962299e-06, "loss": 0.5688, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4479 }, { "epoch": 0.6091922763122111, "grad_norm": 0.39453125, "learning_rate": 7.947051752413131e-06, "loss": 0.7234, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4480 }, { "epoch": 0.6093282567310307, "grad_norm": 0.486328125, "learning_rate": 7.942405350623597e-06, "loss": 0.6217, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4481 }, { "epoch": 0.6094642371498504, "grad_norm": 0.6015625, "learning_rate": 7.937759412641055e-06, "loss": 0.6452, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4482 }, { "epoch": 0.6096002175686701, "grad_norm": 0.3359375, "learning_rate": 7.933113939512754e-06, "loss": 0.6307, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4483 }, { "epoch": 0.6097361979874898, "grad_norm": 0.388671875, "learning_rate": 7.92846893228584e-06, "loss": 0.5202, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4484 }, { "epoch": 0.6098721784063095, "grad_norm": 0.384765625, "learning_rate": 7.923824392007357e-06, "loss": 0.7417, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4485 }, { "epoch": 0.6100081588251292, "grad_norm": 0.4140625, "learning_rate": 7.91918031972424e-06, "loss": 0.555, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4486 }, { "epoch": 0.6101441392439488, "grad_norm": 0.423828125, "learning_rate": 7.914536716483317e-06, "loss": 0.7526, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4487 }, { "epoch": 0.6102801196627685, "grad_norm": 0.4296875, "learning_rate": 7.909893583331316e-06, "loss": 0.6947, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4488 }, { "epoch": 0.6104161000815882, "grad_norm": 0.41796875, "learning_rate": 7.905250921314857e-06, "loss": 0.6252, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4489 }, { "epoch": 0.610552080500408, "grad_norm": 0.62109375, "learning_rate": 7.900608731480449e-06, "loss": 0.5804, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4490 }, { "epoch": 0.6106880609192277, "grad_norm": 0.2138671875, "learning_rate": 7.8959670148745e-06, "loss": 0.4299, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4491 }, { "epoch": 0.6108240413380474, "grad_norm": 0.50390625, "learning_rate": 7.89132577254331e-06, "loss": 0.6951, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4492 }, { "epoch": 0.610960021756867, "grad_norm": 0.32421875, "learning_rate": 7.886685005533073e-06, "loss": 0.7323, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4493 }, { "epoch": 0.6110960021756867, "grad_norm": 0.328125, "learning_rate": 7.882044714889872e-06, "loss": 0.4743, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4494 }, { "epoch": 0.6112319825945064, "grad_norm": 0.376953125, "learning_rate": 7.877404901659687e-06, "loss": 0.5672, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4495 }, { "epoch": 0.6113679630133261, "grad_norm": 0.349609375, "learning_rate": 7.872765566888387e-06, "loss": 0.6159, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4496 }, { "epoch": 0.6115039434321458, "grad_norm": 0.51171875, "learning_rate": 7.868126711621737e-06, "loss": 0.6764, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4497 }, { "epoch": 0.6116399238509654, "grad_norm": 0.46875, "learning_rate": 7.86348833690539e-06, "loss": 0.6473, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4498 }, { "epoch": 0.6117759042697851, "grad_norm": 0.609375, "learning_rate": 7.858850443784893e-06, "loss": 0.6618, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4499 }, { "epoch": 0.6119118846886048, "grad_norm": 0.28125, "learning_rate": 7.854213033305685e-06, "loss": 0.502, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4500 }, { "epoch": 0.6120478651074245, "grad_norm": 0.86328125, "learning_rate": 7.849576106513095e-06, "loss": 0.7354, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4501 }, { "epoch": 0.6121838455262443, "grad_norm": 0.69140625, "learning_rate": 7.844939664452341e-06, "loss": 0.6014, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4502 }, { "epoch": 0.612319825945064, "grad_norm": 0.361328125, "learning_rate": 7.840303708168533e-06, "loss": 0.5962, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4503 }, { "epoch": 0.6124558063638836, "grad_norm": 0.26953125, "learning_rate": 7.835668238706674e-06, "loss": 0.5628, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4504 }, { "epoch": 0.6125917867827033, "grad_norm": 0.400390625, "learning_rate": 7.831033257111656e-06, "loss": 0.4979, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4505 }, { "epoch": 0.612727767201523, "grad_norm": 0.44140625, "learning_rate": 7.826398764428261e-06, "loss": 0.8197, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4506 }, { "epoch": 0.6128637476203427, "grad_norm": 0.73828125, "learning_rate": 7.821764761701158e-06, "loss": 0.6486, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4507 }, { "epoch": 0.6129997280391624, "grad_norm": 0.32421875, "learning_rate": 7.817131249974908e-06, "loss": 0.6346, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4508 }, { "epoch": 0.613135708457982, "grad_norm": 0.53515625, "learning_rate": 7.812498230293964e-06, "loss": 0.7145, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4509 }, { "epoch": 0.6132716888768017, "grad_norm": 0.361328125, "learning_rate": 7.807865703702661e-06, "loss": 0.6916, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4510 }, { "epoch": 0.6134076692956214, "grad_norm": 0.34375, "learning_rate": 7.803233671245232e-06, "loss": 0.4925, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4511 }, { "epoch": 0.6135436497144411, "grad_norm": 0.5703125, "learning_rate": 7.79860213396579e-06, "loss": 0.6647, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4512 }, { "epoch": 0.6136796301332608, "grad_norm": 0.462890625, "learning_rate": 7.793971092908343e-06, "loss": 0.7985, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4513 }, { "epoch": 0.6138156105520804, "grad_norm": 0.5, "learning_rate": 7.789340549116782e-06, "loss": 0.7329, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4514 }, { "epoch": 0.6139515909709002, "grad_norm": 0.57421875, "learning_rate": 7.784710503634888e-06, "loss": 0.6904, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4515 }, { "epoch": 0.6140875713897199, "grad_norm": 0.31640625, "learning_rate": 7.78008095750633e-06, "loss": 0.6211, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4516 }, { "epoch": 0.6142235518085396, "grad_norm": 0.408203125, "learning_rate": 7.775451911774667e-06, "loss": 0.6616, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4517 }, { "epoch": 0.6143595322273593, "grad_norm": 0.482421875, "learning_rate": 7.770823367483342e-06, "loss": 0.7207, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4518 }, { "epoch": 0.614495512646179, "grad_norm": 0.390625, "learning_rate": 7.766195325675681e-06, "loss": 0.661, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4519 }, { "epoch": 0.6146314930649986, "grad_norm": 0.5078125, "learning_rate": 7.761567787394906e-06, "loss": 0.7305, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4520 }, { "epoch": 0.6147674734838183, "grad_norm": 0.53515625, "learning_rate": 7.756940753684117e-06, "loss": 0.7301, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4521 }, { "epoch": 0.614903453902638, "grad_norm": 0.68359375, "learning_rate": 7.752314225586307e-06, "loss": 0.5573, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4522 }, { "epoch": 0.6150394343214577, "grad_norm": 0.337890625, "learning_rate": 7.747688204144352e-06, "loss": 0.4884, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4523 }, { "epoch": 0.6151754147402774, "grad_norm": 0.404296875, "learning_rate": 7.743062690401014e-06, "loss": 0.5399, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4524 }, { "epoch": 0.615311395159097, "grad_norm": 0.314453125, "learning_rate": 7.738437685398937e-06, "loss": 0.5446, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4525 }, { "epoch": 0.6154473755779167, "grad_norm": 0.3203125, "learning_rate": 7.733813190180655e-06, "loss": 0.5794, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4526 }, { "epoch": 0.6155833559967365, "grad_norm": 0.3515625, "learning_rate": 7.729189205788588e-06, "loss": 0.5954, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4527 }, { "epoch": 0.6157193364155562, "grad_norm": 0.435546875, "learning_rate": 7.724565733265036e-06, "loss": 0.8335, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4528 }, { "epoch": 0.6158553168343759, "grad_norm": 0.56640625, "learning_rate": 7.719942773652188e-06, "loss": 0.5558, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4529 }, { "epoch": 0.6159912972531956, "grad_norm": 0.40234375, "learning_rate": 7.715320327992115e-06, "loss": 0.888, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4530 }, { "epoch": 0.6161272776720152, "grad_norm": 0.2890625, "learning_rate": 7.710698397326772e-06, "loss": 0.3887, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4531 }, { "epoch": 0.6162632580908349, "grad_norm": 0.42578125, "learning_rate": 7.706076982698e-06, "loss": 0.6571, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4532 }, { "epoch": 0.6163992385096546, "grad_norm": 0.279296875, "learning_rate": 7.701456085147522e-06, "loss": 0.4595, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4533 }, { "epoch": 0.6165352189284743, "grad_norm": 0.4609375, "learning_rate": 7.696835705716945e-06, "loss": 0.8136, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4534 }, { "epoch": 0.616671199347294, "grad_norm": 0.330078125, "learning_rate": 7.692215845447757e-06, "loss": 0.515, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4535 }, { "epoch": 0.6168071797661137, "grad_norm": 0.470703125, "learning_rate": 7.687596505381334e-06, "loss": 0.484, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4536 }, { "epoch": 0.6169431601849333, "grad_norm": 0.5859375, "learning_rate": 7.68297768655893e-06, "loss": 0.687, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4537 }, { "epoch": 0.617079140603753, "grad_norm": 0.3125, "learning_rate": 7.678359390021683e-06, "loss": 0.5786, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4538 }, { "epoch": 0.6172151210225727, "grad_norm": 0.392578125, "learning_rate": 7.673741616810615e-06, "loss": 0.73, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4539 }, { "epoch": 0.6173511014413925, "grad_norm": 0.3828125, "learning_rate": 7.669124367966626e-06, "loss": 0.8057, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4540 }, { "epoch": 0.6174870818602122, "grad_norm": 0.65625, "learning_rate": 7.664507644530504e-06, "loss": 0.3857, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4541 }, { "epoch": 0.6176230622790319, "grad_norm": 0.408203125, "learning_rate": 7.659891447542912e-06, "loss": 0.6102, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4542 }, { "epoch": 0.6177590426978515, "grad_norm": 0.3125, "learning_rate": 7.655275778044398e-06, "loss": 0.5671, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4543 }, { "epoch": 0.6178950231166712, "grad_norm": 0.29296875, "learning_rate": 7.65066063707539e-06, "loss": 0.5622, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4544 }, { "epoch": 0.6180310035354909, "grad_norm": 0.58203125, "learning_rate": 7.646046025676198e-06, "loss": 0.7386, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4545 }, { "epoch": 0.6181669839543106, "grad_norm": 0.8359375, "learning_rate": 7.64143194488701e-06, "loss": 0.6685, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4546 }, { "epoch": 0.6183029643731303, "grad_norm": 0.28125, "learning_rate": 7.6368183957479e-06, "loss": 0.5795, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4547 }, { "epoch": 0.6184389447919499, "grad_norm": 0.375, "learning_rate": 7.632205379298816e-06, "loss": 0.6388, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4548 }, { "epoch": 0.6185749252107696, "grad_norm": 0.43359375, "learning_rate": 7.6275928965795875e-06, "loss": 0.7165, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4549 }, { "epoch": 0.6187109056295893, "grad_norm": 0.294921875, "learning_rate": 7.622980948629925e-06, "loss": 0.488, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4550 }, { "epoch": 0.618846886048409, "grad_norm": 0.86328125, "learning_rate": 7.618369536489418e-06, "loss": 0.6094, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4551 }, { "epoch": 0.6189828664672287, "grad_norm": 0.35546875, "learning_rate": 7.613758661197537e-06, "loss": 0.6445, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4552 }, { "epoch": 0.6191188468860485, "grad_norm": 0.396484375, "learning_rate": 7.609148323793627e-06, "loss": 0.8978, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4553 }, { "epoch": 0.6192548273048681, "grad_norm": 0.349609375, "learning_rate": 7.604538525316915e-06, "loss": 0.6123, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4554 }, { "epoch": 0.6193908077236878, "grad_norm": 0.283203125, "learning_rate": 7.599929266806508e-06, "loss": 0.5597, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4555 }, { "epoch": 0.6195267881425075, "grad_norm": 0.38671875, "learning_rate": 7.595320549301385e-06, "loss": 0.6092, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4556 }, { "epoch": 0.6196627685613272, "grad_norm": 0.4921875, "learning_rate": 7.5907123738404124e-06, "loss": 0.8594, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4557 }, { "epoch": 0.6197987489801469, "grad_norm": 0.25390625, "learning_rate": 7.586104741462326e-06, "loss": 0.4398, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4558 }, { "epoch": 0.6199347293989665, "grad_norm": 0.5546875, "learning_rate": 7.5814976532057435e-06, "loss": 0.8172, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4559 }, { "epoch": 0.6200707098177862, "grad_norm": 0.40625, "learning_rate": 7.576891110109159e-06, "loss": 0.5931, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4560 }, { "epoch": 0.6202066902366059, "grad_norm": 0.322265625, "learning_rate": 7.572285113210944e-06, "loss": 0.5111, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4561 }, { "epoch": 0.6203426706554256, "grad_norm": 0.275390625, "learning_rate": 7.567679663549346e-06, "loss": 0.6278, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4562 }, { "epoch": 0.6204786510742453, "grad_norm": 0.341796875, "learning_rate": 7.563074762162488e-06, "loss": 0.6668, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4563 }, { "epoch": 0.620614631493065, "grad_norm": 0.427734375, "learning_rate": 7.558470410088374e-06, "loss": 0.7054, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4564 }, { "epoch": 0.6207506119118847, "grad_norm": 0.294921875, "learning_rate": 7.5538666083648796e-06, "loss": 0.6108, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4565 }, { "epoch": 0.6208865923307044, "grad_norm": 0.357421875, "learning_rate": 7.549263358029758e-06, "loss": 0.6403, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4566 }, { "epoch": 0.6210225727495241, "grad_norm": 0.30078125, "learning_rate": 7.5446606601206395e-06, "loss": 0.5944, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4567 }, { "epoch": 0.6211585531683438, "grad_norm": 0.234375, "learning_rate": 7.540058515675026e-06, "loss": 0.3859, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4568 }, { "epoch": 0.6212945335871635, "grad_norm": 0.5859375, "learning_rate": 7.535456925730299e-06, "loss": 0.5885, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4569 }, { "epoch": 0.6214305140059831, "grad_norm": 0.25, "learning_rate": 7.530855891323713e-06, "loss": 0.425, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4570 }, { "epoch": 0.6215664944248028, "grad_norm": 0.314453125, "learning_rate": 7.526255413492396e-06, "loss": 0.5581, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4571 }, { "epoch": 0.6217024748436225, "grad_norm": 0.51953125, "learning_rate": 7.521655493273354e-06, "loss": 0.7148, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4572 }, { "epoch": 0.6218384552624422, "grad_norm": 0.34375, "learning_rate": 7.517056131703464e-06, "loss": 0.5624, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4573 }, { "epoch": 0.6219744356812619, "grad_norm": 0.466796875, "learning_rate": 7.512457329819478e-06, "loss": 0.7578, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4574 }, { "epoch": 0.6221104161000816, "grad_norm": 0.494140625, "learning_rate": 7.507859088658024e-06, "loss": 0.8123, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4575 }, { "epoch": 0.6222463965189012, "grad_norm": 0.2353515625, "learning_rate": 7.5032614092556e-06, "loss": 0.4097, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4576 }, { "epoch": 0.6223823769377209, "grad_norm": 0.42578125, "learning_rate": 7.498664292648579e-06, "loss": 0.5464, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4577 }, { "epoch": 0.6225183573565407, "grad_norm": 0.71484375, "learning_rate": 7.494067739873209e-06, "loss": 0.8197, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4578 }, { "epoch": 0.6226543377753604, "grad_norm": 0.40625, "learning_rate": 7.489471751965607e-06, "loss": 0.6032, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4579 }, { "epoch": 0.6227903181941801, "grad_norm": 0.75, "learning_rate": 7.484876329961768e-06, "loss": 0.8659, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4580 }, { "epoch": 0.6229262986129998, "grad_norm": 0.3984375, "learning_rate": 7.480281474897553e-06, "loss": 0.6787, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4581 }, { "epoch": 0.6230622790318194, "grad_norm": 0.5390625, "learning_rate": 7.475687187808699e-06, "loss": 0.7, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4582 }, { "epoch": 0.6231982594506391, "grad_norm": 0.671875, "learning_rate": 7.471093469730817e-06, "loss": 0.619, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4583 }, { "epoch": 0.6233342398694588, "grad_norm": 0.341796875, "learning_rate": 7.4665003216993835e-06, "loss": 0.6325, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4584 }, { "epoch": 0.6234702202882785, "grad_norm": 0.453125, "learning_rate": 7.461907744749754e-06, "loss": 0.6985, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4585 }, { "epoch": 0.6236062007070982, "grad_norm": 0.318359375, "learning_rate": 7.457315739917149e-06, "loss": 0.6453, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4586 }, { "epoch": 0.6237421811259178, "grad_norm": 0.70703125, "learning_rate": 7.452724308236663e-06, "loss": 0.6701, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4587 }, { "epoch": 0.6238781615447375, "grad_norm": 0.59765625, "learning_rate": 7.448133450743262e-06, "loss": 0.5715, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4588 }, { "epoch": 0.6240141419635572, "grad_norm": 0.296875, "learning_rate": 7.443543168471781e-06, "loss": 0.4954, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4589 }, { "epoch": 0.624150122382377, "grad_norm": 0.66015625, "learning_rate": 7.438953462456924e-06, "loss": 0.6969, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4590 }, { "epoch": 0.6242861028011967, "grad_norm": 0.400390625, "learning_rate": 7.434364333733268e-06, "loss": 0.812, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4591 }, { "epoch": 0.6244220832200164, "grad_norm": 0.40234375, "learning_rate": 7.42977578333526e-06, "loss": 0.675, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4592 }, { "epoch": 0.624558063638836, "grad_norm": 0.275390625, "learning_rate": 7.425187812297214e-06, "loss": 0.484, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4593 }, { "epoch": 0.6246940440576557, "grad_norm": 0.359375, "learning_rate": 7.420600421653314e-06, "loss": 0.6315, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4594 }, { "epoch": 0.6248300244764754, "grad_norm": 0.423828125, "learning_rate": 7.416013612437616e-06, "loss": 0.7021, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4595 }, { "epoch": 0.6249660048952951, "grad_norm": 0.32421875, "learning_rate": 7.4114273856840405e-06, "loss": 0.661, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4596 }, { "epoch": 0.6251019853141148, "grad_norm": 0.5546875, "learning_rate": 7.40684174242638e-06, "loss": 0.7837, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4597 }, { "epoch": 0.6252379657329344, "grad_norm": 0.55859375, "learning_rate": 7.4022566836982964e-06, "loss": 0.7965, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4598 }, { "epoch": 0.6253739461517541, "grad_norm": 0.43359375, "learning_rate": 7.3976722105333175e-06, "loss": 0.8158, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4599 }, { "epoch": 0.6255099265705738, "grad_norm": 0.392578125, "learning_rate": 7.393088323964836e-06, "loss": 0.772, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4600 }, { "epoch": 0.6256459069893935, "grad_norm": 0.361328125, "learning_rate": 7.38850502502612e-06, "loss": 0.6896, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4601 }, { "epoch": 0.6257818874082132, "grad_norm": 0.27734375, "learning_rate": 7.3839223147503e-06, "loss": 0.557, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4602 }, { "epoch": 0.625917867827033, "grad_norm": 0.484375, "learning_rate": 7.3793401941703745e-06, "loss": 0.6257, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4603 }, { "epoch": 0.6260538482458526, "grad_norm": 0.5546875, "learning_rate": 7.374758664319208e-06, "loss": 0.383, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4604 }, { "epoch": 0.6261898286646723, "grad_norm": 0.34765625, "learning_rate": 7.370177726229537e-06, "loss": 0.6084, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4605 }, { "epoch": 0.626325809083492, "grad_norm": 0.578125, "learning_rate": 7.3655973809339585e-06, "loss": 0.637, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4606 }, { "epoch": 0.6264617895023117, "grad_norm": 0.3671875, "learning_rate": 7.361017629464938e-06, "loss": 0.6559, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4607 }, { "epoch": 0.6265977699211314, "grad_norm": 0.42578125, "learning_rate": 7.356438472854809e-06, "loss": 0.661, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4608 }, { "epoch": 0.626733750339951, "grad_norm": 0.38671875, "learning_rate": 7.3518599121357685e-06, "loss": 0.6393, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4609 }, { "epoch": 0.6268697307587707, "grad_norm": 0.400390625, "learning_rate": 7.34728194833988e-06, "loss": 0.7485, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4610 }, { "epoch": 0.6270057111775904, "grad_norm": 0.3203125, "learning_rate": 7.342704582499072e-06, "loss": 0.5846, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4611 }, { "epoch": 0.6271416915964101, "grad_norm": 0.33203125, "learning_rate": 7.33812781564514e-06, "loss": 0.6209, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4612 }, { "epoch": 0.6272776720152298, "grad_norm": 0.6015625, "learning_rate": 7.333551648809743e-06, "loss": 0.7131, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4613 }, { "epoch": 0.6274136524340495, "grad_norm": 0.3203125, "learning_rate": 7.328976083024404e-06, "loss": 0.576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4614 }, { "epoch": 0.6275496328528691, "grad_norm": 0.333984375, "learning_rate": 7.324401119320512e-06, "loss": 0.4836, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4615 }, { "epoch": 0.6276856132716889, "grad_norm": 0.388671875, "learning_rate": 7.319826758729318e-06, "loss": 0.6458, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4616 }, { "epoch": 0.6278215936905086, "grad_norm": 0.416015625, "learning_rate": 7.315253002281944e-06, "loss": 0.7266, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4617 }, { "epoch": 0.6279575741093283, "grad_norm": 0.8046875, "learning_rate": 7.310679851009366e-06, "loss": 0.8835, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4618 }, { "epoch": 0.628093554528148, "grad_norm": 0.333984375, "learning_rate": 7.306107305942428e-06, "loss": 0.5841, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4619 }, { "epoch": 0.6282295349469676, "grad_norm": 0.474609375, "learning_rate": 7.30153536811184e-06, "loss": 0.5556, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4620 }, { "epoch": 0.6283655153657873, "grad_norm": 0.38671875, "learning_rate": 7.2969640385481734e-06, "loss": 0.4364, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4621 }, { "epoch": 0.628501495784607, "grad_norm": 0.72265625, "learning_rate": 7.292393318281858e-06, "loss": 0.7785, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4622 }, { "epoch": 0.6286374762034267, "grad_norm": 0.63671875, "learning_rate": 7.287823208343192e-06, "loss": 0.4269, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4623 }, { "epoch": 0.6287734566222464, "grad_norm": 0.37109375, "learning_rate": 7.283253709762335e-06, "loss": 0.5229, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4624 }, { "epoch": 0.628909437041066, "grad_norm": 0.76171875, "learning_rate": 7.278684823569306e-06, "loss": 0.5793, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4625 }, { "epoch": 0.6290454174598857, "grad_norm": 0.33203125, "learning_rate": 7.2741165507939905e-06, "loss": 0.5731, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4626 }, { "epoch": 0.6291813978787054, "grad_norm": 0.4375, "learning_rate": 7.26954889246613e-06, "loss": 0.8371, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4627 }, { "epoch": 0.6293173782975252, "grad_norm": 0.279296875, "learning_rate": 7.2649818496153335e-06, "loss": 0.5096, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4628 }, { "epoch": 0.6294533587163449, "grad_norm": 0.388671875, "learning_rate": 7.260415423271065e-06, "loss": 0.3589, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4629 }, { "epoch": 0.6295893391351646, "grad_norm": 0.4609375, "learning_rate": 7.255849614462654e-06, "loss": 0.5508, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4630 }, { "epoch": 0.6297253195539843, "grad_norm": 0.48046875, "learning_rate": 7.2512844242192915e-06, "loss": 0.6992, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4631 }, { "epoch": 0.6298612999728039, "grad_norm": 0.4296875, "learning_rate": 7.2467198535700256e-06, "loss": 0.7077, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4632 }, { "epoch": 0.6299972803916236, "grad_norm": 0.55078125, "learning_rate": 7.242155903543766e-06, "loss": 0.6551, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4633 }, { "epoch": 0.6301332608104433, "grad_norm": 0.27734375, "learning_rate": 7.237592575169284e-06, "loss": 0.5555, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4634 }, { "epoch": 0.630269241229263, "grad_norm": 0.8046875, "learning_rate": 7.233029869475207e-06, "loss": 0.6818, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4635 }, { "epoch": 0.6304052216480827, "grad_norm": 0.3671875, "learning_rate": 7.2284677874900285e-06, "loss": 0.6087, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4636 }, { "epoch": 0.6305412020669023, "grad_norm": 0.66796875, "learning_rate": 7.223906330242097e-06, "loss": 0.5845, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4637 }, { "epoch": 0.630677182485722, "grad_norm": 0.392578125, "learning_rate": 7.219345498759616e-06, "loss": 0.8062, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4638 }, { "epoch": 0.6308131629045417, "grad_norm": 0.267578125, "learning_rate": 7.214785294070659e-06, "loss": 0.5161, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4639 }, { "epoch": 0.6309491433233614, "grad_norm": 0.283203125, "learning_rate": 7.210225717203147e-06, "loss": 0.623, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4640 }, { "epoch": 0.6310851237421812, "grad_norm": 0.388671875, "learning_rate": 7.205666769184867e-06, "loss": 0.6972, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4641 }, { "epoch": 0.6312211041610009, "grad_norm": 0.3359375, "learning_rate": 7.201108451043461e-06, "loss": 0.6403, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4642 }, { "epoch": 0.6313570845798205, "grad_norm": 0.427734375, "learning_rate": 7.196550763806429e-06, "loss": 0.7297, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4643 }, { "epoch": 0.6314930649986402, "grad_norm": 0.56640625, "learning_rate": 7.191993708501131e-06, "loss": 0.6242, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4644 }, { "epoch": 0.6316290454174599, "grad_norm": 0.357421875, "learning_rate": 7.187437286154782e-06, "loss": 0.7313, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4645 }, { "epoch": 0.6317650258362796, "grad_norm": 0.349609375, "learning_rate": 7.182881497794455e-06, "loss": 0.7371, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4646 }, { "epoch": 0.6319010062550993, "grad_norm": 0.494140625, "learning_rate": 7.178326344447081e-06, "loss": 0.5394, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4647 }, { "epoch": 0.6320369866739189, "grad_norm": 0.416015625, "learning_rate": 7.173771827139446e-06, "loss": 0.5667, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4648 }, { "epoch": 0.6321729670927386, "grad_norm": 0.38671875, "learning_rate": 7.169217946898197e-06, "loss": 0.6419, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4649 }, { "epoch": 0.6323089475115583, "grad_norm": 0.3671875, "learning_rate": 7.164664704749831e-06, "loss": 0.7826, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4650 }, { "epoch": 0.632444927930378, "grad_norm": 0.447265625, "learning_rate": 7.160112101720704e-06, "loss": 0.6924, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4651 }, { "epoch": 0.6325809083491977, "grad_norm": 0.30078125, "learning_rate": 7.155560138837032e-06, "loss": 0.5174, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4652 }, { "epoch": 0.6327168887680175, "grad_norm": 0.5078125, "learning_rate": 7.1510088171248805e-06, "loss": 0.7021, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4653 }, { "epoch": 0.6328528691868371, "grad_norm": 0.578125, "learning_rate": 7.146458137610173e-06, "loss": 0.7184, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4654 }, { "epoch": 0.6329888496056568, "grad_norm": 0.32421875, "learning_rate": 7.141908101318688e-06, "loss": 0.6081, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4655 }, { "epoch": 0.6331248300244765, "grad_norm": 0.3515625, "learning_rate": 7.137358709276063e-06, "loss": 0.651, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4656 }, { "epoch": 0.6332608104432962, "grad_norm": 0.5078125, "learning_rate": 7.132809962507783e-06, "loss": 0.5958, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4657 }, { "epoch": 0.6333967908621159, "grad_norm": 0.53125, "learning_rate": 7.12826186203919e-06, "loss": 0.7139, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4658 }, { "epoch": 0.6335327712809355, "grad_norm": 0.361328125, "learning_rate": 7.123714408895486e-06, "loss": 0.6265, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4659 }, { "epoch": 0.6336687516997552, "grad_norm": 0.416015625, "learning_rate": 7.119167604101718e-06, "loss": 0.7023, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4660 }, { "epoch": 0.6338047321185749, "grad_norm": 0.435546875, "learning_rate": 7.114621448682794e-06, "loss": 0.7536, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4661 }, { "epoch": 0.6339407125373946, "grad_norm": 0.48046875, "learning_rate": 7.110075943663473e-06, "loss": 0.5636, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4662 }, { "epoch": 0.6340766929562143, "grad_norm": 0.32421875, "learning_rate": 7.105531090068365e-06, "loss": 0.4614, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4663 }, { "epoch": 0.634212673375034, "grad_norm": 0.7265625, "learning_rate": 7.100986888921939e-06, "loss": 0.821, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4664 }, { "epoch": 0.6343486537938536, "grad_norm": 0.42578125, "learning_rate": 7.096443341248511e-06, "loss": 0.6657, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4665 }, { "epoch": 0.6344846342126734, "grad_norm": 0.41015625, "learning_rate": 7.091900448072253e-06, "loss": 0.806, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4666 }, { "epoch": 0.6346206146314931, "grad_norm": 0.49609375, "learning_rate": 7.087358210417188e-06, "loss": 0.8659, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4667 }, { "epoch": 0.6347565950503128, "grad_norm": 0.291015625, "learning_rate": 7.0828166293071945e-06, "loss": 0.5523, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4668 }, { "epoch": 0.6348925754691325, "grad_norm": 0.73828125, "learning_rate": 7.078275705765996e-06, "loss": 0.8856, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4669 }, { "epoch": 0.6350285558879522, "grad_norm": 0.2314453125, "learning_rate": 7.073735440817176e-06, "loss": 0.4639, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4670 }, { "epoch": 0.6351645363067718, "grad_norm": 0.35546875, "learning_rate": 7.069195835484165e-06, "loss": 0.651, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4671 }, { "epoch": 0.6353005167255915, "grad_norm": 0.365234375, "learning_rate": 7.064656890790245e-06, "loss": 0.5407, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4672 }, { "epoch": 0.6354364971444112, "grad_norm": 0.30078125, "learning_rate": 7.060118607758548e-06, "loss": 0.7121, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4673 }, { "epoch": 0.6355724775632309, "grad_norm": 0.59765625, "learning_rate": 7.055580987412061e-06, "loss": 0.6685, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4674 }, { "epoch": 0.6357084579820506, "grad_norm": 0.318359375, "learning_rate": 7.051044030773619e-06, "loss": 0.64, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4675 }, { "epoch": 0.6358444384008702, "grad_norm": 0.349609375, "learning_rate": 7.046507738865904e-06, "loss": 0.6188, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4676 }, { "epoch": 0.6359804188196899, "grad_norm": 0.40234375, "learning_rate": 7.0419721127114545e-06, "loss": 0.5496, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4677 }, { "epoch": 0.6361163992385096, "grad_norm": 0.4453125, "learning_rate": 7.037437153332658e-06, "loss": 0.6203, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4678 }, { "epoch": 0.6362523796573294, "grad_norm": 0.330078125, "learning_rate": 7.032902861751747e-06, "loss": 0.6604, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4679 }, { "epoch": 0.6363883600761491, "grad_norm": 0.306640625, "learning_rate": 7.0283692389908065e-06, "loss": 0.5841, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4680 }, { "epoch": 0.6365243404949688, "grad_norm": 0.359375, "learning_rate": 7.023836286071771e-06, "loss": 0.6351, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4681 }, { "epoch": 0.6366603209137884, "grad_norm": 0.828125, "learning_rate": 7.019304004016424e-06, "loss": 0.4543, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4682 }, { "epoch": 0.6367963013326081, "grad_norm": 0.3515625, "learning_rate": 7.014772393846396e-06, "loss": 0.7044, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4683 }, { "epoch": 0.6369322817514278, "grad_norm": 0.392578125, "learning_rate": 7.010241456583169e-06, "loss": 0.6245, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4684 }, { "epoch": 0.6370682621702475, "grad_norm": 0.416015625, "learning_rate": 7.0057111932480705e-06, "loss": 0.764, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4685 }, { "epoch": 0.6372042425890672, "grad_norm": 0.494140625, "learning_rate": 7.001181604862277e-06, "loss": 0.5112, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4686 }, { "epoch": 0.6373402230078868, "grad_norm": 0.5234375, "learning_rate": 6.996652692446815e-06, "loss": 0.8058, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4687 }, { "epoch": 0.6374762034267065, "grad_norm": 0.380859375, "learning_rate": 6.9921244570225536e-06, "loss": 0.5443, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4688 }, { "epoch": 0.6376121838455262, "grad_norm": 0.5703125, "learning_rate": 6.987596899610214e-06, "loss": 0.6579, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4689 }, { "epoch": 0.6377481642643459, "grad_norm": 0.337890625, "learning_rate": 6.983070021230364e-06, "loss": 0.5916, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4690 }, { "epoch": 0.6378841446831657, "grad_norm": 0.58984375, "learning_rate": 6.978543822903417e-06, "loss": 0.665, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4691 }, { "epoch": 0.6380201251019854, "grad_norm": 0.39453125, "learning_rate": 6.974018305649631e-06, "loss": 0.8439, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4692 }, { "epoch": 0.638156105520805, "grad_norm": 0.91015625, "learning_rate": 6.969493470489116e-06, "loss": 0.9058, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4693 }, { "epoch": 0.6382920859396247, "grad_norm": 0.48046875, "learning_rate": 6.9649693184418235e-06, "loss": 0.7477, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4694 }, { "epoch": 0.6384280663584444, "grad_norm": 0.369140625, "learning_rate": 6.960445850527553e-06, "loss": 0.7062, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4695 }, { "epoch": 0.6385640467772641, "grad_norm": 0.5078125, "learning_rate": 6.95592306776595e-06, "loss": 0.5906, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4696 }, { "epoch": 0.6387000271960838, "grad_norm": 0.546875, "learning_rate": 6.951400971176505e-06, "loss": 0.6548, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4697 }, { "epoch": 0.6388360076149034, "grad_norm": 0.73828125, "learning_rate": 6.946879561778553e-06, "loss": 1.0086, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4698 }, { "epoch": 0.6389719880337231, "grad_norm": 0.392578125, "learning_rate": 6.942358840591277e-06, "loss": 0.522, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4699 }, { "epoch": 0.6391079684525428, "grad_norm": 0.33203125, "learning_rate": 6.937838808633699e-06, "loss": 0.5929, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4700 }, { "epoch": 0.6392439488713625, "grad_norm": 0.318359375, "learning_rate": 6.933319466924693e-06, "loss": 0.48, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4701 }, { "epoch": 0.6393799292901822, "grad_norm": 0.71484375, "learning_rate": 6.928800816482972e-06, "loss": 0.5955, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4702 }, { "epoch": 0.6395159097090019, "grad_norm": 0.60546875, "learning_rate": 6.924282858327096e-06, "loss": 0.4232, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4703 }, { "epoch": 0.6396518901278216, "grad_norm": 0.369140625, "learning_rate": 6.919765593475468e-06, "loss": 0.6737, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4704 }, { "epoch": 0.6397878705466413, "grad_norm": 0.328125, "learning_rate": 6.9152490229463345e-06, "loss": 0.6457, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4705 }, { "epoch": 0.639923850965461, "grad_norm": 0.419921875, "learning_rate": 6.910733147757785e-06, "loss": 0.617, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4706 }, { "epoch": 0.6400598313842807, "grad_norm": 0.388671875, "learning_rate": 6.906217968927754e-06, "loss": 0.668, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4707 }, { "epoch": 0.6401958118031004, "grad_norm": 0.369140625, "learning_rate": 6.901703487474018e-06, "loss": 0.7254, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4708 }, { "epoch": 0.64033179222192, "grad_norm": 0.46875, "learning_rate": 6.8971897044141954e-06, "loss": 0.577, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4709 }, { "epoch": 0.6404677726407397, "grad_norm": 0.44921875, "learning_rate": 6.892676620765748e-06, "loss": 0.7472, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4710 }, { "epoch": 0.6406037530595594, "grad_norm": 0.45703125, "learning_rate": 6.888164237545982e-06, "loss": 0.6961, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4711 }, { "epoch": 0.6407397334783791, "grad_norm": 0.33203125, "learning_rate": 6.883652555772042e-06, "loss": 0.6317, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4712 }, { "epoch": 0.6408757138971988, "grad_norm": 0.318359375, "learning_rate": 6.879141576460917e-06, "loss": 0.5041, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4713 }, { "epoch": 0.6410116943160185, "grad_norm": 0.34765625, "learning_rate": 6.874631300629435e-06, "loss": 0.5547, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4714 }, { "epoch": 0.6411476747348381, "grad_norm": 0.44921875, "learning_rate": 6.87012172929427e-06, "loss": 0.4612, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4715 }, { "epoch": 0.6412836551536579, "grad_norm": 0.359375, "learning_rate": 6.8656128634719334e-06, "loss": 0.5729, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4716 }, { "epoch": 0.6414196355724776, "grad_norm": 0.33984375, "learning_rate": 6.8611047041787785e-06, "loss": 0.6559, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4717 }, { "epoch": 0.6415556159912973, "grad_norm": 0.5234375, "learning_rate": 6.8565972524309995e-06, "loss": 0.8003, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4718 }, { "epoch": 0.641691596410117, "grad_norm": 0.36328125, "learning_rate": 6.852090509244631e-06, "loss": 0.7116, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4719 }, { "epoch": 0.6418275768289367, "grad_norm": 0.625, "learning_rate": 6.847584475635549e-06, "loss": 0.7497, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4720 }, { "epoch": 0.6419635572477563, "grad_norm": 0.53515625, "learning_rate": 6.843079152619467e-06, "loss": 0.6802, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4721 }, { "epoch": 0.642099537666576, "grad_norm": 0.466796875, "learning_rate": 6.838574541211942e-06, "loss": 0.6567, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4722 }, { "epoch": 0.6422355180853957, "grad_norm": 0.57421875, "learning_rate": 6.834070642428368e-06, "loss": 0.849, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4723 }, { "epoch": 0.6423714985042154, "grad_norm": 0.259765625, "learning_rate": 6.829567457283975e-06, "loss": 0.5135, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4724 }, { "epoch": 0.6425074789230351, "grad_norm": 0.255859375, "learning_rate": 6.825064986793841e-06, "loss": 0.4372, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4725 }, { "epoch": 0.6426434593418547, "grad_norm": 0.314453125, "learning_rate": 6.820563231972876e-06, "loss": 0.3709, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4726 }, { "epoch": 0.6427794397606744, "grad_norm": 0.498046875, "learning_rate": 6.81606219383583e-06, "loss": 0.6361, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4727 }, { "epoch": 0.6429154201794941, "grad_norm": 0.19140625, "learning_rate": 6.811561873397293e-06, "loss": 0.3547, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4728 }, { "epoch": 0.6430514005983139, "grad_norm": 0.37109375, "learning_rate": 6.807062271671692e-06, "loss": 0.6807, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4729 }, { "epoch": 0.6431873810171336, "grad_norm": 0.296875, "learning_rate": 6.802563389673291e-06, "loss": 0.4731, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4730 }, { "epoch": 0.6433233614359533, "grad_norm": 0.3828125, "learning_rate": 6.798065228416195e-06, "loss": 0.6864, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4731 }, { "epoch": 0.6434593418547729, "grad_norm": 0.4375, "learning_rate": 6.793567788914343e-06, "loss": 0.7303, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4732 }, { "epoch": 0.6435953222735926, "grad_norm": 0.408203125, "learning_rate": 6.789071072181516e-06, "loss": 0.6878, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4733 }, { "epoch": 0.6437313026924123, "grad_norm": 0.419921875, "learning_rate": 6.784575079231324e-06, "loss": 0.5929, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4734 }, { "epoch": 0.643867283111232, "grad_norm": 0.431640625, "learning_rate": 6.780079811077223e-06, "loss": 0.5827, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4735 }, { "epoch": 0.6440032635300517, "grad_norm": 0.384765625, "learning_rate": 6.775585268732498e-06, "loss": 0.6709, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4736 }, { "epoch": 0.6441392439488713, "grad_norm": 0.330078125, "learning_rate": 6.771091453210272e-06, "loss": 0.5461, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4737 }, { "epoch": 0.644275224367691, "grad_norm": 0.333984375, "learning_rate": 6.766598365523513e-06, "loss": 0.6299, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4738 }, { "epoch": 0.6444112047865107, "grad_norm": 0.408203125, "learning_rate": 6.762106006685014e-06, "loss": 0.7375, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4739 }, { "epoch": 0.6445471852053304, "grad_norm": 0.55078125, "learning_rate": 6.757614377707409e-06, "loss": 0.6377, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4740 }, { "epoch": 0.6446831656241501, "grad_norm": 0.349609375, "learning_rate": 6.753123479603163e-06, "loss": 0.6641, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4741 }, { "epoch": 0.6448191460429699, "grad_norm": 1.421875, "learning_rate": 6.748633313384583e-06, "loss": 0.6631, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4742 }, { "epoch": 0.6449551264617895, "grad_norm": 0.263671875, "learning_rate": 6.744143880063805e-06, "loss": 0.4547, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4743 }, { "epoch": 0.6450911068806092, "grad_norm": 0.376953125, "learning_rate": 6.739655180652802e-06, "loss": 0.6706, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4744 }, { "epoch": 0.6452270872994289, "grad_norm": 0.51171875, "learning_rate": 6.735167216163383e-06, "loss": 0.4295, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4745 }, { "epoch": 0.6453630677182486, "grad_norm": 0.39453125, "learning_rate": 6.730679987607189e-06, "loss": 0.7867, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4746 }, { "epoch": 0.6454990481370683, "grad_norm": 0.365234375, "learning_rate": 6.726193495995699e-06, "loss": 0.658, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4747 }, { "epoch": 0.645635028555888, "grad_norm": 0.73046875, "learning_rate": 6.7217077423402174e-06, "loss": 0.7476, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4748 }, { "epoch": 0.6457710089747076, "grad_norm": 0.50390625, "learning_rate": 6.717222727651894e-06, "loss": 0.5138, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4749 }, { "epoch": 0.6459069893935273, "grad_norm": 0.3671875, "learning_rate": 6.712738452941703e-06, "loss": 0.6691, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4750 }, { "epoch": 0.646042969812347, "grad_norm": 0.2890625, "learning_rate": 6.708254919220454e-06, "loss": 0.3997, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4751 }, { "epoch": 0.6461789502311667, "grad_norm": 0.416015625, "learning_rate": 6.703772127498792e-06, "loss": 0.7186, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4752 }, { "epoch": 0.6463149306499864, "grad_norm": 0.294921875, "learning_rate": 6.699290078787193e-06, "loss": 0.5757, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4753 }, { "epoch": 0.6464509110688061, "grad_norm": 0.28515625, "learning_rate": 6.6948087740959634e-06, "loss": 0.5448, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4754 }, { "epoch": 0.6465868914876258, "grad_norm": 0.2578125, "learning_rate": 6.690328214435246e-06, "loss": 0.498, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4755 }, { "epoch": 0.6467228719064455, "grad_norm": 0.46875, "learning_rate": 6.685848400815014e-06, "loss": 0.4811, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4756 }, { "epoch": 0.6468588523252652, "grad_norm": 0.54296875, "learning_rate": 6.681369334245071e-06, "loss": 0.5679, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4757 }, { "epoch": 0.6469948327440849, "grad_norm": 0.361328125, "learning_rate": 6.676891015735055e-06, "loss": 0.4999, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4758 }, { "epoch": 0.6471308131629046, "grad_norm": 0.29296875, "learning_rate": 6.672413446294433e-06, "loss": 0.5783, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4759 }, { "epoch": 0.6472667935817242, "grad_norm": 0.365234375, "learning_rate": 6.667936626932502e-06, "loss": 0.6118, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4760 }, { "epoch": 0.6474027740005439, "grad_norm": 0.427734375, "learning_rate": 6.663460558658394e-06, "loss": 0.7093, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4761 }, { "epoch": 0.6475387544193636, "grad_norm": 0.2734375, "learning_rate": 6.658985242481069e-06, "loss": 0.383, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4762 }, { "epoch": 0.6476747348381833, "grad_norm": 0.5859375, "learning_rate": 6.6545106794093175e-06, "loss": 0.5941, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4763 }, { "epoch": 0.647810715257003, "grad_norm": 0.181640625, "learning_rate": 6.650036870451762e-06, "loss": 0.327, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4764 }, { "epoch": 0.6479466956758226, "grad_norm": 0.359375, "learning_rate": 6.645563816616851e-06, "loss": 0.576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4765 }, { "epoch": 0.6480826760946423, "grad_norm": 0.4140625, "learning_rate": 6.6410915189128675e-06, "loss": 0.7944, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4766 }, { "epoch": 0.6482186565134621, "grad_norm": 0.76953125, "learning_rate": 6.636619978347923e-06, "loss": 0.6925, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4767 }, { "epoch": 0.6483546369322818, "grad_norm": 0.2197265625, "learning_rate": 6.632149195929957e-06, "loss": 0.3659, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4768 }, { "epoch": 0.6484906173511015, "grad_norm": 0.65625, "learning_rate": 6.627679172666736e-06, "loss": 0.5545, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4769 }, { "epoch": 0.6486265977699212, "grad_norm": 0.244140625, "learning_rate": 6.623209909565862e-06, "loss": 0.4116, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4770 }, { "epoch": 0.6487625781887408, "grad_norm": 0.478515625, "learning_rate": 6.618741407634757e-06, "loss": 0.6157, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4771 }, { "epoch": 0.6488985586075605, "grad_norm": 0.40625, "learning_rate": 6.6142736678806794e-06, "loss": 0.7222, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4772 }, { "epoch": 0.6490345390263802, "grad_norm": 0.373046875, "learning_rate": 6.609806691310712e-06, "loss": 0.7378, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4773 }, { "epoch": 0.6491705194451999, "grad_norm": 0.28515625, "learning_rate": 6.605340478931763e-06, "loss": 0.575, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4774 }, { "epoch": 0.6493064998640196, "grad_norm": 0.30859375, "learning_rate": 6.600875031750576e-06, "loss": 0.7765, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4775 }, { "epoch": 0.6494424802828392, "grad_norm": 0.419921875, "learning_rate": 6.596410350773712e-06, "loss": 0.6117, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4776 }, { "epoch": 0.6495784607016589, "grad_norm": 0.37109375, "learning_rate": 6.591946437007569e-06, "loss": 0.6296, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4777 }, { "epoch": 0.6497144411204786, "grad_norm": 0.359375, "learning_rate": 6.5874832914583665e-06, "loss": 0.5269, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4778 }, { "epoch": 0.6498504215392984, "grad_norm": 0.41796875, "learning_rate": 6.5830209151321525e-06, "loss": 0.7046, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4779 }, { "epoch": 0.6499864019581181, "grad_norm": 0.328125, "learning_rate": 6.5785593090347975e-06, "loss": 0.6019, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4780 }, { "epoch": 0.6501223823769378, "grad_norm": 0.396484375, "learning_rate": 6.574098474172008e-06, "loss": 0.5622, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4781 }, { "epoch": 0.6502583627957574, "grad_norm": 0.2734375, "learning_rate": 6.569638411549306e-06, "loss": 0.45, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4782 }, { "epoch": 0.6503943432145771, "grad_norm": 0.48046875, "learning_rate": 6.565179122172047e-06, "loss": 0.8919, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4783 }, { "epoch": 0.6505303236333968, "grad_norm": 0.283203125, "learning_rate": 6.560720607045407e-06, "loss": 0.4865, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4784 }, { "epoch": 0.6506663040522165, "grad_norm": 0.451171875, "learning_rate": 6.556262867174393e-06, "loss": 0.6706, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4785 }, { "epoch": 0.6508022844710362, "grad_norm": 0.546875, "learning_rate": 6.551805903563831e-06, "loss": 0.4225, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4786 }, { "epoch": 0.6509382648898558, "grad_norm": 0.302734375, "learning_rate": 6.547349717218377e-06, "loss": 0.4751, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4787 }, { "epoch": 0.6510742453086755, "grad_norm": 1.1171875, "learning_rate": 6.542894309142509e-06, "loss": 0.7121, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4788 }, { "epoch": 0.6512102257274952, "grad_norm": 0.5390625, "learning_rate": 6.538439680340532e-06, "loss": 0.7612, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4789 }, { "epoch": 0.6513462061463149, "grad_norm": 0.3671875, "learning_rate": 6.533985831816571e-06, "loss": 0.5889, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4790 }, { "epoch": 0.6514821865651346, "grad_norm": 0.369140625, "learning_rate": 6.529532764574578e-06, "loss": 0.7453, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4791 }, { "epoch": 0.6516181669839544, "grad_norm": 0.341796875, "learning_rate": 6.525080479618331e-06, "loss": 0.6668, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4792 }, { "epoch": 0.651754147402774, "grad_norm": 0.62890625, "learning_rate": 6.520628977951428e-06, "loss": 0.7782, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4793 }, { "epoch": 0.6518901278215937, "grad_norm": 0.384765625, "learning_rate": 6.516178260577293e-06, "loss": 0.7059, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4794 }, { "epoch": 0.6520261082404134, "grad_norm": 0.314453125, "learning_rate": 6.511728328499172e-06, "loss": 0.6147, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4795 }, { "epoch": 0.6521620886592331, "grad_norm": 0.59375, "learning_rate": 6.507279182720132e-06, "loss": 0.4567, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4796 }, { "epoch": 0.6522980690780528, "grad_norm": 0.298828125, "learning_rate": 6.5028308242430635e-06, "loss": 0.651, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4797 }, { "epoch": 0.6524340494968724, "grad_norm": 0.232421875, "learning_rate": 6.498383254070688e-06, "loss": 0.4531, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4798 }, { "epoch": 0.6525700299156921, "grad_norm": 0.30078125, "learning_rate": 6.493936473205538e-06, "loss": 0.4617, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4799 }, { "epoch": 0.6527060103345118, "grad_norm": 0.466796875, "learning_rate": 6.489490482649972e-06, "loss": 0.6908, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4800 }, { "epoch": 0.6528419907533315, "grad_norm": 0.337890625, "learning_rate": 6.4850452834061716e-06, "loss": 0.6533, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4801 }, { "epoch": 0.6529779711721512, "grad_norm": 0.333984375, "learning_rate": 6.480600876476137e-06, "loss": 0.7132, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4802 }, { "epoch": 0.6531139515909709, "grad_norm": 0.40234375, "learning_rate": 6.476157262861695e-06, "loss": 0.6359, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4803 }, { "epoch": 0.6532499320097905, "grad_norm": 0.546875, "learning_rate": 6.471714443564488e-06, "loss": 0.5122, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4804 }, { "epoch": 0.6533859124286103, "grad_norm": 0.2451171875, "learning_rate": 6.467272419585984e-06, "loss": 0.388, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4805 }, { "epoch": 0.65352189284743, "grad_norm": 0.345703125, "learning_rate": 6.462831191927466e-06, "loss": 0.6466, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4806 }, { "epoch": 0.6536578732662497, "grad_norm": 0.251953125, "learning_rate": 6.458390761590043e-06, "loss": 0.445, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4807 }, { "epoch": 0.6537938536850694, "grad_norm": 0.431640625, "learning_rate": 6.453951129574644e-06, "loss": 0.7588, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4808 }, { "epoch": 0.653929834103889, "grad_norm": 0.3359375, "learning_rate": 6.449512296882013e-06, "loss": 0.5968, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4809 }, { "epoch": 0.6540658145227087, "grad_norm": 0.484375, "learning_rate": 6.445074264512721e-06, "loss": 0.7793, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4810 }, { "epoch": 0.6542017949415284, "grad_norm": 0.2041015625, "learning_rate": 6.4406370334671495e-06, "loss": 0.3737, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4811 }, { "epoch": 0.6543377753603481, "grad_norm": 0.50390625, "learning_rate": 6.4362006047455105e-06, "loss": 0.8027, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4812 }, { "epoch": 0.6544737557791678, "grad_norm": 0.42578125, "learning_rate": 6.431764979347826e-06, "loss": 0.6615, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4813 }, { "epoch": 0.6546097361979875, "grad_norm": 0.494140625, "learning_rate": 6.427330158273939e-06, "loss": 0.5493, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4814 }, { "epoch": 0.6547457166168071, "grad_norm": 0.416015625, "learning_rate": 6.422896142523515e-06, "loss": 0.6929, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4815 }, { "epoch": 0.6548816970356268, "grad_norm": 0.33984375, "learning_rate": 6.418462933096035e-06, "loss": 0.5399, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4816 }, { "epoch": 0.6550176774544466, "grad_norm": 0.373046875, "learning_rate": 6.414030530990797e-06, "loss": 0.6748, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4817 }, { "epoch": 0.6551536578732663, "grad_norm": 0.36328125, "learning_rate": 6.40959893720692e-06, "loss": 0.7388, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4818 }, { "epoch": 0.655289638292086, "grad_norm": 0.412109375, "learning_rate": 6.40516815274334e-06, "loss": 0.5765, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4819 }, { "epoch": 0.6554256187109057, "grad_norm": 0.3671875, "learning_rate": 6.400738178598809e-06, "loss": 0.6082, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4820 }, { "epoch": 0.6555615991297253, "grad_norm": 0.3125, "learning_rate": 6.3963090157718955e-06, "loss": 0.5355, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4821 }, { "epoch": 0.655697579548545, "grad_norm": 0.322265625, "learning_rate": 6.39188066526099e-06, "loss": 0.5571, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4822 }, { "epoch": 0.6558335599673647, "grad_norm": 0.48828125, "learning_rate": 6.387453128064296e-06, "loss": 0.7067, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4823 }, { "epoch": 0.6559695403861844, "grad_norm": 0.294921875, "learning_rate": 6.383026405179835e-06, "loss": 0.4677, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4824 }, { "epoch": 0.6561055208050041, "grad_norm": 0.380859375, "learning_rate": 6.378600497605443e-06, "loss": 0.5218, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4825 }, { "epoch": 0.6562415012238237, "grad_norm": 0.60546875, "learning_rate": 6.374175406338774e-06, "loss": 0.5454, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4826 }, { "epoch": 0.6563774816426434, "grad_norm": 0.75, "learning_rate": 6.369751132377298e-06, "loss": 0.7985, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4827 }, { "epoch": 0.6565134620614631, "grad_norm": 0.287109375, "learning_rate": 6.3653276767183e-06, "loss": 0.5529, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4828 }, { "epoch": 0.6566494424802828, "grad_norm": 0.54296875, "learning_rate": 6.360905040358882e-06, "loss": 0.6746, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4829 }, { "epoch": 0.6567854228991026, "grad_norm": 0.8515625, "learning_rate": 6.356483224295959e-06, "loss": 0.8034, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4830 }, { "epoch": 0.6569214033179223, "grad_norm": 0.259765625, "learning_rate": 6.352062229526266e-06, "loss": 0.5218, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4831 }, { "epoch": 0.6570573837367419, "grad_norm": 0.298828125, "learning_rate": 6.347642057046344e-06, "loss": 0.3908, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4832 }, { "epoch": 0.6571933641555616, "grad_norm": 1.59375, "learning_rate": 6.3432227078525564e-06, "loss": 0.8179, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4833 }, { "epoch": 0.6573293445743813, "grad_norm": 0.408203125, "learning_rate": 6.3388041829410785e-06, "loss": 0.7279, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4834 }, { "epoch": 0.657465324993201, "grad_norm": 0.859375, "learning_rate": 6.334386483307901e-06, "loss": 0.7983, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4835 }, { "epoch": 0.6576013054120207, "grad_norm": 0.453125, "learning_rate": 6.329969609948825e-06, "loss": 0.7651, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4836 }, { "epoch": 0.6577372858308403, "grad_norm": 0.31640625, "learning_rate": 6.32555356385947e-06, "loss": 0.4694, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4837 }, { "epoch": 0.65787326624966, "grad_norm": 0.353515625, "learning_rate": 6.321138346035265e-06, "loss": 0.5612, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4838 }, { "epoch": 0.6580092466684797, "grad_norm": 0.6953125, "learning_rate": 6.3167239574714556e-06, "loss": 0.7673, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4839 }, { "epoch": 0.6581452270872994, "grad_norm": 0.53515625, "learning_rate": 6.312310399163098e-06, "loss": 0.7388, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4840 }, { "epoch": 0.6582812075061191, "grad_norm": 0.640625, "learning_rate": 6.307897672105061e-06, "loss": 0.7087, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4841 }, { "epoch": 0.6584171879249389, "grad_norm": 0.412109375, "learning_rate": 6.303485777292031e-06, "loss": 0.5347, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4842 }, { "epoch": 0.6585531683437585, "grad_norm": 0.478515625, "learning_rate": 6.2990747157185e-06, "loss": 0.7664, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4843 }, { "epoch": 0.6586891487625782, "grad_norm": 0.58984375, "learning_rate": 6.294664488378777e-06, "loss": 0.589, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4844 }, { "epoch": 0.6588251291813979, "grad_norm": 0.32421875, "learning_rate": 6.2902550962669775e-06, "loss": 0.5741, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4845 }, { "epoch": 0.6589611096002176, "grad_norm": 0.326171875, "learning_rate": 6.285846540377036e-06, "loss": 0.6017, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4846 }, { "epoch": 0.6590970900190373, "grad_norm": 0.298828125, "learning_rate": 6.281438821702694e-06, "loss": 0.6062, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4847 }, { "epoch": 0.659233070437857, "grad_norm": 0.279296875, "learning_rate": 6.277031941237504e-06, "loss": 0.561, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4848 }, { "epoch": 0.6593690508566766, "grad_norm": 0.81640625, "learning_rate": 6.272625899974833e-06, "loss": 0.4759, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4849 }, { "epoch": 0.6595050312754963, "grad_norm": 0.357421875, "learning_rate": 6.268220698907854e-06, "loss": 0.6374, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4850 }, { "epoch": 0.659641011694316, "grad_norm": 0.5859375, "learning_rate": 6.263816339029556e-06, "loss": 0.8309, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4851 }, { "epoch": 0.6597769921131357, "grad_norm": 0.2734375, "learning_rate": 6.2594128213327336e-06, "loss": 0.5018, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4852 }, { "epoch": 0.6599129725319554, "grad_norm": 0.251953125, "learning_rate": 6.2550101468099935e-06, "loss": 0.4648, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4853 }, { "epoch": 0.660048952950775, "grad_norm": 0.5859375, "learning_rate": 6.250608316453752e-06, "loss": 0.7887, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4854 }, { "epoch": 0.6601849333695948, "grad_norm": 0.365234375, "learning_rate": 6.246207331256238e-06, "loss": 0.658, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4855 }, { "epoch": 0.6603209137884145, "grad_norm": 0.455078125, "learning_rate": 6.241807192209486e-06, "loss": 0.7819, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4856 }, { "epoch": 0.6604568942072342, "grad_norm": 0.353515625, "learning_rate": 6.237407900305334e-06, "loss": 0.6724, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4857 }, { "epoch": 0.6605928746260539, "grad_norm": 0.35546875, "learning_rate": 6.23300945653545e-06, "loss": 0.832, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4858 }, { "epoch": 0.6607288550448736, "grad_norm": 0.228515625, "learning_rate": 6.228611861891289e-06, "loss": 0.4567, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4859 }, { "epoch": 0.6608648354636932, "grad_norm": 0.322265625, "learning_rate": 6.224215117364124e-06, "loss": 0.6561, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4860 }, { "epoch": 0.6610008158825129, "grad_norm": 0.3515625, "learning_rate": 6.219819223945034e-06, "loss": 0.5793, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4861 }, { "epoch": 0.6611367963013326, "grad_norm": 0.296875, "learning_rate": 6.2154241826249075e-06, "loss": 0.5452, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4862 }, { "epoch": 0.6612727767201523, "grad_norm": 0.2490234375, "learning_rate": 6.211029994394442e-06, "loss": 0.4357, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4863 }, { "epoch": 0.661408757138972, "grad_norm": 0.5078125, "learning_rate": 6.206636660244138e-06, "loss": 0.5861, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4864 }, { "epoch": 0.6615447375577916, "grad_norm": 0.3203125, "learning_rate": 6.202244181164311e-06, "loss": 0.5617, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4865 }, { "epoch": 0.6616807179766113, "grad_norm": 0.859375, "learning_rate": 6.197852558145077e-06, "loss": 0.8835, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4866 }, { "epoch": 0.6618166983954311, "grad_norm": 0.5625, "learning_rate": 6.19346179217636e-06, "loss": 0.7407, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4867 }, { "epoch": 0.6619526788142508, "grad_norm": 0.2734375, "learning_rate": 6.189071884247895e-06, "loss": 0.3669, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4868 }, { "epoch": 0.6620886592330705, "grad_norm": 0.263671875, "learning_rate": 6.184682835349221e-06, "loss": 0.5249, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4869 }, { "epoch": 0.6622246396518902, "grad_norm": 0.193359375, "learning_rate": 6.18029464646968e-06, "loss": 0.3667, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4870 }, { "epoch": 0.6623606200707098, "grad_norm": 0.359375, "learning_rate": 6.175907318598427e-06, "loss": 0.508, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4871 }, { "epoch": 0.6624966004895295, "grad_norm": 0.39453125, "learning_rate": 6.171520852724419e-06, "loss": 0.7036, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4872 }, { "epoch": 0.6626325809083492, "grad_norm": 0.376953125, "learning_rate": 6.167135249836417e-06, "loss": 0.5392, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4873 }, { "epoch": 0.6627685613271689, "grad_norm": 0.54296875, "learning_rate": 6.1627505109229906e-06, "loss": 0.591, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4874 }, { "epoch": 0.6629045417459886, "grad_norm": 0.400390625, "learning_rate": 6.158366636972512e-06, "loss": 0.6525, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4875 }, { "epoch": 0.6630405221648082, "grad_norm": 0.515625, "learning_rate": 6.153983628973165e-06, "loss": 0.6003, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4876 }, { "epoch": 0.6631765025836279, "grad_norm": 0.6875, "learning_rate": 6.149601487912927e-06, "loss": 0.744, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4877 }, { "epoch": 0.6633124830024476, "grad_norm": 0.318359375, "learning_rate": 6.14522021477959e-06, "loss": 0.6126, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4878 }, { "epoch": 0.6634484634212673, "grad_norm": 0.34375, "learning_rate": 6.140839810560746e-06, "loss": 0.6164, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4879 }, { "epoch": 0.6635844438400871, "grad_norm": 0.373046875, "learning_rate": 6.1364602762437896e-06, "loss": 0.77, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4880 }, { "epoch": 0.6637204242589068, "grad_norm": 0.35546875, "learning_rate": 6.132081612815925e-06, "loss": 0.6188, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4881 }, { "epoch": 0.6638564046777264, "grad_norm": 0.392578125, "learning_rate": 6.127703821264152e-06, "loss": 0.7092, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4882 }, { "epoch": 0.6639923850965461, "grad_norm": 0.291015625, "learning_rate": 6.123326902575282e-06, "loss": 0.6528, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4883 }, { "epoch": 0.6641283655153658, "grad_norm": 2.046875, "learning_rate": 6.118950857735924e-06, "loss": 0.6543, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4884 }, { "epoch": 0.6642643459341855, "grad_norm": 0.734375, "learning_rate": 6.114575687732493e-06, "loss": 0.6035, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4885 }, { "epoch": 0.6644003263530052, "grad_norm": 0.34765625, "learning_rate": 6.1102013935512065e-06, "loss": 0.5524, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4886 }, { "epoch": 0.6645363067718248, "grad_norm": 0.578125, "learning_rate": 6.105827976178082e-06, "loss": 0.734, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4887 }, { "epoch": 0.6646722871906445, "grad_norm": 0.375, "learning_rate": 6.101455436598942e-06, "loss": 0.6056, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4888 }, { "epoch": 0.6648082676094642, "grad_norm": 0.29296875, "learning_rate": 6.097083775799411e-06, "loss": 0.4987, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4889 }, { "epoch": 0.6649442480282839, "grad_norm": 0.29296875, "learning_rate": 6.0927129947649136e-06, "loss": 0.5747, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4890 }, { "epoch": 0.6650802284471036, "grad_norm": 0.34375, "learning_rate": 6.088343094480677e-06, "loss": 0.591, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4891 }, { "epoch": 0.6652162088659233, "grad_norm": 0.73828125, "learning_rate": 6.0839740759317335e-06, "loss": 0.7253, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4892 }, { "epoch": 0.665352189284743, "grad_norm": 0.3046875, "learning_rate": 6.0796059401029096e-06, "loss": 0.5365, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4893 }, { "epoch": 0.6654881697035627, "grad_norm": 0.6796875, "learning_rate": 6.075238687978837e-06, "loss": 0.682, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4894 }, { "epoch": 0.6656241501223824, "grad_norm": 0.435546875, "learning_rate": 6.07087232054395e-06, "loss": 0.7404, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4895 }, { "epoch": 0.6657601305412021, "grad_norm": 0.52734375, "learning_rate": 6.06650683878248e-06, "loss": 0.8812, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4896 }, { "epoch": 0.6658961109600218, "grad_norm": 0.296875, "learning_rate": 6.062142243678459e-06, "loss": 0.605, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4897 }, { "epoch": 0.6660320913788415, "grad_norm": 0.484375, "learning_rate": 6.057778536215724e-06, "loss": 0.7204, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4898 }, { "epoch": 0.6661680717976611, "grad_norm": 0.5234375, "learning_rate": 6.053415717377902e-06, "loss": 0.6143, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4899 }, { "epoch": 0.6663040522164808, "grad_norm": 0.208984375, "learning_rate": 6.049053788148427e-06, "loss": 0.3325, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4900 }, { "epoch": 0.6664400326353005, "grad_norm": 0.25390625, "learning_rate": 6.0446927495105345e-06, "loss": 0.4701, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4901 }, { "epoch": 0.6665760130541202, "grad_norm": 0.45703125, "learning_rate": 6.0403326024472545e-06, "loss": 0.4627, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4902 }, { "epoch": 0.6667119934729399, "grad_norm": 0.333984375, "learning_rate": 6.0359733479414164e-06, "loss": 0.6356, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4903 }, { "epoch": 0.6668479738917595, "grad_norm": 0.33984375, "learning_rate": 6.031614986975649e-06, "loss": 0.6437, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4904 }, { "epoch": 0.6669839543105793, "grad_norm": 0.306640625, "learning_rate": 6.027257520532384e-06, "loss": 0.5228, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4905 }, { "epoch": 0.667119934729399, "grad_norm": 0.482421875, "learning_rate": 6.022900949593843e-06, "loss": 0.3003, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4906 }, { "epoch": 0.6672559151482187, "grad_norm": 0.369140625, "learning_rate": 6.018545275142053e-06, "loss": 0.7155, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4907 }, { "epoch": 0.6673918955670384, "grad_norm": 0.6953125, "learning_rate": 6.014190498158835e-06, "loss": 0.7048, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4908 }, { "epoch": 0.667527875985858, "grad_norm": 0.427734375, "learning_rate": 6.0098366196258095e-06, "loss": 0.7568, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4909 }, { "epoch": 0.6676638564046777, "grad_norm": 0.298828125, "learning_rate": 6.005483640524394e-06, "loss": 0.4826, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4910 }, { "epoch": 0.6677998368234974, "grad_norm": 0.421875, "learning_rate": 6.001131561835804e-06, "loss": 0.8397, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4911 }, { "epoch": 0.6679358172423171, "grad_norm": 0.5625, "learning_rate": 5.99678038454105e-06, "loss": 0.6014, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4912 }, { "epoch": 0.6680717976611368, "grad_norm": 0.49609375, "learning_rate": 5.9924301096209415e-06, "loss": 0.695, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4913 }, { "epoch": 0.6682077780799565, "grad_norm": 0.59765625, "learning_rate": 5.988080738056083e-06, "loss": 0.6475, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4914 }, { "epoch": 0.6683437584987761, "grad_norm": 0.52734375, "learning_rate": 5.983732270826876e-06, "loss": 0.6201, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4915 }, { "epoch": 0.6684797389175958, "grad_norm": 0.5703125, "learning_rate": 5.979384708913518e-06, "loss": 0.7826, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4916 }, { "epoch": 0.6686157193364155, "grad_norm": 0.421875, "learning_rate": 5.9750380532959985e-06, "loss": 0.6432, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4917 }, { "epoch": 0.6687516997552353, "grad_norm": 0.34375, "learning_rate": 5.970692304954115e-06, "loss": 0.7039, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4918 }, { "epoch": 0.668887680174055, "grad_norm": 0.400390625, "learning_rate": 5.96634746486745e-06, "loss": 0.79, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4919 }, { "epoch": 0.6690236605928747, "grad_norm": 0.345703125, "learning_rate": 5.962003534015382e-06, "loss": 0.7205, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4920 }, { "epoch": 0.6691596410116943, "grad_norm": 0.37890625, "learning_rate": 5.957660513377086e-06, "loss": 0.6447, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4921 }, { "epoch": 0.669295621430514, "grad_norm": 0.35546875, "learning_rate": 5.953318403931533e-06, "loss": 0.7425, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4922 }, { "epoch": 0.6694316018493337, "grad_norm": 0.302734375, "learning_rate": 5.9489772066574854e-06, "loss": 0.467, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4923 }, { "epoch": 0.6695675822681534, "grad_norm": 0.341796875, "learning_rate": 5.944636922533504e-06, "loss": 0.7695, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4924 }, { "epoch": 0.6697035626869731, "grad_norm": 0.2734375, "learning_rate": 5.940297552537942e-06, "loss": 0.4655, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4925 }, { "epoch": 0.6698395431057927, "grad_norm": 0.30859375, "learning_rate": 5.935959097648946e-06, "loss": 0.5815, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4926 }, { "epoch": 0.6699755235246124, "grad_norm": 0.474609375, "learning_rate": 5.931621558844456e-06, "loss": 0.5749, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4927 }, { "epoch": 0.6701115039434321, "grad_norm": 0.375, "learning_rate": 5.927284937102208e-06, "loss": 0.6913, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4928 }, { "epoch": 0.6702474843622518, "grad_norm": 0.51171875, "learning_rate": 5.922949233399727e-06, "loss": 0.4642, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4929 }, { "epoch": 0.6703834647810716, "grad_norm": 0.9140625, "learning_rate": 5.918614448714337e-06, "loss": 0.8211, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4930 }, { "epoch": 0.6705194451998913, "grad_norm": 0.7578125, "learning_rate": 5.914280584023149e-06, "loss": 0.673, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4931 }, { "epoch": 0.6706554256187109, "grad_norm": 0.486328125, "learning_rate": 5.909947640303068e-06, "loss": 0.7531, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4932 }, { "epoch": 0.6707914060375306, "grad_norm": 0.38671875, "learning_rate": 5.905615618530795e-06, "loss": 0.6486, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4933 }, { "epoch": 0.6709273864563503, "grad_norm": 0.31640625, "learning_rate": 5.90128451968282e-06, "loss": 0.5866, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4934 }, { "epoch": 0.67106336687517, "grad_norm": 0.412109375, "learning_rate": 5.896954344735426e-06, "loss": 0.6916, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4935 }, { "epoch": 0.6711993472939897, "grad_norm": 0.390625, "learning_rate": 5.892625094664688e-06, "loss": 0.5776, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4936 }, { "epoch": 0.6713353277128093, "grad_norm": 0.3984375, "learning_rate": 5.8882967704464685e-06, "loss": 0.6444, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4937 }, { "epoch": 0.671471308131629, "grad_norm": 0.40234375, "learning_rate": 5.883969373056428e-06, "loss": 0.6587, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4938 }, { "epoch": 0.6716072885504487, "grad_norm": 0.3828125, "learning_rate": 5.879642903470013e-06, "loss": 0.5809, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4939 }, { "epoch": 0.6717432689692684, "grad_norm": 0.6015625, "learning_rate": 5.875317362662465e-06, "loss": 0.7805, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4940 }, { "epoch": 0.6718792493880881, "grad_norm": 0.390625, "learning_rate": 5.870992751608811e-06, "loss": 0.741, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4941 }, { "epoch": 0.6720152298069078, "grad_norm": 0.302734375, "learning_rate": 5.8666690712838715e-06, "loss": 0.4857, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4942 }, { "epoch": 0.6721512102257275, "grad_norm": 0.412109375, "learning_rate": 5.862346322662259e-06, "loss": 0.7294, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4943 }, { "epoch": 0.6722871906445472, "grad_norm": 0.33984375, "learning_rate": 5.8580245067183714e-06, "loss": 0.765, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4944 }, { "epoch": 0.6724231710633669, "grad_norm": 1.3984375, "learning_rate": 5.8537036244264014e-06, "loss": 0.9166, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4945 }, { "epoch": 0.6725591514821866, "grad_norm": 0.30078125, "learning_rate": 5.849383676760327e-06, "loss": 0.6538, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4946 }, { "epoch": 0.6726951319010063, "grad_norm": 0.52734375, "learning_rate": 5.845064664693918e-06, "loss": 0.7096, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4947 }, { "epoch": 0.672831112319826, "grad_norm": 0.2333984375, "learning_rate": 5.840746589200732e-06, "loss": 0.4499, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4948 }, { "epoch": 0.6729670927386456, "grad_norm": 0.326171875, "learning_rate": 5.8364294512541174e-06, "loss": 0.5487, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4949 }, { "epoch": 0.6731030731574653, "grad_norm": 0.4453125, "learning_rate": 5.832113251827205e-06, "loss": 0.667, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4950 }, { "epoch": 0.673239053576285, "grad_norm": 0.48828125, "learning_rate": 5.827797991892927e-06, "loss": 0.8198, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4951 }, { "epoch": 0.6733750339951047, "grad_norm": 0.56640625, "learning_rate": 5.823483672423988e-06, "loss": 0.5827, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4952 }, { "epoch": 0.6735110144139244, "grad_norm": 0.291015625, "learning_rate": 5.819170294392894e-06, "loss": 0.6129, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4953 }, { "epoch": 0.673646994832744, "grad_norm": 0.72265625, "learning_rate": 5.814857858771928e-06, "loss": 0.8281, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4954 }, { "epoch": 0.6737829752515637, "grad_norm": 0.34765625, "learning_rate": 5.810546366533172e-06, "loss": 0.6632, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4955 }, { "epoch": 0.6739189556703835, "grad_norm": 0.4296875, "learning_rate": 5.806235818648481e-06, "loss": 0.7153, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4956 }, { "epoch": 0.6740549360892032, "grad_norm": 0.357421875, "learning_rate": 5.801926216089515e-06, "loss": 0.4519, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4957 }, { "epoch": 0.6741909165080229, "grad_norm": 0.462890625, "learning_rate": 5.797617559827702e-06, "loss": 0.7746, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4958 }, { "epoch": 0.6743268969268426, "grad_norm": 0.326171875, "learning_rate": 5.793309850834272e-06, "loss": 0.5197, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4959 }, { "epoch": 0.6744628773456622, "grad_norm": 0.44140625, "learning_rate": 5.78900309008023e-06, "loss": 0.6061, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4960 }, { "epoch": 0.6745988577644819, "grad_norm": 0.609375, "learning_rate": 5.78469727853638e-06, "loss": 0.8729, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4961 }, { "epoch": 0.6747348381833016, "grad_norm": 0.63671875, "learning_rate": 5.780392417173295e-06, "loss": 0.7661, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4962 }, { "epoch": 0.6748708186021213, "grad_norm": 0.4453125, "learning_rate": 5.776088506961353e-06, "loss": 0.6559, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4963 }, { "epoch": 0.675006799020941, "grad_norm": 0.400390625, "learning_rate": 5.771785548870698e-06, "loss": 0.7741, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4964 }, { "epoch": 0.6751427794397606, "grad_norm": 0.39453125, "learning_rate": 5.7674835438712805e-06, "loss": 0.7997, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4965 }, { "epoch": 0.6752787598585803, "grad_norm": 0.345703125, "learning_rate": 5.763182492932815e-06, "loss": 0.5941, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4966 }, { "epoch": 0.6754147402774, "grad_norm": 0.3046875, "learning_rate": 5.7588823970248185e-06, "loss": 0.5462, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4967 }, { "epoch": 0.6755507206962198, "grad_norm": 0.376953125, "learning_rate": 5.754583257116579e-06, "loss": 0.8013, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4968 }, { "epoch": 0.6756867011150395, "grad_norm": 0.251953125, "learning_rate": 5.750285074177182e-06, "loss": 0.5106, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4969 }, { "epoch": 0.6758226815338592, "grad_norm": 0.2890625, "learning_rate": 5.745987849175483e-06, "loss": 0.6151, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4970 }, { "epoch": 0.6759586619526788, "grad_norm": 0.40234375, "learning_rate": 5.741691583080135e-06, "loss": 0.6287, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4971 }, { "epoch": 0.6760946423714985, "grad_norm": 0.3515625, "learning_rate": 5.737396276859564e-06, "loss": 0.4798, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4972 }, { "epoch": 0.6762306227903182, "grad_norm": 0.76171875, "learning_rate": 5.73310193148199e-06, "loss": 0.5672, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4973 }, { "epoch": 0.6763666032091379, "grad_norm": 0.48828125, "learning_rate": 5.728808547915405e-06, "loss": 0.6768, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4974 }, { "epoch": 0.6765025836279576, "grad_norm": 0.40234375, "learning_rate": 5.724516127127598e-06, "loss": 0.7104, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4975 }, { "epoch": 0.6766385640467772, "grad_norm": 0.3671875, "learning_rate": 5.720224670086123e-06, "loss": 0.7147, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4976 }, { "epoch": 0.6767745444655969, "grad_norm": 0.330078125, "learning_rate": 5.715934177758333e-06, "loss": 0.6234, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4977 }, { "epoch": 0.6769105248844166, "grad_norm": 0.3984375, "learning_rate": 5.71164465111136e-06, "loss": 0.6725, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4978 }, { "epoch": 0.6770465053032363, "grad_norm": 0.3046875, "learning_rate": 5.707356091112108e-06, "loss": 0.4904, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4979 }, { "epoch": 0.677182485722056, "grad_norm": 0.35546875, "learning_rate": 5.70306849872728e-06, "loss": 0.6657, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4980 }, { "epoch": 0.6773184661408758, "grad_norm": 0.400390625, "learning_rate": 5.698781874923343e-06, "loss": 0.6348, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4981 }, { "epoch": 0.6774544465596954, "grad_norm": 0.60546875, "learning_rate": 5.694496220666563e-06, "loss": 0.6444, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4982 }, { "epoch": 0.6775904269785151, "grad_norm": 0.40234375, "learning_rate": 5.690211536922969e-06, "loss": 0.6125, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4983 }, { "epoch": 0.6777264073973348, "grad_norm": 0.322265625, "learning_rate": 5.685927824658391e-06, "loss": 0.5962, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4984 }, { "epoch": 0.6778623878161545, "grad_norm": 0.578125, "learning_rate": 5.681645084838422e-06, "loss": 0.6694, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4985 }, { "epoch": 0.6779983682349742, "grad_norm": 0.59375, "learning_rate": 5.677363318428451e-06, "loss": 0.4299, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4986 }, { "epoch": 0.6781343486537939, "grad_norm": 0.25, "learning_rate": 5.673082526393634e-06, "loss": 0.5016, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4987 }, { "epoch": 0.6782703290726135, "grad_norm": 0.4296875, "learning_rate": 5.668802709698919e-06, "loss": 0.6667, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4988 }, { "epoch": 0.6784063094914332, "grad_norm": 0.2373046875, "learning_rate": 5.664523869309023e-06, "loss": 0.5161, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4989 }, { "epoch": 0.6785422899102529, "grad_norm": 2.859375, "learning_rate": 5.660246006188458e-06, "loss": 0.8735, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4990 }, { "epoch": 0.6786782703290726, "grad_norm": 0.396484375, "learning_rate": 5.655969121301497e-06, "loss": 0.534, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4991 }, { "epoch": 0.6788142507478923, "grad_norm": 0.283203125, "learning_rate": 5.651693215612209e-06, "loss": 0.5768, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4992 }, { "epoch": 0.678950231166712, "grad_norm": 0.4765625, "learning_rate": 5.647418290084431e-06, "loss": 0.6632, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4993 }, { "epoch": 0.6790862115855317, "grad_norm": 0.3125, "learning_rate": 5.643144345681788e-06, "loss": 0.5312, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4994 }, { "epoch": 0.6792221920043514, "grad_norm": 0.390625, "learning_rate": 5.638871383367674e-06, "loss": 0.5418, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4995 }, { "epoch": 0.6793581724231711, "grad_norm": 0.578125, "learning_rate": 5.634599404105272e-06, "loss": 0.7209, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4996 }, { "epoch": 0.6794941528419908, "grad_norm": 0.283203125, "learning_rate": 5.6303284088575334e-06, "loss": 0.5552, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4997 }, { "epoch": 0.6796301332608105, "grad_norm": 0.2890625, "learning_rate": 5.6260583985872e-06, "loss": 0.4198, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4998 }, { "epoch": 0.6797661136796301, "grad_norm": 0.310546875, "learning_rate": 5.6217893742567745e-06, "loss": 0.5402, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 4999 }, { "epoch": 0.6799020940984498, "grad_norm": 0.38671875, "learning_rate": 5.617521336828556e-06, "loss": 0.7036, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5000 }, { "epoch": 0.6800380745172695, "grad_norm": 0.416015625, "learning_rate": 5.6132542872646055e-06, "loss": 0.7979, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5001 }, { "epoch": 0.6801740549360892, "grad_norm": 0.314453125, "learning_rate": 5.608988226526775e-06, "loss": 0.6007, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5002 }, { "epoch": 0.6803100353549089, "grad_norm": 0.58984375, "learning_rate": 5.604723155576679e-06, "loss": 0.8984, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5003 }, { "epoch": 0.6804460157737285, "grad_norm": 0.296875, "learning_rate": 5.600459075375726e-06, "loss": 0.4482, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5004 }, { "epoch": 0.6805819961925482, "grad_norm": 0.431640625, "learning_rate": 5.596195986885081e-06, "loss": 0.8146, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5005 }, { "epoch": 0.680717976611368, "grad_norm": 0.61328125, "learning_rate": 5.591933891065706e-06, "loss": 0.4639, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5006 }, { "epoch": 0.6808539570301877, "grad_norm": 0.5703125, "learning_rate": 5.587672788878322e-06, "loss": 0.5269, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5007 }, { "epoch": 0.6809899374490074, "grad_norm": 0.41015625, "learning_rate": 5.583412681283441e-06, "loss": 0.7795, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5008 }, { "epoch": 0.6811259178678271, "grad_norm": 1.5546875, "learning_rate": 5.579153569241335e-06, "loss": 0.8647, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5009 }, { "epoch": 0.6812618982866467, "grad_norm": 0.25390625, "learning_rate": 5.574895453712068e-06, "loss": 0.3966, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5010 }, { "epoch": 0.6813978787054664, "grad_norm": 0.24609375, "learning_rate": 5.570638335655462e-06, "loss": 0.3646, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5011 }, { "epoch": 0.6815338591242861, "grad_norm": 0.32421875, "learning_rate": 5.566382216031136e-06, "loss": 0.5368, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5012 }, { "epoch": 0.6816698395431058, "grad_norm": 0.36328125, "learning_rate": 5.5621270957984575e-06, "loss": 0.7231, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5013 }, { "epoch": 0.6818058199619255, "grad_norm": 0.283203125, "learning_rate": 5.5578729759165935e-06, "loss": 0.5565, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5014 }, { "epoch": 0.6819418003807451, "grad_norm": 0.27734375, "learning_rate": 5.553619857344468e-06, "loss": 0.5457, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5015 }, { "epoch": 0.6820777807995648, "grad_norm": 0.34375, "learning_rate": 5.549367741040792e-06, "loss": 0.5662, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5016 }, { "epoch": 0.6822137612183845, "grad_norm": 0.3515625, "learning_rate": 5.545116627964038e-06, "loss": 0.533, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5017 }, { "epoch": 0.6823497416372042, "grad_norm": 0.46875, "learning_rate": 5.540866519072463e-06, "loss": 0.5951, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5018 }, { "epoch": 0.682485722056024, "grad_norm": 0.40625, "learning_rate": 5.536617415324091e-06, "loss": 0.7493, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5019 }, { "epoch": 0.6826217024748437, "grad_norm": 0.2578125, "learning_rate": 5.532369317676726e-06, "loss": 0.4663, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5020 }, { "epoch": 0.6827576828936633, "grad_norm": 0.34375, "learning_rate": 5.528122227087935e-06, "loss": 0.5011, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5021 }, { "epoch": 0.682893663312483, "grad_norm": 0.58203125, "learning_rate": 5.52387614451507e-06, "loss": 0.7201, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5022 }, { "epoch": 0.6830296437313027, "grad_norm": 0.2216796875, "learning_rate": 5.519631070915245e-06, "loss": 0.5076, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5023 }, { "epoch": 0.6831656241501224, "grad_norm": 0.2138671875, "learning_rate": 5.515387007245357e-06, "loss": 0.3151, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5024 }, { "epoch": 0.6833016045689421, "grad_norm": 0.44140625, "learning_rate": 5.511143954462064e-06, "loss": 0.8215, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5025 }, { "epoch": 0.6834375849877617, "grad_norm": 0.703125, "learning_rate": 5.506901913521808e-06, "loss": 0.6452, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5026 }, { "epoch": 0.6835735654065814, "grad_norm": 0.43359375, "learning_rate": 5.502660885380791e-06, "loss": 0.5768, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5027 }, { "epoch": 0.6837095458254011, "grad_norm": 0.328125, "learning_rate": 5.498420870994997e-06, "loss": 0.7179, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5028 }, { "epoch": 0.6838455262442208, "grad_norm": 0.58984375, "learning_rate": 5.494181871320172e-06, "loss": 0.7562, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5029 }, { "epoch": 0.6839815066630405, "grad_norm": 0.431640625, "learning_rate": 5.489943887311846e-06, "loss": 0.7123, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5030 }, { "epoch": 0.6841174870818603, "grad_norm": 0.361328125, "learning_rate": 5.485706919925304e-06, "loss": 0.4725, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5031 }, { "epoch": 0.68425346750068, "grad_norm": 0.279296875, "learning_rate": 5.481470970115619e-06, "loss": 0.5244, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5032 }, { "epoch": 0.6843894479194996, "grad_norm": 0.640625, "learning_rate": 5.477236038837617e-06, "loss": 0.761, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5033 }, { "epoch": 0.6845254283383193, "grad_norm": 0.31640625, "learning_rate": 5.473002127045912e-06, "loss": 0.6497, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5034 }, { "epoch": 0.684661408757139, "grad_norm": 0.28515625, "learning_rate": 5.468769235694871e-06, "loss": 0.4712, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5035 }, { "epoch": 0.6847973891759587, "grad_norm": 0.40625, "learning_rate": 5.464537365738646e-06, "loss": 0.6014, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5036 }, { "epoch": 0.6849333695947784, "grad_norm": 0.4921875, "learning_rate": 5.460306518131146e-06, "loss": 0.7171, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5037 }, { "epoch": 0.685069350013598, "grad_norm": 0.294921875, "learning_rate": 5.45607669382606e-06, "loss": 0.5373, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5038 }, { "epoch": 0.6852053304324177, "grad_norm": 0.43359375, "learning_rate": 5.451847893776845e-06, "loss": 0.7936, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5039 }, { "epoch": 0.6853413108512374, "grad_norm": 0.33984375, "learning_rate": 5.447620118936718e-06, "loss": 0.7248, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5040 }, { "epoch": 0.6854772912700571, "grad_norm": 0.3671875, "learning_rate": 5.443393370258677e-06, "loss": 0.6062, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5041 }, { "epoch": 0.6856132716888768, "grad_norm": 0.4921875, "learning_rate": 5.439167648695476e-06, "loss": 0.8309, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5042 }, { "epoch": 0.6857492521076964, "grad_norm": 0.357421875, "learning_rate": 5.434942955199653e-06, "loss": 0.6497, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5043 }, { "epoch": 0.6858852325265162, "grad_norm": 0.427734375, "learning_rate": 5.430719290723495e-06, "loss": 0.8587, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5044 }, { "epoch": 0.6860212129453359, "grad_norm": 0.67578125, "learning_rate": 5.426496656219078e-06, "loss": 0.6419, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5045 }, { "epoch": 0.6861571933641556, "grad_norm": 1.453125, "learning_rate": 5.422275052638227e-06, "loss": 0.6668, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5046 }, { "epoch": 0.6862931737829753, "grad_norm": 0.50390625, "learning_rate": 5.418054480932551e-06, "loss": 0.8457, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5047 }, { "epoch": 0.686429154201795, "grad_norm": 0.228515625, "learning_rate": 5.4138349420534085e-06, "loss": 0.4325, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5048 }, { "epoch": 0.6865651346206146, "grad_norm": 0.4921875, "learning_rate": 5.409616436951948e-06, "loss": 0.714, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5049 }, { "epoch": 0.6867011150394343, "grad_norm": 0.37890625, "learning_rate": 5.405398966579059e-06, "loss": 0.5519, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5050 }, { "epoch": 0.686837095458254, "grad_norm": 0.337890625, "learning_rate": 5.401182531885421e-06, "loss": 0.6621, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5051 }, { "epoch": 0.6869730758770737, "grad_norm": 0.65234375, "learning_rate": 5.396967133821461e-06, "loss": 0.6069, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5052 }, { "epoch": 0.6871090562958934, "grad_norm": 0.26953125, "learning_rate": 5.392752773337391e-06, "loss": 0.453, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5053 }, { "epoch": 0.687245036714713, "grad_norm": 0.4765625, "learning_rate": 5.38853945138317e-06, "loss": 0.3957, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5054 }, { "epoch": 0.6873810171335327, "grad_norm": 0.41015625, "learning_rate": 5.384327168908541e-06, "loss": 0.6589, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5055 }, { "epoch": 0.6875169975523525, "grad_norm": 0.53125, "learning_rate": 5.380115926862997e-06, "loss": 0.5133, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5056 }, { "epoch": 0.6876529779711722, "grad_norm": 0.376953125, "learning_rate": 5.375905726195809e-06, "loss": 0.6024, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5057 }, { "epoch": 0.6877889583899919, "grad_norm": 0.326171875, "learning_rate": 5.371696567855999e-06, "loss": 0.7031, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5058 }, { "epoch": 0.6879249388088116, "grad_norm": 0.361328125, "learning_rate": 5.3674884527923755e-06, "loss": 0.5732, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5059 }, { "epoch": 0.6880609192276312, "grad_norm": 0.314453125, "learning_rate": 5.3632813819534864e-06, "loss": 0.5924, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5060 }, { "epoch": 0.6881968996464509, "grad_norm": 0.357421875, "learning_rate": 5.3590753562876685e-06, "loss": 0.624, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5061 }, { "epoch": 0.6883328800652706, "grad_norm": 0.369140625, "learning_rate": 5.354870376743e-06, "loss": 0.6702, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5062 }, { "epoch": 0.6884688604840903, "grad_norm": 0.359375, "learning_rate": 5.350666444267345e-06, "loss": 0.6434, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5063 }, { "epoch": 0.68860484090291, "grad_norm": 0.341796875, "learning_rate": 5.3464635598083126e-06, "loss": 0.7109, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5064 }, { "epoch": 0.6887408213217296, "grad_norm": 0.4375, "learning_rate": 5.342261724313292e-06, "loss": 0.6792, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5065 }, { "epoch": 0.6888768017405493, "grad_norm": 0.4375, "learning_rate": 5.338060938729422e-06, "loss": 0.7642, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5066 }, { "epoch": 0.689012782159369, "grad_norm": 0.236328125, "learning_rate": 5.333861204003616e-06, "loss": 0.4508, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5067 }, { "epoch": 0.6891487625781887, "grad_norm": 0.369140625, "learning_rate": 5.329662521082539e-06, "loss": 0.6659, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5068 }, { "epoch": 0.6892847429970085, "grad_norm": 0.41015625, "learning_rate": 5.325464890912635e-06, "loss": 0.6585, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5069 }, { "epoch": 0.6894207234158282, "grad_norm": 0.46484375, "learning_rate": 5.3212683144400915e-06, "loss": 0.8454, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5070 }, { "epoch": 0.6895567038346478, "grad_norm": 0.3515625, "learning_rate": 5.317072792610876e-06, "loss": 0.5775, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5071 }, { "epoch": 0.6896926842534675, "grad_norm": 0.353515625, "learning_rate": 5.312878326370703e-06, "loss": 0.7703, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5072 }, { "epoch": 0.6898286646722872, "grad_norm": 0.65234375, "learning_rate": 5.308684916665064e-06, "loss": 0.6899, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5073 }, { "epoch": 0.6899646450911069, "grad_norm": 0.322265625, "learning_rate": 5.304492564439198e-06, "loss": 0.4971, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5074 }, { "epoch": 0.6901006255099266, "grad_norm": 0.376953125, "learning_rate": 5.3003012706381195e-06, "loss": 0.6817, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5075 }, { "epoch": 0.6902366059287462, "grad_norm": 0.30078125, "learning_rate": 5.296111036206589e-06, "loss": 0.6484, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5076 }, { "epoch": 0.6903725863475659, "grad_norm": 0.412109375, "learning_rate": 5.291921862089144e-06, "loss": 0.6077, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5077 }, { "epoch": 0.6905085667663856, "grad_norm": 0.51953125, "learning_rate": 5.287733749230071e-06, "loss": 0.625, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5078 }, { "epoch": 0.6906445471852053, "grad_norm": 0.30859375, "learning_rate": 5.283546698573425e-06, "loss": 0.5571, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5079 }, { "epoch": 0.690780527604025, "grad_norm": 0.34375, "learning_rate": 5.279360711063015e-06, "loss": 0.5667, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5080 }, { "epoch": 0.6909165080228447, "grad_norm": 0.328125, "learning_rate": 5.275175787642419e-06, "loss": 0.624, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5081 }, { "epoch": 0.6910524884416644, "grad_norm": 0.376953125, "learning_rate": 5.270991929254963e-06, "loss": 0.5705, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5082 }, { "epoch": 0.6911884688604841, "grad_norm": 0.255859375, "learning_rate": 5.266809136843747e-06, "loss": 0.4517, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5083 }, { "epoch": 0.6913244492793038, "grad_norm": 0.55859375, "learning_rate": 5.262627411351615e-06, "loss": 0.793, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5084 }, { "epoch": 0.6914604296981235, "grad_norm": 0.431640625, "learning_rate": 5.258446753721189e-06, "loss": 0.6099, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5085 }, { "epoch": 0.6915964101169432, "grad_norm": 0.796875, "learning_rate": 5.254267164894831e-06, "loss": 0.5954, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5086 }, { "epoch": 0.6917323905357629, "grad_norm": 0.298828125, "learning_rate": 5.250088645814679e-06, "loss": 0.6553, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5087 }, { "epoch": 0.6918683709545825, "grad_norm": 0.36328125, "learning_rate": 5.245911197422616e-06, "loss": 0.5728, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5088 }, { "epoch": 0.6920043513734022, "grad_norm": 0.43359375, "learning_rate": 5.241734820660296e-06, "loss": 0.6689, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5089 }, { "epoch": 0.6921403317922219, "grad_norm": 0.61328125, "learning_rate": 5.23755951646912e-06, "loss": 0.4045, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5090 }, { "epoch": 0.6922763122110416, "grad_norm": 0.39453125, "learning_rate": 5.233385285790258e-06, "loss": 0.6198, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5091 }, { "epoch": 0.6924122926298613, "grad_norm": 0.3203125, "learning_rate": 5.229212129564625e-06, "loss": 0.6102, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5092 }, { "epoch": 0.6925482730486809, "grad_norm": 0.326171875, "learning_rate": 5.225040048732911e-06, "loss": 0.6859, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5093 }, { "epoch": 0.6926842534675007, "grad_norm": 0.1962890625, "learning_rate": 5.220869044235545e-06, "loss": 0.2832, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5094 }, { "epoch": 0.6928202338863204, "grad_norm": 0.390625, "learning_rate": 5.216699117012729e-06, "loss": 0.771, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5095 }, { "epoch": 0.6929562143051401, "grad_norm": 0.63671875, "learning_rate": 5.212530268004411e-06, "loss": 0.7248, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5096 }, { "epoch": 0.6930921947239598, "grad_norm": 0.69140625, "learning_rate": 5.208362498150305e-06, "loss": 0.6506, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5097 }, { "epoch": 0.6932281751427795, "grad_norm": 0.515625, "learning_rate": 5.2041958083898724e-06, "loss": 0.8501, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5098 }, { "epoch": 0.6933641555615991, "grad_norm": 0.30078125, "learning_rate": 5.200030199662338e-06, "loss": 0.5658, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5099 }, { "epoch": 0.6935001359804188, "grad_norm": 0.33984375, "learning_rate": 5.195865672906685e-06, "loss": 0.522, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5100 }, { "epoch": 0.6936361163992385, "grad_norm": 0.396484375, "learning_rate": 5.191702229061641e-06, "loss": 0.7595, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5101 }, { "epoch": 0.6937720968180582, "grad_norm": 0.28125, "learning_rate": 5.187539869065706e-06, "loss": 0.5093, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5102 }, { "epoch": 0.6939080772368779, "grad_norm": 0.7734375, "learning_rate": 5.183378593857117e-06, "loss": 0.7145, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5103 }, { "epoch": 0.6940440576556975, "grad_norm": 0.3203125, "learning_rate": 5.179218404373886e-06, "loss": 0.7018, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5104 }, { "epoch": 0.6941800380745172, "grad_norm": 0.7109375, "learning_rate": 5.175059301553762e-06, "loss": 0.7549, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5105 }, { "epoch": 0.6943160184933369, "grad_norm": 0.28515625, "learning_rate": 5.170901286334264e-06, "loss": 0.4492, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5106 }, { "epoch": 0.6944519989121567, "grad_norm": 0.3984375, "learning_rate": 5.1667443596526555e-06, "loss": 0.5754, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5107 }, { "epoch": 0.6945879793309764, "grad_norm": 0.375, "learning_rate": 5.162588522445961e-06, "loss": 0.5999, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5108 }, { "epoch": 0.6947239597497961, "grad_norm": 0.353515625, "learning_rate": 5.158433775650955e-06, "loss": 0.695, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5109 }, { "epoch": 0.6948599401686157, "grad_norm": 0.6953125, "learning_rate": 5.154280120204173e-06, "loss": 0.5628, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5110 }, { "epoch": 0.6949959205874354, "grad_norm": 0.37109375, "learning_rate": 5.150127557041891e-06, "loss": 0.5814, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5111 }, { "epoch": 0.6951319010062551, "grad_norm": 0.5078125, "learning_rate": 5.145976087100157e-06, "loss": 0.652, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5112 }, { "epoch": 0.6952678814250748, "grad_norm": 0.341796875, "learning_rate": 5.141825711314754e-06, "loss": 0.6099, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5113 }, { "epoch": 0.6954038618438945, "grad_norm": 0.451171875, "learning_rate": 5.137676430621237e-06, "loss": 0.7539, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5114 }, { "epoch": 0.6955398422627141, "grad_norm": 0.72265625, "learning_rate": 5.133528245954895e-06, "loss": 0.8115, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5115 }, { "epoch": 0.6956758226815338, "grad_norm": 0.369140625, "learning_rate": 5.129381158250789e-06, "loss": 0.5075, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5116 }, { "epoch": 0.6958118031003535, "grad_norm": 0.42578125, "learning_rate": 5.125235168443714e-06, "loss": 0.7528, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5117 }, { "epoch": 0.6959477835191732, "grad_norm": 0.388671875, "learning_rate": 5.121090277468235e-06, "loss": 0.7018, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5118 }, { "epoch": 0.696083763937993, "grad_norm": 0.439453125, "learning_rate": 5.116946486258654e-06, "loss": 0.4635, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5119 }, { "epoch": 0.6962197443568127, "grad_norm": 0.44140625, "learning_rate": 5.11280379574904e-06, "loss": 0.7318, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5120 }, { "epoch": 0.6963557247756323, "grad_norm": 0.443359375, "learning_rate": 5.108662206873196e-06, "loss": 0.6099, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5121 }, { "epoch": 0.696491705194452, "grad_norm": 0.259765625, "learning_rate": 5.104521720564698e-06, "loss": 0.4969, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5122 }, { "epoch": 0.6966276856132717, "grad_norm": 0.216796875, "learning_rate": 5.100382337756853e-06, "loss": 0.4049, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5123 }, { "epoch": 0.6967636660320914, "grad_norm": 0.349609375, "learning_rate": 5.0962440593827355e-06, "loss": 0.5866, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5124 }, { "epoch": 0.6968996464509111, "grad_norm": 0.30859375, "learning_rate": 5.0921068863751565e-06, "loss": 0.4772, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5125 }, { "epoch": 0.6970356268697308, "grad_norm": 0.58203125, "learning_rate": 5.087970819666693e-06, "loss": 0.5645, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5126 }, { "epoch": 0.6971716072885504, "grad_norm": 0.37890625, "learning_rate": 5.083835860189659e-06, "loss": 0.6305, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5127 }, { "epoch": 0.6973075877073701, "grad_norm": 0.333984375, "learning_rate": 5.0797020088761306e-06, "loss": 0.562, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5128 }, { "epoch": 0.6974435681261898, "grad_norm": 0.302734375, "learning_rate": 5.075569266657921e-06, "loss": 0.6147, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5129 }, { "epoch": 0.6975795485450095, "grad_norm": 0.33203125, "learning_rate": 5.0714376344666095e-06, "loss": 0.5589, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5130 }, { "epoch": 0.6977155289638292, "grad_norm": 0.439453125, "learning_rate": 5.06730711323351e-06, "loss": 0.6201, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5131 }, { "epoch": 0.697851509382649, "grad_norm": 0.427734375, "learning_rate": 5.0631777038896965e-06, "loss": 0.6859, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5132 }, { "epoch": 0.6979874898014686, "grad_norm": 0.6953125, "learning_rate": 5.059049407365984e-06, "loss": 0.6797, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5133 }, { "epoch": 0.6981234702202883, "grad_norm": 0.46484375, "learning_rate": 5.054922224592947e-06, "loss": 0.6143, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5134 }, { "epoch": 0.698259450639108, "grad_norm": 0.419921875, "learning_rate": 5.050796156500895e-06, "loss": 0.6375, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5135 }, { "epoch": 0.6983954310579277, "grad_norm": 0.66796875, "learning_rate": 5.0466712040199015e-06, "loss": 0.7388, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5136 }, { "epoch": 0.6985314114767474, "grad_norm": 0.6328125, "learning_rate": 5.042547368079776e-06, "loss": 0.4928, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5137 }, { "epoch": 0.698667391895567, "grad_norm": 0.451171875, "learning_rate": 5.038424649610087e-06, "loss": 0.9425, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5138 }, { "epoch": 0.6988033723143867, "grad_norm": 0.74609375, "learning_rate": 5.03430304954014e-06, "loss": 0.7562, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5139 }, { "epoch": 0.6989393527332064, "grad_norm": 0.54296875, "learning_rate": 5.030182568799e-06, "loss": 0.5664, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5140 }, { "epoch": 0.6990753331520261, "grad_norm": 0.37890625, "learning_rate": 5.026063208315467e-06, "loss": 0.6245, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5141 }, { "epoch": 0.6992113135708458, "grad_norm": 0.41015625, "learning_rate": 5.021944969018102e-06, "loss": 0.777, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5142 }, { "epoch": 0.6993472939896654, "grad_norm": 0.48046875, "learning_rate": 5.017827851835199e-06, "loss": 0.8659, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5143 }, { "epoch": 0.6994832744084851, "grad_norm": 0.33984375, "learning_rate": 5.0137118576948144e-06, "loss": 0.5138, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5144 }, { "epoch": 0.6996192548273049, "grad_norm": 0.31640625, "learning_rate": 5.0095969875247365e-06, "loss": 0.5037, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5145 }, { "epoch": 0.6997552352461246, "grad_norm": 0.69921875, "learning_rate": 5.005483242252514e-06, "loss": 0.7915, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5146 }, { "epoch": 0.6998912156649443, "grad_norm": 0.384765625, "learning_rate": 5.001370622805428e-06, "loss": 0.5338, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5147 }, { "epoch": 0.700027196083764, "grad_norm": 0.330078125, "learning_rate": 4.997259130110522e-06, "loss": 0.5308, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5148 }, { "epoch": 0.7001631765025836, "grad_norm": 0.2734375, "learning_rate": 4.993148765094567e-06, "loss": 0.4515, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5149 }, { "epoch": 0.7002991569214033, "grad_norm": 0.52734375, "learning_rate": 4.9890395286841e-06, "loss": 0.3709, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5150 }, { "epoch": 0.700435137340223, "grad_norm": 0.58984375, "learning_rate": 4.9849314218053825e-06, "loss": 0.7752, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5151 }, { "epoch": 0.7005711177590427, "grad_norm": 0.3125, "learning_rate": 4.9808244453844414e-06, "loss": 0.6435, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5152 }, { "epoch": 0.7007070981778624, "grad_norm": 0.30078125, "learning_rate": 4.976718600347031e-06, "loss": 0.5239, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5153 }, { "epoch": 0.700843078596682, "grad_norm": 0.435546875, "learning_rate": 4.972613887618667e-06, "loss": 0.5441, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5154 }, { "epoch": 0.7009790590155017, "grad_norm": 0.44921875, "learning_rate": 4.968510308124595e-06, "loss": 0.5904, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5155 }, { "epoch": 0.7011150394343214, "grad_norm": 0.419921875, "learning_rate": 4.964407862789817e-06, "loss": 0.786, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5156 }, { "epoch": 0.7012510198531412, "grad_norm": 0.5, "learning_rate": 4.96030655253907e-06, "loss": 0.755, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5157 }, { "epoch": 0.7013870002719609, "grad_norm": 0.5, "learning_rate": 4.95620637829684e-06, "loss": 0.7388, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5158 }, { "epoch": 0.7015229806907806, "grad_norm": 0.3203125, "learning_rate": 4.952107340987365e-06, "loss": 0.7178, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5159 }, { "epoch": 0.7016589611096002, "grad_norm": 1.296875, "learning_rate": 4.9480094415346046e-06, "loss": 0.7847, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5160 }, { "epoch": 0.7017949415284199, "grad_norm": 0.390625, "learning_rate": 4.943912680862288e-06, "loss": 0.603, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5161 }, { "epoch": 0.7019309219472396, "grad_norm": 0.69140625, "learning_rate": 4.939817059893864e-06, "loss": 0.9312, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5162 }, { "epoch": 0.7020669023660593, "grad_norm": 0.55078125, "learning_rate": 4.935722579552546e-06, "loss": 0.4702, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5163 }, { "epoch": 0.702202882784879, "grad_norm": 0.45703125, "learning_rate": 4.93162924076127e-06, "loss": 0.6466, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5164 }, { "epoch": 0.7023388632036986, "grad_norm": 0.287109375, "learning_rate": 4.927537044442733e-06, "loss": 0.6139, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5165 }, { "epoch": 0.7024748436225183, "grad_norm": 0.3828125, "learning_rate": 4.923445991519359e-06, "loss": 0.6888, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5166 }, { "epoch": 0.702610824041338, "grad_norm": 0.625, "learning_rate": 4.919356082913329e-06, "loss": 0.8398, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5167 }, { "epoch": 0.7027468044601577, "grad_norm": 0.349609375, "learning_rate": 4.9152673195465505e-06, "loss": 0.6501, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5168 }, { "epoch": 0.7028827848789774, "grad_norm": 0.3671875, "learning_rate": 4.9111797023406884e-06, "loss": 0.5659, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5169 }, { "epoch": 0.7030187652977972, "grad_norm": 0.17578125, "learning_rate": 4.907093232217133e-06, "loss": 0.2573, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5170 }, { "epoch": 0.7031547457166168, "grad_norm": 0.40234375, "learning_rate": 4.903007910097034e-06, "loss": 0.7516, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5171 }, { "epoch": 0.7032907261354365, "grad_norm": 0.322265625, "learning_rate": 4.898923736901265e-06, "loss": 0.422, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5172 }, { "epoch": 0.7034267065542562, "grad_norm": 0.447265625, "learning_rate": 4.894840713550454e-06, "loss": 0.5671, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5173 }, { "epoch": 0.7035626869730759, "grad_norm": 0.3984375, "learning_rate": 4.89075884096496e-06, "loss": 0.4658, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5174 }, { "epoch": 0.7036986673918956, "grad_norm": 0.37109375, "learning_rate": 4.886678120064892e-06, "loss": 0.7269, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5175 }, { "epoch": 0.7038346478107153, "grad_norm": 0.41796875, "learning_rate": 4.882598551770088e-06, "loss": 0.6862, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5176 }, { "epoch": 0.7039706282295349, "grad_norm": 0.3203125, "learning_rate": 4.87852013700014e-06, "loss": 0.6364, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5177 }, { "epoch": 0.7041066086483546, "grad_norm": 0.41796875, "learning_rate": 4.874442876674366e-06, "loss": 0.641, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5178 }, { "epoch": 0.7042425890671743, "grad_norm": 0.36328125, "learning_rate": 4.870366771711835e-06, "loss": 0.5745, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5179 }, { "epoch": 0.704378569485994, "grad_norm": 0.302734375, "learning_rate": 4.866291823031346e-06, "loss": 0.5724, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5180 }, { "epoch": 0.7045145499048137, "grad_norm": 0.4140625, "learning_rate": 4.86221803155145e-06, "loss": 0.5892, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5181 }, { "epoch": 0.7046505303236335, "grad_norm": 0.404296875, "learning_rate": 4.8581453981904205e-06, "loss": 0.6937, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5182 }, { "epoch": 0.7047865107424531, "grad_norm": 0.41796875, "learning_rate": 4.854073923866286e-06, "loss": 0.7376, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5183 }, { "epoch": 0.7049224911612728, "grad_norm": 0.310546875, "learning_rate": 4.850003609496799e-06, "loss": 0.556, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5184 }, { "epoch": 0.7050584715800925, "grad_norm": 0.275390625, "learning_rate": 4.845934455999468e-06, "loss": 0.4772, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5185 }, { "epoch": 0.7051944519989122, "grad_norm": 0.359375, "learning_rate": 4.8418664642915204e-06, "loss": 0.5563, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5186 }, { "epoch": 0.7053304324177319, "grad_norm": 0.466796875, "learning_rate": 4.8377996352899395e-06, "loss": 0.7257, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5187 }, { "epoch": 0.7054664128365515, "grad_norm": 0.306640625, "learning_rate": 4.83373396991143e-06, "loss": 0.7137, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5188 }, { "epoch": 0.7056023932553712, "grad_norm": 0.412109375, "learning_rate": 4.829669469072451e-06, "loss": 0.6331, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5189 }, { "epoch": 0.7057383736741909, "grad_norm": 0.404296875, "learning_rate": 4.8256061336891826e-06, "loss": 0.7585, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5190 }, { "epoch": 0.7058743540930106, "grad_norm": 0.6328125, "learning_rate": 4.821543964677559e-06, "loss": 0.756, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5191 }, { "epoch": 0.7060103345118303, "grad_norm": 0.3984375, "learning_rate": 4.817482962953233e-06, "loss": 0.6143, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5192 }, { "epoch": 0.7061463149306499, "grad_norm": 0.421875, "learning_rate": 4.813423129431612e-06, "loss": 0.5776, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5193 }, { "epoch": 0.7062822953494696, "grad_norm": 0.2216796875, "learning_rate": 4.809364465027827e-06, "loss": 0.4152, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5194 }, { "epoch": 0.7064182757682894, "grad_norm": 0.3125, "learning_rate": 4.8053069706567555e-06, "loss": 0.5508, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5195 }, { "epoch": 0.7065542561871091, "grad_norm": 0.208984375, "learning_rate": 4.801250647233001e-06, "loss": 0.4479, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5196 }, { "epoch": 0.7066902366059288, "grad_norm": 0.70703125, "learning_rate": 4.7971954956709135e-06, "loss": 0.5257, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5197 }, { "epoch": 0.7068262170247485, "grad_norm": 0.7890625, "learning_rate": 4.793141516884568e-06, "loss": 0.6929, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5198 }, { "epoch": 0.7069621974435681, "grad_norm": 0.5390625, "learning_rate": 4.789088711787787e-06, "loss": 0.8446, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5199 }, { "epoch": 0.7070981778623878, "grad_norm": 0.66015625, "learning_rate": 4.785037081294117e-06, "loss": 0.569, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5200 }, { "epoch": 0.7072341582812075, "grad_norm": 0.380859375, "learning_rate": 4.7809866263168505e-06, "loss": 0.6652, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5201 }, { "epoch": 0.7073701387000272, "grad_norm": 0.58984375, "learning_rate": 4.776937347769003e-06, "loss": 0.5265, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5202 }, { "epoch": 0.7075061191188469, "grad_norm": 0.349609375, "learning_rate": 4.772889246563338e-06, "loss": 0.6831, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5203 }, { "epoch": 0.7076420995376665, "grad_norm": 0.2890625, "learning_rate": 4.768842323612341e-06, "loss": 0.3706, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5204 }, { "epoch": 0.7077780799564862, "grad_norm": 0.58203125, "learning_rate": 4.764796579828243e-06, "loss": 0.527, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5205 }, { "epoch": 0.7079140603753059, "grad_norm": 0.2490234375, "learning_rate": 4.760752016122999e-06, "loss": 0.4281, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5206 }, { "epoch": 0.7080500407941257, "grad_norm": 0.359375, "learning_rate": 4.7567086334083105e-06, "loss": 0.598, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5207 }, { "epoch": 0.7081860212129454, "grad_norm": 0.58984375, "learning_rate": 4.752666432595596e-06, "loss": 0.6916, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5208 }, { "epoch": 0.7083220016317651, "grad_norm": 0.31640625, "learning_rate": 4.748625414596026e-06, "loss": 0.5736, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5209 }, { "epoch": 0.7084579820505847, "grad_norm": 0.5, "learning_rate": 4.7445855803204885e-06, "loss": 0.6287, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5210 }, { "epoch": 0.7085939624694044, "grad_norm": 0.41015625, "learning_rate": 4.740546930679617e-06, "loss": 0.4574, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5211 }, { "epoch": 0.7087299428882241, "grad_norm": 0.2431640625, "learning_rate": 4.736509466583767e-06, "loss": 0.5228, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5212 }, { "epoch": 0.7088659233070438, "grad_norm": 0.3203125, "learning_rate": 4.732473188943039e-06, "loss": 0.5985, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5213 }, { "epoch": 0.7090019037258635, "grad_norm": 1.2890625, "learning_rate": 4.7284380986672505e-06, "loss": 0.9307, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5214 }, { "epoch": 0.7091378841446832, "grad_norm": 0.3515625, "learning_rate": 4.724404196665969e-06, "loss": 0.7979, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5215 }, { "epoch": 0.7092738645635028, "grad_norm": 0.5234375, "learning_rate": 4.720371483848482e-06, "loss": 0.6182, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5216 }, { "epoch": 0.7094098449823225, "grad_norm": 0.6875, "learning_rate": 4.716339961123807e-06, "loss": 0.9754, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5217 }, { "epoch": 0.7095458254011422, "grad_norm": 0.392578125, "learning_rate": 4.712309629400704e-06, "loss": 0.7925, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5218 }, { "epoch": 0.7096818058199619, "grad_norm": 0.62890625, "learning_rate": 4.708280489587659e-06, "loss": 0.557, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5219 }, { "epoch": 0.7098177862387817, "grad_norm": 0.419921875, "learning_rate": 4.704252542592891e-06, "loss": 0.8234, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5220 }, { "epoch": 0.7099537666576013, "grad_norm": 0.62890625, "learning_rate": 4.700225789324343e-06, "loss": 0.5928, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5221 }, { "epoch": 0.710089747076421, "grad_norm": 0.2578125, "learning_rate": 4.696200230689701e-06, "loss": 0.3905, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5222 }, { "epoch": 0.7102257274952407, "grad_norm": 0.404296875, "learning_rate": 4.692175867596368e-06, "loss": 0.8926, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5223 }, { "epoch": 0.7103617079140604, "grad_norm": 0.33203125, "learning_rate": 4.688152700951493e-06, "loss": 0.5231, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5224 }, { "epoch": 0.7104976883328801, "grad_norm": 0.390625, "learning_rate": 4.684130731661938e-06, "loss": 0.6123, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5225 }, { "epoch": 0.7106336687516998, "grad_norm": 0.53515625, "learning_rate": 4.680109960634312e-06, "loss": 0.6784, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5226 }, { "epoch": 0.7107696491705194, "grad_norm": 0.408203125, "learning_rate": 4.676090388774937e-06, "loss": 0.7253, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5227 }, { "epoch": 0.7109056295893391, "grad_norm": 0.5546875, "learning_rate": 4.672072016989885e-06, "loss": 0.4137, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5228 }, { "epoch": 0.7110416100081588, "grad_norm": 0.2890625, "learning_rate": 4.668054846184935e-06, "loss": 0.4157, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5229 }, { "epoch": 0.7111775904269785, "grad_norm": 0.48046875, "learning_rate": 4.664038877265615e-06, "loss": 0.7458, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5230 }, { "epoch": 0.7113135708457982, "grad_norm": 0.43359375, "learning_rate": 4.660024111137167e-06, "loss": 0.4987, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5231 }, { "epoch": 0.7114495512646178, "grad_norm": 0.392578125, "learning_rate": 4.656010548704574e-06, "loss": 0.653, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5232 }, { "epoch": 0.7115855316834376, "grad_norm": 0.625, "learning_rate": 4.651998190872537e-06, "loss": 0.5653, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5233 }, { "epoch": 0.7117215121022573, "grad_norm": 0.310546875, "learning_rate": 4.647987038545496e-06, "loss": 0.6663, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5234 }, { "epoch": 0.711857492521077, "grad_norm": 0.3984375, "learning_rate": 4.643977092627608e-06, "loss": 0.7498, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5235 }, { "epoch": 0.7119934729398967, "grad_norm": 0.353515625, "learning_rate": 4.639968354022768e-06, "loss": 0.6367, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5236 }, { "epoch": 0.7121294533587164, "grad_norm": 0.388671875, "learning_rate": 4.635960823634591e-06, "loss": 0.7674, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5237 }, { "epoch": 0.712265433777536, "grad_norm": 0.640625, "learning_rate": 4.631954502366428e-06, "loss": 0.6532, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5238 }, { "epoch": 0.7124014141963557, "grad_norm": 0.380859375, "learning_rate": 4.627949391121348e-06, "loss": 0.6772, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5239 }, { "epoch": 0.7125373946151754, "grad_norm": 0.34765625, "learning_rate": 4.6239454908021575e-06, "loss": 0.6605, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5240 }, { "epoch": 0.7126733750339951, "grad_norm": 0.35546875, "learning_rate": 4.619942802311378e-06, "loss": 0.6347, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5241 }, { "epoch": 0.7128093554528148, "grad_norm": 0.431640625, "learning_rate": 4.6159413265512706e-06, "loss": 0.6299, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5242 }, { "epoch": 0.7129453358716344, "grad_norm": 0.69140625, "learning_rate": 4.611941064423812e-06, "loss": 0.7446, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5243 }, { "epoch": 0.7130813162904541, "grad_norm": 0.337890625, "learning_rate": 4.607942016830714e-06, "loss": 0.619, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5244 }, { "epoch": 0.7132172967092739, "grad_norm": 0.37890625, "learning_rate": 4.603944184673407e-06, "loss": 0.7594, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5245 }, { "epoch": 0.7133532771280936, "grad_norm": 0.74609375, "learning_rate": 4.5999475688530574e-06, "loss": 0.7212, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5246 }, { "epoch": 0.7134892575469133, "grad_norm": 0.251953125, "learning_rate": 4.595952170270542e-06, "loss": 0.4507, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5247 }, { "epoch": 0.713625237965733, "grad_norm": 0.376953125, "learning_rate": 4.5919579898264824e-06, "loss": 0.7181, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5248 }, { "epoch": 0.7137612183845526, "grad_norm": 0.474609375, "learning_rate": 4.587965028421207e-06, "loss": 0.423, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5249 }, { "epoch": 0.7138971988033723, "grad_norm": 0.423828125, "learning_rate": 4.583973286954786e-06, "loss": 0.6488, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5250 }, { "epoch": 0.714033179222192, "grad_norm": 0.53515625, "learning_rate": 4.579982766326999e-06, "loss": 0.7887, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5251 }, { "epoch": 0.7141691596410117, "grad_norm": 0.40234375, "learning_rate": 4.575993467437366e-06, "loss": 0.6325, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5252 }, { "epoch": 0.7143051400598314, "grad_norm": 0.392578125, "learning_rate": 4.572005391185117e-06, "loss": 0.7729, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5253 }, { "epoch": 0.714441120478651, "grad_norm": 0.64453125, "learning_rate": 4.568018538469219e-06, "loss": 0.6729, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5254 }, { "epoch": 0.7145771008974707, "grad_norm": 0.4765625, "learning_rate": 4.56403291018835e-06, "loss": 0.6719, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5255 }, { "epoch": 0.7147130813162904, "grad_norm": 0.609375, "learning_rate": 4.560048507240927e-06, "loss": 0.699, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5256 }, { "epoch": 0.7148490617351101, "grad_norm": 0.4140625, "learning_rate": 4.556065330525077e-06, "loss": 0.6289, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5257 }, { "epoch": 0.7149850421539299, "grad_norm": 0.625, "learning_rate": 4.552083380938662e-06, "loss": 0.9701, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5258 }, { "epoch": 0.7151210225727496, "grad_norm": 0.310546875, "learning_rate": 4.5481026593792574e-06, "loss": 0.619, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5259 }, { "epoch": 0.7152570029915692, "grad_norm": 0.435546875, "learning_rate": 4.5441231667441724e-06, "loss": 0.7148, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5260 }, { "epoch": 0.7153929834103889, "grad_norm": 0.345703125, "learning_rate": 4.540144903930425e-06, "loss": 0.6442, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5261 }, { "epoch": 0.7155289638292086, "grad_norm": 0.33984375, "learning_rate": 4.536167871834772e-06, "loss": 0.6252, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5262 }, { "epoch": 0.7156649442480283, "grad_norm": 0.4140625, "learning_rate": 4.53219207135368e-06, "loss": 0.6502, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5263 }, { "epoch": 0.715800924666848, "grad_norm": 0.29296875, "learning_rate": 4.5282175033833465e-06, "loss": 0.4564, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5264 }, { "epoch": 0.7159369050856677, "grad_norm": 0.330078125, "learning_rate": 4.524244168819684e-06, "loss": 0.616, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5265 }, { "epoch": 0.7160728855044873, "grad_norm": 0.30078125, "learning_rate": 4.520272068558337e-06, "loss": 0.5177, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5266 }, { "epoch": 0.716208865923307, "grad_norm": 0.392578125, "learning_rate": 4.516301203494656e-06, "loss": 0.6444, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5267 }, { "epoch": 0.7163448463421267, "grad_norm": 0.2890625, "learning_rate": 4.512331574523733e-06, "loss": 0.5422, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5268 }, { "epoch": 0.7164808267609464, "grad_norm": 0.73828125, "learning_rate": 4.508363182540361e-06, "loss": 0.7051, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5269 }, { "epoch": 0.7166168071797662, "grad_norm": 0.39453125, "learning_rate": 4.504396028439073e-06, "loss": 0.7228, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5270 }, { "epoch": 0.7167527875985858, "grad_norm": 0.341796875, "learning_rate": 4.50043011311411e-06, "loss": 0.6126, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5271 }, { "epoch": 0.7168887680174055, "grad_norm": 0.490234375, "learning_rate": 4.496465437459434e-06, "loss": 0.6735, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5272 }, { "epoch": 0.7170247484362252, "grad_norm": 0.35546875, "learning_rate": 4.492502002368738e-06, "loss": 0.6628, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5273 }, { "epoch": 0.7171607288550449, "grad_norm": 0.451171875, "learning_rate": 4.4885398087354235e-06, "loss": 0.8301, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5274 }, { "epoch": 0.7172967092738646, "grad_norm": 0.234375, "learning_rate": 4.484578857452623e-06, "loss": 0.4027, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5275 }, { "epoch": 0.7174326896926843, "grad_norm": 0.28125, "learning_rate": 4.480619149413176e-06, "loss": 0.5674, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5276 }, { "epoch": 0.7175686701115039, "grad_norm": 0.26171875, "learning_rate": 4.476660685509656e-06, "loss": 0.4883, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5277 }, { "epoch": 0.7177046505303236, "grad_norm": 0.59375, "learning_rate": 4.47270346663434e-06, "loss": 0.7471, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5278 }, { "epoch": 0.7178406309491433, "grad_norm": 0.341796875, "learning_rate": 4.468747493679246e-06, "loss": 0.7183, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5279 }, { "epoch": 0.717976611367963, "grad_norm": 0.369140625, "learning_rate": 4.46479276753609e-06, "loss": 0.6341, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5280 }, { "epoch": 0.7181125917867827, "grad_norm": 0.361328125, "learning_rate": 4.4608392890963214e-06, "loss": 0.7144, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5281 }, { "epoch": 0.7182485722056023, "grad_norm": 0.416015625, "learning_rate": 4.456887059251096e-06, "loss": 0.7407, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5282 }, { "epoch": 0.7183845526244221, "grad_norm": 0.38671875, "learning_rate": 4.452936078891301e-06, "loss": 0.7352, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5283 }, { "epoch": 0.7185205330432418, "grad_norm": 0.345703125, "learning_rate": 4.448986348907529e-06, "loss": 0.6074, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5284 }, { "epoch": 0.7186565134620615, "grad_norm": 0.37109375, "learning_rate": 4.445037870190106e-06, "loss": 0.7324, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5285 }, { "epoch": 0.7187924938808812, "grad_norm": 0.29296875, "learning_rate": 4.441090643629057e-06, "loss": 0.59, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5286 }, { "epoch": 0.7189284742997009, "grad_norm": 0.4140625, "learning_rate": 4.437144670114145e-06, "loss": 0.7249, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5287 }, { "epoch": 0.7190644547185205, "grad_norm": 0.328125, "learning_rate": 4.433199950534832e-06, "loss": 0.7008, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5288 }, { "epoch": 0.7192004351373402, "grad_norm": 0.271484375, "learning_rate": 4.429256485780314e-06, "loss": 0.5081, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5289 }, { "epoch": 0.7193364155561599, "grad_norm": 0.53125, "learning_rate": 4.42531427673949e-06, "loss": 0.7943, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5290 }, { "epoch": 0.7194723959749796, "grad_norm": 0.33203125, "learning_rate": 4.421373324300988e-06, "loss": 0.5036, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5291 }, { "epoch": 0.7196083763937993, "grad_norm": 0.51171875, "learning_rate": 4.417433629353139e-06, "loss": 0.6842, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5292 }, { "epoch": 0.719744356812619, "grad_norm": 0.392578125, "learning_rate": 4.4134951927840074e-06, "loss": 0.6481, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5293 }, { "epoch": 0.7198803372314386, "grad_norm": 0.291015625, "learning_rate": 4.409558015481355e-06, "loss": 0.5246, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5294 }, { "epoch": 0.7200163176502583, "grad_norm": 0.462890625, "learning_rate": 4.40562209833268e-06, "loss": 0.5299, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5295 }, { "epoch": 0.7201522980690781, "grad_norm": 0.5078125, "learning_rate": 4.401687442225179e-06, "loss": 0.7679, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5296 }, { "epoch": 0.7202882784878978, "grad_norm": 0.70703125, "learning_rate": 4.397754048045776e-06, "loss": 0.6572, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5297 }, { "epoch": 0.7204242589067175, "grad_norm": 0.47265625, "learning_rate": 4.3938219166811005e-06, "loss": 0.7358, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5298 }, { "epoch": 0.7205602393255371, "grad_norm": 0.546875, "learning_rate": 4.389891049017511e-06, "loss": 0.8188, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5299 }, { "epoch": 0.7206962197443568, "grad_norm": 0.466796875, "learning_rate": 4.385961445941064e-06, "loss": 0.8861, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5300 }, { "epoch": 0.7208322001631765, "grad_norm": 0.3125, "learning_rate": 4.3820331083375485e-06, "loss": 0.6405, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5301 }, { "epoch": 0.7209681805819962, "grad_norm": 0.267578125, "learning_rate": 4.378106037092452e-06, "loss": 0.377, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5302 }, { "epoch": 0.7211041610008159, "grad_norm": 0.61328125, "learning_rate": 4.3741802330909935e-06, "loss": 0.7637, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5303 }, { "epoch": 0.7212401414196355, "grad_norm": 0.431640625, "learning_rate": 4.370255697218087e-06, "loss": 0.6582, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5304 }, { "epoch": 0.7213761218384552, "grad_norm": 0.4765625, "learning_rate": 4.366332430358382e-06, "loss": 0.5967, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5305 }, { "epoch": 0.7215121022572749, "grad_norm": 0.57421875, "learning_rate": 4.362410433396219e-06, "loss": 0.6159, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5306 }, { "epoch": 0.7216480826760946, "grad_norm": 0.68359375, "learning_rate": 4.358489707215675e-06, "loss": 0.7979, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5307 }, { "epoch": 0.7217840630949144, "grad_norm": 0.412109375, "learning_rate": 4.35457025270052e-06, "loss": 0.6572, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5308 }, { "epoch": 0.7219200435137341, "grad_norm": 0.369140625, "learning_rate": 4.350652070734256e-06, "loss": 0.7539, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5309 }, { "epoch": 0.7220560239325537, "grad_norm": 0.423828125, "learning_rate": 4.34673516220008e-06, "loss": 0.5902, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5310 }, { "epoch": 0.7221920043513734, "grad_norm": 0.35546875, "learning_rate": 4.34281952798092e-06, "loss": 0.6377, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5311 }, { "epoch": 0.7223279847701931, "grad_norm": 0.44140625, "learning_rate": 4.3389051689594e-06, "loss": 0.4766, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5312 }, { "epoch": 0.7224639651890128, "grad_norm": 0.546875, "learning_rate": 4.334992086017871e-06, "loss": 0.6086, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5313 }, { "epoch": 0.7225999456078325, "grad_norm": 0.33984375, "learning_rate": 4.331080280038384e-06, "loss": 0.7114, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5314 }, { "epoch": 0.7227359260266522, "grad_norm": 0.375, "learning_rate": 4.3271697519027126e-06, "loss": 0.7528, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5315 }, { "epoch": 0.7228719064454718, "grad_norm": 0.57421875, "learning_rate": 4.323260502492333e-06, "loss": 0.5316, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5316 }, { "epoch": 0.7230078868642915, "grad_norm": 0.412109375, "learning_rate": 4.319352532688444e-06, "loss": 0.6211, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5317 }, { "epoch": 0.7231438672831112, "grad_norm": 0.197265625, "learning_rate": 4.315445843371941e-06, "loss": 0.3877, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5318 }, { "epoch": 0.7232798477019309, "grad_norm": 0.412109375, "learning_rate": 4.31154043542345e-06, "loss": 0.6192, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5319 }, { "epoch": 0.7234158281207506, "grad_norm": 0.4140625, "learning_rate": 4.307636309723289e-06, "loss": 0.7134, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5320 }, { "epoch": 0.7235518085395704, "grad_norm": 0.373046875, "learning_rate": 4.303733467151503e-06, "loss": 0.6086, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5321 }, { "epoch": 0.72368778895839, "grad_norm": 0.46484375, "learning_rate": 4.2998319085878316e-06, "loss": 0.6836, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5322 }, { "epoch": 0.7238237693772097, "grad_norm": 0.2734375, "learning_rate": 4.295931634911743e-06, "loss": 0.5868, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5323 }, { "epoch": 0.7239597497960294, "grad_norm": 0.57421875, "learning_rate": 4.292032647002402e-06, "loss": 0.7301, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5324 }, { "epoch": 0.7240957302148491, "grad_norm": 0.58984375, "learning_rate": 4.288134945738684e-06, "loss": 0.7196, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5325 }, { "epoch": 0.7242317106336688, "grad_norm": 0.3671875, "learning_rate": 4.2842385319991886e-06, "loss": 0.418, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5326 }, { "epoch": 0.7243676910524884, "grad_norm": 0.2275390625, "learning_rate": 4.280343406662205e-06, "loss": 0.4302, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5327 }, { "epoch": 0.7245036714713081, "grad_norm": 0.2890625, "learning_rate": 4.27644957060575e-06, "loss": 0.5356, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5328 }, { "epoch": 0.7246396518901278, "grad_norm": 0.3515625, "learning_rate": 4.272557024707535e-06, "loss": 0.7111, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5329 }, { "epoch": 0.7247756323089475, "grad_norm": 0.296875, "learning_rate": 4.2686657698449926e-06, "loss": 0.5716, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5330 }, { "epoch": 0.7249116127277672, "grad_norm": 0.390625, "learning_rate": 4.264775806895256e-06, "loss": 0.7386, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5331 }, { "epoch": 0.7250475931465868, "grad_norm": 0.458984375, "learning_rate": 4.260887136735175e-06, "loss": 0.6528, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5332 }, { "epoch": 0.7251835735654066, "grad_norm": 0.283203125, "learning_rate": 4.256999760241296e-06, "loss": 0.4813, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5333 }, { "epoch": 0.7253195539842263, "grad_norm": 0.482421875, "learning_rate": 4.253113678289889e-06, "loss": 0.6444, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5334 }, { "epoch": 0.725455534403046, "grad_norm": 0.470703125, "learning_rate": 4.249228891756918e-06, "loss": 0.8034, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5335 }, { "epoch": 0.7255915148218657, "grad_norm": 0.369140625, "learning_rate": 4.245345401518067e-06, "loss": 0.6937, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5336 }, { "epoch": 0.7257274952406854, "grad_norm": 0.494140625, "learning_rate": 4.241463208448716e-06, "loss": 0.6136, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5337 }, { "epoch": 0.725863475659505, "grad_norm": 0.376953125, "learning_rate": 4.2375823134239624e-06, "loss": 0.6089, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5338 }, { "epoch": 0.7259994560783247, "grad_norm": 0.279296875, "learning_rate": 4.2337027173186074e-06, "loss": 0.5793, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5339 }, { "epoch": 0.7261354364971444, "grad_norm": 0.6171875, "learning_rate": 4.229824421007162e-06, "loss": 0.7464, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5340 }, { "epoch": 0.7262714169159641, "grad_norm": 0.796875, "learning_rate": 4.225947425363835e-06, "loss": 0.666, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5341 }, { "epoch": 0.7264073973347838, "grad_norm": 0.33984375, "learning_rate": 4.222071731262557e-06, "loss": 0.6889, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5342 }, { "epoch": 0.7265433777536034, "grad_norm": 0.5390625, "learning_rate": 4.218197339576948e-06, "loss": 0.8545, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5343 }, { "epoch": 0.7266793581724231, "grad_norm": 0.359375, "learning_rate": 4.214324251180351e-06, "loss": 0.784, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5344 }, { "epoch": 0.7268153385912428, "grad_norm": 0.2431640625, "learning_rate": 4.2104524669458e-06, "loss": 0.4022, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5345 }, { "epoch": 0.7269513190100626, "grad_norm": 0.359375, "learning_rate": 4.20658198774605e-06, "loss": 0.7036, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5346 }, { "epoch": 0.7270872994288823, "grad_norm": 0.58984375, "learning_rate": 4.202712814453548e-06, "loss": 0.6644, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5347 }, { "epoch": 0.727223279847702, "grad_norm": 0.265625, "learning_rate": 4.198844947940459e-06, "loss": 0.5073, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5348 }, { "epoch": 0.7273592602665216, "grad_norm": 0.625, "learning_rate": 4.19497838907864e-06, "loss": 0.8436, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5349 }, { "epoch": 0.7274952406853413, "grad_norm": 0.31640625, "learning_rate": 4.1911131387396686e-06, "loss": 0.4425, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5350 }, { "epoch": 0.727631221104161, "grad_norm": 0.33984375, "learning_rate": 4.1872491977948125e-06, "loss": 0.6043, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5351 }, { "epoch": 0.7277672015229807, "grad_norm": 0.35546875, "learning_rate": 4.183386567115057e-06, "loss": 0.5301, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5352 }, { "epoch": 0.7279031819418004, "grad_norm": 0.45703125, "learning_rate": 4.179525247571082e-06, "loss": 0.7186, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5353 }, { "epoch": 0.72803916236062, "grad_norm": 0.404296875, "learning_rate": 4.175665240033279e-06, "loss": 0.6721, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5354 }, { "epoch": 0.7281751427794397, "grad_norm": 0.29296875, "learning_rate": 4.171806545371737e-06, "loss": 0.4909, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5355 }, { "epoch": 0.7283111231982594, "grad_norm": 0.486328125, "learning_rate": 4.16794916445626e-06, "loss": 0.4673, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5356 }, { "epoch": 0.7284471036170791, "grad_norm": 0.333984375, "learning_rate": 4.16409309815634e-06, "loss": 0.6413, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5357 }, { "epoch": 0.7285830840358988, "grad_norm": 0.296875, "learning_rate": 4.160238347341189e-06, "loss": 0.6235, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5358 }, { "epoch": 0.7287190644547186, "grad_norm": 0.341796875, "learning_rate": 4.15638491287971e-06, "loss": 0.6385, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5359 }, { "epoch": 0.7288550448735382, "grad_norm": 0.412109375, "learning_rate": 4.152532795640518e-06, "loss": 0.6286, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5360 }, { "epoch": 0.7289910252923579, "grad_norm": 0.33203125, "learning_rate": 4.148681996491923e-06, "loss": 0.5851, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5361 }, { "epoch": 0.7291270057111776, "grad_norm": 0.38671875, "learning_rate": 4.144832516301947e-06, "loss": 0.7114, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5362 }, { "epoch": 0.7292629861299973, "grad_norm": 0.349609375, "learning_rate": 4.140984355938306e-06, "loss": 0.6696, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5363 }, { "epoch": 0.729398966548817, "grad_norm": 0.400390625, "learning_rate": 4.1371375162684255e-06, "loss": 0.7324, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5364 }, { "epoch": 0.7295349469676367, "grad_norm": 0.369140625, "learning_rate": 4.133291998159427e-06, "loss": 0.6517, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5365 }, { "epoch": 0.7296709273864563, "grad_norm": 0.38671875, "learning_rate": 4.129447802478142e-06, "loss": 0.6829, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5366 }, { "epoch": 0.729806907805276, "grad_norm": 0.2392578125, "learning_rate": 4.1256049300910914e-06, "loss": 0.361, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5367 }, { "epoch": 0.7299428882240957, "grad_norm": 0.33984375, "learning_rate": 4.121763381864514e-06, "loss": 0.5924, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5368 }, { "epoch": 0.7300788686429154, "grad_norm": 0.353515625, "learning_rate": 4.117923158664335e-06, "loss": 0.7096, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5369 }, { "epoch": 0.7302148490617351, "grad_norm": 0.263671875, "learning_rate": 4.114084261356195e-06, "loss": 0.5617, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5370 }, { "epoch": 0.7303508294805549, "grad_norm": 0.380859375, "learning_rate": 4.1102466908054206e-06, "loss": 0.7288, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5371 }, { "epoch": 0.7304868098993745, "grad_norm": 0.328125, "learning_rate": 4.106410447877054e-06, "loss": 0.7127, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5372 }, { "epoch": 0.7306227903181942, "grad_norm": 0.640625, "learning_rate": 4.102575533435824e-06, "loss": 0.6089, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5373 }, { "epoch": 0.7307587707370139, "grad_norm": 0.60546875, "learning_rate": 4.098741948346175e-06, "loss": 0.4271, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5374 }, { "epoch": 0.7308947511558336, "grad_norm": 0.423828125, "learning_rate": 4.094909693472237e-06, "loss": 0.6429, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5375 }, { "epoch": 0.7310307315746533, "grad_norm": 0.71484375, "learning_rate": 4.091078769677852e-06, "loss": 0.7116, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5376 }, { "epoch": 0.7311667119934729, "grad_norm": 0.5546875, "learning_rate": 4.087249177826553e-06, "loss": 0.742, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5377 }, { "epoch": 0.7313026924122926, "grad_norm": 0.36328125, "learning_rate": 4.083420918781583e-06, "loss": 0.6488, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5378 }, { "epoch": 0.7314386728311123, "grad_norm": 0.349609375, "learning_rate": 4.079593993405874e-06, "loss": 0.6315, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5379 }, { "epoch": 0.731574653249932, "grad_norm": 0.375, "learning_rate": 4.075768402562059e-06, "loss": 0.6126, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5380 }, { "epoch": 0.7317106336687517, "grad_norm": 0.255859375, "learning_rate": 4.0719441471124784e-06, "loss": 0.4806, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5381 }, { "epoch": 0.7318466140875713, "grad_norm": 0.62109375, "learning_rate": 4.068121227919162e-06, "loss": 0.8241, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5382 }, { "epoch": 0.731982594506391, "grad_norm": 0.4296875, "learning_rate": 4.064299645843848e-06, "loss": 0.7689, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5383 }, { "epoch": 0.7321185749252108, "grad_norm": 1.2734375, "learning_rate": 4.06047940174796e-06, "loss": 0.8557, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5384 }, { "epoch": 0.7322545553440305, "grad_norm": 0.345703125, "learning_rate": 4.056660496492635e-06, "loss": 0.7357, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5385 }, { "epoch": 0.7323905357628502, "grad_norm": 0.404296875, "learning_rate": 4.0528429309386955e-06, "loss": 0.4222, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5386 }, { "epoch": 0.7325265161816699, "grad_norm": 0.490234375, "learning_rate": 4.0490267059466714e-06, "loss": 0.8638, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5387 }, { "epoch": 0.7326624966004895, "grad_norm": 0.67578125, "learning_rate": 4.045211822376784e-06, "loss": 0.7264, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5388 }, { "epoch": 0.7327984770193092, "grad_norm": 0.314453125, "learning_rate": 4.041398281088956e-06, "loss": 0.6037, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5389 }, { "epoch": 0.7329344574381289, "grad_norm": 0.5546875, "learning_rate": 4.037586082942805e-06, "loss": 0.7954, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5390 }, { "epoch": 0.7330704378569486, "grad_norm": 0.44140625, "learning_rate": 4.03377522879765e-06, "loss": 0.6485, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5391 }, { "epoch": 0.7332064182757683, "grad_norm": 0.6796875, "learning_rate": 4.029965719512498e-06, "loss": 0.8582, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5392 }, { "epoch": 0.733342398694588, "grad_norm": 0.2578125, "learning_rate": 4.026157555946067e-06, "loss": 0.5422, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5393 }, { "epoch": 0.7334783791134076, "grad_norm": 0.3515625, "learning_rate": 4.022350738956756e-06, "loss": 0.6264, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5394 }, { "epoch": 0.7336143595322273, "grad_norm": 0.333984375, "learning_rate": 4.0185452694026764e-06, "loss": 0.4697, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5395 }, { "epoch": 0.7337503399510471, "grad_norm": 0.416015625, "learning_rate": 4.014741148141618e-06, "loss": 0.7368, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5396 }, { "epoch": 0.7338863203698668, "grad_norm": 0.3359375, "learning_rate": 4.010938376031085e-06, "loss": 0.6084, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5397 }, { "epoch": 0.7340223007886865, "grad_norm": 0.4296875, "learning_rate": 4.0071369539282565e-06, "loss": 0.6891, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5398 }, { "epoch": 0.7341582812075061, "grad_norm": 0.76171875, "learning_rate": 4.003336882690036e-06, "loss": 0.5326, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5399 }, { "epoch": 0.7342942616263258, "grad_norm": 0.4375, "learning_rate": 3.999538163172995e-06, "loss": 0.6893, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5400 }, { "epoch": 0.7344302420451455, "grad_norm": 0.328125, "learning_rate": 3.995740796233416e-06, "loss": 0.6805, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5401 }, { "epoch": 0.7345662224639652, "grad_norm": 0.54296875, "learning_rate": 3.991944782727268e-06, "loss": 0.5806, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5402 }, { "epoch": 0.7347022028827849, "grad_norm": 0.296875, "learning_rate": 3.988150123510224e-06, "loss": 0.5369, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5403 }, { "epoch": 0.7348381833016046, "grad_norm": 0.408203125, "learning_rate": 3.98435681943764e-06, "loss": 0.7695, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5404 }, { "epoch": 0.7349741637204242, "grad_norm": 0.5703125, "learning_rate": 3.98056487136458e-06, "loss": 0.6998, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5405 }, { "epoch": 0.7351101441392439, "grad_norm": 0.29296875, "learning_rate": 3.976774280145788e-06, "loss": 0.5653, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5406 }, { "epoch": 0.7352461245580636, "grad_norm": 0.5546875, "learning_rate": 3.972985046635718e-06, "loss": 0.6774, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5407 }, { "epoch": 0.7353821049768833, "grad_norm": 0.76171875, "learning_rate": 3.9691971716885015e-06, "loss": 0.855, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5408 }, { "epoch": 0.7355180853957031, "grad_norm": 0.57421875, "learning_rate": 3.96541065615798e-06, "loss": 0.5515, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5409 }, { "epoch": 0.7356540658145228, "grad_norm": 0.376953125, "learning_rate": 3.961625500897671e-06, "loss": 0.5907, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5410 }, { "epoch": 0.7357900462333424, "grad_norm": 1.84375, "learning_rate": 3.957841706760804e-06, "loss": 0.8464, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5411 }, { "epoch": 0.7359260266521621, "grad_norm": 0.443359375, "learning_rate": 3.954059274600285e-06, "loss": 0.7702, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5412 }, { "epoch": 0.7360620070709818, "grad_norm": 0.64453125, "learning_rate": 3.950278205268726e-06, "loss": 0.5182, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5413 }, { "epoch": 0.7361979874898015, "grad_norm": 0.310546875, "learning_rate": 3.9464984996184204e-06, "loss": 0.6452, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5414 }, { "epoch": 0.7363339679086212, "grad_norm": 0.474609375, "learning_rate": 3.942720158501369e-06, "loss": 0.7687, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5415 }, { "epoch": 0.7364699483274408, "grad_norm": 1.1953125, "learning_rate": 3.9389431827692455e-06, "loss": 0.7656, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5416 }, { "epoch": 0.7366059287462605, "grad_norm": 0.52734375, "learning_rate": 3.935167573273436e-06, "loss": 0.5579, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5417 }, { "epoch": 0.7367419091650802, "grad_norm": 0.68359375, "learning_rate": 3.931393330865e-06, "loss": 0.9152, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5418 }, { "epoch": 0.7368778895838999, "grad_norm": 0.34375, "learning_rate": 3.927620456394707e-06, "loss": 0.5529, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5419 }, { "epoch": 0.7370138700027196, "grad_norm": 0.400390625, "learning_rate": 3.9238489507130005e-06, "loss": 0.8148, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5420 }, { "epoch": 0.7371498504215392, "grad_norm": 0.310546875, "learning_rate": 3.920078814670032e-06, "loss": 0.5422, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5421 }, { "epoch": 0.737285830840359, "grad_norm": 0.333984375, "learning_rate": 3.916310049115628e-06, "loss": 0.5899, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5422 }, { "epoch": 0.7374218112591787, "grad_norm": 0.337890625, "learning_rate": 3.912542654899323e-06, "loss": 0.5975, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5423 }, { "epoch": 0.7375577916779984, "grad_norm": 0.3359375, "learning_rate": 3.908776632870324e-06, "loss": 0.5941, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5424 }, { "epoch": 0.7376937720968181, "grad_norm": 0.72265625, "learning_rate": 3.9050119838775505e-06, "loss": 0.8716, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5425 }, { "epoch": 0.7378297525156378, "grad_norm": 0.333984375, "learning_rate": 3.901248708769587e-06, "loss": 0.6296, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5426 }, { "epoch": 0.7379657329344574, "grad_norm": 0.306640625, "learning_rate": 3.897486808394733e-06, "loss": 0.5251, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5427 }, { "epoch": 0.7381017133532771, "grad_norm": 0.38671875, "learning_rate": 3.893726283600959e-06, "loss": 0.6629, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5428 }, { "epoch": 0.7382376937720968, "grad_norm": 0.56640625, "learning_rate": 3.88996713523594e-06, "loss": 0.6063, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5429 }, { "epoch": 0.7383736741909165, "grad_norm": 0.55859375, "learning_rate": 3.886209364147027e-06, "loss": 0.8604, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5430 }, { "epoch": 0.7385096546097362, "grad_norm": 0.2177734375, "learning_rate": 3.882452971181274e-06, "loss": 0.3597, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5431 }, { "epoch": 0.7386456350285558, "grad_norm": 0.51953125, "learning_rate": 3.8786979571854154e-06, "loss": 0.8896, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5432 }, { "epoch": 0.7387816154473755, "grad_norm": 0.265625, "learning_rate": 3.874944323005873e-06, "loss": 0.4513, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5433 }, { "epoch": 0.7389175958661953, "grad_norm": 0.47265625, "learning_rate": 3.87119206948877e-06, "loss": 0.7697, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5434 }, { "epoch": 0.739053576285015, "grad_norm": 0.330078125, "learning_rate": 3.867441197479901e-06, "loss": 0.577, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5435 }, { "epoch": 0.7391895567038347, "grad_norm": 0.35546875, "learning_rate": 3.8636917078247675e-06, "loss": 0.5163, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5436 }, { "epoch": 0.7393255371226544, "grad_norm": 0.39453125, "learning_rate": 3.859943601368542e-06, "loss": 0.7633, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5437 }, { "epoch": 0.739461517541474, "grad_norm": 0.326171875, "learning_rate": 3.856196878956101e-06, "loss": 0.716, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5438 }, { "epoch": 0.7395974979602937, "grad_norm": 0.33203125, "learning_rate": 3.8524515414319955e-06, "loss": 0.2051, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5439 }, { "epoch": 0.7397334783791134, "grad_norm": 0.33203125, "learning_rate": 3.848707589640476e-06, "loss": 0.5706, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5440 }, { "epoch": 0.7398694587979331, "grad_norm": 0.365234375, "learning_rate": 3.844965024425468e-06, "loss": 0.5975, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5441 }, { "epoch": 0.7400054392167528, "grad_norm": 0.396484375, "learning_rate": 3.841223846630599e-06, "loss": 0.7401, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5442 }, { "epoch": 0.7401414196355725, "grad_norm": 0.62890625, "learning_rate": 3.83748405709917e-06, "loss": 0.6221, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5443 }, { "epoch": 0.7402774000543921, "grad_norm": 0.283203125, "learning_rate": 3.833745656674181e-06, "loss": 0.5179, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5444 }, { "epoch": 0.7404133804732118, "grad_norm": 0.349609375, "learning_rate": 3.8300086461983065e-06, "loss": 0.5918, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5445 }, { "epoch": 0.7405493608920315, "grad_norm": 0.4921875, "learning_rate": 3.8262730265139215e-06, "loss": 0.8346, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5446 }, { "epoch": 0.7406853413108513, "grad_norm": 0.310546875, "learning_rate": 3.8225387984630735e-06, "loss": 0.6553, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5447 }, { "epoch": 0.740821321729671, "grad_norm": 0.79296875, "learning_rate": 3.818805962887509e-06, "loss": 0.8037, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5448 }, { "epoch": 0.7409573021484906, "grad_norm": 0.39453125, "learning_rate": 3.81507452062865e-06, "loss": 0.6489, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5449 }, { "epoch": 0.7410932825673103, "grad_norm": 0.380859375, "learning_rate": 3.811344472527615e-06, "loss": 0.7347, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5450 }, { "epoch": 0.74122926298613, "grad_norm": 0.3203125, "learning_rate": 3.8076158194251945e-06, "loss": 0.5809, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5451 }, { "epoch": 0.7413652434049497, "grad_norm": 0.4921875, "learning_rate": 3.80388856216188e-06, "loss": 0.6299, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5452 }, { "epoch": 0.7415012238237694, "grad_norm": 0.57421875, "learning_rate": 3.8001627015778355e-06, "loss": 0.5461, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5453 }, { "epoch": 0.741637204242589, "grad_norm": 0.357421875, "learning_rate": 3.7964382385129206e-06, "loss": 0.6929, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5454 }, { "epoch": 0.7417731846614087, "grad_norm": 0.326171875, "learning_rate": 3.7927151738066693e-06, "loss": 0.6032, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5455 }, { "epoch": 0.7419091650802284, "grad_norm": 0.318359375, "learning_rate": 3.788993508298311e-06, "loss": 0.6536, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5456 }, { "epoch": 0.7420451454990481, "grad_norm": 0.2451171875, "learning_rate": 3.785273242826749e-06, "loss": 0.4406, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5457 }, { "epoch": 0.7421811259178678, "grad_norm": 0.5, "learning_rate": 3.781554378230584e-06, "loss": 0.5356, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5458 }, { "epoch": 0.7423171063366876, "grad_norm": 0.375, "learning_rate": 3.777836915348081e-06, "loss": 0.7279, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5459 }, { "epoch": 0.7424530867555073, "grad_norm": 0.39453125, "learning_rate": 3.774120855017218e-06, "loss": 0.6598, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5460 }, { "epoch": 0.7425890671743269, "grad_norm": 1.0859375, "learning_rate": 3.7704061980756292e-06, "loss": 0.7542, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5461 }, { "epoch": 0.7427250475931466, "grad_norm": 0.5, "learning_rate": 3.7666929453606493e-06, "loss": 0.8577, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5462 }, { "epoch": 0.7428610280119663, "grad_norm": 0.3046875, "learning_rate": 3.7629810977092852e-06, "loss": 0.6107, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5463 }, { "epoch": 0.742997008430786, "grad_norm": 0.32421875, "learning_rate": 3.7592706559582404e-06, "loss": 0.4645, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5464 }, { "epoch": 0.7431329888496057, "grad_norm": 0.259765625, "learning_rate": 3.755561620943885e-06, "loss": 0.584, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5465 }, { "epoch": 0.7432689692684253, "grad_norm": 0.41015625, "learning_rate": 3.751853993502288e-06, "loss": 0.6927, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5466 }, { "epoch": 0.743404949687245, "grad_norm": 0.42578125, "learning_rate": 3.7481477744691885e-06, "loss": 0.681, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5467 }, { "epoch": 0.7435409301060647, "grad_norm": 0.388671875, "learning_rate": 3.7444429646800206e-06, "loss": 0.7829, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5468 }, { "epoch": 0.7436769105248844, "grad_norm": 0.494140625, "learning_rate": 3.7407395649698853e-06, "loss": 0.7357, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5469 }, { "epoch": 0.7438128909437041, "grad_norm": 0.68359375, "learning_rate": 3.737037576173581e-06, "loss": 0.7357, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5470 }, { "epoch": 0.7439488713625237, "grad_norm": 0.4296875, "learning_rate": 3.7333369991255764e-06, "loss": 0.5083, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5471 }, { "epoch": 0.7440848517813435, "grad_norm": 0.341796875, "learning_rate": 3.7296378346600314e-06, "loss": 0.6305, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5472 }, { "epoch": 0.7442208322001632, "grad_norm": 0.376953125, "learning_rate": 3.7259400836107773e-06, "loss": 0.7152, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5473 }, { "epoch": 0.7443568126189829, "grad_norm": 0.7734375, "learning_rate": 3.7222437468113383e-06, "loss": 0.5312, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5474 }, { "epoch": 0.7444927930378026, "grad_norm": 0.38671875, "learning_rate": 3.718548825094909e-06, "loss": 0.6437, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5475 }, { "epoch": 0.7446287734566223, "grad_norm": 0.36328125, "learning_rate": 3.7148553192943736e-06, "loss": 0.6021, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5476 }, { "epoch": 0.7447647538754419, "grad_norm": 0.64453125, "learning_rate": 3.7111632302422908e-06, "loss": 0.707, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5477 }, { "epoch": 0.7449007342942616, "grad_norm": 0.482421875, "learning_rate": 3.707472558770905e-06, "loss": 0.7726, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5478 }, { "epoch": 0.7450367147130813, "grad_norm": 0.40234375, "learning_rate": 3.7037833057121352e-06, "loss": 0.7607, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5479 }, { "epoch": 0.745172695131901, "grad_norm": 0.384765625, "learning_rate": 3.700095471897589e-06, "loss": 0.5781, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5480 }, { "epoch": 0.7453086755507207, "grad_norm": 2.46875, "learning_rate": 3.696409058158544e-06, "loss": 0.5653, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5481 }, { "epoch": 0.7454446559695403, "grad_norm": 0.373046875, "learning_rate": 3.692724065325969e-06, "loss": 0.6263, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5482 }, { "epoch": 0.74558063638836, "grad_norm": 0.353515625, "learning_rate": 3.689040494230499e-06, "loss": 0.7371, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5483 }, { "epoch": 0.7457166168071797, "grad_norm": 0.314453125, "learning_rate": 3.6853583457024634e-06, "loss": 0.6194, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5484 }, { "epoch": 0.7458525972259995, "grad_norm": 0.31640625, "learning_rate": 3.6816776205718565e-06, "loss": 0.6673, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5485 }, { "epoch": 0.7459885776448192, "grad_norm": 0.4453125, "learning_rate": 3.6779983196683655e-06, "loss": 0.7835, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5486 }, { "epoch": 0.7461245580636389, "grad_norm": 0.47265625, "learning_rate": 3.6743204438213465e-06, "loss": 0.7171, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5487 }, { "epoch": 0.7462605384824585, "grad_norm": 0.36328125, "learning_rate": 3.670643993859835e-06, "loss": 0.6942, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5488 }, { "epoch": 0.7463965189012782, "grad_norm": 0.376953125, "learning_rate": 3.666968970612553e-06, "loss": 0.6797, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5489 }, { "epoch": 0.7465324993200979, "grad_norm": 0.29296875, "learning_rate": 3.6632953749078902e-06, "loss": 0.5819, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5490 }, { "epoch": 0.7466684797389176, "grad_norm": 0.396484375, "learning_rate": 3.6596232075739267e-06, "loss": 0.3796, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5491 }, { "epoch": 0.7468044601577373, "grad_norm": 0.32421875, "learning_rate": 3.655952469438406e-06, "loss": 0.5365, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5492 }, { "epoch": 0.746940440576557, "grad_norm": 0.375, "learning_rate": 3.6522831613287656e-06, "loss": 0.6333, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5493 }, { "epoch": 0.7470764209953766, "grad_norm": 0.400390625, "learning_rate": 3.6486152840721046e-06, "loss": 0.7819, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5494 }, { "epoch": 0.7472124014141963, "grad_norm": 0.208984375, "learning_rate": 3.644948838495215e-06, "loss": 0.3569, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5495 }, { "epoch": 0.747348381833016, "grad_norm": 0.451171875, "learning_rate": 3.6412838254245497e-06, "loss": 0.7992, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5496 }, { "epoch": 0.7474843622518358, "grad_norm": 0.353515625, "learning_rate": 3.6376202456862574e-06, "loss": 0.716, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5497 }, { "epoch": 0.7476203426706555, "grad_norm": 0.328125, "learning_rate": 3.633958100106144e-06, "loss": 0.4699, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5498 }, { "epoch": 0.7477563230894752, "grad_norm": 0.267578125, "learning_rate": 3.6302973895097118e-06, "loss": 0.5552, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5499 }, { "epoch": 0.7478923035082948, "grad_norm": 0.357421875, "learning_rate": 3.62663811472212e-06, "loss": 0.7726, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5500 }, { "epoch": 0.7480282839271145, "grad_norm": 0.44140625, "learning_rate": 3.622980276568221e-06, "loss": 0.5396, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5501 }, { "epoch": 0.7481642643459342, "grad_norm": 0.376953125, "learning_rate": 3.6193238758725313e-06, "loss": 0.6915, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5502 }, { "epoch": 0.7483002447647539, "grad_norm": 0.4609375, "learning_rate": 3.6156689134592525e-06, "loss": 0.6755, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5503 }, { "epoch": 0.7484362251835736, "grad_norm": 0.38671875, "learning_rate": 3.6120153901522546e-06, "loss": 0.6652, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5504 }, { "epoch": 0.7485722056023932, "grad_norm": 0.3515625, "learning_rate": 3.60836330677509e-06, "loss": 0.5819, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5505 }, { "epoch": 0.7487081860212129, "grad_norm": 0.37890625, "learning_rate": 3.604712664150978e-06, "loss": 0.8797, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5506 }, { "epoch": 0.7488441664400326, "grad_norm": 0.33984375, "learning_rate": 3.601063463102823e-06, "loss": 0.6618, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5507 }, { "epoch": 0.7489801468588523, "grad_norm": 0.421875, "learning_rate": 3.5974157044531955e-06, "loss": 0.6213, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5508 }, { "epoch": 0.749116127277672, "grad_norm": 0.91015625, "learning_rate": 3.5937693890243484e-06, "loss": 0.9089, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5509 }, { "epoch": 0.7492521076964918, "grad_norm": 0.33203125, "learning_rate": 3.5901245176382017e-06, "loss": 0.6948, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5510 }, { "epoch": 0.7493880881153114, "grad_norm": 0.287109375, "learning_rate": 3.5864810911163595e-06, "loss": 0.6688, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5511 }, { "epoch": 0.7495240685341311, "grad_norm": 0.515625, "learning_rate": 3.5828391102800884e-06, "loss": 0.7594, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5512 }, { "epoch": 0.7496600489529508, "grad_norm": 0.435546875, "learning_rate": 3.579198575950342e-06, "loss": 0.6461, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5513 }, { "epoch": 0.7497960293717705, "grad_norm": 0.306640625, "learning_rate": 3.5755594889477343e-06, "loss": 0.4653, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5514 }, { "epoch": 0.7499320097905902, "grad_norm": 0.361328125, "learning_rate": 3.5719218500925666e-06, "loss": 0.5422, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5515 }, { "epoch": 0.7500679902094098, "grad_norm": 0.447265625, "learning_rate": 3.568285660204801e-06, "loss": 0.5425, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5516 }, { "epoch": 0.7502039706282295, "grad_norm": 0.314453125, "learning_rate": 3.5646509201040857e-06, "loss": 0.5446, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5517 }, { "epoch": 0.7503399510470492, "grad_norm": 0.54296875, "learning_rate": 3.5610176306097287e-06, "loss": 0.7201, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5518 }, { "epoch": 0.7504759314658689, "grad_norm": 0.298828125, "learning_rate": 3.557385792540722e-06, "loss": 0.5544, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5519 }, { "epoch": 0.7506119118846886, "grad_norm": 0.51171875, "learning_rate": 3.553755406715724e-06, "loss": 0.6252, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5520 }, { "epoch": 0.7507478923035082, "grad_norm": 0.90625, "learning_rate": 3.5501264739530737e-06, "loss": 0.7417, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5521 }, { "epoch": 0.750883872722328, "grad_norm": 0.41796875, "learning_rate": 3.5464989950707697e-06, "loss": 0.7619, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5522 }, { "epoch": 0.7510198531411477, "grad_norm": 0.4765625, "learning_rate": 3.542872970886496e-06, "loss": 0.8555, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5523 }, { "epoch": 0.7511558335599674, "grad_norm": 0.388671875, "learning_rate": 3.5392484022175976e-06, "loss": 0.7041, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5524 }, { "epoch": 0.7512918139787871, "grad_norm": 0.373046875, "learning_rate": 3.5356252898811017e-06, "loss": 0.6478, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5525 }, { "epoch": 0.7514277943976068, "grad_norm": 0.408203125, "learning_rate": 3.532003634693697e-06, "loss": 0.639, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5526 }, { "epoch": 0.7515637748164264, "grad_norm": 0.412109375, "learning_rate": 3.528383437471753e-06, "loss": 0.8182, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5527 }, { "epoch": 0.7516997552352461, "grad_norm": 0.5234375, "learning_rate": 3.524764699031302e-06, "loss": 0.7868, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5528 }, { "epoch": 0.7518357356540658, "grad_norm": 0.466796875, "learning_rate": 3.5211474201880592e-06, "loss": 0.7897, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5529 }, { "epoch": 0.7519717160728855, "grad_norm": 0.47265625, "learning_rate": 3.5175316017573935e-06, "loss": 0.8553, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5530 }, { "epoch": 0.7521076964917052, "grad_norm": 0.3359375, "learning_rate": 3.5139172445543647e-06, "loss": 0.6426, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5531 }, { "epoch": 0.7522436769105248, "grad_norm": 0.38671875, "learning_rate": 3.510304349393685e-06, "loss": 0.6909, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5532 }, { "epoch": 0.7523796573293445, "grad_norm": 0.333984375, "learning_rate": 3.506692917089751e-06, "loss": 0.7199, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5533 }, { "epoch": 0.7525156377481642, "grad_norm": 0.408203125, "learning_rate": 3.5030829484566185e-06, "loss": 0.7902, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5534 }, { "epoch": 0.752651618166984, "grad_norm": 0.5078125, "learning_rate": 3.499474444308024e-06, "loss": 0.8104, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5535 }, { "epoch": 0.7527875985858037, "grad_norm": 0.384765625, "learning_rate": 3.4958674054573627e-06, "loss": 0.7474, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5536 }, { "epoch": 0.7529235790046234, "grad_norm": 0.453125, "learning_rate": 3.4922618327177114e-06, "loss": 0.7002, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5537 }, { "epoch": 0.753059559423443, "grad_norm": 0.373046875, "learning_rate": 3.488657726901804e-06, "loss": 0.5827, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5538 }, { "epoch": 0.7531955398422627, "grad_norm": 0.400390625, "learning_rate": 3.4850550888220557e-06, "loss": 0.8133, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5539 }, { "epoch": 0.7533315202610824, "grad_norm": 0.330078125, "learning_rate": 3.481453919290544e-06, "loss": 0.7036, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5540 }, { "epoch": 0.7534675006799021, "grad_norm": 0.318359375, "learning_rate": 3.4778542191190113e-06, "loss": 0.6055, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5541 }, { "epoch": 0.7536034810987218, "grad_norm": 0.5234375, "learning_rate": 3.474255989118881e-06, "loss": 0.7645, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5542 }, { "epoch": 0.7537394615175415, "grad_norm": 0.5, "learning_rate": 3.470659230101231e-06, "loss": 0.4549, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5543 }, { "epoch": 0.7538754419363611, "grad_norm": 0.37890625, "learning_rate": 3.4670639428768236e-06, "loss": 0.7559, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5544 }, { "epoch": 0.7540114223551808, "grad_norm": 0.474609375, "learning_rate": 3.463470128256071e-06, "loss": 0.8524, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5545 }, { "epoch": 0.7541474027740005, "grad_norm": 1.1328125, "learning_rate": 3.4598777870490717e-06, "loss": 0.8836, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5546 }, { "epoch": 0.7542833831928203, "grad_norm": 0.3203125, "learning_rate": 3.456286920065576e-06, "loss": 0.7334, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5547 }, { "epoch": 0.75441936361164, "grad_norm": 0.3359375, "learning_rate": 3.4526975281150156e-06, "loss": 0.5895, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5548 }, { "epoch": 0.7545553440304597, "grad_norm": 0.396484375, "learning_rate": 3.449109612006477e-06, "loss": 0.6916, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5549 }, { "epoch": 0.7546913244492793, "grad_norm": 0.392578125, "learning_rate": 3.445523172548727e-06, "loss": 0.7757, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5550 }, { "epoch": 0.754827304868099, "grad_norm": 0.365234375, "learning_rate": 3.441938210550185e-06, "loss": 0.6138, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5551 }, { "epoch": 0.7549632852869187, "grad_norm": 0.36328125, "learning_rate": 3.4383547268189545e-06, "loss": 0.7617, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5552 }, { "epoch": 0.7550992657057384, "grad_norm": 0.53515625, "learning_rate": 3.434772722162788e-06, "loss": 0.5605, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5553 }, { "epoch": 0.7552352461245581, "grad_norm": 0.3125, "learning_rate": 3.4311921973891196e-06, "loss": 0.5798, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5554 }, { "epoch": 0.7553712265433777, "grad_norm": 0.83984375, "learning_rate": 3.4276131533050384e-06, "loss": 0.7681, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5555 }, { "epoch": 0.7555072069621974, "grad_norm": 0.72265625, "learning_rate": 3.4240355907173093e-06, "loss": 0.6242, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5556 }, { "epoch": 0.7556431873810171, "grad_norm": 0.3359375, "learning_rate": 3.4204595104323536e-06, "loss": 0.5791, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5557 }, { "epoch": 0.7557791677998368, "grad_norm": 0.87890625, "learning_rate": 3.41688491325627e-06, "loss": 0.6359, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5558 }, { "epoch": 0.7559151482186565, "grad_norm": 0.29296875, "learning_rate": 3.4133117999948086e-06, "loss": 0.5604, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5559 }, { "epoch": 0.7560511286374763, "grad_norm": 0.4921875, "learning_rate": 3.4097401714533997e-06, "loss": 0.7552, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5560 }, { "epoch": 0.7561871090562959, "grad_norm": 0.5078125, "learning_rate": 3.406170028437127e-06, "loss": 0.6213, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5561 }, { "epoch": 0.7563230894751156, "grad_norm": 1.296875, "learning_rate": 3.402601371750749e-06, "loss": 0.8143, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5562 }, { "epoch": 0.7564590698939353, "grad_norm": 0.51171875, "learning_rate": 3.399034202198679e-06, "loss": 0.5695, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5563 }, { "epoch": 0.756595050312755, "grad_norm": 0.28515625, "learning_rate": 3.3954685205850068e-06, "loss": 0.5363, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5564 }, { "epoch": 0.7567310307315747, "grad_norm": 0.376953125, "learning_rate": 3.3919043277134755e-06, "loss": 0.6007, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5565 }, { "epoch": 0.7568670111503943, "grad_norm": 0.33203125, "learning_rate": 3.3883416243875024e-06, "loss": 0.5163, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5566 }, { "epoch": 0.757002991569214, "grad_norm": 0.625, "learning_rate": 3.384780411410158e-06, "loss": 0.6991, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5567 }, { "epoch": 0.7571389719880337, "grad_norm": 0.498046875, "learning_rate": 3.3812206895841913e-06, "loss": 0.7601, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5568 }, { "epoch": 0.7572749524068534, "grad_norm": 0.32421875, "learning_rate": 3.377662459711999e-06, "loss": 0.4448, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5569 }, { "epoch": 0.7574109328256731, "grad_norm": 0.322265625, "learning_rate": 3.3741057225956575e-06, "loss": 0.5671, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5570 }, { "epoch": 0.7575469132444927, "grad_norm": 0.369140625, "learning_rate": 3.3705504790368904e-06, "loss": 0.4701, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5571 }, { "epoch": 0.7576828936633124, "grad_norm": 0.41015625, "learning_rate": 3.366996729837102e-06, "loss": 0.6362, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5572 }, { "epoch": 0.7578188740821322, "grad_norm": 0.421875, "learning_rate": 3.3634444757973417e-06, "loss": 0.769, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5573 }, { "epoch": 0.7579548545009519, "grad_norm": 0.318359375, "learning_rate": 3.35989371771834e-06, "loss": 0.5356, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5574 }, { "epoch": 0.7580908349197716, "grad_norm": 0.42578125, "learning_rate": 3.3563444564004732e-06, "loss": 0.8384, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5575 }, { "epoch": 0.7582268153385913, "grad_norm": 0.3203125, "learning_rate": 3.3527966926437948e-06, "loss": 0.5674, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5576 }, { "epoch": 0.758362795757411, "grad_norm": 0.578125, "learning_rate": 3.349250427248009e-06, "loss": 0.5675, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5577 }, { "epoch": 0.7584987761762306, "grad_norm": 0.55078125, "learning_rate": 3.3457056610124905e-06, "loss": 0.7074, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5578 }, { "epoch": 0.7586347565950503, "grad_norm": 0.498046875, "learning_rate": 3.342162394736267e-06, "loss": 0.5355, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5579 }, { "epoch": 0.75877073701387, "grad_norm": 0.373046875, "learning_rate": 3.3386206292180456e-06, "loss": 0.638, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5580 }, { "epoch": 0.7589067174326897, "grad_norm": 0.345703125, "learning_rate": 3.3350803652561736e-06, "loss": 0.589, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5581 }, { "epoch": 0.7590426978515094, "grad_norm": 0.330078125, "learning_rate": 3.3315416036486756e-06, "loss": 0.5972, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5582 }, { "epoch": 0.759178678270329, "grad_norm": 0.376953125, "learning_rate": 3.3280043451932266e-06, "loss": 0.7703, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5583 }, { "epoch": 0.7593146586891487, "grad_norm": 0.46875, "learning_rate": 3.324468590687173e-06, "loss": 0.4474, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5584 }, { "epoch": 0.7594506391079685, "grad_norm": 0.376953125, "learning_rate": 3.320934340927513e-06, "loss": 0.6444, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5585 }, { "epoch": 0.7595866195267882, "grad_norm": 0.33984375, "learning_rate": 3.317401596710913e-06, "loss": 0.556, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5586 }, { "epoch": 0.7597225999456079, "grad_norm": 0.58984375, "learning_rate": 3.313870358833693e-06, "loss": 0.735, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5587 }, { "epoch": 0.7598585803644275, "grad_norm": 0.55078125, "learning_rate": 3.3103406280918415e-06, "loss": 0.6017, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5588 }, { "epoch": 0.7599945607832472, "grad_norm": 0.373046875, "learning_rate": 3.306812405280998e-06, "loss": 0.645, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5589 }, { "epoch": 0.7601305412020669, "grad_norm": 0.421875, "learning_rate": 3.3032856911964728e-06, "loss": 0.5542, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5590 }, { "epoch": 0.7602665216208866, "grad_norm": 0.4375, "learning_rate": 3.2997604866332246e-06, "loss": 0.783, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5591 }, { "epoch": 0.7604025020397063, "grad_norm": 0.3203125, "learning_rate": 3.296236792385884e-06, "loss": 0.4398, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5592 }, { "epoch": 0.760538482458526, "grad_norm": 0.51953125, "learning_rate": 3.292714609248727e-06, "loss": 0.7453, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5593 }, { "epoch": 0.7606744628773456, "grad_norm": 0.392578125, "learning_rate": 3.2891939380157044e-06, "loss": 0.599, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5594 }, { "epoch": 0.7608104432961653, "grad_norm": 0.28515625, "learning_rate": 3.2856747794804145e-06, "loss": 0.5477, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5595 }, { "epoch": 0.760946423714985, "grad_norm": 0.34375, "learning_rate": 3.2821571344361168e-06, "loss": 0.5496, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5596 }, { "epoch": 0.7610824041338047, "grad_norm": 0.37890625, "learning_rate": 3.2786410036757366e-06, "loss": 0.7559, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5597 }, { "epoch": 0.7612183845526245, "grad_norm": 0.37109375, "learning_rate": 3.275126387991847e-06, "loss": 0.6372, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5598 }, { "epoch": 0.7613543649714442, "grad_norm": 0.86328125, "learning_rate": 3.2716132881766916e-06, "loss": 0.7466, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5599 }, { "epoch": 0.7614903453902638, "grad_norm": 0.283203125, "learning_rate": 3.26810170502216e-06, "loss": 0.501, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5600 }, { "epoch": 0.7616263258090835, "grad_norm": 0.5234375, "learning_rate": 3.2645916393198107e-06, "loss": 0.77, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5601 }, { "epoch": 0.7617623062279032, "grad_norm": 0.330078125, "learning_rate": 3.2610830918608516e-06, "loss": 0.7095, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5602 }, { "epoch": 0.7618982866467229, "grad_norm": 0.283203125, "learning_rate": 3.2575760634361576e-06, "loss": 0.472, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5603 }, { "epoch": 0.7620342670655426, "grad_norm": 0.251953125, "learning_rate": 3.2540705548362485e-06, "loss": 0.4849, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5604 }, { "epoch": 0.7621702474843622, "grad_norm": 0.28515625, "learning_rate": 3.2505665668513154e-06, "loss": 0.5511, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5605 }, { "epoch": 0.7623062279031819, "grad_norm": 0.6484375, "learning_rate": 3.2470641002711945e-06, "loss": 0.7886, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5606 }, { "epoch": 0.7624422083220016, "grad_norm": 0.5234375, "learning_rate": 3.243563155885391e-06, "loss": 0.7405, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5607 }, { "epoch": 0.7625781887408213, "grad_norm": 0.67578125, "learning_rate": 3.240063734483052e-06, "loss": 0.6784, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5608 }, { "epoch": 0.762714169159641, "grad_norm": 0.326171875, "learning_rate": 3.2365658368529997e-06, "loss": 0.621, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5609 }, { "epoch": 0.7628501495784608, "grad_norm": 0.408203125, "learning_rate": 3.2330694637836947e-06, "loss": 0.5835, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5610 }, { "epoch": 0.7629861299972804, "grad_norm": 0.4765625, "learning_rate": 3.229574616063268e-06, "loss": 0.7427, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5611 }, { "epoch": 0.7631221104161001, "grad_norm": 0.2392578125, "learning_rate": 3.226081294479496e-06, "loss": 0.4502, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5612 }, { "epoch": 0.7632580908349198, "grad_norm": 0.49609375, "learning_rate": 3.2225894998198216e-06, "loss": 0.6979, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5613 }, { "epoch": 0.7633940712537395, "grad_norm": 0.412109375, "learning_rate": 3.219099232871332e-06, "loss": 0.799, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5614 }, { "epoch": 0.7635300516725592, "grad_norm": 0.546875, "learning_rate": 3.215610494420782e-06, "loss": 0.6315, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5615 }, { "epoch": 0.7636660320913788, "grad_norm": 0.734375, "learning_rate": 3.2121232852545705e-06, "loss": 0.7673, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5616 }, { "epoch": 0.7638020125101985, "grad_norm": 0.44140625, "learning_rate": 3.2086376061587623e-06, "loss": 0.668, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5617 }, { "epoch": 0.7639379929290182, "grad_norm": 0.484375, "learning_rate": 3.205153457919067e-06, "loss": 0.8623, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5618 }, { "epoch": 0.7640739733478379, "grad_norm": 0.396484375, "learning_rate": 3.2016708413208584e-06, "loss": 0.7611, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5619 }, { "epoch": 0.7642099537666576, "grad_norm": 0.373046875, "learning_rate": 3.198189757149157e-06, "loss": 0.4425, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5620 }, { "epoch": 0.7643459341854772, "grad_norm": 0.390625, "learning_rate": 3.194710206188647e-06, "loss": 0.6629, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5621 }, { "epoch": 0.7644819146042969, "grad_norm": 0.41796875, "learning_rate": 3.1912321892236555e-06, "loss": 0.721, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5622 }, { "epoch": 0.7646178950231167, "grad_norm": 0.5234375, "learning_rate": 3.1877557070381747e-06, "loss": 0.8664, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5623 }, { "epoch": 0.7647538754419364, "grad_norm": 0.47265625, "learning_rate": 3.184280760415843e-06, "loss": 0.757, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5624 }, { "epoch": 0.7648898558607561, "grad_norm": 0.359375, "learning_rate": 3.1808073501399606e-06, "loss": 0.7043, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5625 }, { "epoch": 0.7650258362795758, "grad_norm": 0.45703125, "learning_rate": 3.1773354769934693e-06, "loss": 0.6453, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5626 }, { "epoch": 0.7651618166983954, "grad_norm": 0.423828125, "learning_rate": 3.1738651417589795e-06, "loss": 0.6989, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5627 }, { "epoch": 0.7652977971172151, "grad_norm": 0.50390625, "learning_rate": 3.1703963452187415e-06, "loss": 0.6926, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5628 }, { "epoch": 0.7654337775360348, "grad_norm": 0.384765625, "learning_rate": 3.166929088154669e-06, "loss": 0.708, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5629 }, { "epoch": 0.7655697579548545, "grad_norm": 0.51953125, "learning_rate": 3.1634633713483174e-06, "loss": 0.4666, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5630 }, { "epoch": 0.7657057383736742, "grad_norm": 0.41796875, "learning_rate": 3.1599991955809104e-06, "loss": 0.8374, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5631 }, { "epoch": 0.7658417187924939, "grad_norm": 0.318359375, "learning_rate": 3.1565365616333076e-06, "loss": 0.5594, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5632 }, { "epoch": 0.7659776992113135, "grad_norm": 0.66015625, "learning_rate": 3.1530754702860355e-06, "loss": 0.7629, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5633 }, { "epoch": 0.7661136796301332, "grad_norm": 0.330078125, "learning_rate": 3.14961592231926e-06, "loss": 0.6076, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5634 }, { "epoch": 0.7662496600489529, "grad_norm": 0.5, "learning_rate": 3.1461579185128133e-06, "loss": 0.7892, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5635 }, { "epoch": 0.7663856404677727, "grad_norm": 0.427734375, "learning_rate": 3.1427014596461636e-06, "loss": 0.6882, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5636 }, { "epoch": 0.7665216208865924, "grad_norm": 0.40234375, "learning_rate": 3.1392465464984455e-06, "loss": 0.687, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5637 }, { "epoch": 0.766657601305412, "grad_norm": 0.3125, "learning_rate": 3.135793179848433e-06, "loss": 0.7096, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5638 }, { "epoch": 0.7667935817242317, "grad_norm": 0.388671875, "learning_rate": 3.13234136047456e-06, "loss": 0.5545, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5639 }, { "epoch": 0.7669295621430514, "grad_norm": 0.490234375, "learning_rate": 3.1288910891549096e-06, "loss": 0.8138, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5640 }, { "epoch": 0.7670655425618711, "grad_norm": 0.259765625, "learning_rate": 3.1254423666672175e-06, "loss": 0.4468, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5641 }, { "epoch": 0.7672015229806908, "grad_norm": 0.330078125, "learning_rate": 3.1219951937888606e-06, "loss": 0.6715, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5642 }, { "epoch": 0.7673375033995105, "grad_norm": 0.30859375, "learning_rate": 3.1185495712968827e-06, "loss": 0.6476, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5643 }, { "epoch": 0.7674734838183301, "grad_norm": 0.890625, "learning_rate": 3.11510549996796e-06, "loss": 0.6099, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5644 }, { "epoch": 0.7676094642371498, "grad_norm": 0.2490234375, "learning_rate": 3.1116629805784373e-06, "loss": 0.5335, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5645 }, { "epoch": 0.7677454446559695, "grad_norm": 0.58203125, "learning_rate": 3.108222013904292e-06, "loss": 0.7458, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5646 }, { "epoch": 0.7678814250747892, "grad_norm": 0.2412109375, "learning_rate": 3.104782600721168e-06, "loss": 0.4963, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5647 }, { "epoch": 0.768017405493609, "grad_norm": 0.37109375, "learning_rate": 3.101344741804346e-06, "loss": 0.6055, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5648 }, { "epoch": 0.7681533859124287, "grad_norm": 0.48828125, "learning_rate": 3.0979084379287615e-06, "loss": 0.7975, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5649 }, { "epoch": 0.7682893663312483, "grad_norm": 0.46875, "learning_rate": 3.094473689869002e-06, "loss": 0.7384, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5650 }, { "epoch": 0.768425346750068, "grad_norm": 0.6171875, "learning_rate": 3.091040498399298e-06, "loss": 0.8911, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5651 }, { "epoch": 0.7685613271688877, "grad_norm": 0.353515625, "learning_rate": 3.0876088642935387e-06, "loss": 0.7293, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5652 }, { "epoch": 0.7686973075877074, "grad_norm": 0.515625, "learning_rate": 3.0841787883252494e-06, "loss": 0.5881, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5653 }, { "epoch": 0.7688332880065271, "grad_norm": 0.400390625, "learning_rate": 3.0807502712676174e-06, "loss": 0.521, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5654 }, { "epoch": 0.7689692684253467, "grad_norm": 0.396484375, "learning_rate": 3.077323313893467e-06, "loss": 0.7279, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5655 }, { "epoch": 0.7691052488441664, "grad_norm": 0.62890625, "learning_rate": 3.0738979169752813e-06, "loss": 0.6927, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5656 }, { "epoch": 0.7692412292629861, "grad_norm": 0.53515625, "learning_rate": 3.0704740812851807e-06, "loss": 0.8006, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5657 }, { "epoch": 0.7693772096818058, "grad_norm": 0.6328125, "learning_rate": 3.0670518075949475e-06, "loss": 0.7456, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5658 }, { "epoch": 0.7695131901006255, "grad_norm": 0.66015625, "learning_rate": 3.063631096675996e-06, "loss": 0.7454, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5659 }, { "epoch": 0.7696491705194451, "grad_norm": 0.4765625, "learning_rate": 3.0602119492994033e-06, "loss": 0.8564, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5660 }, { "epoch": 0.7697851509382649, "grad_norm": 0.287109375, "learning_rate": 3.0567943662358803e-06, "loss": 0.5897, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5661 }, { "epoch": 0.7699211313570846, "grad_norm": 0.259765625, "learning_rate": 3.0533783482557977e-06, "loss": 0.5666, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5662 }, { "epoch": 0.7700571117759043, "grad_norm": 0.4921875, "learning_rate": 3.0499638961291623e-06, "loss": 0.6424, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5663 }, { "epoch": 0.770193092194724, "grad_norm": 0.58984375, "learning_rate": 3.0465510106256403e-06, "loss": 0.4779, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5664 }, { "epoch": 0.7703290726135437, "grad_norm": 0.64453125, "learning_rate": 3.0431396925145307e-06, "loss": 0.4636, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5665 }, { "epoch": 0.7704650530323633, "grad_norm": 0.6484375, "learning_rate": 3.039729942564793e-06, "loss": 0.5614, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5666 }, { "epoch": 0.770601033451183, "grad_norm": 0.58203125, "learning_rate": 3.0363217615450192e-06, "loss": 0.8421, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5667 }, { "epoch": 0.7707370138700027, "grad_norm": 0.337890625, "learning_rate": 3.032915150223462e-06, "loss": 0.7935, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5668 }, { "epoch": 0.7708729942888224, "grad_norm": 0.443359375, "learning_rate": 3.0295101093680067e-06, "loss": 0.8989, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5669 }, { "epoch": 0.7710089747076421, "grad_norm": 0.546875, "learning_rate": 3.026106639746199e-06, "loss": 0.6514, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5670 }, { "epoch": 0.7711449551264618, "grad_norm": 0.37890625, "learning_rate": 3.022704742125213e-06, "loss": 0.6491, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5671 }, { "epoch": 0.7712809355452814, "grad_norm": 0.37109375, "learning_rate": 3.0193044172718877e-06, "loss": 0.5848, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5672 }, { "epoch": 0.7714169159641012, "grad_norm": 0.359375, "learning_rate": 3.0159056659526896e-06, "loss": 0.4455, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5673 }, { "epoch": 0.7715528963829209, "grad_norm": 0.29296875, "learning_rate": 3.0125084889337465e-06, "loss": 0.5575, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5674 }, { "epoch": 0.7716888768017406, "grad_norm": 0.314453125, "learning_rate": 3.009112886980815e-06, "loss": 0.5791, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5675 }, { "epoch": 0.7718248572205603, "grad_norm": 0.6640625, "learning_rate": 3.0057188608593146e-06, "loss": 0.5031, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5676 }, { "epoch": 0.77196083763938, "grad_norm": 0.58984375, "learning_rate": 3.002326411334291e-06, "loss": 0.7231, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5677 }, { "epoch": 0.7720968180581996, "grad_norm": 0.298828125, "learning_rate": 2.9989355391704523e-06, "loss": 0.4603, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5678 }, { "epoch": 0.7722327984770193, "grad_norm": 0.62890625, "learning_rate": 2.995546245132136e-06, "loss": 0.7827, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5679 }, { "epoch": 0.772368778895839, "grad_norm": 0.36328125, "learning_rate": 2.9921585299833355e-06, "loss": 0.7913, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5680 }, { "epoch": 0.7725047593146587, "grad_norm": 0.4296875, "learning_rate": 2.9887723944876768e-06, "loss": 0.8836, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5681 }, { "epoch": 0.7726407397334784, "grad_norm": 0.33203125, "learning_rate": 2.9853878394084447e-06, "loss": 0.5866, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5682 }, { "epoch": 0.772776720152298, "grad_norm": 0.390625, "learning_rate": 2.982004865508551e-06, "loss": 0.7806, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5683 }, { "epoch": 0.7729127005711177, "grad_norm": 1.109375, "learning_rate": 2.978623473550567e-06, "loss": 0.7609, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5684 }, { "epoch": 0.7730486809899374, "grad_norm": 0.62109375, "learning_rate": 2.975243664296692e-06, "loss": 0.7811, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5685 }, { "epoch": 0.7731846614087572, "grad_norm": 0.478515625, "learning_rate": 2.971865438508784e-06, "loss": 0.7495, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5686 }, { "epoch": 0.7733206418275769, "grad_norm": 0.28125, "learning_rate": 2.968488796948329e-06, "loss": 0.5267, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5687 }, { "epoch": 0.7734566222463966, "grad_norm": 0.51171875, "learning_rate": 2.9651137403764706e-06, "loss": 0.4517, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5688 }, { "epoch": 0.7735926026652162, "grad_norm": 0.4296875, "learning_rate": 2.961740269553981e-06, "loss": 0.5954, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5689 }, { "epoch": 0.7737285830840359, "grad_norm": 0.375, "learning_rate": 2.9583683852412882e-06, "loss": 0.7729, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5690 }, { "epoch": 0.7738645635028556, "grad_norm": 0.5, "learning_rate": 2.9549980881984496e-06, "loss": 0.4966, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5691 }, { "epoch": 0.7740005439216753, "grad_norm": 0.7109375, "learning_rate": 2.951629379185178e-06, "loss": 0.7479, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5692 }, { "epoch": 0.774136524340495, "grad_norm": 0.341796875, "learning_rate": 2.9482622589608143e-06, "loss": 0.5169, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5693 }, { "epoch": 0.7742725047593146, "grad_norm": 0.46484375, "learning_rate": 2.944896728284358e-06, "loss": 0.7803, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5694 }, { "epoch": 0.7744084851781343, "grad_norm": 0.37890625, "learning_rate": 2.9415327879144294e-06, "loss": 0.5474, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5695 }, { "epoch": 0.774544465596954, "grad_norm": 0.59765625, "learning_rate": 2.9381704386093125e-06, "loss": 0.5592, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5696 }, { "epoch": 0.7746804460157737, "grad_norm": 0.3515625, "learning_rate": 2.934809681126913e-06, "loss": 0.7074, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5697 }, { "epoch": 0.7748164264345934, "grad_norm": 0.33203125, "learning_rate": 2.9314505162247952e-06, "loss": 0.6159, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5698 }, { "epoch": 0.7749524068534132, "grad_norm": 0.5859375, "learning_rate": 2.928092944660146e-06, "loss": 0.6733, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5699 }, { "epoch": 0.7750883872722328, "grad_norm": 0.62890625, "learning_rate": 2.9247369671898142e-06, "loss": 0.6551, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5700 }, { "epoch": 0.7752243676910525, "grad_norm": 0.353515625, "learning_rate": 2.921382584570269e-06, "loss": 0.6885, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5701 }, { "epoch": 0.7753603481098722, "grad_norm": 0.388671875, "learning_rate": 2.9180297975576368e-06, "loss": 0.6873, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5702 }, { "epoch": 0.7754963285286919, "grad_norm": 0.458984375, "learning_rate": 2.914678606907673e-06, "loss": 0.6178, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5703 }, { "epoch": 0.7756323089475116, "grad_norm": 0.365234375, "learning_rate": 2.911329013375773e-06, "loss": 0.6527, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5704 }, { "epoch": 0.7757682893663312, "grad_norm": 0.79296875, "learning_rate": 2.9079810177169833e-06, "loss": 0.887, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5705 }, { "epoch": 0.7759042697851509, "grad_norm": 0.30859375, "learning_rate": 2.9046346206859776e-06, "loss": 0.6868, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5706 }, { "epoch": 0.7760402502039706, "grad_norm": 0.65625, "learning_rate": 2.901289823037079e-06, "loss": 0.6898, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5707 }, { "epoch": 0.7761762306227903, "grad_norm": 0.2353515625, "learning_rate": 2.8979466255242417e-06, "loss": 0.4202, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5708 }, { "epoch": 0.77631221104161, "grad_norm": 0.78125, "learning_rate": 2.8946050289010675e-06, "loss": 0.6745, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5709 }, { "epoch": 0.7764481914604296, "grad_norm": 0.37890625, "learning_rate": 2.891265033920788e-06, "loss": 0.6532, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5710 }, { "epoch": 0.7765841718792494, "grad_norm": 0.40625, "learning_rate": 2.887926641336285e-06, "loss": 0.7323, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5711 }, { "epoch": 0.7767201522980691, "grad_norm": 1.125, "learning_rate": 2.884589851900067e-06, "loss": 0.9619, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5712 }, { "epoch": 0.7768561327168888, "grad_norm": 0.49609375, "learning_rate": 2.8812546663642927e-06, "loss": 0.7539, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5713 }, { "epoch": 0.7769921131357085, "grad_norm": 0.59375, "learning_rate": 2.8779210854807473e-06, "loss": 0.7052, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5714 }, { "epoch": 0.7771280935545282, "grad_norm": 0.7734375, "learning_rate": 2.8745891100008683e-06, "loss": 0.8506, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5715 }, { "epoch": 0.7772640739733478, "grad_norm": 0.421875, "learning_rate": 2.8712587406757165e-06, "loss": 0.5358, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5716 }, { "epoch": 0.7774000543921675, "grad_norm": 0.2294921875, "learning_rate": 2.8679299782560045e-06, "loss": 0.4092, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5717 }, { "epoch": 0.7775360348109872, "grad_norm": 0.40234375, "learning_rate": 2.864602823492069e-06, "loss": 0.6388, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5718 }, { "epoch": 0.7776720152298069, "grad_norm": 0.380859375, "learning_rate": 2.861277277133898e-06, "loss": 0.6688, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5719 }, { "epoch": 0.7778079956486266, "grad_norm": 0.33984375, "learning_rate": 2.857953339931104e-06, "loss": 0.5885, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5720 }, { "epoch": 0.7779439760674463, "grad_norm": 0.369140625, "learning_rate": 2.85463101263295e-06, "loss": 0.6547, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5721 }, { "epoch": 0.7780799564862659, "grad_norm": 0.38671875, "learning_rate": 2.851310295988323e-06, "loss": 0.6851, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5722 }, { "epoch": 0.7782159369050856, "grad_norm": 0.59765625, "learning_rate": 2.847991190745758e-06, "loss": 0.7274, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5723 }, { "epoch": 0.7783519173239054, "grad_norm": 0.30859375, "learning_rate": 2.844673697653417e-06, "loss": 0.5384, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5724 }, { "epoch": 0.7784878977427251, "grad_norm": 0.333984375, "learning_rate": 2.8413578174591094e-06, "loss": 0.6493, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5725 }, { "epoch": 0.7786238781615448, "grad_norm": 0.482421875, "learning_rate": 2.838043550910269e-06, "loss": 0.693, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5726 }, { "epoch": 0.7787598585803645, "grad_norm": 0.341796875, "learning_rate": 2.834730898753977e-06, "loss": 0.6326, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5727 }, { "epoch": 0.7788958389991841, "grad_norm": 0.375, "learning_rate": 2.8314198617369403e-06, "loss": 0.7244, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5728 }, { "epoch": 0.7790318194180038, "grad_norm": 0.28515625, "learning_rate": 2.828110440605515e-06, "loss": 0.5659, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5729 }, { "epoch": 0.7791677998368235, "grad_norm": 0.34765625, "learning_rate": 2.8248026361056757e-06, "loss": 0.5742, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5730 }, { "epoch": 0.7793037802556432, "grad_norm": 0.298828125, "learning_rate": 2.821496448983051e-06, "loss": 0.5609, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5731 }, { "epoch": 0.7794397606744629, "grad_norm": 0.330078125, "learning_rate": 2.818191879982888e-06, "loss": 0.7085, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5732 }, { "epoch": 0.7795757410932825, "grad_norm": 0.62109375, "learning_rate": 2.814888929850084e-06, "loss": 0.9023, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5733 }, { "epoch": 0.7797117215121022, "grad_norm": 0.5234375, "learning_rate": 2.8115875993291573e-06, "loss": 0.6768, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5734 }, { "epoch": 0.7798477019309219, "grad_norm": 0.3359375, "learning_rate": 2.8082878891642764e-06, "loss": 0.5812, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5735 }, { "epoch": 0.7799836823497417, "grad_norm": 0.396484375, "learning_rate": 2.804989800099227e-06, "loss": 0.5773, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5736 }, { "epoch": 0.7801196627685614, "grad_norm": 0.2578125, "learning_rate": 2.8016933328774475e-06, "loss": 0.5871, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5737 }, { "epoch": 0.780255643187381, "grad_norm": 0.39453125, "learning_rate": 2.7983984882419945e-06, "loss": 0.7782, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5738 }, { "epoch": 0.7803916236062007, "grad_norm": 0.5234375, "learning_rate": 2.795105266935573e-06, "loss": 0.6737, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5739 }, { "epoch": 0.7805276040250204, "grad_norm": 0.27734375, "learning_rate": 2.791813669700508e-06, "loss": 0.4237, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5740 }, { "epoch": 0.7806635844438401, "grad_norm": 0.453125, "learning_rate": 2.7885236972787733e-06, "loss": 0.8577, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5741 }, { "epoch": 0.7807995648626598, "grad_norm": 0.5390625, "learning_rate": 2.7852353504119622e-06, "loss": 0.8326, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5742 }, { "epoch": 0.7809355452814795, "grad_norm": 0.357421875, "learning_rate": 2.7819486298413147e-06, "loss": 0.5654, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5743 }, { "epoch": 0.7810715257002991, "grad_norm": 0.294921875, "learning_rate": 2.7786635363076907e-06, "loss": 0.4793, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5744 }, { "epoch": 0.7812075061191188, "grad_norm": 0.373046875, "learning_rate": 2.7753800705515977e-06, "loss": 0.5404, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5745 }, { "epoch": 0.7813434865379385, "grad_norm": 0.373046875, "learning_rate": 2.772098233313163e-06, "loss": 0.4742, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5746 }, { "epoch": 0.7814794669567582, "grad_norm": 0.74609375, "learning_rate": 2.768818025332158e-06, "loss": 0.6803, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5747 }, { "epoch": 0.7816154473755779, "grad_norm": 0.6171875, "learning_rate": 2.7655394473479756e-06, "loss": 0.887, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5748 }, { "epoch": 0.7817514277943977, "grad_norm": 0.41015625, "learning_rate": 2.762262500099655e-06, "loss": 0.5636, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5749 }, { "epoch": 0.7818874082132173, "grad_norm": 0.375, "learning_rate": 2.758987184325852e-06, "loss": 0.6116, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5750 }, { "epoch": 0.782023388632037, "grad_norm": 0.33203125, "learning_rate": 2.7557135007648707e-06, "loss": 0.5794, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5751 }, { "epoch": 0.7821593690508567, "grad_norm": 0.486328125, "learning_rate": 2.7524414501546325e-06, "loss": 0.8675, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5752 }, { "epoch": 0.7822953494696764, "grad_norm": 0.421875, "learning_rate": 2.7491710332327025e-06, "loss": 0.7399, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5753 }, { "epoch": 0.7824313298884961, "grad_norm": 0.35546875, "learning_rate": 2.7459022507362687e-06, "loss": 0.6979, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5754 }, { "epoch": 0.7825673103073157, "grad_norm": 0.671875, "learning_rate": 2.7426351034021604e-06, "loss": 0.7197, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5755 }, { "epoch": 0.7827032907261354, "grad_norm": 0.26171875, "learning_rate": 2.739369591966825e-06, "loss": 0.5097, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5756 }, { "epoch": 0.7828392711449551, "grad_norm": 0.41796875, "learning_rate": 2.7361057171663554e-06, "loss": 0.6567, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5757 }, { "epoch": 0.7829752515637748, "grad_norm": 0.37890625, "learning_rate": 2.732843479736463e-06, "loss": 0.7012, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5758 }, { "epoch": 0.7831112319825945, "grad_norm": 0.67578125, "learning_rate": 2.7295828804125e-06, "loss": 0.8392, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5759 }, { "epoch": 0.7832472124014142, "grad_norm": 0.423828125, "learning_rate": 2.7263239199294477e-06, "loss": 0.7299, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5760 }, { "epoch": 0.7833831928202338, "grad_norm": 0.3828125, "learning_rate": 2.7230665990219084e-06, "loss": 0.7113, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5761 }, { "epoch": 0.7835191732390536, "grad_norm": 0.609375, "learning_rate": 2.7198109184241295e-06, "loss": 0.6267, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5762 }, { "epoch": 0.7836551536578733, "grad_norm": 0.4375, "learning_rate": 2.716556878869975e-06, "loss": 0.5907, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5763 }, { "epoch": 0.783791134076693, "grad_norm": 0.455078125, "learning_rate": 2.713304481092951e-06, "loss": 0.673, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5764 }, { "epoch": 0.7839271144955127, "grad_norm": 0.30859375, "learning_rate": 2.7100537258261805e-06, "loss": 0.6273, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5765 }, { "epoch": 0.7840630949143323, "grad_norm": 0.33984375, "learning_rate": 2.7068046138024318e-06, "loss": 0.6011, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5766 }, { "epoch": 0.784199075333152, "grad_norm": 0.77734375, "learning_rate": 2.7035571457540865e-06, "loss": 0.7391, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5767 }, { "epoch": 0.7843350557519717, "grad_norm": 0.400390625, "learning_rate": 2.7003113224131717e-06, "loss": 0.9141, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5768 }, { "epoch": 0.7844710361707914, "grad_norm": 0.478515625, "learning_rate": 2.6970671445113273e-06, "loss": 0.4375, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5769 }, { "epoch": 0.7846070165896111, "grad_norm": 0.40234375, "learning_rate": 2.6938246127798385e-06, "loss": 0.8101, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5770 }, { "epoch": 0.7847429970084308, "grad_norm": 0.32421875, "learning_rate": 2.690583727949604e-06, "loss": 0.6125, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5771 }, { "epoch": 0.7848789774272504, "grad_norm": 0.35546875, "learning_rate": 2.687344490751167e-06, "loss": 0.8044, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5772 }, { "epoch": 0.7850149578460701, "grad_norm": 0.388671875, "learning_rate": 2.6841069019146827e-06, "loss": 0.5695, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5773 }, { "epoch": 0.7851509382648899, "grad_norm": 0.486328125, "learning_rate": 2.680870962169951e-06, "loss": 0.6038, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5774 }, { "epoch": 0.7852869186837096, "grad_norm": 0.41796875, "learning_rate": 2.6776366722463854e-06, "loss": 0.5231, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5775 }, { "epoch": 0.7854228991025293, "grad_norm": 0.3046875, "learning_rate": 2.6744040328730402e-06, "loss": 0.6335, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5776 }, { "epoch": 0.785558879521349, "grad_norm": 0.279296875, "learning_rate": 2.6711730447785876e-06, "loss": 0.5734, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5777 }, { "epoch": 0.7856948599401686, "grad_norm": 0.62109375, "learning_rate": 2.6679437086913352e-06, "loss": 0.7122, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5778 }, { "epoch": 0.7858308403589883, "grad_norm": 0.73046875, "learning_rate": 2.6647160253392114e-06, "loss": 0.7899, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5779 }, { "epoch": 0.785966820777808, "grad_norm": 0.345703125, "learning_rate": 2.6614899954497797e-06, "loss": 0.6258, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5780 }, { "epoch": 0.7861028011966277, "grad_norm": 0.36328125, "learning_rate": 2.658265619750221e-06, "loss": 0.5921, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5781 }, { "epoch": 0.7862387816154474, "grad_norm": 0.291015625, "learning_rate": 2.6550428989673548e-06, "loss": 0.5231, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5782 }, { "epoch": 0.786374762034267, "grad_norm": 0.458984375, "learning_rate": 2.651821833827617e-06, "loss": 0.7909, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5783 }, { "epoch": 0.7865107424530867, "grad_norm": 0.427734375, "learning_rate": 2.648602425057081e-06, "loss": 0.765, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5784 }, { "epoch": 0.7866467228719064, "grad_norm": 0.28125, "learning_rate": 2.6453846733814324e-06, "loss": 0.4886, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5785 }, { "epoch": 0.7867827032907261, "grad_norm": 0.40234375, "learning_rate": 2.642168579526001e-06, "loss": 0.5892, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5786 }, { "epoch": 0.7869186837095459, "grad_norm": 0.408203125, "learning_rate": 2.638954144215725e-06, "loss": 0.7201, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5787 }, { "epoch": 0.7870546641283656, "grad_norm": 0.3671875, "learning_rate": 2.635741368175185e-06, "loss": 0.6816, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5788 }, { "epoch": 0.7871906445471852, "grad_norm": 0.255859375, "learning_rate": 2.6325302521285744e-06, "loss": 0.4399, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5789 }, { "epoch": 0.7873266249660049, "grad_norm": 0.62890625, "learning_rate": 2.6293207967997224e-06, "loss": 0.5002, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5790 }, { "epoch": 0.7874626053848246, "grad_norm": 0.341796875, "learning_rate": 2.626113002912075e-06, "loss": 0.6584, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5791 }, { "epoch": 0.7875985858036443, "grad_norm": 0.369140625, "learning_rate": 2.622906871188713e-06, "loss": 0.5721, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5792 }, { "epoch": 0.787734566222464, "grad_norm": 0.828125, "learning_rate": 2.619702402352332e-06, "loss": 0.7355, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5793 }, { "epoch": 0.7878705466412836, "grad_norm": 0.33203125, "learning_rate": 2.6164995971252647e-06, "loss": 0.6012, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5794 }, { "epoch": 0.7880065270601033, "grad_norm": 0.34375, "learning_rate": 2.6132984562294574e-06, "loss": 0.6584, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5795 }, { "epoch": 0.788142507478923, "grad_norm": 0.58203125, "learning_rate": 2.6100989803864896e-06, "loss": 0.5927, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5796 }, { "epoch": 0.7882784878977427, "grad_norm": 0.478515625, "learning_rate": 2.6069011703175596e-06, "loss": 0.5866, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5797 }, { "epoch": 0.7884144683165624, "grad_norm": 0.376953125, "learning_rate": 2.603705026743497e-06, "loss": 0.6228, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5798 }, { "epoch": 0.7885504487353822, "grad_norm": 0.296875, "learning_rate": 2.600510550384746e-06, "loss": 0.6549, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5799 }, { "epoch": 0.7886864291542018, "grad_norm": 0.33984375, "learning_rate": 2.597317741961387e-06, "loss": 0.6392, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5800 }, { "epoch": 0.7888224095730215, "grad_norm": 0.28515625, "learning_rate": 2.5941266021931112e-06, "loss": 0.5981, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5801 }, { "epoch": 0.7889583899918412, "grad_norm": 0.44921875, "learning_rate": 2.590937131799248e-06, "loss": 0.5882, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5802 }, { "epoch": 0.7890943704106609, "grad_norm": 0.3125, "learning_rate": 2.5877493314987357e-06, "loss": 0.5002, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5803 }, { "epoch": 0.7892303508294806, "grad_norm": 0.384765625, "learning_rate": 2.5845632020101495e-06, "loss": 0.744, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5804 }, { "epoch": 0.7893663312483002, "grad_norm": 0.3671875, "learning_rate": 2.5813787440516757e-06, "loss": 0.6147, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5805 }, { "epoch": 0.7895023116671199, "grad_norm": 0.4921875, "learning_rate": 2.5781959583411375e-06, "loss": 0.7775, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5806 }, { "epoch": 0.7896382920859396, "grad_norm": 0.482421875, "learning_rate": 2.5750148455959667e-06, "loss": 0.7931, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5807 }, { "epoch": 0.7897742725047593, "grad_norm": 0.61328125, "learning_rate": 2.571835406533232e-06, "loss": 0.5267, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5808 }, { "epoch": 0.789910252923579, "grad_norm": 0.5703125, "learning_rate": 2.56865764186961e-06, "loss": 0.6857, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5809 }, { "epoch": 0.7900462333423987, "grad_norm": 0.333984375, "learning_rate": 2.565481552321415e-06, "loss": 0.6605, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5810 }, { "epoch": 0.7901822137612183, "grad_norm": 0.36328125, "learning_rate": 2.5623071386045696e-06, "loss": 0.6657, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5811 }, { "epoch": 0.7903181941800381, "grad_norm": 0.34765625, "learning_rate": 2.559134401434632e-06, "loss": 0.658, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5812 }, { "epoch": 0.7904541745988578, "grad_norm": 0.39453125, "learning_rate": 2.5559633415267703e-06, "loss": 0.7061, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5813 }, { "epoch": 0.7905901550176775, "grad_norm": 0.51171875, "learning_rate": 2.552793959595786e-06, "loss": 0.7173, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5814 }, { "epoch": 0.7907261354364972, "grad_norm": 0.40625, "learning_rate": 2.54962625635609e-06, "loss": 0.7572, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5815 }, { "epoch": 0.7908621158553168, "grad_norm": 0.62109375, "learning_rate": 2.546460232521728e-06, "loss": 0.8021, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5816 }, { "epoch": 0.7909980962741365, "grad_norm": 0.435546875, "learning_rate": 2.543295888806354e-06, "loss": 0.418, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5817 }, { "epoch": 0.7911340766929562, "grad_norm": 0.3359375, "learning_rate": 2.540133225923256e-06, "loss": 0.5835, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5818 }, { "epoch": 0.7912700571117759, "grad_norm": 0.314453125, "learning_rate": 2.536972244585331e-06, "loss": 0.5531, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5819 }, { "epoch": 0.7914060375305956, "grad_norm": 0.36328125, "learning_rate": 2.5338129455051053e-06, "loss": 0.6273, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5820 }, { "epoch": 0.7915420179494153, "grad_norm": 0.3828125, "learning_rate": 2.5306553293947265e-06, "loss": 0.7109, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5821 }, { "epoch": 0.7916779983682349, "grad_norm": 0.35546875, "learning_rate": 2.5274993969659554e-06, "loss": 0.7505, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5822 }, { "epoch": 0.7918139787870546, "grad_norm": 0.490234375, "learning_rate": 2.524345148930182e-06, "loss": 0.7378, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5823 }, { "epoch": 0.7919499592058743, "grad_norm": 0.3984375, "learning_rate": 2.5211925859984064e-06, "loss": 0.7106, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5824 }, { "epoch": 0.7920859396246941, "grad_norm": 0.33203125, "learning_rate": 2.5180417088812624e-06, "loss": 0.6551, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5825 }, { "epoch": 0.7922219200435138, "grad_norm": 0.56640625, "learning_rate": 2.514892518288988e-06, "loss": 0.613, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5826 }, { "epoch": 0.7923579004623335, "grad_norm": 0.48046875, "learning_rate": 2.511745014931458e-06, "loss": 0.8293, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5827 }, { "epoch": 0.7924938808811531, "grad_norm": 0.41796875, "learning_rate": 2.50859919951815e-06, "loss": 0.9686, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5828 }, { "epoch": 0.7926298612999728, "grad_norm": 0.328125, "learning_rate": 2.5054550727581763e-06, "loss": 0.6842, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5829 }, { "epoch": 0.7927658417187925, "grad_norm": 0.376953125, "learning_rate": 2.5023126353602545e-06, "loss": 0.4359, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5830 }, { "epoch": 0.7929018221376122, "grad_norm": 0.75390625, "learning_rate": 2.4991718880327342e-06, "loss": 0.9902, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5831 }, { "epoch": 0.7930378025564319, "grad_norm": 0.220703125, "learning_rate": 2.4960328314835746e-06, "loss": 0.4409, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5832 }, { "epoch": 0.7931737829752515, "grad_norm": 0.2578125, "learning_rate": 2.492895466420361e-06, "loss": 0.4007, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5833 }, { "epoch": 0.7933097633940712, "grad_norm": 0.3125, "learning_rate": 2.4897597935502894e-06, "loss": 0.6154, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5834 }, { "epoch": 0.7934457438128909, "grad_norm": 0.427734375, "learning_rate": 2.4866258135801837e-06, "loss": 0.6945, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5835 }, { "epoch": 0.7935817242317106, "grad_norm": 0.41796875, "learning_rate": 2.483493527216475e-06, "loss": 0.8447, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5836 }, { "epoch": 0.7937177046505304, "grad_norm": 0.30859375, "learning_rate": 2.480362935165227e-06, "loss": 0.6231, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5837 }, { "epoch": 0.7938536850693501, "grad_norm": 0.578125, "learning_rate": 2.4772340381321047e-06, "loss": 0.741, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5838 }, { "epoch": 0.7939896654881697, "grad_norm": 0.80078125, "learning_rate": 2.4741068368224084e-06, "loss": 0.7708, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5839 }, { "epoch": 0.7941256459069894, "grad_norm": 0.365234375, "learning_rate": 2.4709813319410403e-06, "loss": 0.554, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5840 }, { "epoch": 0.7942616263258091, "grad_norm": 0.4609375, "learning_rate": 2.467857524192533e-06, "loss": 0.8358, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5841 }, { "epoch": 0.7943976067446288, "grad_norm": 0.283203125, "learning_rate": 2.464735414281024e-06, "loss": 0.5163, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5842 }, { "epoch": 0.7945335871634485, "grad_norm": 0.5703125, "learning_rate": 2.4616150029102837e-06, "loss": 0.5275, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5843 }, { "epoch": 0.7946695675822681, "grad_norm": 0.373046875, "learning_rate": 2.458496290783684e-06, "loss": 0.6806, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5844 }, { "epoch": 0.7948055480010878, "grad_norm": 0.546875, "learning_rate": 2.455379278604226e-06, "loss": 0.8851, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5845 }, { "epoch": 0.7949415284199075, "grad_norm": 0.34765625, "learning_rate": 2.4522639670745165e-06, "loss": 0.6654, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5846 }, { "epoch": 0.7950775088387272, "grad_norm": 0.5546875, "learning_rate": 2.449150356896791e-06, "loss": 0.8477, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5847 }, { "epoch": 0.7952134892575469, "grad_norm": 0.384765625, "learning_rate": 2.4460384487728893e-06, "loss": 0.7126, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5848 }, { "epoch": 0.7953494696763665, "grad_norm": 0.419921875, "learning_rate": 2.44292824340428e-06, "loss": 0.6502, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5849 }, { "epoch": 0.7954854500951863, "grad_norm": 0.314453125, "learning_rate": 2.4398197414920345e-06, "loss": 0.5675, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5850 }, { "epoch": 0.795621430514006, "grad_norm": 0.38671875, "learning_rate": 2.4367129437368542e-06, "loss": 0.6852, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5851 }, { "epoch": 0.7957574109328257, "grad_norm": 0.2734375, "learning_rate": 2.4336078508390426e-06, "loss": 0.4997, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5852 }, { "epoch": 0.7958933913516454, "grad_norm": 0.734375, "learning_rate": 2.430504463498531e-06, "loss": 0.8504, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5853 }, { "epoch": 0.7960293717704651, "grad_norm": 0.6875, "learning_rate": 2.427402782414856e-06, "loss": 0.7287, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5854 }, { "epoch": 0.7961653521892847, "grad_norm": 0.396484375, "learning_rate": 2.4243028082871813e-06, "loss": 0.6497, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5855 }, { "epoch": 0.7963013326081044, "grad_norm": 0.578125, "learning_rate": 2.4212045418142706e-06, "loss": 0.7166, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5856 }, { "epoch": 0.7964373130269241, "grad_norm": 0.345703125, "learning_rate": 2.4181079836945175e-06, "loss": 0.6538, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5857 }, { "epoch": 0.7965732934457438, "grad_norm": 0.53515625, "learning_rate": 2.41501313462592e-06, "loss": 0.54, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5858 }, { "epoch": 0.7967092738645635, "grad_norm": 0.255859375, "learning_rate": 2.4119199953060977e-06, "loss": 0.394, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5859 }, { "epoch": 0.7968452542833832, "grad_norm": 0.5546875, "learning_rate": 2.4088285664322797e-06, "loss": 0.6484, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5860 }, { "epoch": 0.7969812347022028, "grad_norm": 0.392578125, "learning_rate": 2.4057388487013157e-06, "loss": 0.5418, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5861 }, { "epoch": 0.7971172151210226, "grad_norm": 0.494140625, "learning_rate": 2.4026508428096594e-06, "loss": 0.731, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5862 }, { "epoch": 0.7972531955398423, "grad_norm": 0.369140625, "learning_rate": 2.3995645494533925e-06, "loss": 0.7104, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5863 }, { "epoch": 0.797389175958662, "grad_norm": 0.578125, "learning_rate": 2.3964799693281972e-06, "loss": 0.8467, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5864 }, { "epoch": 0.7975251563774817, "grad_norm": 0.28125, "learning_rate": 2.39339710312938e-06, "loss": 0.4917, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5865 }, { "epoch": 0.7976611367963014, "grad_norm": 0.5390625, "learning_rate": 2.3903159515518527e-06, "loss": 0.6852, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5866 }, { "epoch": 0.797797117215121, "grad_norm": 0.5390625, "learning_rate": 2.387236515290149e-06, "loss": 0.5415, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5867 }, { "epoch": 0.7979330976339407, "grad_norm": 0.65625, "learning_rate": 2.384158795038407e-06, "loss": 0.705, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5868 }, { "epoch": 0.7980690780527604, "grad_norm": 0.396484375, "learning_rate": 2.3810827914903867e-06, "loss": 0.6644, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5869 }, { "epoch": 0.7982050584715801, "grad_norm": 0.345703125, "learning_rate": 2.3780085053394508e-06, "loss": 0.5859, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5870 }, { "epoch": 0.7983410388903998, "grad_norm": 0.32421875, "learning_rate": 2.3749359372785884e-06, "loss": 0.6206, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5871 }, { "epoch": 0.7984770193092194, "grad_norm": 0.38671875, "learning_rate": 2.371865088000388e-06, "loss": 0.7207, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5872 }, { "epoch": 0.7986129997280391, "grad_norm": 0.451171875, "learning_rate": 2.3687959581970623e-06, "loss": 0.6995, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5873 }, { "epoch": 0.7987489801468588, "grad_norm": 0.451171875, "learning_rate": 2.365728548560423e-06, "loss": 0.7366, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5874 }, { "epoch": 0.7988849605656786, "grad_norm": 0.384765625, "learning_rate": 2.36266285978191e-06, "loss": 0.7116, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5875 }, { "epoch": 0.7990209409844983, "grad_norm": 0.333984375, "learning_rate": 2.3595988925525594e-06, "loss": 0.6362, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5876 }, { "epoch": 0.799156921403318, "grad_norm": 0.259765625, "learning_rate": 2.3565366475630345e-06, "loss": 0.464, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5877 }, { "epoch": 0.7992929018221376, "grad_norm": 0.4140625, "learning_rate": 2.3534761255035955e-06, "loss": 0.6117, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5878 }, { "epoch": 0.7994288822409573, "grad_norm": 0.7265625, "learning_rate": 2.3504173270641274e-06, "loss": 0.6022, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5879 }, { "epoch": 0.799564862659777, "grad_norm": 0.99609375, "learning_rate": 2.3473602529341155e-06, "loss": 0.7179, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5880 }, { "epoch": 0.7997008430785967, "grad_norm": 0.2080078125, "learning_rate": 2.3443049038026643e-06, "loss": 0.3089, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5881 }, { "epoch": 0.7998368234974164, "grad_norm": 0.361328125, "learning_rate": 2.3412512803584896e-06, "loss": 0.5708, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5882 }, { "epoch": 0.799972803916236, "grad_norm": 0.248046875, "learning_rate": 2.3381993832899096e-06, "loss": 0.4769, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5883 }, { "epoch": 0.8001087843350557, "grad_norm": 0.89453125, "learning_rate": 2.3351492132848665e-06, "loss": 0.7033, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5884 }, { "epoch": 0.8002447647538754, "grad_norm": 0.330078125, "learning_rate": 2.3321007710308985e-06, "loss": 0.5767, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5885 }, { "epoch": 0.8003807451726951, "grad_norm": 0.46875, "learning_rate": 2.3290540572151675e-06, "loss": 0.6701, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5886 }, { "epoch": 0.8005167255915149, "grad_norm": 0.498046875, "learning_rate": 2.3260090725244342e-06, "loss": 0.7882, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5887 }, { "epoch": 0.8006527060103346, "grad_norm": 0.41796875, "learning_rate": 2.322965817645083e-06, "loss": 0.6587, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5888 }, { "epoch": 0.8007886864291542, "grad_norm": 0.251953125, "learning_rate": 2.3199242932630926e-06, "loss": 0.5171, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5889 }, { "epoch": 0.8009246668479739, "grad_norm": 0.25390625, "learning_rate": 2.3168845000640673e-06, "loss": 0.5448, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5890 }, { "epoch": 0.8010606472667936, "grad_norm": 0.3125, "learning_rate": 2.3138464387332073e-06, "loss": 0.6501, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5891 }, { "epoch": 0.8011966276856133, "grad_norm": 0.64453125, "learning_rate": 2.310810109955334e-06, "loss": 0.6164, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5892 }, { "epoch": 0.801332608104433, "grad_norm": 0.396484375, "learning_rate": 2.307775514414867e-06, "loss": 0.66, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5893 }, { "epoch": 0.8014685885232526, "grad_norm": 0.255859375, "learning_rate": 2.3047426527958483e-06, "loss": 0.4328, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5894 }, { "epoch": 0.8016045689420723, "grad_norm": 0.3125, "learning_rate": 2.3017115257819157e-06, "loss": 0.5511, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5895 }, { "epoch": 0.801740549360892, "grad_norm": 0.271484375, "learning_rate": 2.298682134056328e-06, "loss": 0.5664, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5896 }, { "epoch": 0.8018765297797117, "grad_norm": 0.65234375, "learning_rate": 2.295654478301942e-06, "loss": 0.7529, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5897 }, { "epoch": 0.8020125101985314, "grad_norm": 0.357421875, "learning_rate": 2.292628559201232e-06, "loss": 0.7025, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5898 }, { "epoch": 0.802148490617351, "grad_norm": 0.2431640625, "learning_rate": 2.289604377436273e-06, "loss": 0.4697, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5899 }, { "epoch": 0.8022844710361708, "grad_norm": 0.36328125, "learning_rate": 2.2865819336887596e-06, "loss": 0.6056, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5900 }, { "epoch": 0.8024204514549905, "grad_norm": 0.3828125, "learning_rate": 2.2835612286399787e-06, "loss": 0.7371, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5901 }, { "epoch": 0.8025564318738102, "grad_norm": 0.279296875, "learning_rate": 2.2805422629708427e-06, "loss": 0.6336, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5902 }, { "epoch": 0.8026924122926299, "grad_norm": 0.341796875, "learning_rate": 2.2775250373618553e-06, "loss": 0.4852, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5903 }, { "epoch": 0.8028283927114496, "grad_norm": 0.408203125, "learning_rate": 2.2745095524931438e-06, "loss": 0.7183, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5904 }, { "epoch": 0.8029643731302692, "grad_norm": 0.55078125, "learning_rate": 2.2714958090444283e-06, "loss": 0.5269, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5905 }, { "epoch": 0.8031003535490889, "grad_norm": 1.0, "learning_rate": 2.26848380769505e-06, "loss": 0.6221, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5906 }, { "epoch": 0.8032363339679086, "grad_norm": 0.318359375, "learning_rate": 2.2654735491239445e-06, "loss": 0.57, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5907 }, { "epoch": 0.8033723143867283, "grad_norm": 0.3515625, "learning_rate": 2.2624650340096676e-06, "loss": 0.5439, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5908 }, { "epoch": 0.803508294805548, "grad_norm": 0.3984375, "learning_rate": 2.2594582630303674e-06, "loss": 0.6237, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5909 }, { "epoch": 0.8036442752243677, "grad_norm": 0.3515625, "learning_rate": 2.256453236863815e-06, "loss": 0.709, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5910 }, { "epoch": 0.8037802556431873, "grad_norm": 0.39453125, "learning_rate": 2.2534499561873737e-06, "loss": 0.6475, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5911 }, { "epoch": 0.803916236062007, "grad_norm": 0.328125, "learning_rate": 2.2504484216780243e-06, "loss": 0.6567, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5912 }, { "epoch": 0.8040522164808268, "grad_norm": 0.62890625, "learning_rate": 2.2474486340123445e-06, "loss": 0.7521, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5913 }, { "epoch": 0.8041881968996465, "grad_norm": 0.369140625, "learning_rate": 2.2444505938665296e-06, "loss": 0.8517, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5914 }, { "epoch": 0.8043241773184662, "grad_norm": 0.359375, "learning_rate": 2.241454301916367e-06, "loss": 0.6787, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5915 }, { "epoch": 0.8044601577372859, "grad_norm": 0.275390625, "learning_rate": 2.2384597588372647e-06, "loss": 0.4331, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5916 }, { "epoch": 0.8045961381561055, "grad_norm": 0.39453125, "learning_rate": 2.235466965304223e-06, "loss": 0.7809, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5917 }, { "epoch": 0.8047321185749252, "grad_norm": 0.90234375, "learning_rate": 2.232475921991859e-06, "loss": 0.598, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5918 }, { "epoch": 0.8048680989937449, "grad_norm": 0.384765625, "learning_rate": 2.2294866295743867e-06, "loss": 0.6779, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5919 }, { "epoch": 0.8050040794125646, "grad_norm": 0.298828125, "learning_rate": 2.2264990887256343e-06, "loss": 0.6219, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5920 }, { "epoch": 0.8051400598313843, "grad_norm": 0.6015625, "learning_rate": 2.2235133001190233e-06, "loss": 0.6605, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5921 }, { "epoch": 0.8052760402502039, "grad_norm": 0.341796875, "learning_rate": 2.2205292644275933e-06, "loss": 0.5368, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5922 }, { "epoch": 0.8054120206690236, "grad_norm": 0.56640625, "learning_rate": 2.217546982323977e-06, "loss": 0.8218, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5923 }, { "epoch": 0.8055480010878433, "grad_norm": 0.283203125, "learning_rate": 2.2145664544804223e-06, "loss": 0.5099, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5924 }, { "epoch": 0.8056839815066631, "grad_norm": 0.4609375, "learning_rate": 2.211587681568771e-06, "loss": 0.658, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5925 }, { "epoch": 0.8058199619254828, "grad_norm": 0.75390625, "learning_rate": 2.208610664260481e-06, "loss": 0.7166, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5926 }, { "epoch": 0.8059559423443025, "grad_norm": 0.345703125, "learning_rate": 2.2056354032266024e-06, "loss": 0.6313, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5927 }, { "epoch": 0.8060919227631221, "grad_norm": 0.265625, "learning_rate": 2.2026618991378026e-06, "loss": 0.4274, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5928 }, { "epoch": 0.8062279031819418, "grad_norm": 0.392578125, "learning_rate": 2.1996901526643366e-06, "loss": 0.7489, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5929 }, { "epoch": 0.8063638836007615, "grad_norm": 0.5859375, "learning_rate": 2.1967201644760816e-06, "loss": 0.7243, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5930 }, { "epoch": 0.8064998640195812, "grad_norm": 1.2265625, "learning_rate": 2.193751935242502e-06, "loss": 0.8335, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5931 }, { "epoch": 0.8066358444384009, "grad_norm": 0.4296875, "learning_rate": 2.190785465632679e-06, "loss": 0.755, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5932 }, { "epoch": 0.8067718248572205, "grad_norm": 0.349609375, "learning_rate": 2.1878207563152855e-06, "loss": 0.6014, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5933 }, { "epoch": 0.8069078052760402, "grad_norm": 0.359375, "learning_rate": 2.184857807958608e-06, "loss": 0.6994, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5934 }, { "epoch": 0.8070437856948599, "grad_norm": 0.369140625, "learning_rate": 2.181896621230527e-06, "loss": 0.6413, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5935 }, { "epoch": 0.8071797661136796, "grad_norm": 0.48828125, "learning_rate": 2.178937196798534e-06, "loss": 0.7024, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5936 }, { "epoch": 0.8073157465324993, "grad_norm": 0.373046875, "learning_rate": 2.1759795353297165e-06, "loss": 0.6174, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5937 }, { "epoch": 0.8074517269513191, "grad_norm": 0.828125, "learning_rate": 2.1730236374907708e-06, "loss": 0.9087, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5938 }, { "epoch": 0.8075877073701387, "grad_norm": 0.419921875, "learning_rate": 2.170069503947988e-06, "loss": 0.4826, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5939 }, { "epoch": 0.8077236877889584, "grad_norm": 0.361328125, "learning_rate": 2.167117135367267e-06, "loss": 0.742, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5940 }, { "epoch": 0.8078596682077781, "grad_norm": 0.26953125, "learning_rate": 2.1641665324141137e-06, "loss": 0.5509, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5941 }, { "epoch": 0.8079956486265978, "grad_norm": 0.46875, "learning_rate": 2.1612176957536222e-06, "loss": 0.6292, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5942 }, { "epoch": 0.8081316290454175, "grad_norm": 0.345703125, "learning_rate": 2.1582706260505015e-06, "loss": 0.6209, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5943 }, { "epoch": 0.8082676094642371, "grad_norm": 0.314453125, "learning_rate": 2.155325323969054e-06, "loss": 0.4469, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5944 }, { "epoch": 0.8084035898830568, "grad_norm": 0.34375, "learning_rate": 2.1523817901731904e-06, "loss": 0.7903, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5945 }, { "epoch": 0.8085395703018765, "grad_norm": 0.9140625, "learning_rate": 2.1494400253264134e-06, "loss": 0.8229, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5946 }, { "epoch": 0.8086755507206962, "grad_norm": 0.41796875, "learning_rate": 2.146500030091839e-06, "loss": 0.6043, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5947 }, { "epoch": 0.8088115311395159, "grad_norm": 0.25390625, "learning_rate": 2.143561805132174e-06, "loss": 0.4823, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5948 }, { "epoch": 0.8089475115583356, "grad_norm": 0.361328125, "learning_rate": 2.140625351109733e-06, "loss": 0.6935, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5949 }, { "epoch": 0.8090834919771553, "grad_norm": 0.40234375, "learning_rate": 2.1376906686864252e-06, "loss": 0.6837, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5950 }, { "epoch": 0.809219472395975, "grad_norm": 0.53125, "learning_rate": 2.1347577585237676e-06, "loss": 0.7075, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5951 }, { "epoch": 0.8093554528147947, "grad_norm": 0.365234375, "learning_rate": 2.131826621282871e-06, "loss": 0.536, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5952 }, { "epoch": 0.8094914332336144, "grad_norm": 0.357421875, "learning_rate": 2.1288972576244528e-06, "loss": 0.5506, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5953 }, { "epoch": 0.8096274136524341, "grad_norm": 0.55078125, "learning_rate": 2.1259696682088226e-06, "loss": 0.6755, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5954 }, { "epoch": 0.8097633940712538, "grad_norm": 0.5546875, "learning_rate": 2.1230438536959007e-06, "loss": 0.7616, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5955 }, { "epoch": 0.8098993744900734, "grad_norm": 0.66015625, "learning_rate": 2.120119814745196e-06, "loss": 0.6602, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5956 }, { "epoch": 0.8100353549088931, "grad_norm": 0.6015625, "learning_rate": 2.117197552015827e-06, "loss": 0.7995, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5957 }, { "epoch": 0.8101713353277128, "grad_norm": 0.53125, "learning_rate": 2.1142770661665037e-06, "loss": 0.665, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5958 }, { "epoch": 0.8103073157465325, "grad_norm": 0.427734375, "learning_rate": 2.111358357855544e-06, "loss": 0.797, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5959 }, { "epoch": 0.8104432961653522, "grad_norm": 0.671875, "learning_rate": 2.108441427740854e-06, "loss": 0.842, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5960 }, { "epoch": 0.8105792765841718, "grad_norm": 0.63671875, "learning_rate": 2.105526276479952e-06, "loss": 0.8405, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5961 }, { "epoch": 0.8107152570029915, "grad_norm": 0.337890625, "learning_rate": 2.1026129047299436e-06, "loss": 0.4661, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5962 }, { "epoch": 0.8108512374218113, "grad_norm": 0.380859375, "learning_rate": 2.099701313147543e-06, "loss": 0.7139, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5963 }, { "epoch": 0.810987217840631, "grad_norm": 0.400390625, "learning_rate": 2.0967915023890527e-06, "loss": 0.6792, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5964 }, { "epoch": 0.8111231982594507, "grad_norm": 0.64453125, "learning_rate": 2.0938834731103862e-06, "loss": 0.7899, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5965 }, { "epoch": 0.8112591786782704, "grad_norm": 0.74609375, "learning_rate": 2.0909772259670434e-06, "loss": 0.4966, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5966 }, { "epoch": 0.81139515909709, "grad_norm": 0.5703125, "learning_rate": 2.088072761614133e-06, "loss": 0.7435, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5967 }, { "epoch": 0.8115311395159097, "grad_norm": 0.412109375, "learning_rate": 2.0851700807063504e-06, "loss": 0.7158, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5968 }, { "epoch": 0.8116671199347294, "grad_norm": 0.44140625, "learning_rate": 2.0822691838980015e-06, "loss": 0.6654, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5969 }, { "epoch": 0.8118031003535491, "grad_norm": 0.271484375, "learning_rate": 2.07937007184298e-06, "loss": 0.6343, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5970 }, { "epoch": 0.8119390807723688, "grad_norm": 0.431640625, "learning_rate": 2.076472745194784e-06, "loss": 0.7241, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5971 }, { "epoch": 0.8120750611911884, "grad_norm": 0.294921875, "learning_rate": 2.073577204606503e-06, "loss": 0.6642, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5972 }, { "epoch": 0.8122110416100081, "grad_norm": 0.578125, "learning_rate": 2.0706834507308317e-06, "loss": 0.8485, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5973 }, { "epoch": 0.8123470220288278, "grad_norm": 0.62109375, "learning_rate": 2.067791484220052e-06, "loss": 0.585, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5974 }, { "epoch": 0.8124830024476475, "grad_norm": 0.357421875, "learning_rate": 2.064901305726055e-06, "loss": 0.7581, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5975 }, { "epoch": 0.8126189828664673, "grad_norm": 0.33203125, "learning_rate": 2.0620129159003154e-06, "loss": 0.6169, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5976 }, { "epoch": 0.812754963285287, "grad_norm": 0.251953125, "learning_rate": 2.059126315393918e-06, "loss": 0.5176, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5977 }, { "epoch": 0.8128909437041066, "grad_norm": 0.50390625, "learning_rate": 2.056241504857531e-06, "loss": 0.7471, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5978 }, { "epoch": 0.8130269241229263, "grad_norm": 0.671875, "learning_rate": 2.0533584849414346e-06, "loss": 0.5571, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5979 }, { "epoch": 0.813162904541746, "grad_norm": 0.333984375, "learning_rate": 2.0504772562954876e-06, "loss": 0.5448, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5980 }, { "epoch": 0.8132988849605657, "grad_norm": 0.421875, "learning_rate": 2.0475978195691624e-06, "loss": 0.6589, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5981 }, { "epoch": 0.8134348653793854, "grad_norm": 0.486328125, "learning_rate": 2.044720175411512e-06, "loss": 0.6509, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5982 }, { "epoch": 0.813570845798205, "grad_norm": 0.375, "learning_rate": 2.0418443244711993e-06, "loss": 0.728, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5983 }, { "epoch": 0.8137068262170247, "grad_norm": 0.470703125, "learning_rate": 2.0389702673964696e-06, "loss": 0.8385, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5984 }, { "epoch": 0.8138428066358444, "grad_norm": 0.3515625, "learning_rate": 2.0360980048351764e-06, "loss": 0.6211, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5985 }, { "epoch": 0.8139787870546641, "grad_norm": 0.314453125, "learning_rate": 2.0332275374347576e-06, "loss": 0.6514, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5986 }, { "epoch": 0.8141147674734838, "grad_norm": 0.4140625, "learning_rate": 2.0303588658422568e-06, "loss": 0.6746, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5987 }, { "epoch": 0.8142507478923036, "grad_norm": 0.306640625, "learning_rate": 2.0274919907043033e-06, "loss": 0.5485, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5988 }, { "epoch": 0.8143867283111232, "grad_norm": 0.326171875, "learning_rate": 2.02462691266713e-06, "loss": 0.6549, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5989 }, { "epoch": 0.8145227087299429, "grad_norm": 0.3203125, "learning_rate": 2.0217636323765553e-06, "loss": 0.5671, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5990 }, { "epoch": 0.8146586891487626, "grad_norm": 0.326171875, "learning_rate": 2.0189021504780025e-06, "loss": 0.4639, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5991 }, { "epoch": 0.8147946695675823, "grad_norm": 0.419921875, "learning_rate": 2.01604246761648e-06, "loss": 0.651, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5992 }, { "epoch": 0.814930649986402, "grad_norm": 0.349609375, "learning_rate": 2.0131845844366015e-06, "loss": 0.6084, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5993 }, { "epoch": 0.8150666304052216, "grad_norm": 0.4765625, "learning_rate": 2.0103285015825613e-06, "loss": 0.5679, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5994 }, { "epoch": 0.8152026108240413, "grad_norm": 0.357421875, "learning_rate": 2.0074742196981623e-06, "loss": 0.6599, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5995 }, { "epoch": 0.815338591242861, "grad_norm": 0.2392578125, "learning_rate": 2.0046217394267886e-06, "loss": 0.471, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5996 }, { "epoch": 0.8154745716616807, "grad_norm": 0.345703125, "learning_rate": 2.0017710614114306e-06, "loss": 0.5257, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5997 }, { "epoch": 0.8156105520805004, "grad_norm": 0.439453125, "learning_rate": 1.9989221862946585e-06, "loss": 0.6528, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5998 }, { "epoch": 0.81574653249932, "grad_norm": 0.69921875, "learning_rate": 1.9960751147186507e-06, "loss": 0.6434, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 5999 }, { "epoch": 0.8158825129181397, "grad_norm": 0.31640625, "learning_rate": 1.9932298473251656e-06, "loss": 0.6647, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6000 }, { "epoch": 0.8160184933369595, "grad_norm": 0.46875, "learning_rate": 1.990386384755565e-06, "loss": 0.8304, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6001 }, { "epoch": 0.8161544737557792, "grad_norm": 0.380859375, "learning_rate": 1.987544727650801e-06, "loss": 0.5522, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6002 }, { "epoch": 0.8162904541745989, "grad_norm": 0.27734375, "learning_rate": 1.9847048766514142e-06, "loss": 0.4897, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6003 }, { "epoch": 0.8164264345934186, "grad_norm": 0.3828125, "learning_rate": 1.9818668323975464e-06, "loss": 0.6361, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6004 }, { "epoch": 0.8165624150122383, "grad_norm": 0.384765625, "learning_rate": 1.9790305955289223e-06, "loss": 0.7635, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6005 }, { "epoch": 0.8166983954310579, "grad_norm": 0.474609375, "learning_rate": 1.9761961666848707e-06, "loss": 0.7002, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6006 }, { "epoch": 0.8168343758498776, "grad_norm": 0.3984375, "learning_rate": 1.973363546504299e-06, "loss": 0.799, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6007 }, { "epoch": 0.8169703562686973, "grad_norm": 0.640625, "learning_rate": 1.970532735625721e-06, "loss": 0.7585, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6008 }, { "epoch": 0.817106336687517, "grad_norm": 0.48828125, "learning_rate": 1.967703734687231e-06, "loss": 0.7404, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6009 }, { "epoch": 0.8172423171063367, "grad_norm": 0.361328125, "learning_rate": 1.964876544326525e-06, "loss": 0.688, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6010 }, { "epoch": 0.8173782975251563, "grad_norm": 0.423828125, "learning_rate": 1.9620511651808814e-06, "loss": 0.5493, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6011 }, { "epoch": 0.817514277943976, "grad_norm": 0.498046875, "learning_rate": 1.95922759788718e-06, "loss": 0.6248, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6012 }, { "epoch": 0.8176502583627958, "grad_norm": 0.328125, "learning_rate": 1.9564058430818832e-06, "loss": 0.5425, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6013 }, { "epoch": 0.8177862387816155, "grad_norm": 0.26953125, "learning_rate": 1.9535859014010525e-06, "loss": 0.4997, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6014 }, { "epoch": 0.8179222192004352, "grad_norm": 0.349609375, "learning_rate": 1.9507677734803345e-06, "loss": 0.626, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6015 }, { "epoch": 0.8180581996192549, "grad_norm": 0.275390625, "learning_rate": 1.9479514599549733e-06, "loss": 0.6587, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6016 }, { "epoch": 0.8181941800380745, "grad_norm": 0.34765625, "learning_rate": 1.945136961459795e-06, "loss": 0.5815, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6017 }, { "epoch": 0.8183301604568942, "grad_norm": 0.412109375, "learning_rate": 1.9423242786292275e-06, "loss": 0.7941, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6018 }, { "epoch": 0.8184661408757139, "grad_norm": 0.2314453125, "learning_rate": 1.9395134120972794e-06, "loss": 0.3796, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6019 }, { "epoch": 0.8186021212945336, "grad_norm": 0.57421875, "learning_rate": 1.9367043624975577e-06, "loss": 0.8599, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6020 }, { "epoch": 0.8187381017133533, "grad_norm": 0.546875, "learning_rate": 1.9338971304632535e-06, "loss": 0.6247, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6021 }, { "epoch": 0.8188740821321729, "grad_norm": 0.640625, "learning_rate": 1.931091716627156e-06, "loss": 0.4657, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6022 }, { "epoch": 0.8190100625509926, "grad_norm": 0.373046875, "learning_rate": 1.928288121621633e-06, "loss": 0.6331, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6023 }, { "epoch": 0.8191460429698123, "grad_norm": 0.5625, "learning_rate": 1.9254863460786553e-06, "loss": 0.5778, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6024 }, { "epoch": 0.819282023388632, "grad_norm": 1.4140625, "learning_rate": 1.9226863906297734e-06, "loss": 0.9604, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6025 }, { "epoch": 0.8194180038074518, "grad_norm": 0.84765625, "learning_rate": 1.919888255906135e-06, "loss": 0.8306, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6026 }, { "epoch": 0.8195539842262715, "grad_norm": 0.625, "learning_rate": 1.9170919425384695e-06, "loss": 0.7569, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6027 }, { "epoch": 0.8196899646450911, "grad_norm": 0.59765625, "learning_rate": 1.9142974511571045e-06, "loss": 0.6746, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6028 }, { "epoch": 0.8198259450639108, "grad_norm": 0.220703125, "learning_rate": 1.9115047823919476e-06, "loss": 0.4834, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6029 }, { "epoch": 0.8199619254827305, "grad_norm": 0.6796875, "learning_rate": 1.908713936872506e-06, "loss": 0.6911, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6030 }, { "epoch": 0.8200979059015502, "grad_norm": 0.3828125, "learning_rate": 1.9059249152278657e-06, "loss": 0.592, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6031 }, { "epoch": 0.8202338863203699, "grad_norm": 0.57421875, "learning_rate": 1.9031377180867106e-06, "loss": 0.7534, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6032 }, { "epoch": 0.8203698667391895, "grad_norm": 0.81640625, "learning_rate": 1.9003523460773044e-06, "loss": 0.8426, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6033 }, { "epoch": 0.8205058471580092, "grad_norm": 1.21875, "learning_rate": 1.8975687998275093e-06, "loss": 0.7344, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6034 }, { "epoch": 0.8206418275768289, "grad_norm": 0.30859375, "learning_rate": 1.8947870799647638e-06, "loss": 0.6517, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6035 }, { "epoch": 0.8207778079956486, "grad_norm": 0.447265625, "learning_rate": 1.8920071871161093e-06, "loss": 0.9225, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6036 }, { "epoch": 0.8209137884144683, "grad_norm": 0.353515625, "learning_rate": 1.8892291219081605e-06, "loss": 0.6087, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6037 }, { "epoch": 0.821049768833288, "grad_norm": 0.3359375, "learning_rate": 1.8864528849671338e-06, "loss": 0.5166, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6038 }, { "epoch": 0.8211857492521077, "grad_norm": 0.345703125, "learning_rate": 1.883678476918821e-06, "loss": 0.5173, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6039 }, { "epoch": 0.8213217296709274, "grad_norm": 0.248046875, "learning_rate": 1.880905898388612e-06, "loss": 0.4884, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6040 }, { "epoch": 0.8214577100897471, "grad_norm": 0.435546875, "learning_rate": 1.8781351500014767e-06, "loss": 0.8859, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6041 }, { "epoch": 0.8215936905085668, "grad_norm": 0.33984375, "learning_rate": 1.8753662323819788e-06, "loss": 0.5829, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6042 }, { "epoch": 0.8217296709273865, "grad_norm": 0.314453125, "learning_rate": 1.8725991461542624e-06, "loss": 0.474, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6043 }, { "epoch": 0.8218656513462061, "grad_norm": 0.3671875, "learning_rate": 1.8698338919420667e-06, "loss": 0.5422, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6044 }, { "epoch": 0.8220016317650258, "grad_norm": 0.28515625, "learning_rate": 1.8670704703687104e-06, "loss": 0.631, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6045 }, { "epoch": 0.8221376121838455, "grad_norm": 0.36328125, "learning_rate": 1.8643088820571054e-06, "loss": 0.5537, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6046 }, { "epoch": 0.8222735926026652, "grad_norm": 0.361328125, "learning_rate": 1.8615491276297437e-06, "loss": 0.7686, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6047 }, { "epoch": 0.8224095730214849, "grad_norm": 1.078125, "learning_rate": 1.8587912077087134e-06, "loss": 0.8219, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6048 }, { "epoch": 0.8225455534403046, "grad_norm": 0.66015625, "learning_rate": 1.856035122915677e-06, "loss": 0.6667, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6049 }, { "epoch": 0.8226815338591242, "grad_norm": 0.58203125, "learning_rate": 1.8532808738718943e-06, "loss": 0.7224, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6050 }, { "epoch": 0.822817514277944, "grad_norm": 0.283203125, "learning_rate": 1.8505284611982033e-06, "loss": 0.6017, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6051 }, { "epoch": 0.8229534946967637, "grad_norm": 0.5234375, "learning_rate": 1.8477778855150353e-06, "loss": 0.6848, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6052 }, { "epoch": 0.8230894751155834, "grad_norm": 0.328125, "learning_rate": 1.8450291474423999e-06, "loss": 0.6355, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6053 }, { "epoch": 0.8232254555344031, "grad_norm": 0.37109375, "learning_rate": 1.8422822475999003e-06, "loss": 0.5618, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6054 }, { "epoch": 0.8233614359532228, "grad_norm": 0.76953125, "learning_rate": 1.8395371866067159e-06, "loss": 0.8423, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6055 }, { "epoch": 0.8234974163720424, "grad_norm": 0.1904296875, "learning_rate": 1.8367939650816224e-06, "loss": 0.3128, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6056 }, { "epoch": 0.8236333967908621, "grad_norm": 0.255859375, "learning_rate": 1.8340525836429713e-06, "loss": 0.5293, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6057 }, { "epoch": 0.8237693772096818, "grad_norm": 0.4296875, "learning_rate": 1.8313130429087078e-06, "loss": 0.6473, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6058 }, { "epoch": 0.8239053576285015, "grad_norm": 0.3515625, "learning_rate": 1.8285753434963539e-06, "loss": 0.5841, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6059 }, { "epoch": 0.8240413380473212, "grad_norm": 0.28515625, "learning_rate": 1.82583948602302e-06, "loss": 0.6571, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6060 }, { "epoch": 0.8241773184661408, "grad_norm": 0.609375, "learning_rate": 1.8231054711054075e-06, "loss": 0.6893, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6061 }, { "epoch": 0.8243132988849605, "grad_norm": 0.208984375, "learning_rate": 1.8203732993597912e-06, "loss": 0.4012, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6062 }, { "epoch": 0.8244492793037802, "grad_norm": 0.359375, "learning_rate": 1.8176429714020394e-06, "loss": 0.6729, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6063 }, { "epoch": 0.8245852597226, "grad_norm": 0.53515625, "learning_rate": 1.8149144878475989e-06, "loss": 0.6268, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6064 }, { "epoch": 0.8247212401414197, "grad_norm": 0.2412109375, "learning_rate": 1.8121878493115053e-06, "loss": 0.4741, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6065 }, { "epoch": 0.8248572205602394, "grad_norm": 0.419921875, "learning_rate": 1.8094630564083737e-06, "loss": 0.5539, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6066 }, { "epoch": 0.824993200979059, "grad_norm": 0.421875, "learning_rate": 1.8067401097524095e-06, "loss": 0.5433, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6067 }, { "epoch": 0.8251291813978787, "grad_norm": 0.369140625, "learning_rate": 1.804019009957394e-06, "loss": 0.6001, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6068 }, { "epoch": 0.8252651618166984, "grad_norm": 0.59375, "learning_rate": 1.8012997576367008e-06, "loss": 0.8298, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6069 }, { "epoch": 0.8254011422355181, "grad_norm": 0.326171875, "learning_rate": 1.7985823534032775e-06, "loss": 0.6254, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6070 }, { "epoch": 0.8255371226543378, "grad_norm": 0.390625, "learning_rate": 1.7958667978696654e-06, "loss": 0.6611, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6071 }, { "epoch": 0.8256731030731574, "grad_norm": 0.76953125, "learning_rate": 1.793153091647979e-06, "loss": 0.7155, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6072 }, { "epoch": 0.8258090834919771, "grad_norm": 0.470703125, "learning_rate": 1.7904412353499256e-06, "loss": 0.5961, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6073 }, { "epoch": 0.8259450639107968, "grad_norm": 0.77734375, "learning_rate": 1.7877312295867854e-06, "loss": 0.5871, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6074 }, { "epoch": 0.8260810443296165, "grad_norm": 0.380859375, "learning_rate": 1.7850230749694331e-06, "loss": 0.7272, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6075 }, { "epoch": 0.8262170247484363, "grad_norm": 0.77734375, "learning_rate": 1.7823167721083145e-06, "loss": 0.8006, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6076 }, { "epoch": 0.826353005167256, "grad_norm": 0.3515625, "learning_rate": 1.7796123216134676e-06, "loss": 0.5994, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6077 }, { "epoch": 0.8264889855860756, "grad_norm": 0.486328125, "learning_rate": 1.7769097240945043e-06, "loss": 0.828, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6078 }, { "epoch": 0.8266249660048953, "grad_norm": 0.376953125, "learning_rate": 1.7742089801606278e-06, "loss": 0.7899, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6079 }, { "epoch": 0.826760946423715, "grad_norm": 0.349609375, "learning_rate": 1.771510090420613e-06, "loss": 0.7406, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6080 }, { "epoch": 0.8268969268425347, "grad_norm": 0.37109375, "learning_rate": 1.7688130554828297e-06, "loss": 0.6947, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6081 }, { "epoch": 0.8270329072613544, "grad_norm": 0.546875, "learning_rate": 1.7661178759552156e-06, "loss": 0.625, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6082 }, { "epoch": 0.827168887680174, "grad_norm": 0.255859375, "learning_rate": 1.763424552445303e-06, "loss": 0.5521, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6083 }, { "epoch": 0.8273048680989937, "grad_norm": 0.66015625, "learning_rate": 1.7607330855601945e-06, "loss": 0.7264, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6084 }, { "epoch": 0.8274408485178134, "grad_norm": 0.455078125, "learning_rate": 1.7580434759065857e-06, "loss": 0.4012, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6085 }, { "epoch": 0.8275768289366331, "grad_norm": 0.353515625, "learning_rate": 1.7553557240907414e-06, "loss": 0.7262, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6086 }, { "epoch": 0.8277128093554528, "grad_norm": 0.408203125, "learning_rate": 1.7526698307185186e-06, "loss": 0.8055, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6087 }, { "epoch": 0.8278487897742725, "grad_norm": 0.419921875, "learning_rate": 1.749985796395347e-06, "loss": 0.6789, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6088 }, { "epoch": 0.8279847701930922, "grad_norm": 0.5234375, "learning_rate": 1.747303621726244e-06, "loss": 0.5895, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6089 }, { "epoch": 0.8281207506119119, "grad_norm": 0.32421875, "learning_rate": 1.7446233073158003e-06, "loss": 0.4627, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6090 }, { "epoch": 0.8282567310307316, "grad_norm": 0.29296875, "learning_rate": 1.741944853768196e-06, "loss": 0.5332, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6091 }, { "epoch": 0.8283927114495513, "grad_norm": 0.3828125, "learning_rate": 1.7392682616871836e-06, "loss": 0.6322, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6092 }, { "epoch": 0.828528691868371, "grad_norm": 0.3203125, "learning_rate": 1.736593531676104e-06, "loss": 0.5063, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6093 }, { "epoch": 0.8286646722871907, "grad_norm": 0.328125, "learning_rate": 1.7339206643378692e-06, "loss": 0.5324, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6094 }, { "epoch": 0.8288006527060103, "grad_norm": 0.349609375, "learning_rate": 1.7312496602749795e-06, "loss": 0.5697, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6095 }, { "epoch": 0.82893663312483, "grad_norm": 0.427734375, "learning_rate": 1.7285805200895088e-06, "loss": 0.8109, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6096 }, { "epoch": 0.8290726135436497, "grad_norm": 0.3984375, "learning_rate": 1.7259132443831194e-06, "loss": 0.6464, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6097 }, { "epoch": 0.8292085939624694, "grad_norm": 0.279296875, "learning_rate": 1.7232478337570413e-06, "loss": 0.5197, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6098 }, { "epoch": 0.8293445743812891, "grad_norm": 0.6171875, "learning_rate": 1.720584288812096e-06, "loss": 0.62, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6099 }, { "epoch": 0.8294805548001087, "grad_norm": 0.3359375, "learning_rate": 1.7179226101486746e-06, "loss": 0.4813, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6100 }, { "epoch": 0.8296165352189284, "grad_norm": 0.28515625, "learning_rate": 1.7152627983667558e-06, "loss": 0.5682, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6101 }, { "epoch": 0.8297525156377482, "grad_norm": 0.51953125, "learning_rate": 1.7126048540658913e-06, "loss": 0.5181, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6102 }, { "epoch": 0.8298884960565679, "grad_norm": 0.40234375, "learning_rate": 1.7099487778452162e-06, "loss": 0.5786, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6103 }, { "epoch": 0.8300244764753876, "grad_norm": 0.4765625, "learning_rate": 1.7072945703034393e-06, "loss": 0.7336, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6104 }, { "epoch": 0.8301604568942073, "grad_norm": 0.2275390625, "learning_rate": 1.7046422320388556e-06, "loss": 0.4385, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6105 }, { "epoch": 0.8302964373130269, "grad_norm": 0.404296875, "learning_rate": 1.701991763649331e-06, "loss": 0.8257, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6106 }, { "epoch": 0.8304324177318466, "grad_norm": 0.55859375, "learning_rate": 1.6993431657323167e-06, "loss": 0.8289, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6107 }, { "epoch": 0.8305683981506663, "grad_norm": 0.484375, "learning_rate": 1.696696438884835e-06, "loss": 0.3883, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6108 }, { "epoch": 0.830704378569486, "grad_norm": 0.2734375, "learning_rate": 1.6940515837034954e-06, "loss": 0.5664, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6109 }, { "epoch": 0.8308403589883057, "grad_norm": 0.67578125, "learning_rate": 1.6914086007844754e-06, "loss": 0.6776, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6110 }, { "epoch": 0.8309763394071253, "grad_norm": 0.49609375, "learning_rate": 1.6887674907235407e-06, "loss": 0.7702, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6111 }, { "epoch": 0.831112319825945, "grad_norm": 0.1865234375, "learning_rate": 1.6861282541160251e-06, "loss": 0.3193, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6112 }, { "epoch": 0.8312483002447647, "grad_norm": 0.30859375, "learning_rate": 1.6834908915568494e-06, "loss": 0.5352, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6113 }, { "epoch": 0.8313842806635845, "grad_norm": 0.5390625, "learning_rate": 1.6808554036405024e-06, "loss": 0.7896, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6114 }, { "epoch": 0.8315202610824042, "grad_norm": 0.37109375, "learning_rate": 1.6782217909610609e-06, "loss": 0.6965, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6115 }, { "epoch": 0.8316562415012239, "grad_norm": 0.6171875, "learning_rate": 1.6755900541121673e-06, "loss": 0.6302, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6116 }, { "epoch": 0.8317922219200435, "grad_norm": 0.34765625, "learning_rate": 1.6729601936870532e-06, "loss": 0.6938, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6117 }, { "epoch": 0.8319282023388632, "grad_norm": 0.39453125, "learning_rate": 1.6703322102785168e-06, "loss": 0.8035, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6118 }, { "epoch": 0.8320641827576829, "grad_norm": 0.4140625, "learning_rate": 1.667706104478941e-06, "loss": 0.8099, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6119 }, { "epoch": 0.8322001631765026, "grad_norm": 1.1484375, "learning_rate": 1.665081876880278e-06, "loss": 0.6377, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6120 }, { "epoch": 0.8323361435953223, "grad_norm": 0.328125, "learning_rate": 1.662459528074064e-06, "loss": 0.5768, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6121 }, { "epoch": 0.832472124014142, "grad_norm": 0.6875, "learning_rate": 1.659839058651409e-06, "loss": 0.8504, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6122 }, { "epoch": 0.8326081044329616, "grad_norm": 0.419921875, "learning_rate": 1.657220469202997e-06, "loss": 0.63, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6123 }, { "epoch": 0.8327440848517813, "grad_norm": 0.49609375, "learning_rate": 1.6546037603190922e-06, "loss": 0.7985, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6124 }, { "epoch": 0.832880065270601, "grad_norm": 0.4140625, "learning_rate": 1.651988932589529e-06, "loss": 0.6035, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6125 }, { "epoch": 0.8330160456894207, "grad_norm": 0.298828125, "learning_rate": 1.6493759866037263e-06, "loss": 0.6226, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6126 }, { "epoch": 0.8331520261082405, "grad_norm": 0.361328125, "learning_rate": 1.6467649229506688e-06, "loss": 0.6287, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6127 }, { "epoch": 0.8332880065270601, "grad_norm": 0.53125, "learning_rate": 1.644155742218927e-06, "loss": 0.5755, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6128 }, { "epoch": 0.8334239869458798, "grad_norm": 0.361328125, "learning_rate": 1.641548444996639e-06, "loss": 0.6646, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6129 }, { "epoch": 0.8335599673646995, "grad_norm": 0.578125, "learning_rate": 1.6389430318715239e-06, "loss": 0.5446, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6130 }, { "epoch": 0.8336959477835192, "grad_norm": 0.765625, "learning_rate": 1.6363395034308704e-06, "loss": 0.9019, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6131 }, { "epoch": 0.8338319282023389, "grad_norm": 0.30078125, "learning_rate": 1.6337378602615505e-06, "loss": 0.6268, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6132 }, { "epoch": 0.8339679086211585, "grad_norm": 0.3515625, "learning_rate": 1.6311381029500007e-06, "loss": 0.7847, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6133 }, { "epoch": 0.8341038890399782, "grad_norm": 0.412109375, "learning_rate": 1.628540232082243e-06, "loss": 0.6143, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6134 }, { "epoch": 0.8342398694587979, "grad_norm": 0.439453125, "learning_rate": 1.6259442482438648e-06, "loss": 0.5942, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6135 }, { "epoch": 0.8343758498776176, "grad_norm": 0.330078125, "learning_rate": 1.6233501520200378e-06, "loss": 0.5578, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6136 }, { "epoch": 0.8345118302964373, "grad_norm": 0.287109375, "learning_rate": 1.6207579439954969e-06, "loss": 0.555, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6137 }, { "epoch": 0.834647810715257, "grad_norm": 0.3203125, "learning_rate": 1.618167624754563e-06, "loss": 0.6126, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6138 }, { "epoch": 0.8347837911340767, "grad_norm": 0.365234375, "learning_rate": 1.6155791948811217e-06, "loss": 0.637, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6139 }, { "epoch": 0.8349197715528964, "grad_norm": 0.71484375, "learning_rate": 1.6129926549586417e-06, "loss": 0.6037, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6140 }, { "epoch": 0.8350557519717161, "grad_norm": 0.36328125, "learning_rate": 1.6104080055701531e-06, "loss": 0.4766, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6141 }, { "epoch": 0.8351917323905358, "grad_norm": 0.388671875, "learning_rate": 1.6078252472982758e-06, "loss": 0.8029, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6142 }, { "epoch": 0.8353277128093555, "grad_norm": 0.74609375, "learning_rate": 1.6052443807251883e-06, "loss": 0.6532, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6143 }, { "epoch": 0.8354636932281752, "grad_norm": 0.44921875, "learning_rate": 1.6026654064326553e-06, "loss": 0.4847, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6144 }, { "epoch": 0.8355996736469948, "grad_norm": 0.390625, "learning_rate": 1.6000883250020026e-06, "loss": 0.5566, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6145 }, { "epoch": 0.8357356540658145, "grad_norm": 0.283203125, "learning_rate": 1.5975131370141428e-06, "loss": 0.4728, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6146 }, { "epoch": 0.8358716344846342, "grad_norm": 0.365234375, "learning_rate": 1.5949398430495478e-06, "loss": 0.658, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6147 }, { "epoch": 0.8360076149034539, "grad_norm": 0.55078125, "learning_rate": 1.592368443688277e-06, "loss": 0.763, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6148 }, { "epoch": 0.8361435953222736, "grad_norm": 0.8203125, "learning_rate": 1.5897989395099478e-06, "loss": 0.8394, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6149 }, { "epoch": 0.8362795757410932, "grad_norm": 0.76171875, "learning_rate": 1.5872313310937647e-06, "loss": 0.7567, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6150 }, { "epoch": 0.8364155561599129, "grad_norm": 0.44140625, "learning_rate": 1.5846656190184917e-06, "loss": 0.7301, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6151 }, { "epoch": 0.8365515365787327, "grad_norm": 0.404296875, "learning_rate": 1.5821018038624768e-06, "loss": 0.5689, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6152 }, { "epoch": 0.8366875169975524, "grad_norm": 1.03125, "learning_rate": 1.5795398862036315e-06, "loss": 0.6006, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6153 }, { "epoch": 0.8368234974163721, "grad_norm": 0.494140625, "learning_rate": 1.576979866619448e-06, "loss": 0.5496, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6154 }, { "epoch": 0.8369594778351918, "grad_norm": 0.2421875, "learning_rate": 1.5744217456869803e-06, "loss": 0.494, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6155 }, { "epoch": 0.8370954582540114, "grad_norm": 0.345703125, "learning_rate": 1.571865523982865e-06, "loss": 0.6626, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6156 }, { "epoch": 0.8372314386728311, "grad_norm": 0.58984375, "learning_rate": 1.5693112020833012e-06, "loss": 0.6183, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6157 }, { "epoch": 0.8373674190916508, "grad_norm": 0.296875, "learning_rate": 1.5667587805640682e-06, "loss": 0.6257, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6158 }, { "epoch": 0.8375033995104705, "grad_norm": 0.5390625, "learning_rate": 1.5642082600005104e-06, "loss": 0.7176, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6159 }, { "epoch": 0.8376393799292902, "grad_norm": 0.59375, "learning_rate": 1.56165964096755e-06, "loss": 0.6727, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6160 }, { "epoch": 0.8377753603481098, "grad_norm": 0.455078125, "learning_rate": 1.5591129240396717e-06, "loss": 0.473, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6161 }, { "epoch": 0.8379113407669295, "grad_norm": 0.330078125, "learning_rate": 1.5565681097909413e-06, "loss": 0.3494, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6162 }, { "epoch": 0.8380473211857492, "grad_norm": 0.337890625, "learning_rate": 1.5540251987949872e-06, "loss": 0.6593, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6163 }, { "epoch": 0.8381833016045689, "grad_norm": 0.470703125, "learning_rate": 1.5514841916250157e-06, "loss": 0.7886, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6164 }, { "epoch": 0.8383192820233887, "grad_norm": 0.5546875, "learning_rate": 1.5489450888537982e-06, "loss": 0.7267, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6165 }, { "epoch": 0.8384552624422084, "grad_norm": 0.46484375, "learning_rate": 1.546407891053684e-06, "loss": 0.6509, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6166 }, { "epoch": 0.838591242861028, "grad_norm": 0.64453125, "learning_rate": 1.5438725987965818e-06, "loss": 0.5929, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6167 }, { "epoch": 0.8387272232798477, "grad_norm": 0.484375, "learning_rate": 1.5413392126539839e-06, "loss": 0.5283, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6168 }, { "epoch": 0.8388632036986674, "grad_norm": 0.314453125, "learning_rate": 1.5388077331969409e-06, "loss": 0.6007, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6169 }, { "epoch": 0.8389991841174871, "grad_norm": 0.38671875, "learning_rate": 1.5362781609960853e-06, "loss": 0.6912, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6170 }, { "epoch": 0.8391351645363068, "grad_norm": 0.828125, "learning_rate": 1.5337504966216077e-06, "loss": 0.7435, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6171 }, { "epoch": 0.8392711449551264, "grad_norm": 0.5859375, "learning_rate": 1.531224740643279e-06, "loss": 0.4271, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6172 }, { "epoch": 0.8394071253739461, "grad_norm": 0.578125, "learning_rate": 1.5287008936304314e-06, "loss": 0.6475, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6173 }, { "epoch": 0.8395431057927658, "grad_norm": 0.470703125, "learning_rate": 1.5261789561519768e-06, "loss": 0.828, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6174 }, { "epoch": 0.8396790862115855, "grad_norm": 0.5625, "learning_rate": 1.5236589287763836e-06, "loss": 0.5889, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6175 }, { "epoch": 0.8398150666304052, "grad_norm": 0.451171875, "learning_rate": 1.521140812071703e-06, "loss": 0.7739, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6176 }, { "epoch": 0.839951047049225, "grad_norm": 1.0, "learning_rate": 1.5186246066055465e-06, "loss": 0.7912, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6177 }, { "epoch": 0.8400870274680446, "grad_norm": 0.306640625, "learning_rate": 1.516110312945095e-06, "loss": 0.6709, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6178 }, { "epoch": 0.8402230078868643, "grad_norm": 0.41015625, "learning_rate": 1.513597931657106e-06, "loss": 0.6545, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6179 }, { "epoch": 0.840358988305684, "grad_norm": 0.57421875, "learning_rate": 1.5110874633078943e-06, "loss": 0.6322, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6180 }, { "epoch": 0.8404949687245037, "grad_norm": 0.29296875, "learning_rate": 1.5085789084633596e-06, "loss": 0.5518, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6181 }, { "epoch": 0.8406309491433234, "grad_norm": 0.369140625, "learning_rate": 1.5060722676889516e-06, "loss": 0.7837, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6182 }, { "epoch": 0.840766929562143, "grad_norm": 0.42578125, "learning_rate": 1.5035675415497064e-06, "loss": 0.745, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6183 }, { "epoch": 0.8409029099809627, "grad_norm": 0.5859375, "learning_rate": 1.5010647306102121e-06, "loss": 0.84, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6184 }, { "epoch": 0.8410388903997824, "grad_norm": 0.30078125, "learning_rate": 1.4985638354346377e-06, "loss": 0.7352, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6185 }, { "epoch": 0.8411748708186021, "grad_norm": 0.5390625, "learning_rate": 1.496064856586713e-06, "loss": 0.7764, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6186 }, { "epoch": 0.8413108512374218, "grad_norm": 0.33984375, "learning_rate": 1.493567794629741e-06, "loss": 0.7004, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6187 }, { "epoch": 0.8414468316562415, "grad_norm": 0.44921875, "learning_rate": 1.491072650126587e-06, "loss": 0.4823, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6188 }, { "epoch": 0.8415828120750611, "grad_norm": 0.349609375, "learning_rate": 1.48857942363969e-06, "loss": 0.7192, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6189 }, { "epoch": 0.8417187924938809, "grad_norm": 0.369140625, "learning_rate": 1.4860881157310502e-06, "loss": 0.6852, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6190 }, { "epoch": 0.8418547729127006, "grad_norm": 0.5390625, "learning_rate": 1.483598726962243e-06, "loss": 0.8665, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6191 }, { "epoch": 0.8419907533315203, "grad_norm": 0.439453125, "learning_rate": 1.481111257894402e-06, "loss": 0.7516, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6192 }, { "epoch": 0.84212673375034, "grad_norm": 0.361328125, "learning_rate": 1.4786257090882393e-06, "loss": 0.627, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6193 }, { "epoch": 0.8422627141691597, "grad_norm": 0.484375, "learning_rate": 1.4761420811040229e-06, "loss": 0.7306, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6194 }, { "epoch": 0.8423986945879793, "grad_norm": 0.326171875, "learning_rate": 1.4736603745015964e-06, "loss": 0.5382, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6195 }, { "epoch": 0.842534675006799, "grad_norm": 0.30859375, "learning_rate": 1.471180589840363e-06, "loss": 0.6465, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6196 }, { "epoch": 0.8426706554256187, "grad_norm": 0.349609375, "learning_rate": 1.4687027276793008e-06, "loss": 0.4896, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6197 }, { "epoch": 0.8428066358444384, "grad_norm": 0.69140625, "learning_rate": 1.4662267885769455e-06, "loss": 0.7409, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6198 }, { "epoch": 0.8429426162632581, "grad_norm": 1.0546875, "learning_rate": 1.4637527730914092e-06, "loss": 0.6186, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6199 }, { "epoch": 0.8430785966820777, "grad_norm": 0.287109375, "learning_rate": 1.4612806817803604e-06, "loss": 0.5526, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6200 }, { "epoch": 0.8432145771008974, "grad_norm": 0.333984375, "learning_rate": 1.4588105152010435e-06, "loss": 0.6068, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6201 }, { "epoch": 0.8433505575197172, "grad_norm": 0.439453125, "learning_rate": 1.4563422739102596e-06, "loss": 0.7653, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6202 }, { "epoch": 0.8434865379385369, "grad_norm": 0.333984375, "learning_rate": 1.4538759584643847e-06, "loss": 0.498, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6203 }, { "epoch": 0.8436225183573566, "grad_norm": 0.392578125, "learning_rate": 1.451411569419352e-06, "loss": 0.7033, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6204 }, { "epoch": 0.8437584987761763, "grad_norm": 0.279296875, "learning_rate": 1.4489491073306693e-06, "loss": 0.5431, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6205 }, { "epoch": 0.8438944791949959, "grad_norm": 0.388671875, "learning_rate": 1.4464885727534018e-06, "loss": 0.5775, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6206 }, { "epoch": 0.8440304596138156, "grad_norm": 0.431640625, "learning_rate": 1.4440299662421887e-06, "loss": 0.652, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6207 }, { "epoch": 0.8441664400326353, "grad_norm": 0.609375, "learning_rate": 1.4415732883512235e-06, "loss": 0.6873, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6208 }, { "epoch": 0.844302420451455, "grad_norm": 0.427734375, "learning_rate": 1.4391185396342787e-06, "loss": 0.7572, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6209 }, { "epoch": 0.8444384008702747, "grad_norm": 0.318359375, "learning_rate": 1.4366657206446777e-06, "loss": 0.533, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6210 }, { "epoch": 0.8445743812890943, "grad_norm": 0.62109375, "learning_rate": 1.4342148319353222e-06, "loss": 0.7599, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6211 }, { "epoch": 0.844710361707914, "grad_norm": 0.443359375, "learning_rate": 1.431765874058667e-06, "loss": 0.7847, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6212 }, { "epoch": 0.8448463421267337, "grad_norm": 1.0390625, "learning_rate": 1.4293188475667418e-06, "loss": 0.7811, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6213 }, { "epoch": 0.8449823225455534, "grad_norm": 0.2734375, "learning_rate": 1.4268737530111332e-06, "loss": 0.4862, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6214 }, { "epoch": 0.8451183029643732, "grad_norm": 1.484375, "learning_rate": 1.4244305909429968e-06, "loss": 0.7189, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6215 }, { "epoch": 0.8452542833831929, "grad_norm": 0.380859375, "learning_rate": 1.4219893619130498e-06, "loss": 0.7246, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6216 }, { "epoch": 0.8453902638020125, "grad_norm": 0.330078125, "learning_rate": 1.4195500664715778e-06, "loss": 0.618, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6217 }, { "epoch": 0.8455262442208322, "grad_norm": 0.26953125, "learning_rate": 1.4171127051684242e-06, "loss": 0.5355, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6218 }, { "epoch": 0.8456622246396519, "grad_norm": 0.26953125, "learning_rate": 1.4146772785530038e-06, "loss": 0.4595, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6219 }, { "epoch": 0.8457982050584716, "grad_norm": 0.357421875, "learning_rate": 1.4122437871742877e-06, "loss": 0.7438, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6220 }, { "epoch": 0.8459341854772913, "grad_norm": 0.306640625, "learning_rate": 1.4098122315808194e-06, "loss": 0.4723, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6221 }, { "epoch": 0.846070165896111, "grad_norm": 0.359375, "learning_rate": 1.4073826123206946e-06, "loss": 0.7096, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6222 }, { "epoch": 0.8462061463149306, "grad_norm": 0.5234375, "learning_rate": 1.4049549299415866e-06, "loss": 0.5817, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6223 }, { "epoch": 0.8463421267337503, "grad_norm": 0.625, "learning_rate": 1.4025291849907186e-06, "loss": 0.5838, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6224 }, { "epoch": 0.84647810715257, "grad_norm": 0.26953125, "learning_rate": 1.4001053780148876e-06, "loss": 0.575, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6225 }, { "epoch": 0.8466140875713897, "grad_norm": 0.365234375, "learning_rate": 1.3976835095604446e-06, "loss": 0.6719, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6226 }, { "epoch": 0.8467500679902095, "grad_norm": 0.59765625, "learning_rate": 1.3952635801733138e-06, "loss": 0.6147, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6227 }, { "epoch": 0.8468860484090291, "grad_norm": 0.380859375, "learning_rate": 1.3928455903989724e-06, "loss": 0.7093, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6228 }, { "epoch": 0.8470220288278488, "grad_norm": 0.294921875, "learning_rate": 1.3904295407824686e-06, "loss": 0.5487, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6229 }, { "epoch": 0.8471580092466685, "grad_norm": 0.2177734375, "learning_rate": 1.388015431868408e-06, "loss": 0.3693, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6230 }, { "epoch": 0.8472939896654882, "grad_norm": 0.337890625, "learning_rate": 1.3856032642009576e-06, "loss": 0.598, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6231 }, { "epoch": 0.8474299700843079, "grad_norm": 0.30859375, "learning_rate": 1.3831930383238534e-06, "loss": 0.4945, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6232 }, { "epoch": 0.8475659505031276, "grad_norm": 0.4296875, "learning_rate": 1.3807847547803866e-06, "loss": 0.6198, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6233 }, { "epoch": 0.8477019309219472, "grad_norm": 0.375, "learning_rate": 1.3783784141134171e-06, "loss": 0.7093, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6234 }, { "epoch": 0.8478379113407669, "grad_norm": 0.423828125, "learning_rate": 1.375974016865359e-06, "loss": 0.5086, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6235 }, { "epoch": 0.8479738917595866, "grad_norm": 0.24609375, "learning_rate": 1.3735715635781988e-06, "loss": 0.3799, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6236 }, { "epoch": 0.8481098721784063, "grad_norm": 0.314453125, "learning_rate": 1.371171054793473e-06, "loss": 0.5573, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6237 }, { "epoch": 0.848245852597226, "grad_norm": 0.68359375, "learning_rate": 1.3687724910522903e-06, "loss": 0.4863, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6238 }, { "epoch": 0.8483818330160456, "grad_norm": 0.462890625, "learning_rate": 1.3663758728953114e-06, "loss": 0.6906, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6239 }, { "epoch": 0.8485178134348654, "grad_norm": 0.33203125, "learning_rate": 1.3639812008627685e-06, "loss": 0.612, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6240 }, { "epoch": 0.8486537938536851, "grad_norm": 0.47265625, "learning_rate": 1.361588475494444e-06, "loss": 0.6073, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6241 }, { "epoch": 0.8487897742725048, "grad_norm": 0.357421875, "learning_rate": 1.359197697329695e-06, "loss": 0.6045, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6242 }, { "epoch": 0.8489257546913245, "grad_norm": 0.58203125, "learning_rate": 1.3568088669074254e-06, "loss": 0.6816, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6243 }, { "epoch": 0.8490617351101442, "grad_norm": 0.380859375, "learning_rate": 1.354421984766112e-06, "loss": 0.5245, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6244 }, { "epoch": 0.8491977155289638, "grad_norm": 0.24609375, "learning_rate": 1.352037051443783e-06, "loss": 0.451, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6245 }, { "epoch": 0.8493336959477835, "grad_norm": 0.4296875, "learning_rate": 1.3496540674780356e-06, "loss": 0.516, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6246 }, { "epoch": 0.8494696763666032, "grad_norm": 0.486328125, "learning_rate": 1.3472730334060192e-06, "loss": 0.744, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6247 }, { "epoch": 0.8496056567854229, "grad_norm": 0.6640625, "learning_rate": 1.3448939497644508e-06, "loss": 0.764, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6248 }, { "epoch": 0.8497416372042426, "grad_norm": 0.419921875, "learning_rate": 1.342516817089603e-06, "loss": 0.5986, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6249 }, { "epoch": 0.8498776176230622, "grad_norm": 0.412109375, "learning_rate": 1.3401416359173136e-06, "loss": 0.6278, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6250 }, { "epoch": 0.8500135980418819, "grad_norm": 0.25390625, "learning_rate": 1.3377684067829745e-06, "loss": 0.423, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6251 }, { "epoch": 0.8501495784607016, "grad_norm": 0.302734375, "learning_rate": 1.3353971302215418e-06, "loss": 0.6159, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6252 }, { "epoch": 0.8502855588795214, "grad_norm": 0.294921875, "learning_rate": 1.3330278067675294e-06, "loss": 0.6457, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6253 }, { "epoch": 0.8504215392983411, "grad_norm": 0.365234375, "learning_rate": 1.3306604369550147e-06, "loss": 0.585, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6254 }, { "epoch": 0.8505575197171608, "grad_norm": 0.443359375, "learning_rate": 1.328295021317626e-06, "loss": 0.7257, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6255 }, { "epoch": 0.8506935001359804, "grad_norm": 0.6015625, "learning_rate": 1.3259315603885637e-06, "loss": 0.9342, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6256 }, { "epoch": 0.8508294805548001, "grad_norm": 0.41015625, "learning_rate": 1.3235700547005758e-06, "loss": 0.6188, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6257 }, { "epoch": 0.8509654609736198, "grad_norm": 0.39453125, "learning_rate": 1.3212105047859768e-06, "loss": 0.5247, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6258 }, { "epoch": 0.8511014413924395, "grad_norm": 0.3203125, "learning_rate": 1.3188529111766368e-06, "loss": 0.5659, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6259 }, { "epoch": 0.8512374218112592, "grad_norm": 0.2333984375, "learning_rate": 1.3164972744039895e-06, "loss": 0.4569, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6260 }, { "epoch": 0.8513734022300788, "grad_norm": 0.421875, "learning_rate": 1.3141435949990188e-06, "loss": 0.5309, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6261 }, { "epoch": 0.8515093826488985, "grad_norm": 0.5546875, "learning_rate": 1.3117918734922775e-06, "loss": 0.6729, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6262 }, { "epoch": 0.8516453630677182, "grad_norm": 0.400390625, "learning_rate": 1.3094421104138688e-06, "loss": 0.6943, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6263 }, { "epoch": 0.8517813434865379, "grad_norm": 0.515625, "learning_rate": 1.3070943062934627e-06, "loss": 0.784, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6264 }, { "epoch": 0.8519173239053577, "grad_norm": 0.333984375, "learning_rate": 1.304748461660278e-06, "loss": 0.5809, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6265 }, { "epoch": 0.8520533043241774, "grad_norm": 0.50390625, "learning_rate": 1.3024045770431004e-06, "loss": 0.8079, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6266 }, { "epoch": 0.852189284742997, "grad_norm": 0.361328125, "learning_rate": 1.3000626529702664e-06, "loss": 0.689, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6267 }, { "epoch": 0.8523252651618167, "grad_norm": 0.6328125, "learning_rate": 1.2977226899696781e-06, "loss": 0.8451, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6268 }, { "epoch": 0.8524612455806364, "grad_norm": 0.66015625, "learning_rate": 1.2953846885687893e-06, "loss": 0.682, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6269 }, { "epoch": 0.8525972259994561, "grad_norm": 0.51171875, "learning_rate": 1.293048649294617e-06, "loss": 0.6854, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6270 }, { "epoch": 0.8527332064182758, "grad_norm": 0.31640625, "learning_rate": 1.290714572673728e-06, "loss": 0.5073, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6271 }, { "epoch": 0.8528691868370954, "grad_norm": 0.341796875, "learning_rate": 1.288382459232258e-06, "loss": 0.6434, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6272 }, { "epoch": 0.8530051672559151, "grad_norm": 0.421875, "learning_rate": 1.2860523094958876e-06, "loss": 0.8908, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6273 }, { "epoch": 0.8531411476747348, "grad_norm": 0.287109375, "learning_rate": 1.2837241239898669e-06, "loss": 0.4588, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6274 }, { "epoch": 0.8532771280935545, "grad_norm": 0.6328125, "learning_rate": 1.2813979032389923e-06, "loss": 0.6112, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6275 }, { "epoch": 0.8534131085123742, "grad_norm": 0.6484375, "learning_rate": 1.2790736477676257e-06, "loss": 0.6396, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6276 }, { "epoch": 0.8535490889311939, "grad_norm": 0.5625, "learning_rate": 1.2767513580996805e-06, "loss": 0.6636, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6277 }, { "epoch": 0.8536850693500136, "grad_norm": 0.578125, "learning_rate": 1.2744310347586332e-06, "loss": 0.7764, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6278 }, { "epoch": 0.8538210497688333, "grad_norm": 0.5234375, "learning_rate": 1.2721126782675065e-06, "loss": 0.5841, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6279 }, { "epoch": 0.853957030187653, "grad_norm": 0.32421875, "learning_rate": 1.2697962891488924e-06, "loss": 0.4661, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6280 }, { "epoch": 0.8540930106064727, "grad_norm": 0.25, "learning_rate": 1.267481867924929e-06, "loss": 0.388, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6281 }, { "epoch": 0.8542289910252924, "grad_norm": 0.44921875, "learning_rate": 1.2651694151173199e-06, "loss": 0.6984, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6282 }, { "epoch": 0.854364971444112, "grad_norm": 0.48046875, "learning_rate": 1.262858931247314e-06, "loss": 0.3963, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6283 }, { "epoch": 0.8545009518629317, "grad_norm": 0.328125, "learning_rate": 1.2605504168357286e-06, "loss": 0.5905, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6284 }, { "epoch": 0.8546369322817514, "grad_norm": 0.51953125, "learning_rate": 1.258243872402929e-06, "loss": 0.626, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6285 }, { "epoch": 0.8547729127005711, "grad_norm": 0.3046875, "learning_rate": 1.2559392984688357e-06, "loss": 0.4603, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6286 }, { "epoch": 0.8549088931193908, "grad_norm": 0.515625, "learning_rate": 1.253636695552931e-06, "loss": 0.6652, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6287 }, { "epoch": 0.8550448735382105, "grad_norm": 0.396484375, "learning_rate": 1.2513360641742477e-06, "loss": 0.7576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6288 }, { "epoch": 0.8551808539570301, "grad_norm": 0.43359375, "learning_rate": 1.2490374048513799e-06, "loss": 0.7227, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6289 }, { "epoch": 0.8553168343758499, "grad_norm": 0.3671875, "learning_rate": 1.2467407181024683e-06, "loss": 0.7466, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6290 }, { "epoch": 0.8554528147946696, "grad_norm": 0.388671875, "learning_rate": 1.2444460044452188e-06, "loss": 0.5451, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6291 }, { "epoch": 0.8555887952134893, "grad_norm": 0.78125, "learning_rate": 1.2421532643968847e-06, "loss": 0.9622, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6292 }, { "epoch": 0.855724775632309, "grad_norm": 0.318359375, "learning_rate": 1.2398624984742801e-06, "loss": 0.3983, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6293 }, { "epoch": 0.8558607560511287, "grad_norm": 0.54296875, "learning_rate": 1.2375737071937698e-06, "loss": 0.7711, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6294 }, { "epoch": 0.8559967364699483, "grad_norm": 0.58203125, "learning_rate": 1.2352868910712768e-06, "loss": 0.6139, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6295 }, { "epoch": 0.856132716888768, "grad_norm": 0.81640625, "learning_rate": 1.233002050622275e-06, "loss": 0.6727, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6296 }, { "epoch": 0.8562686973075877, "grad_norm": 0.60546875, "learning_rate": 1.2307191863617985e-06, "loss": 0.754, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6297 }, { "epoch": 0.8564046777264074, "grad_norm": 0.384765625, "learning_rate": 1.2284382988044297e-06, "loss": 0.708, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6298 }, { "epoch": 0.8565406581452271, "grad_norm": 0.302734375, "learning_rate": 1.226159388464313e-06, "loss": 0.6602, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6299 }, { "epoch": 0.8566766385640467, "grad_norm": 0.55859375, "learning_rate": 1.2238824558551365e-06, "loss": 0.5532, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6300 }, { "epoch": 0.8568126189828664, "grad_norm": 0.5234375, "learning_rate": 1.2216075014901529e-06, "loss": 0.568, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6301 }, { "epoch": 0.8569485994016861, "grad_norm": 0.55859375, "learning_rate": 1.2193345258821631e-06, "loss": 0.646, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6302 }, { "epoch": 0.8570845798205059, "grad_norm": 0.259765625, "learning_rate": 1.2170635295435263e-06, "loss": 0.5226, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6303 }, { "epoch": 0.8572205602393256, "grad_norm": 0.4765625, "learning_rate": 1.2147945129861494e-06, "loss": 0.5701, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6304 }, { "epoch": 0.8573565406581453, "grad_norm": 0.734375, "learning_rate": 1.2125274767215e-06, "loss": 0.5944, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6305 }, { "epoch": 0.8574925210769649, "grad_norm": 0.42578125, "learning_rate": 1.2102624212605908e-06, "loss": 0.8024, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6306 }, { "epoch": 0.8576285014957846, "grad_norm": 0.390625, "learning_rate": 1.2079993471139983e-06, "loss": 0.5143, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6307 }, { "epoch": 0.8577644819146043, "grad_norm": 0.484375, "learning_rate": 1.2057382547918428e-06, "loss": 0.7666, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6308 }, { "epoch": 0.857900462333424, "grad_norm": 0.46875, "learning_rate": 1.2034791448038063e-06, "loss": 0.5586, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6309 }, { "epoch": 0.8580364427522437, "grad_norm": 0.4453125, "learning_rate": 1.201222017659115e-06, "loss": 0.6574, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6310 }, { "epoch": 0.8581724231710633, "grad_norm": 0.34375, "learning_rate": 1.1989668738665582e-06, "loss": 0.6896, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6311 }, { "epoch": 0.858308403589883, "grad_norm": 0.67578125, "learning_rate": 1.1967137139344675e-06, "loss": 0.8574, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6312 }, { "epoch": 0.8584443840087027, "grad_norm": 0.412109375, "learning_rate": 1.1944625383707377e-06, "loss": 0.7106, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6313 }, { "epoch": 0.8585803644275224, "grad_norm": 0.306640625, "learning_rate": 1.1922133476828058e-06, "loss": 0.5368, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6314 }, { "epoch": 0.8587163448463421, "grad_norm": 0.3828125, "learning_rate": 1.1899661423776732e-06, "loss": 0.6864, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6315 }, { "epoch": 0.8588523252651619, "grad_norm": 0.435546875, "learning_rate": 1.1877209229618825e-06, "loss": 0.6187, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6316 }, { "epoch": 0.8589883056839815, "grad_norm": 0.412109375, "learning_rate": 1.1854776899415366e-06, "loss": 0.8335, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6317 }, { "epoch": 0.8591242861028012, "grad_norm": 0.62109375, "learning_rate": 1.1832364438222855e-06, "loss": 0.744, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6318 }, { "epoch": 0.8592602665216209, "grad_norm": 0.294921875, "learning_rate": 1.1809971851093349e-06, "loss": 0.5716, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6319 }, { "epoch": 0.8593962469404406, "grad_norm": 0.443359375, "learning_rate": 1.178759914307439e-06, "loss": 0.7664, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6320 }, { "epoch": 0.8595322273592603, "grad_norm": 0.34765625, "learning_rate": 1.1765246319209111e-06, "loss": 0.6208, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6321 }, { "epoch": 0.85966820777808, "grad_norm": 0.412109375, "learning_rate": 1.1742913384536036e-06, "loss": 0.7773, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6322 }, { "epoch": 0.8598041881968996, "grad_norm": 0.455078125, "learning_rate": 1.1720600344089361e-06, "loss": 0.6089, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6323 }, { "epoch": 0.8599401686157193, "grad_norm": 0.28125, "learning_rate": 1.1698307202898651e-06, "loss": 0.5256, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6324 }, { "epoch": 0.860076149034539, "grad_norm": 0.392578125, "learning_rate": 1.1676033965989098e-06, "loss": 0.7031, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6325 }, { "epoch": 0.8602121294533587, "grad_norm": 0.34375, "learning_rate": 1.165378063838133e-06, "loss": 0.4371, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6326 }, { "epoch": 0.8603481098721784, "grad_norm": 0.451171875, "learning_rate": 1.1631547225091545e-06, "loss": 0.7064, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6327 }, { "epoch": 0.8604840902909981, "grad_norm": 0.37109375, "learning_rate": 1.1609333731131401e-06, "loss": 0.6099, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6328 }, { "epoch": 0.8606200707098178, "grad_norm": 0.328125, "learning_rate": 1.1587140161508126e-06, "loss": 0.5697, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6329 }, { "epoch": 0.8607560511286375, "grad_norm": 0.2412109375, "learning_rate": 1.1564966521224386e-06, "loss": 0.4137, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6330 }, { "epoch": 0.8608920315474572, "grad_norm": 0.357421875, "learning_rate": 1.1542812815278414e-06, "loss": 0.5441, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6331 }, { "epoch": 0.8610280119662769, "grad_norm": 0.240234375, "learning_rate": 1.1520679048663908e-06, "loss": 0.467, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6332 }, { "epoch": 0.8611639923850966, "grad_norm": 0.29296875, "learning_rate": 1.1498565226370118e-06, "loss": 0.519, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6333 }, { "epoch": 0.8612999728039162, "grad_norm": 0.5078125, "learning_rate": 1.1476471353381735e-06, "loss": 0.7173, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6334 }, { "epoch": 0.8614359532227359, "grad_norm": 0.318359375, "learning_rate": 1.1454397434679022e-06, "loss": 0.7349, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6335 }, { "epoch": 0.8615719336415556, "grad_norm": 0.453125, "learning_rate": 1.143234347523766e-06, "loss": 0.6773, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6336 }, { "epoch": 0.8617079140603753, "grad_norm": 0.26953125, "learning_rate": 1.1410309480028947e-06, "loss": 0.4331, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6337 }, { "epoch": 0.861843894479195, "grad_norm": 0.3671875, "learning_rate": 1.138829545401956e-06, "loss": 0.6681, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6338 }, { "epoch": 0.8619798748980146, "grad_norm": 0.71875, "learning_rate": 1.1366301402171775e-06, "loss": 0.8851, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6339 }, { "epoch": 0.8621158553168343, "grad_norm": 0.341796875, "learning_rate": 1.1344327329443293e-06, "loss": 0.4831, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6340 }, { "epoch": 0.8622518357356541, "grad_norm": 0.291015625, "learning_rate": 1.1322373240787321e-06, "loss": 0.6061, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6341 }, { "epoch": 0.8623878161544738, "grad_norm": 0.287109375, "learning_rate": 1.130043914115262e-06, "loss": 0.603, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6342 }, { "epoch": 0.8625237965732935, "grad_norm": 0.37109375, "learning_rate": 1.1278525035483367e-06, "loss": 0.5591, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6343 }, { "epoch": 0.8626597769921132, "grad_norm": 0.3125, "learning_rate": 1.1256630928719302e-06, "loss": 0.6956, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6344 }, { "epoch": 0.8627957574109328, "grad_norm": 0.859375, "learning_rate": 1.12347568257956e-06, "loss": 0.8364, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6345 }, { "epoch": 0.8629317378297525, "grad_norm": 0.404296875, "learning_rate": 1.1212902731642972e-06, "loss": 0.6657, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6346 }, { "epoch": 0.8630677182485722, "grad_norm": 0.318359375, "learning_rate": 1.1191068651187564e-06, "loss": 0.6702, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6347 }, { "epoch": 0.8632036986673919, "grad_norm": 0.427734375, "learning_rate": 1.1169254589351096e-06, "loss": 0.5622, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6348 }, { "epoch": 0.8633396790862116, "grad_norm": 0.46484375, "learning_rate": 1.114746055105067e-06, "loss": 0.6904, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6349 }, { "epoch": 0.8634756595050312, "grad_norm": 0.427734375, "learning_rate": 1.1125686541198966e-06, "loss": 0.6689, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6350 }, { "epoch": 0.8636116399238509, "grad_norm": 0.296875, "learning_rate": 1.110393256470409e-06, "loss": 0.6103, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6351 }, { "epoch": 0.8637476203426706, "grad_norm": 0.41796875, "learning_rate": 1.1082198626469687e-06, "loss": 0.8338, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6352 }, { "epoch": 0.8638836007614904, "grad_norm": 0.376953125, "learning_rate": 1.1060484731394805e-06, "loss": 0.6452, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6353 }, { "epoch": 0.8640195811803101, "grad_norm": 0.49609375, "learning_rate": 1.1038790884374062e-06, "loss": 0.4474, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6354 }, { "epoch": 0.8641555615991298, "grad_norm": 0.36328125, "learning_rate": 1.101711709029748e-06, "loss": 0.5776, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6355 }, { "epoch": 0.8642915420179494, "grad_norm": 0.4296875, "learning_rate": 1.099546335405065e-06, "loss": 0.7514, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6356 }, { "epoch": 0.8644275224367691, "grad_norm": 0.640625, "learning_rate": 1.0973829680514525e-06, "loss": 0.5153, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6357 }, { "epoch": 0.8645635028555888, "grad_norm": 0.80078125, "learning_rate": 1.0952216074565658e-06, "loss": 0.6471, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6358 }, { "epoch": 0.8646994832744085, "grad_norm": 0.2578125, "learning_rate": 1.0930622541075963e-06, "loss": 0.5361, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6359 }, { "epoch": 0.8648354636932282, "grad_norm": 1.140625, "learning_rate": 1.0909049084912948e-06, "loss": 0.7389, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6360 }, { "epoch": 0.8649714441120478, "grad_norm": 0.1845703125, "learning_rate": 1.0887495710939455e-06, "loss": 0.3566, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6361 }, { "epoch": 0.8651074245308675, "grad_norm": 0.26171875, "learning_rate": 1.0865962424013975e-06, "loss": 0.4657, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6362 }, { "epoch": 0.8652434049496872, "grad_norm": 0.498046875, "learning_rate": 1.0844449228990294e-06, "loss": 0.8089, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6363 }, { "epoch": 0.8653793853685069, "grad_norm": 0.4609375, "learning_rate": 1.08229561307178e-06, "loss": 0.7152, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6364 }, { "epoch": 0.8655153657873266, "grad_norm": 0.3125, "learning_rate": 1.080148313404127e-06, "loss": 0.6457, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6365 }, { "epoch": 0.8656513462061464, "grad_norm": 0.328125, "learning_rate": 1.0780030243801e-06, "loss": 0.5814, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6366 }, { "epoch": 0.865787326624966, "grad_norm": 0.306640625, "learning_rate": 1.0758597464832709e-06, "loss": 0.5444, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6367 }, { "epoch": 0.8659233070437857, "grad_norm": 0.380859375, "learning_rate": 1.0737184801967637e-06, "loss": 0.7056, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6368 }, { "epoch": 0.8660592874626054, "grad_norm": 0.369140625, "learning_rate": 1.0715792260032431e-06, "loss": 0.7274, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6369 }, { "epoch": 0.8661952678814251, "grad_norm": 0.482421875, "learning_rate": 1.0694419843849258e-06, "loss": 0.6997, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6370 }, { "epoch": 0.8663312483002448, "grad_norm": 0.41796875, "learning_rate": 1.0673067558235695e-06, "loss": 0.6799, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6371 }, { "epoch": 0.8664672287190645, "grad_norm": 0.435546875, "learning_rate": 1.0651735408004838e-06, "loss": 0.7671, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6372 }, { "epoch": 0.8666032091378841, "grad_norm": 0.640625, "learning_rate": 1.0630423397965184e-06, "loss": 0.616, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6373 }, { "epoch": 0.8667391895567038, "grad_norm": 0.3125, "learning_rate": 1.0609131532920746e-06, "loss": 0.6572, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6374 }, { "epoch": 0.8668751699755235, "grad_norm": 0.453125, "learning_rate": 1.0587859817670953e-06, "loss": 0.7669, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6375 }, { "epoch": 0.8670111503943432, "grad_norm": 0.357421875, "learning_rate": 1.0566608257010736e-06, "loss": 0.5745, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6376 }, { "epoch": 0.8671471308131629, "grad_norm": 0.28515625, "learning_rate": 1.0545376855730405e-06, "loss": 0.5376, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6377 }, { "epoch": 0.8672831112319825, "grad_norm": 0.486328125, "learning_rate": 1.0524165618615845e-06, "loss": 0.341, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6378 }, { "epoch": 0.8674190916508023, "grad_norm": 0.310546875, "learning_rate": 1.0502974550448264e-06, "loss": 0.7002, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6379 }, { "epoch": 0.867555072069622, "grad_norm": 0.435546875, "learning_rate": 1.048180365600444e-06, "loss": 0.5278, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6380 }, { "epoch": 0.8676910524884417, "grad_norm": 0.34765625, "learning_rate": 1.0460652940056505e-06, "loss": 0.5405, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6381 }, { "epoch": 0.8678270329072614, "grad_norm": 1.7109375, "learning_rate": 1.0439522407372137e-06, "loss": 0.8293, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6382 }, { "epoch": 0.8679630133260811, "grad_norm": 0.32421875, "learning_rate": 1.0418412062714367e-06, "loss": 0.5688, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6383 }, { "epoch": 0.8680989937449007, "grad_norm": 0.2373046875, "learning_rate": 1.0397321910841772e-06, "loss": 0.5054, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6384 }, { "epoch": 0.8682349741637204, "grad_norm": 0.392578125, "learning_rate": 1.0376251956508299e-06, "loss": 0.6279, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6385 }, { "epoch": 0.8683709545825401, "grad_norm": 0.498046875, "learning_rate": 1.0355202204463389e-06, "loss": 0.8089, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6386 }, { "epoch": 0.8685069350013598, "grad_norm": 0.48046875, "learning_rate": 1.033417265945189e-06, "loss": 0.7227, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6387 }, { "epoch": 0.8686429154201795, "grad_norm": 0.2890625, "learning_rate": 1.0313163326214149e-06, "loss": 0.5348, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6388 }, { "epoch": 0.8687788958389991, "grad_norm": 0.384765625, "learning_rate": 1.0292174209485906e-06, "loss": 0.6668, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6389 }, { "epoch": 0.8689148762578188, "grad_norm": 0.59765625, "learning_rate": 1.0271205313998378e-06, "loss": 0.6047, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6390 }, { "epoch": 0.8690508566766386, "grad_norm": 0.5078125, "learning_rate": 1.0250256644478196e-06, "loss": 0.7101, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6391 }, { "epoch": 0.8691868370954583, "grad_norm": 0.314453125, "learning_rate": 1.022932820564746e-06, "loss": 0.5591, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6392 }, { "epoch": 0.869322817514278, "grad_norm": 0.345703125, "learning_rate": 1.0208420002223696e-06, "loss": 0.5579, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6393 }, { "epoch": 0.8694587979330977, "grad_norm": 0.53125, "learning_rate": 1.0187532038919844e-06, "loss": 0.6669, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6394 }, { "epoch": 0.8695947783519173, "grad_norm": 0.388671875, "learning_rate": 1.0166664320444342e-06, "loss": 0.6511, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6395 }, { "epoch": 0.869730758770737, "grad_norm": 0.400390625, "learning_rate": 1.0145816851501001e-06, "loss": 0.5897, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6396 }, { "epoch": 0.8698667391895567, "grad_norm": 0.384765625, "learning_rate": 1.0124989636789117e-06, "loss": 0.4982, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6397 }, { "epoch": 0.8700027196083764, "grad_norm": 0.326171875, "learning_rate": 1.010418268100337e-06, "loss": 0.6051, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6398 }, { "epoch": 0.8701387000271961, "grad_norm": 0.3515625, "learning_rate": 1.0083395988833944e-06, "loss": 0.4961, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6399 }, { "epoch": 0.8702746804460157, "grad_norm": 0.28515625, "learning_rate": 1.0062629564966365e-06, "loss": 0.6541, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6400 }, { "epoch": 0.8704106608648354, "grad_norm": 0.55078125, "learning_rate": 1.0041883414081676e-06, "loss": 0.6585, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6401 }, { "epoch": 0.8705466412836551, "grad_norm": 0.251953125, "learning_rate": 1.0021157540856296e-06, "loss": 0.5163, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6402 }, { "epoch": 0.8706826217024748, "grad_norm": 0.259765625, "learning_rate": 1.000045194996211e-06, "loss": 0.5469, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6403 }, { "epoch": 0.8708186021212946, "grad_norm": 0.361328125, "learning_rate": 9.979766646066368e-07, "loss": 0.6309, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6404 }, { "epoch": 0.8709545825401143, "grad_norm": 0.48828125, "learning_rate": 9.959101633831848e-07, "loss": 0.6328, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6405 }, { "epoch": 0.871090562958934, "grad_norm": 0.326171875, "learning_rate": 9.938456917916638e-07, "loss": 0.6201, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6406 }, { "epoch": 0.8712265433777536, "grad_norm": 0.298828125, "learning_rate": 9.917832502974367e-07, "loss": 0.5957, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6407 }, { "epoch": 0.8713625237965733, "grad_norm": 0.37109375, "learning_rate": 9.89722839365398e-07, "loss": 0.589, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6408 }, { "epoch": 0.871498504215393, "grad_norm": 0.34765625, "learning_rate": 9.876644594599927e-07, "loss": 0.6836, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6409 }, { "epoch": 0.8716344846342127, "grad_norm": 0.419921875, "learning_rate": 9.856081110452032e-07, "loss": 0.6019, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6410 }, { "epoch": 0.8717704650530324, "grad_norm": 0.2490234375, "learning_rate": 9.835537945845574e-07, "loss": 0.5866, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6411 }, { "epoch": 0.871906445471852, "grad_norm": 0.283203125, "learning_rate": 9.815015105411196e-07, "loss": 0.6379, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6412 }, { "epoch": 0.8720424258906717, "grad_norm": 0.353515625, "learning_rate": 9.794512593775052e-07, "loss": 0.5579, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6413 }, { "epoch": 0.8721784063094914, "grad_norm": 0.35546875, "learning_rate": 9.77403041555861e-07, "loss": 0.7823, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6414 }, { "epoch": 0.8723143867283111, "grad_norm": 0.2236328125, "learning_rate": 9.753568575378836e-07, "loss": 0.4347, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6415 }, { "epoch": 0.8724503671471309, "grad_norm": 0.349609375, "learning_rate": 9.733127077848047e-07, "loss": 0.688, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6416 }, { "epoch": 0.8725863475659505, "grad_norm": 0.3671875, "learning_rate": 9.71270592757404e-07, "loss": 0.5841, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6417 }, { "epoch": 0.8727223279847702, "grad_norm": 0.421875, "learning_rate": 9.692305129159962e-07, "loss": 0.612, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6418 }, { "epoch": 0.8728583084035899, "grad_norm": 0.291015625, "learning_rate": 9.671924687204437e-07, "loss": 0.5428, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6419 }, { "epoch": 0.8729942888224096, "grad_norm": 0.55078125, "learning_rate": 9.651564606301422e-07, "loss": 0.6032, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6420 }, { "epoch": 0.8731302692412293, "grad_norm": 0.408203125, "learning_rate": 9.631224891040348e-07, "loss": 0.6322, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6421 }, { "epoch": 0.873266249660049, "grad_norm": 0.55859375, "learning_rate": 9.610905546006055e-07, "loss": 0.641, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6422 }, { "epoch": 0.8734022300788686, "grad_norm": 0.6171875, "learning_rate": 9.590606575778761e-07, "loss": 0.8172, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6423 }, { "epoch": 0.8735382104976883, "grad_norm": 0.376953125, "learning_rate": 9.57032798493408e-07, "loss": 0.6631, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6424 }, { "epoch": 0.873674190916508, "grad_norm": 0.76171875, "learning_rate": 9.550069778043103e-07, "loss": 0.6882, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6425 }, { "epoch": 0.8738101713353277, "grad_norm": 0.39453125, "learning_rate": 9.529831959672209e-07, "loss": 0.7307, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6426 }, { "epoch": 0.8739461517541474, "grad_norm": 0.4140625, "learning_rate": 9.509614534383315e-07, "loss": 0.6909, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6427 }, { "epoch": 0.874082132172967, "grad_norm": 0.71484375, "learning_rate": 9.489417506733633e-07, "loss": 0.5623, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6428 }, { "epoch": 0.8742181125917868, "grad_norm": 0.6171875, "learning_rate": 9.469240881275842e-07, "loss": 0.7581, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6429 }, { "epoch": 0.8743540930106065, "grad_norm": 0.404296875, "learning_rate": 9.449084662557984e-07, "loss": 0.7238, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6430 }, { "epoch": 0.8744900734294262, "grad_norm": 0.65625, "learning_rate": 9.428948855123543e-07, "loss": 0.596, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6431 }, { "epoch": 0.8746260538482459, "grad_norm": 0.2314453125, "learning_rate": 9.408833463511335e-07, "loss": 0.4661, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6432 }, { "epoch": 0.8747620342670656, "grad_norm": 0.38671875, "learning_rate": 9.388738492255667e-07, "loss": 0.5924, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6433 }, { "epoch": 0.8748980146858852, "grad_norm": 0.296875, "learning_rate": 9.368663945886136e-07, "loss": 0.5933, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6434 }, { "epoch": 0.8750339951047049, "grad_norm": 0.30859375, "learning_rate": 9.348609828927835e-07, "loss": 0.6898, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6435 }, { "epoch": 0.8751699755235246, "grad_norm": 0.515625, "learning_rate": 9.32857614590118e-07, "loss": 0.6414, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6436 }, { "epoch": 0.8753059559423443, "grad_norm": 0.36328125, "learning_rate": 9.308562901322027e-07, "loss": 0.7266, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6437 }, { "epoch": 0.875441936361164, "grad_norm": 0.51171875, "learning_rate": 9.288570099701588e-07, "loss": 0.8396, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6438 }, { "epoch": 0.8755779167799836, "grad_norm": 0.291015625, "learning_rate": 9.268597745546504e-07, "loss": 0.5653, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6439 }, { "epoch": 0.8757138971988033, "grad_norm": 0.439453125, "learning_rate": 9.248645843358761e-07, "loss": 0.6792, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6440 }, { "epoch": 0.875849877617623, "grad_norm": 0.34375, "learning_rate": 9.228714397635796e-07, "loss": 0.6427, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6441 }, { "epoch": 0.8759858580364428, "grad_norm": 0.322265625, "learning_rate": 9.208803412870371e-07, "loss": 0.6711, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6442 }, { "epoch": 0.8761218384552625, "grad_norm": 0.3359375, "learning_rate": 9.188912893550694e-07, "loss": 0.6089, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6443 }, { "epoch": 0.8762578188740822, "grad_norm": 0.88671875, "learning_rate": 9.169042844160292e-07, "loss": 0.7713, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6444 }, { "epoch": 0.8763937992929018, "grad_norm": 0.46875, "learning_rate": 9.149193269178158e-07, "loss": 0.6034, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6445 }, { "epoch": 0.8765297797117215, "grad_norm": 0.21484375, "learning_rate": 9.129364173078593e-07, "loss": 0.4155, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6446 }, { "epoch": 0.8766657601305412, "grad_norm": 0.30859375, "learning_rate": 9.109555560331352e-07, "loss": 0.5202, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6447 }, { "epoch": 0.8768017405493609, "grad_norm": 0.357421875, "learning_rate": 9.08976743540152e-07, "loss": 0.5946, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6448 }, { "epoch": 0.8769377209681806, "grad_norm": 0.302734375, "learning_rate": 9.069999802749563e-07, "loss": 0.5798, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6449 }, { "epoch": 0.8770737013870002, "grad_norm": 0.298828125, "learning_rate": 9.050252666831394e-07, "loss": 0.5526, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6450 }, { "epoch": 0.8772096818058199, "grad_norm": 0.322265625, "learning_rate": 9.030526032098208e-07, "loss": 0.5551, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6451 }, { "epoch": 0.8773456622246396, "grad_norm": 0.36328125, "learning_rate": 9.010819902996681e-07, "loss": 0.5117, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6452 }, { "epoch": 0.8774816426434593, "grad_norm": 0.2333984375, "learning_rate": 8.991134283968761e-07, "loss": 0.4784, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6453 }, { "epoch": 0.8776176230622791, "grad_norm": 0.34765625, "learning_rate": 8.971469179451886e-07, "loss": 0.5387, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6454 }, { "epoch": 0.8777536034810988, "grad_norm": 0.58984375, "learning_rate": 8.951824593878744e-07, "loss": 0.5085, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6455 }, { "epoch": 0.8778895838999184, "grad_norm": 0.37109375, "learning_rate": 8.932200531677537e-07, "loss": 0.5903, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6456 }, { "epoch": 0.8780255643187381, "grad_norm": 0.32421875, "learning_rate": 8.912596997271706e-07, "loss": 0.5674, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6457 }, { "epoch": 0.8781615447375578, "grad_norm": 0.32421875, "learning_rate": 8.893013995080169e-07, "loss": 0.5786, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6458 }, { "epoch": 0.8782975251563775, "grad_norm": 0.71484375, "learning_rate": 8.873451529517141e-07, "loss": 0.8223, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6459 }, { "epoch": 0.8784335055751972, "grad_norm": 0.3671875, "learning_rate": 8.853909604992284e-07, "loss": 0.487, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6460 }, { "epoch": 0.8785694859940169, "grad_norm": 0.455078125, "learning_rate": 8.834388225910528e-07, "loss": 0.8341, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6461 }, { "epoch": 0.8787054664128365, "grad_norm": 0.294921875, "learning_rate": 8.814887396672301e-07, "loss": 0.6137, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6462 }, { "epoch": 0.8788414468316562, "grad_norm": 0.60546875, "learning_rate": 8.795407121673261e-07, "loss": 0.765, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6463 }, { "epoch": 0.8789774272504759, "grad_norm": 0.734375, "learning_rate": 8.775947405304553e-07, "loss": 0.6405, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6464 }, { "epoch": 0.8791134076692956, "grad_norm": 0.39453125, "learning_rate": 8.756508251952589e-07, "loss": 0.8468, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6465 }, { "epoch": 0.8792493880881153, "grad_norm": 0.6171875, "learning_rate": 8.737089665999243e-07, "loss": 0.4619, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6466 }, { "epoch": 0.879385368506935, "grad_norm": 0.279296875, "learning_rate": 8.717691651821647e-07, "loss": 0.4056, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6467 }, { "epoch": 0.8795213489257547, "grad_norm": 0.451171875, "learning_rate": 8.698314213792402e-07, "loss": 0.7489, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6468 }, { "epoch": 0.8796573293445744, "grad_norm": 0.388671875, "learning_rate": 8.678957356279372e-07, "loss": 0.7773, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6469 }, { "epoch": 0.8797933097633941, "grad_norm": 0.31640625, "learning_rate": 8.659621083645875e-07, "loss": 0.613, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6470 }, { "epoch": 0.8799292901822138, "grad_norm": 0.287109375, "learning_rate": 8.640305400250492e-07, "loss": 0.6097, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6471 }, { "epoch": 0.8800652706010335, "grad_norm": 0.52734375, "learning_rate": 8.621010310447276e-07, "loss": 0.5123, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6472 }, { "epoch": 0.8802012510198531, "grad_norm": 0.357421875, "learning_rate": 8.60173581858551e-07, "loss": 0.7135, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6473 }, { "epoch": 0.8803372314386728, "grad_norm": 0.46484375, "learning_rate": 8.582481929009956e-07, "loss": 0.6868, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6474 }, { "epoch": 0.8804732118574925, "grad_norm": 0.28515625, "learning_rate": 8.56324864606064e-07, "loss": 0.4907, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6475 }, { "epoch": 0.8806091922763122, "grad_norm": 0.302734375, "learning_rate": 8.544035974073006e-07, "loss": 0.542, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6476 }, { "epoch": 0.8807451726951319, "grad_norm": 0.423828125, "learning_rate": 8.524843917377801e-07, "loss": 0.5659, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6477 }, { "epoch": 0.8808811531139515, "grad_norm": 0.80859375, "learning_rate": 8.505672480301164e-07, "loss": 0.7273, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6478 }, { "epoch": 0.8810171335327713, "grad_norm": 0.451171875, "learning_rate": 8.48652166716456e-07, "loss": 0.7965, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6479 }, { "epoch": 0.881153113951591, "grad_norm": 0.478515625, "learning_rate": 8.467391482284836e-07, "loss": 0.7038, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6480 }, { "epoch": 0.8812890943704107, "grad_norm": 0.625, "learning_rate": 8.448281929974122e-07, "loss": 0.729, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6481 }, { "epoch": 0.8814250747892304, "grad_norm": 0.4296875, "learning_rate": 8.429193014540015e-07, "loss": 0.8701, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6482 }, { "epoch": 0.8815610552080501, "grad_norm": 0.341796875, "learning_rate": 8.410124740285341e-07, "loss": 0.568, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6483 }, { "epoch": 0.8816970356268697, "grad_norm": 0.392578125, "learning_rate": 8.39107711150835e-07, "loss": 0.7746, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6484 }, { "epoch": 0.8818330160456894, "grad_norm": 0.416015625, "learning_rate": 8.372050132502573e-07, "loss": 0.5785, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6485 }, { "epoch": 0.8819689964645091, "grad_norm": 0.51171875, "learning_rate": 8.35304380755697e-07, "loss": 0.8867, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6486 }, { "epoch": 0.8821049768833288, "grad_norm": 0.578125, "learning_rate": 8.334058140955759e-07, "loss": 0.799, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6487 }, { "epoch": 0.8822409573021485, "grad_norm": 0.294921875, "learning_rate": 8.315093136978569e-07, "loss": 0.5908, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6488 }, { "epoch": 0.8823769377209681, "grad_norm": 0.453125, "learning_rate": 8.296148799900328e-07, "loss": 0.3218, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6489 }, { "epoch": 0.8825129181397878, "grad_norm": 0.279296875, "learning_rate": 8.277225133991329e-07, "loss": 0.5257, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6490 }, { "epoch": 0.8826488985586075, "grad_norm": 0.330078125, "learning_rate": 8.258322143517183e-07, "loss": 0.5216, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6491 }, { "epoch": 0.8827848789774273, "grad_norm": 0.39453125, "learning_rate": 8.239439832738893e-07, "loss": 0.646, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6492 }, { "epoch": 0.882920859396247, "grad_norm": 0.51953125, "learning_rate": 8.220578205912711e-07, "loss": 0.7891, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6493 }, { "epoch": 0.8830568398150667, "grad_norm": 0.37109375, "learning_rate": 8.201737267290321e-07, "loss": 0.6981, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6494 }, { "epoch": 0.8831928202338863, "grad_norm": 0.33203125, "learning_rate": 8.182917021118664e-07, "loss": 0.5876, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6495 }, { "epoch": 0.883328800652706, "grad_norm": 0.494140625, "learning_rate": 8.164117471640098e-07, "loss": 0.7142, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6496 }, { "epoch": 0.8834647810715257, "grad_norm": 0.375, "learning_rate": 8.145338623092225e-07, "loss": 0.7745, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6497 }, { "epoch": 0.8836007614903454, "grad_norm": 0.4453125, "learning_rate": 8.126580479708068e-07, "loss": 0.7193, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6498 }, { "epoch": 0.8837367419091651, "grad_norm": 0.31640625, "learning_rate": 8.107843045715902e-07, "loss": 0.5921, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6499 }, { "epoch": 0.8838727223279848, "grad_norm": 0.361328125, "learning_rate": 8.089126325339414e-07, "loss": 0.7376, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6500 }, { "epoch": 0.8840087027468044, "grad_norm": 0.314453125, "learning_rate": 8.070430322797562e-07, "loss": 0.5763, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6501 }, { "epoch": 0.8841446831656241, "grad_norm": 0.447265625, "learning_rate": 8.051755042304643e-07, "loss": 0.7699, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6502 }, { "epoch": 0.8842806635844438, "grad_norm": 0.41796875, "learning_rate": 8.033100488070311e-07, "loss": 0.7401, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6503 }, { "epoch": 0.8844166440032635, "grad_norm": 0.5625, "learning_rate": 8.014466664299514e-07, "loss": 0.6825, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6504 }, { "epoch": 0.8845526244220833, "grad_norm": 0.41015625, "learning_rate": 7.995853575192558e-07, "loss": 0.8213, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6505 }, { "epoch": 0.884688604840903, "grad_norm": 0.275390625, "learning_rate": 7.977261224945055e-07, "loss": 0.5088, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6506 }, { "epoch": 0.8848245852597226, "grad_norm": 0.431640625, "learning_rate": 7.95868961774795e-07, "loss": 0.8187, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6507 }, { "epoch": 0.8849605656785423, "grad_norm": 0.33203125, "learning_rate": 7.940138757787507e-07, "loss": 0.5951, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6508 }, { "epoch": 0.885096546097362, "grad_norm": 0.2294921875, "learning_rate": 7.921608649245327e-07, "loss": 0.4312, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6509 }, { "epoch": 0.8852325265161817, "grad_norm": 0.33203125, "learning_rate": 7.9030992962983e-07, "loss": 0.5851, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6510 }, { "epoch": 0.8853685069350014, "grad_norm": 0.48828125, "learning_rate": 7.8846107031187e-07, "loss": 0.7643, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6511 }, { "epoch": 0.885504487353821, "grad_norm": 0.2197265625, "learning_rate": 7.866142873874028e-07, "loss": 0.4206, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6512 }, { "epoch": 0.8856404677726407, "grad_norm": 0.28515625, "learning_rate": 7.847695812727218e-07, "loss": 0.6047, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6513 }, { "epoch": 0.8857764481914604, "grad_norm": 0.6015625, "learning_rate": 7.829269523836413e-07, "loss": 0.695, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6514 }, { "epoch": 0.8859124286102801, "grad_norm": 0.291015625, "learning_rate": 7.810864011355157e-07, "loss": 0.5413, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6515 }, { "epoch": 0.8860484090290998, "grad_norm": 0.310546875, "learning_rate": 7.792479279432241e-07, "loss": 0.6924, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6516 }, { "epoch": 0.8861843894479196, "grad_norm": 0.28515625, "learning_rate": 7.774115332211862e-07, "loss": 0.5807, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6517 }, { "epoch": 0.8863203698667392, "grad_norm": 0.427734375, "learning_rate": 7.75577217383342e-07, "loss": 0.5654, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6518 }, { "epoch": 0.8864563502855589, "grad_norm": 0.44921875, "learning_rate": 7.737449808431741e-07, "loss": 0.7606, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6519 }, { "epoch": 0.8865923307043786, "grad_norm": 0.4296875, "learning_rate": 7.719148240136864e-07, "loss": 0.7786, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6520 }, { "epoch": 0.8867283111231983, "grad_norm": 0.58984375, "learning_rate": 7.700867473074225e-07, "loss": 0.6675, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6521 }, { "epoch": 0.886864291542018, "grad_norm": 0.50390625, "learning_rate": 7.682607511364492e-07, "loss": 0.8175, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6522 }, { "epoch": 0.8870002719608376, "grad_norm": 0.333984375, "learning_rate": 7.66436835912373e-07, "loss": 0.6632, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6523 }, { "epoch": 0.8871362523796573, "grad_norm": 0.341796875, "learning_rate": 7.646150020463228e-07, "loss": 0.659, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6524 }, { "epoch": 0.887272232798477, "grad_norm": 0.376953125, "learning_rate": 7.627952499489644e-07, "loss": 0.5677, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6525 }, { "epoch": 0.8874082132172967, "grad_norm": 0.365234375, "learning_rate": 7.609775800304908e-07, "loss": 0.5978, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6526 }, { "epoch": 0.8875441936361164, "grad_norm": 0.55078125, "learning_rate": 7.591619927006289e-07, "loss": 0.5555, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6527 }, { "epoch": 0.887680174054936, "grad_norm": 0.408203125, "learning_rate": 7.57348488368631e-07, "loss": 0.7122, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6528 }, { "epoch": 0.8878161544737557, "grad_norm": 0.41015625, "learning_rate": 7.555370674432872e-07, "loss": 0.5527, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6529 }, { "epoch": 0.8879521348925755, "grad_norm": 0.275390625, "learning_rate": 7.537277303329105e-07, "loss": 0.498, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6530 }, { "epoch": 0.8880881153113952, "grad_norm": 0.578125, "learning_rate": 7.519204774453503e-07, "loss": 0.5949, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6531 }, { "epoch": 0.8882240957302149, "grad_norm": 0.30078125, "learning_rate": 7.501153091879809e-07, "loss": 0.5575, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6532 }, { "epoch": 0.8883600761490346, "grad_norm": 0.484375, "learning_rate": 7.483122259677111e-07, "loss": 0.6444, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6533 }, { "epoch": 0.8884960565678542, "grad_norm": 0.341796875, "learning_rate": 7.46511228190977e-07, "loss": 0.6642, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6534 }, { "epoch": 0.8886320369866739, "grad_norm": 0.466796875, "learning_rate": 7.447123162637482e-07, "loss": 0.8478, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6535 }, { "epoch": 0.8887680174054936, "grad_norm": 0.298828125, "learning_rate": 7.429154905915159e-07, "loss": 0.6243, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6536 }, { "epoch": 0.8889039978243133, "grad_norm": 0.2890625, "learning_rate": 7.411207515793128e-07, "loss": 0.5081, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6537 }, { "epoch": 0.889039978243133, "grad_norm": 0.2041015625, "learning_rate": 7.393280996316899e-07, "loss": 0.3722, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6538 }, { "epoch": 0.8891759586619526, "grad_norm": 0.349609375, "learning_rate": 7.37537535152737e-07, "loss": 0.5845, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6539 }, { "epoch": 0.8893119390807723, "grad_norm": 0.3125, "learning_rate": 7.357490585460658e-07, "loss": 0.5985, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6540 }, { "epoch": 0.889447919499592, "grad_norm": 0.55078125, "learning_rate": 7.339626702148217e-07, "loss": 0.5037, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6541 }, { "epoch": 0.8895838999184118, "grad_norm": 0.33203125, "learning_rate": 7.321783705616791e-07, "loss": 0.6083, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6542 }, { "epoch": 0.8897198803372315, "grad_norm": 0.345703125, "learning_rate": 7.303961599888432e-07, "loss": 0.5941, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6543 }, { "epoch": 0.8898558607560512, "grad_norm": 0.365234375, "learning_rate": 7.286160388980412e-07, "loss": 0.6676, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6544 }, { "epoch": 0.8899918411748708, "grad_norm": 0.234375, "learning_rate": 7.26838007690539e-07, "loss": 0.4528, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6545 }, { "epoch": 0.8901278215936905, "grad_norm": 0.5859375, "learning_rate": 7.250620667671227e-07, "loss": 0.6081, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6546 }, { "epoch": 0.8902638020125102, "grad_norm": 0.314453125, "learning_rate": 7.232882165281141e-07, "loss": 0.6265, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6547 }, { "epoch": 0.8903997824313299, "grad_norm": 0.478515625, "learning_rate": 7.215164573733569e-07, "loss": 0.7483, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6548 }, { "epoch": 0.8905357628501496, "grad_norm": 0.384765625, "learning_rate": 7.197467897022325e-07, "loss": 0.6336, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6549 }, { "epoch": 0.8906717432689693, "grad_norm": 0.318359375, "learning_rate": 7.17979213913641e-07, "loss": 0.5975, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6550 }, { "epoch": 0.8908077236877889, "grad_norm": 0.28125, "learning_rate": 7.162137304060179e-07, "loss": 0.5807, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6551 }, { "epoch": 0.8909437041066086, "grad_norm": 0.5390625, "learning_rate": 7.14450339577324e-07, "loss": 0.6182, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6552 }, { "epoch": 0.8910796845254283, "grad_norm": 0.451171875, "learning_rate": 7.12689041825051e-07, "loss": 0.5911, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6553 }, { "epoch": 0.891215664944248, "grad_norm": 0.419921875, "learning_rate": 7.109298375462125e-07, "loss": 0.6812, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6554 }, { "epoch": 0.8913516453630678, "grad_norm": 0.390625, "learning_rate": 7.091727271373606e-07, "loss": 0.6847, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6555 }, { "epoch": 0.8914876257818874, "grad_norm": 0.328125, "learning_rate": 7.074177109945657e-07, "loss": 0.5529, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6556 }, { "epoch": 0.8916236062007071, "grad_norm": 0.404296875, "learning_rate": 7.056647895134294e-07, "loss": 0.7469, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6557 }, { "epoch": 0.8917595866195268, "grad_norm": 0.56640625, "learning_rate": 7.039139630890834e-07, "loss": 0.6528, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6558 }, { "epoch": 0.8918955670383465, "grad_norm": 0.392578125, "learning_rate": 7.021652321161842e-07, "loss": 0.6855, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6559 }, { "epoch": 0.8920315474571662, "grad_norm": 0.75, "learning_rate": 7.004185969889188e-07, "loss": 0.7334, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6560 }, { "epoch": 0.8921675278759859, "grad_norm": 0.50390625, "learning_rate": 6.986740581009977e-07, "loss": 0.6462, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6561 }, { "epoch": 0.8923035082948055, "grad_norm": 0.57421875, "learning_rate": 6.96931615845664e-07, "loss": 0.5313, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6562 }, { "epoch": 0.8924394887136252, "grad_norm": 0.75390625, "learning_rate": 6.951912706156838e-07, "loss": 0.7546, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6563 }, { "epoch": 0.8925754691324449, "grad_norm": 0.2470703125, "learning_rate": 6.934530228033531e-07, "loss": 0.4891, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6564 }, { "epoch": 0.8927114495512646, "grad_norm": 0.2734375, "learning_rate": 6.91716872800492e-07, "loss": 0.5351, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6565 }, { "epoch": 0.8928474299700843, "grad_norm": 0.431640625, "learning_rate": 6.899828209984527e-07, "loss": 0.7454, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6566 }, { "epoch": 0.892983410388904, "grad_norm": 0.49609375, "learning_rate": 6.882508677881094e-07, "loss": 0.4898, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6567 }, { "epoch": 0.8931193908077237, "grad_norm": 0.2353515625, "learning_rate": 6.865210135598688e-07, "loss": 0.5146, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6568 }, { "epoch": 0.8932553712265434, "grad_norm": 0.470703125, "learning_rate": 6.847932587036565e-07, "loss": 0.6774, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6569 }, { "epoch": 0.8933913516453631, "grad_norm": 0.5234375, "learning_rate": 6.830676036089345e-07, "loss": 0.6001, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6570 }, { "epoch": 0.8935273320641828, "grad_norm": 0.294921875, "learning_rate": 6.813440486646827e-07, "loss": 0.5542, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6571 }, { "epoch": 0.8936633124830025, "grad_norm": 0.65234375, "learning_rate": 6.796225942594148e-07, "loss": 0.785, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6572 }, { "epoch": 0.8937992929018221, "grad_norm": 0.62109375, "learning_rate": 6.779032407811636e-07, "loss": 0.8075, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6573 }, { "epoch": 0.8939352733206418, "grad_norm": 0.380859375, "learning_rate": 6.76185988617496e-07, "loss": 0.6182, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6574 }, { "epoch": 0.8940712537394615, "grad_norm": 0.349609375, "learning_rate": 6.744708381554988e-07, "loss": 0.6252, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6575 }, { "epoch": 0.8942072341582812, "grad_norm": 0.453125, "learning_rate": 6.727577897817916e-07, "loss": 0.748, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6576 }, { "epoch": 0.8943432145771009, "grad_norm": 0.43359375, "learning_rate": 6.710468438825124e-07, "loss": 0.5687, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6577 }, { "epoch": 0.8944791949959205, "grad_norm": 0.2490234375, "learning_rate": 6.693380008433326e-07, "loss": 0.5037, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6578 }, { "epoch": 0.8946151754147402, "grad_norm": 0.3828125, "learning_rate": 6.676312610494429e-07, "loss": 0.3695, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6579 }, { "epoch": 0.89475115583356, "grad_norm": 0.1767578125, "learning_rate": 6.659266248855667e-07, "loss": 0.3066, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6580 }, { "epoch": 0.8948871362523797, "grad_norm": 0.396484375, "learning_rate": 6.642240927359478e-07, "loss": 0.7882, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6581 }, { "epoch": 0.8950231166711994, "grad_norm": 0.310546875, "learning_rate": 6.625236649843592e-07, "loss": 0.5846, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6582 }, { "epoch": 0.8951590970900191, "grad_norm": 0.5078125, "learning_rate": 6.608253420140953e-07, "loss": 0.7298, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6583 }, { "epoch": 0.8952950775088387, "grad_norm": 0.388671875, "learning_rate": 6.591291242079822e-07, "loss": 0.6989, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6584 }, { "epoch": 0.8954310579276584, "grad_norm": 0.33984375, "learning_rate": 6.574350119483664e-07, "loss": 0.5871, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6585 }, { "epoch": 0.8955670383464781, "grad_norm": 0.28125, "learning_rate": 6.557430056171221e-07, "loss": 0.5231, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6586 }, { "epoch": 0.8957030187652978, "grad_norm": 0.2392578125, "learning_rate": 6.540531055956478e-07, "loss": 0.4191, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6587 }, { "epoch": 0.8958389991841175, "grad_norm": 0.44921875, "learning_rate": 6.523653122648687e-07, "loss": 0.7065, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6588 }, { "epoch": 0.8959749796029371, "grad_norm": 0.3671875, "learning_rate": 6.506796260052306e-07, "loss": 0.666, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6589 }, { "epoch": 0.8961109600217568, "grad_norm": 0.298828125, "learning_rate": 6.489960471967139e-07, "loss": 0.502, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6590 }, { "epoch": 0.8962469404405765, "grad_norm": 0.9453125, "learning_rate": 6.473145762188116e-07, "loss": 0.6768, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6591 }, { "epoch": 0.8963829208593962, "grad_norm": 0.5625, "learning_rate": 6.456352134505517e-07, "loss": 0.7152, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6592 }, { "epoch": 0.896518901278216, "grad_norm": 0.326171875, "learning_rate": 6.439579592704803e-07, "loss": 0.5771, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6593 }, { "epoch": 0.8966548816970357, "grad_norm": 0.32421875, "learning_rate": 6.422828140566751e-07, "loss": 0.492, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6594 }, { "epoch": 0.8967908621158553, "grad_norm": 0.263671875, "learning_rate": 6.406097781867293e-07, "loss": 0.5679, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6595 }, { "epoch": 0.896926842534675, "grad_norm": 0.341796875, "learning_rate": 6.389388520377704e-07, "loss": 0.6556, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6596 }, { "epoch": 0.8970628229534947, "grad_norm": 0.40625, "learning_rate": 6.372700359864414e-07, "loss": 0.8026, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6597 }, { "epoch": 0.8971988033723144, "grad_norm": 0.486328125, "learning_rate": 6.356033304089171e-07, "loss": 0.6859, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6598 }, { "epoch": 0.8973347837911341, "grad_norm": 0.376953125, "learning_rate": 6.339387356808912e-07, "loss": 0.8835, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6599 }, { "epoch": 0.8974707642099538, "grad_norm": 0.6015625, "learning_rate": 6.322762521775849e-07, "loss": 0.7393, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6600 }, { "epoch": 0.8976067446287734, "grad_norm": 0.37890625, "learning_rate": 6.306158802737416e-07, "loss": 0.6455, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6601 }, { "epoch": 0.8977427250475931, "grad_norm": 0.6796875, "learning_rate": 6.289576203436287e-07, "loss": 0.5939, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6602 }, { "epoch": 0.8978787054664128, "grad_norm": 0.451171875, "learning_rate": 6.273014727610405e-07, "loss": 0.6229, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6603 }, { "epoch": 0.8980146858852325, "grad_norm": 0.5625, "learning_rate": 6.256474378992916e-07, "loss": 0.5846, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6604 }, { "epoch": 0.8981506663040523, "grad_norm": 0.333984375, "learning_rate": 6.239955161312216e-07, "loss": 0.548, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6605 }, { "epoch": 0.898286646722872, "grad_norm": 0.4453125, "learning_rate": 6.22345707829195e-07, "loss": 0.6799, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6606 }, { "epoch": 0.8984226271416916, "grad_norm": 0.412109375, "learning_rate": 6.206980133650964e-07, "loss": 0.6172, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6607 }, { "epoch": 0.8985586075605113, "grad_norm": 0.306640625, "learning_rate": 6.190524331103398e-07, "loss": 0.5488, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6608 }, { "epoch": 0.898694587979331, "grad_norm": 0.232421875, "learning_rate": 6.174089674358563e-07, "loss": 0.3825, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6609 }, { "epoch": 0.8988305683981507, "grad_norm": 0.33984375, "learning_rate": 6.157676167121018e-07, "loss": 0.6644, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6610 }, { "epoch": 0.8989665488169704, "grad_norm": 0.28125, "learning_rate": 6.141283813090615e-07, "loss": 0.5094, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6611 }, { "epoch": 0.89910252923579, "grad_norm": 0.369140625, "learning_rate": 6.124912615962341e-07, "loss": 0.6281, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6612 }, { "epoch": 0.8992385096546097, "grad_norm": 0.453125, "learning_rate": 6.108562579426502e-07, "loss": 0.7875, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6613 }, { "epoch": 0.8993744900734294, "grad_norm": 0.640625, "learning_rate": 6.092233707168571e-07, "loss": 0.6992, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6614 }, { "epoch": 0.8995104704922491, "grad_norm": 0.447265625, "learning_rate": 6.075926002869304e-07, "loss": 0.6797, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6615 }, { "epoch": 0.8996464509110688, "grad_norm": 0.380859375, "learning_rate": 6.059639470204626e-07, "loss": 0.7591, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6616 }, { "epoch": 0.8997824313298884, "grad_norm": 0.359375, "learning_rate": 6.043374112845735e-07, "loss": 0.6312, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6617 }, { "epoch": 0.8999184117487082, "grad_norm": 0.37109375, "learning_rate": 6.027129934459042e-07, "loss": 0.6554, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6618 }, { "epoch": 0.9000543921675279, "grad_norm": 0.369140625, "learning_rate": 6.010906938706184e-07, "loss": 0.6867, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6619 }, { "epoch": 0.9001903725863476, "grad_norm": 0.4296875, "learning_rate": 5.994705129244016e-07, "loss": 0.5752, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6620 }, { "epoch": 0.9003263530051673, "grad_norm": 0.42578125, "learning_rate": 5.978524509724637e-07, "loss": 0.4784, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6621 }, { "epoch": 0.900462333423987, "grad_norm": 0.32421875, "learning_rate": 5.96236508379533e-07, "loss": 0.5459, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6622 }, { "epoch": 0.9005983138428066, "grad_norm": 0.55859375, "learning_rate": 5.946226855098658e-07, "loss": 0.6551, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6623 }, { "epoch": 0.9007342942616263, "grad_norm": 0.3984375, "learning_rate": 5.930109827272357e-07, "loss": 0.5892, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6624 }, { "epoch": 0.900870274680446, "grad_norm": 0.42578125, "learning_rate": 5.914014003949408e-07, "loss": 0.8079, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6625 }, { "epoch": 0.9010062550992657, "grad_norm": 0.447265625, "learning_rate": 5.897939388758001e-07, "loss": 0.7288, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6626 }, { "epoch": 0.9011422355180854, "grad_norm": 0.3828125, "learning_rate": 5.881885985321567e-07, "loss": 0.8478, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6627 }, { "epoch": 0.901278215936905, "grad_norm": 0.546875, "learning_rate": 5.865853797258714e-07, "loss": 0.5524, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6628 }, { "epoch": 0.9014141963557247, "grad_norm": 0.2890625, "learning_rate": 5.849842828183316e-07, "loss": 0.5814, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6629 }, { "epoch": 0.9015501767745445, "grad_norm": 0.26953125, "learning_rate": 5.833853081704422e-07, "loss": 0.4783, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6630 }, { "epoch": 0.9016861571933642, "grad_norm": 0.55078125, "learning_rate": 5.817884561426346e-07, "loss": 0.5203, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6631 }, { "epoch": 0.9018221376121839, "grad_norm": 0.306640625, "learning_rate": 5.801937270948543e-07, "loss": 0.5448, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6632 }, { "epoch": 0.9019581180310036, "grad_norm": 0.6953125, "learning_rate": 5.786011213865772e-07, "loss": 0.73, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6633 }, { "epoch": 0.9020940984498232, "grad_norm": 0.59375, "learning_rate": 5.770106393767926e-07, "loss": 0.7913, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6634 }, { "epoch": 0.9022300788686429, "grad_norm": 0.515625, "learning_rate": 5.754222814240174e-07, "loss": 0.7686, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6635 }, { "epoch": 0.9023660592874626, "grad_norm": 0.33984375, "learning_rate": 5.738360478862848e-07, "loss": 0.5285, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6636 }, { "epoch": 0.9025020397062823, "grad_norm": 0.3203125, "learning_rate": 5.722519391211523e-07, "loss": 0.5588, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6637 }, { "epoch": 0.902638020125102, "grad_norm": 0.275390625, "learning_rate": 5.706699554856965e-07, "loss": 0.4796, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6638 }, { "epoch": 0.9027740005439217, "grad_norm": 0.44921875, "learning_rate": 5.690900973365177e-07, "loss": 0.7401, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6639 }, { "epoch": 0.9029099809627413, "grad_norm": 0.408203125, "learning_rate": 5.675123650297332e-07, "loss": 0.7985, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6640 }, { "epoch": 0.903045961381561, "grad_norm": 0.6171875, "learning_rate": 5.659367589209841e-07, "loss": 0.6401, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6641 }, { "epoch": 0.9031819418003807, "grad_norm": 0.33203125, "learning_rate": 5.643632793654308e-07, "loss": 0.5269, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6642 }, { "epoch": 0.9033179222192005, "grad_norm": 0.50390625, "learning_rate": 5.627919267177562e-07, "loss": 0.741, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6643 }, { "epoch": 0.9034539026380202, "grad_norm": 0.384765625, "learning_rate": 5.612227013321603e-07, "loss": 0.8535, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6644 }, { "epoch": 0.9035898830568398, "grad_norm": 0.609375, "learning_rate": 5.596556035623679e-07, "loss": 0.715, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6645 }, { "epoch": 0.9037258634756595, "grad_norm": 0.5078125, "learning_rate": 5.580906337616198e-07, "loss": 0.6323, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6646 }, { "epoch": 0.9038618438944792, "grad_norm": 0.32421875, "learning_rate": 5.565277922826806e-07, "loss": 0.6506, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6647 }, { "epoch": 0.9039978243132989, "grad_norm": 0.498046875, "learning_rate": 5.549670794778339e-07, "loss": 0.6198, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6648 }, { "epoch": 0.9041338047321186, "grad_norm": 0.55078125, "learning_rate": 5.534084956988839e-07, "loss": 0.6496, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6649 }, { "epoch": 0.9042697851509383, "grad_norm": 0.51171875, "learning_rate": 5.518520412971517e-07, "loss": 0.6693, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6650 }, { "epoch": 0.9044057655697579, "grad_norm": 0.2734375, "learning_rate": 5.502977166234857e-07, "loss": 0.5431, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6651 }, { "epoch": 0.9045417459885776, "grad_norm": 0.5234375, "learning_rate": 5.487455220282445e-07, "loss": 0.7413, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6652 }, { "epoch": 0.9046777264073973, "grad_norm": 0.37890625, "learning_rate": 5.47195457861317e-07, "loss": 0.7171, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6653 }, { "epoch": 0.904813706826217, "grad_norm": 0.453125, "learning_rate": 5.456475244721016e-07, "loss": 0.8162, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6654 }, { "epoch": 0.9049496872450367, "grad_norm": 0.31640625, "learning_rate": 5.441017222095268e-07, "loss": 0.6029, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6655 }, { "epoch": 0.9050856676638565, "grad_norm": 0.36328125, "learning_rate": 5.425580514220297e-07, "loss": 0.6496, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6656 }, { "epoch": 0.9052216480826761, "grad_norm": 0.77734375, "learning_rate": 5.410165124575772e-07, "loss": 0.7017, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6657 }, { "epoch": 0.9053576285014958, "grad_norm": 0.31640625, "learning_rate": 5.394771056636472e-07, "loss": 0.5247, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6658 }, { "epoch": 0.9054936089203155, "grad_norm": 0.435546875, "learning_rate": 5.379398313872441e-07, "loss": 0.8184, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6659 }, { "epoch": 0.9056295893391352, "grad_norm": 0.73828125, "learning_rate": 5.364046899748865e-07, "loss": 0.7783, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6660 }, { "epoch": 0.9057655697579549, "grad_norm": 0.376953125, "learning_rate": 5.34871681772614e-07, "loss": 0.7803, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6661 }, { "epoch": 0.9059015501767745, "grad_norm": 0.6796875, "learning_rate": 5.333408071259849e-07, "loss": 0.8407, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6662 }, { "epoch": 0.9060375305955942, "grad_norm": 0.400390625, "learning_rate": 5.318120663800796e-07, "loss": 0.5251, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6663 }, { "epoch": 0.9061735110144139, "grad_norm": 0.263671875, "learning_rate": 5.302854598794938e-07, "loss": 0.4285, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6664 }, { "epoch": 0.9063094914332336, "grad_norm": 0.380859375, "learning_rate": 5.28760987968342e-07, "loss": 0.6833, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6665 }, { "epoch": 0.9064454718520533, "grad_norm": 0.310546875, "learning_rate": 5.272386509902605e-07, "loss": 0.5934, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6666 }, { "epoch": 0.906581452270873, "grad_norm": 0.431640625, "learning_rate": 5.257184492884004e-07, "loss": 0.8154, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6667 }, { "epoch": 0.9067174326896927, "grad_norm": 0.482421875, "learning_rate": 5.242003832054365e-07, "loss": 0.7008, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6668 }, { "epoch": 0.9068534131085124, "grad_norm": 0.53515625, "learning_rate": 5.226844530835562e-07, "loss": 0.5366, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6669 }, { "epoch": 0.9069893935273321, "grad_norm": 0.2275390625, "learning_rate": 5.211706592644728e-07, "loss": 0.4471, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6670 }, { "epoch": 0.9071253739461518, "grad_norm": 0.66015625, "learning_rate": 5.196590020894098e-07, "loss": 0.7007, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6671 }, { "epoch": 0.9072613543649715, "grad_norm": 0.2890625, "learning_rate": 5.181494818991173e-07, "loss": 0.5208, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6672 }, { "epoch": 0.9073973347837911, "grad_norm": 0.578125, "learning_rate": 5.166420990338549e-07, "loss": 0.8296, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6673 }, { "epoch": 0.9075333152026108, "grad_norm": 0.359375, "learning_rate": 5.1513685383341e-07, "loss": 0.5254, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6674 }, { "epoch": 0.9076692956214305, "grad_norm": 0.46875, "learning_rate": 5.136337466370788e-07, "loss": 0.5959, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6675 }, { "epoch": 0.9078052760402502, "grad_norm": 0.2734375, "learning_rate": 5.121327777836849e-07, "loss": 0.5033, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6676 }, { "epoch": 0.9079412564590699, "grad_norm": 0.28515625, "learning_rate": 5.106339476115596e-07, "loss": 0.5993, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6677 }, { "epoch": 0.9080772368778895, "grad_norm": 0.359375, "learning_rate": 5.091372564585617e-07, "loss": 0.6851, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6678 }, { "epoch": 0.9082132172967092, "grad_norm": 0.66796875, "learning_rate": 5.076427046620613e-07, "loss": 0.5576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6679 }, { "epoch": 0.9083491977155289, "grad_norm": 0.3984375, "learning_rate": 5.061502925589512e-07, "loss": 0.6714, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6680 }, { "epoch": 0.9084851781343487, "grad_norm": 0.263671875, "learning_rate": 5.046600204856356e-07, "loss": 0.5042, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6681 }, { "epoch": 0.9086211585531684, "grad_norm": 0.283203125, "learning_rate": 5.031718887780445e-07, "loss": 0.4622, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6682 }, { "epoch": 0.9087571389719881, "grad_norm": 0.3359375, "learning_rate": 5.016858977716166e-07, "loss": 0.5394, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6683 }, { "epoch": 0.9088931193908077, "grad_norm": 0.40625, "learning_rate": 5.002020478013158e-07, "loss": 0.7362, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6684 }, { "epoch": 0.9090290998096274, "grad_norm": 0.48046875, "learning_rate": 4.987203392016183e-07, "loss": 0.7021, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6685 }, { "epoch": 0.9091650802284471, "grad_norm": 0.51171875, "learning_rate": 4.972407723065198e-07, "loss": 0.4089, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6686 }, { "epoch": 0.9093010606472668, "grad_norm": 0.6796875, "learning_rate": 4.957633474495316e-07, "loss": 0.861, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6687 }, { "epoch": 0.9094370410660865, "grad_norm": 0.34375, "learning_rate": 4.942880649636861e-07, "loss": 0.6047, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6688 }, { "epoch": 0.9095730214849062, "grad_norm": 0.73828125, "learning_rate": 4.928149251815273e-07, "loss": 0.6532, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6689 }, { "epoch": 0.9097090019037258, "grad_norm": 0.423828125, "learning_rate": 4.913439284351207e-07, "loss": 0.6948, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6690 }, { "epoch": 0.9098449823225455, "grad_norm": 0.31640625, "learning_rate": 4.898750750560455e-07, "loss": 0.6917, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6691 }, { "epoch": 0.9099809627413652, "grad_norm": 0.384765625, "learning_rate": 4.884083653754013e-07, "loss": 0.673, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6692 }, { "epoch": 0.910116943160185, "grad_norm": 0.64453125, "learning_rate": 4.869437997237991e-07, "loss": 0.6831, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6693 }, { "epoch": 0.9102529235790047, "grad_norm": 0.224609375, "learning_rate": 4.854813784313728e-07, "loss": 0.4476, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6694 }, { "epoch": 0.9103889039978244, "grad_norm": 0.494140625, "learning_rate": 4.840211018277685e-07, "loss": 0.877, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6695 }, { "epoch": 0.910524884416644, "grad_norm": 0.3671875, "learning_rate": 4.82562970242152e-07, "loss": 0.6673, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6696 }, { "epoch": 0.9106608648354637, "grad_norm": 0.39453125, "learning_rate": 4.811069840032023e-07, "loss": 0.7585, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6697 }, { "epoch": 0.9107968452542834, "grad_norm": 0.279296875, "learning_rate": 4.796531434391171e-07, "loss": 0.4884, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6698 }, { "epoch": 0.9109328256731031, "grad_norm": 0.2890625, "learning_rate": 4.782014488776099e-07, "loss": 0.5296, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6699 }, { "epoch": 0.9110688060919228, "grad_norm": 0.2490234375, "learning_rate": 4.7675190064591094e-07, "loss": 0.4813, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6700 }, { "epoch": 0.9112047865107424, "grad_norm": 0.3671875, "learning_rate": 4.7530449907076446e-07, "loss": 0.7147, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6701 }, { "epoch": 0.9113407669295621, "grad_norm": 0.41796875, "learning_rate": 4.738592444784351e-07, "loss": 0.6709, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6702 }, { "epoch": 0.9114767473483818, "grad_norm": 0.39453125, "learning_rate": 4.7241613719469784e-07, "loss": 0.7194, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6703 }, { "epoch": 0.9116127277672015, "grad_norm": 0.3984375, "learning_rate": 4.709751775448501e-07, "loss": 0.5939, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6704 }, { "epoch": 0.9117487081860212, "grad_norm": 0.359375, "learning_rate": 4.695363658536978e-07, "loss": 0.6548, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6705 }, { "epoch": 0.911884688604841, "grad_norm": 0.44921875, "learning_rate": 4.6809970244557024e-07, "loss": 0.7629, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6706 }, { "epoch": 0.9120206690236606, "grad_norm": 0.40234375, "learning_rate": 4.6666518764430736e-07, "loss": 0.7643, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6707 }, { "epoch": 0.9121566494424803, "grad_norm": 0.32421875, "learning_rate": 4.6523282177326713e-07, "loss": 0.6688, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6708 }, { "epoch": 0.9122926298613, "grad_norm": 0.404296875, "learning_rate": 4.6380260515532015e-07, "loss": 0.7946, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6709 }, { "epoch": 0.9124286102801197, "grad_norm": 0.43359375, "learning_rate": 4.623745381128575e-07, "loss": 0.6359, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6710 }, { "epoch": 0.9125645906989394, "grad_norm": 0.6171875, "learning_rate": 4.609486209677805e-07, "loss": 0.7899, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6711 }, { "epoch": 0.912700571117759, "grad_norm": 0.51171875, "learning_rate": 4.595248540415109e-07, "loss": 0.6543, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6712 }, { "epoch": 0.9128365515365787, "grad_norm": 0.373046875, "learning_rate": 4.5810323765497987e-07, "loss": 0.6484, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6713 }, { "epoch": 0.9129725319553984, "grad_norm": 0.3984375, "learning_rate": 4.56683772128641e-07, "loss": 0.7397, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6714 }, { "epoch": 0.9131085123742181, "grad_norm": 0.462890625, "learning_rate": 4.5526645778245615e-07, "loss": 0.5799, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6715 }, { "epoch": 0.9132444927930378, "grad_norm": 0.3671875, "learning_rate": 4.538512949359075e-07, "loss": 0.5581, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6716 }, { "epoch": 0.9133804732118574, "grad_norm": 0.384765625, "learning_rate": 4.5243828390798773e-07, "loss": 0.627, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6717 }, { "epoch": 0.9135164536306771, "grad_norm": 0.78515625, "learning_rate": 4.5102742501720976e-07, "loss": 0.8825, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6718 }, { "epoch": 0.9136524340494969, "grad_norm": 0.60546875, "learning_rate": 4.4961871858159477e-07, "loss": 0.4889, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6719 }, { "epoch": 0.9137884144683166, "grad_norm": 0.80859375, "learning_rate": 4.4821216491868545e-07, "loss": 0.6566, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6720 }, { "epoch": 0.9139243948871363, "grad_norm": 0.322265625, "learning_rate": 4.4680776434553375e-07, "loss": 0.6045, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6721 }, { "epoch": 0.914060375305956, "grad_norm": 0.51953125, "learning_rate": 4.4540551717871083e-07, "loss": 0.7588, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6722 }, { "epoch": 0.9141963557247756, "grad_norm": 0.30078125, "learning_rate": 4.4400542373429946e-07, "loss": 0.4552, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6723 }, { "epoch": 0.9143323361435953, "grad_norm": 0.265625, "learning_rate": 4.4260748432789826e-07, "loss": 0.4927, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6724 }, { "epoch": 0.914468316562415, "grad_norm": 0.474609375, "learning_rate": 4.412116992746196e-07, "loss": 0.6771, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6725 }, { "epoch": 0.9146042969812347, "grad_norm": 0.287109375, "learning_rate": 4.398180688890896e-07, "loss": 0.4904, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6726 }, { "epoch": 0.9147402774000544, "grad_norm": 0.6875, "learning_rate": 4.384265934854526e-07, "loss": 0.6587, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6727 }, { "epoch": 0.914876257818874, "grad_norm": 0.828125, "learning_rate": 4.370372733773598e-07, "loss": 0.8351, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6728 }, { "epoch": 0.9150122382376937, "grad_norm": 0.2890625, "learning_rate": 4.356501088779841e-07, "loss": 0.4793, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6729 }, { "epoch": 0.9151482186565134, "grad_norm": 0.271484375, "learning_rate": 4.342651003000076e-07, "loss": 0.5465, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6730 }, { "epoch": 0.9152841990753332, "grad_norm": 0.55859375, "learning_rate": 4.328822479556294e-07, "loss": 0.4238, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6731 }, { "epoch": 0.9154201794941529, "grad_norm": 0.291015625, "learning_rate": 4.315015521565602e-07, "loss": 0.6592, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6732 }, { "epoch": 0.9155561599129726, "grad_norm": 0.375, "learning_rate": 4.301230132140277e-07, "loss": 0.5977, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6733 }, { "epoch": 0.9156921403317922, "grad_norm": 1.078125, "learning_rate": 4.287466314387689e-07, "loss": 0.8211, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6734 }, { "epoch": 0.9158281207506119, "grad_norm": 0.220703125, "learning_rate": 4.2737240714104014e-07, "loss": 0.3625, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6735 }, { "epoch": 0.9159641011694316, "grad_norm": 0.63671875, "learning_rate": 4.260003406306046e-07, "loss": 0.5654, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6736 }, { "epoch": 0.9161000815882513, "grad_norm": 0.330078125, "learning_rate": 4.2463043221674603e-07, "loss": 0.6004, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6737 }, { "epoch": 0.916236062007071, "grad_norm": 0.64453125, "learning_rate": 4.2326268220825526e-07, "loss": 0.6478, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6738 }, { "epoch": 0.9163720424258907, "grad_norm": 0.291015625, "learning_rate": 4.2189709091344453e-07, "loss": 0.5075, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6739 }, { "epoch": 0.9165080228447103, "grad_norm": 0.26953125, "learning_rate": 4.2053365864012985e-07, "loss": 0.4961, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6740 }, { "epoch": 0.91664400326353, "grad_norm": 0.44921875, "learning_rate": 4.1917238569564977e-07, "loss": 0.5884, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6741 }, { "epoch": 0.9167799836823497, "grad_norm": 0.244140625, "learning_rate": 4.1781327238684775e-07, "loss": 0.5358, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6742 }, { "epoch": 0.9169159641011694, "grad_norm": 0.5234375, "learning_rate": 4.1645631902008876e-07, "loss": 0.4272, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6743 }, { "epoch": 0.9170519445199892, "grad_norm": 0.404296875, "learning_rate": 4.151015259012436e-07, "loss": 0.798, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6744 }, { "epoch": 0.9171879249388089, "grad_norm": 0.369140625, "learning_rate": 4.1374889333570035e-07, "loss": 0.6893, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6745 }, { "epoch": 0.9173239053576285, "grad_norm": 0.5234375, "learning_rate": 4.123984216283583e-07, "loss": 0.5495, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6746 }, { "epoch": 0.9174598857764482, "grad_norm": 0.455078125, "learning_rate": 4.110501110836329e-07, "loss": 0.5024, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6747 }, { "epoch": 0.9175958661952679, "grad_norm": 0.263671875, "learning_rate": 4.0970396200544546e-07, "loss": 0.4842, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6748 }, { "epoch": 0.9177318466140876, "grad_norm": 0.65625, "learning_rate": 4.0835997469723776e-07, "loss": 0.585, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6749 }, { "epoch": 0.9178678270329073, "grad_norm": 0.3515625, "learning_rate": 4.070181494619596e-07, "loss": 0.6805, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6750 }, { "epoch": 0.9180038074517269, "grad_norm": 0.400390625, "learning_rate": 4.0567848660207576e-07, "loss": 0.8441, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6751 }, { "epoch": 0.9181397878705466, "grad_norm": 0.59765625, "learning_rate": 4.043409864195602e-07, "loss": 0.7503, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6752 }, { "epoch": 0.9182757682893663, "grad_norm": 0.59765625, "learning_rate": 4.0300564921590515e-07, "loss": 0.4741, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6753 }, { "epoch": 0.918411748708186, "grad_norm": 0.3671875, "learning_rate": 4.0167247529210974e-07, "loss": 0.5643, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6754 }, { "epoch": 0.9185477291270057, "grad_norm": 0.37109375, "learning_rate": 4.003414649486892e-07, "loss": 0.6657, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6755 }, { "epoch": 0.9186837095458255, "grad_norm": 0.56640625, "learning_rate": 3.9901261848566687e-07, "loss": 0.8073, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6756 }, { "epoch": 0.9188196899646451, "grad_norm": 0.302734375, "learning_rate": 3.9768593620258533e-07, "loss": 0.596, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6757 }, { "epoch": 0.9189556703834648, "grad_norm": 0.30859375, "learning_rate": 3.9636141839848987e-07, "loss": 0.5449, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6758 }, { "epoch": 0.9190916508022845, "grad_norm": 0.3125, "learning_rate": 3.950390653719471e-07, "loss": 0.5545, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6759 }, { "epoch": 0.9192276312211042, "grad_norm": 0.421875, "learning_rate": 3.9371887742102986e-07, "loss": 0.6292, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6760 }, { "epoch": 0.9193636116399239, "grad_norm": 0.345703125, "learning_rate": 3.9240085484332445e-07, "loss": 0.6925, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6761 }, { "epoch": 0.9194995920587435, "grad_norm": 0.458984375, "learning_rate": 3.910849979359288e-07, "loss": 0.6439, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6762 }, { "epoch": 0.9196355724775632, "grad_norm": 0.421875, "learning_rate": 3.897713069954556e-07, "loss": 0.6143, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6763 }, { "epoch": 0.9197715528963829, "grad_norm": 0.341796875, "learning_rate": 3.8845978231802474e-07, "loss": 0.7651, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6764 }, { "epoch": 0.9199075333152026, "grad_norm": 0.400390625, "learning_rate": 3.871504241992707e-07, "loss": 0.5392, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6765 }, { "epoch": 0.9200435137340223, "grad_norm": 0.359375, "learning_rate": 3.858432329343376e-07, "loss": 0.6174, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6766 }, { "epoch": 0.920179494152842, "grad_norm": 0.71875, "learning_rate": 3.8453820881788394e-07, "loss": 0.9759, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6767 }, { "epoch": 0.9203154745716616, "grad_norm": 0.26171875, "learning_rate": 3.832353521440768e-07, "loss": 0.3973, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6768 }, { "epoch": 0.9204514549904814, "grad_norm": 0.439453125, "learning_rate": 3.819346632065968e-07, "loss": 0.6771, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6769 }, { "epoch": 0.9205874354093011, "grad_norm": 0.80859375, "learning_rate": 3.806361422986349e-07, "loss": 0.7038, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6770 }, { "epoch": 0.9207234158281208, "grad_norm": 0.58203125, "learning_rate": 3.793397897128948e-07, "loss": 0.6271, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6771 }, { "epoch": 0.9208593962469405, "grad_norm": 0.380859375, "learning_rate": 3.780456057415871e-07, "loss": 0.5913, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6772 }, { "epoch": 0.9209953766657601, "grad_norm": 0.376953125, "learning_rate": 3.767535906764397e-07, "loss": 0.7765, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6773 }, { "epoch": 0.9211313570845798, "grad_norm": 0.4921875, "learning_rate": 3.754637448086873e-07, "loss": 0.7321, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6774 }, { "epoch": 0.9212673375033995, "grad_norm": 0.3828125, "learning_rate": 3.7417606842907847e-07, "loss": 0.5703, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6775 }, { "epoch": 0.9214033179222192, "grad_norm": 0.30078125, "learning_rate": 3.7289056182786774e-07, "loss": 0.5381, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6776 }, { "epoch": 0.9215392983410389, "grad_norm": 0.455078125, "learning_rate": 3.7160722529482773e-07, "loss": 0.4682, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6777 }, { "epoch": 0.9216752787598586, "grad_norm": 0.435546875, "learning_rate": 3.7032605911923593e-07, "loss": 0.5815, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6778 }, { "epoch": 0.9218112591786782, "grad_norm": 0.388671875, "learning_rate": 3.690470635898835e-07, "loss": 0.6649, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6779 }, { "epoch": 0.9219472395974979, "grad_norm": 0.462890625, "learning_rate": 3.6777023899507216e-07, "loss": 0.8081, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6780 }, { "epoch": 0.9220832200163176, "grad_norm": 0.73828125, "learning_rate": 3.6649558562261377e-07, "loss": 0.6405, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6781 }, { "epoch": 0.9222192004351374, "grad_norm": 0.67578125, "learning_rate": 3.652231037598286e-07, "loss": 0.8717, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6782 }, { "epoch": 0.9223551808539571, "grad_norm": 0.314453125, "learning_rate": 3.6395279369355274e-07, "loss": 0.7306, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6783 }, { "epoch": 0.9224911612727767, "grad_norm": 0.45703125, "learning_rate": 3.6268465571012824e-07, "loss": 0.4967, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6784 }, { "epoch": 0.9226271416915964, "grad_norm": 0.388671875, "learning_rate": 3.6141869009540866e-07, "loss": 0.4352, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6785 }, { "epoch": 0.9227631221104161, "grad_norm": 0.408203125, "learning_rate": 3.6015489713475906e-07, "loss": 0.5957, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6786 }, { "epoch": 0.9228991025292358, "grad_norm": 0.291015625, "learning_rate": 3.5889327711305156e-07, "loss": 0.515, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6787 }, { "epoch": 0.9230350829480555, "grad_norm": 0.310546875, "learning_rate": 3.5763383031467425e-07, "loss": 0.5345, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6788 }, { "epoch": 0.9231710633668752, "grad_norm": 0.431640625, "learning_rate": 3.563765570235178e-07, "loss": 0.8464, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6789 }, { "epoch": 0.9233070437856948, "grad_norm": 1.125, "learning_rate": 3.551214575229911e-07, "loss": 0.9591, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6790 }, { "epoch": 0.9234430242045145, "grad_norm": 0.365234375, "learning_rate": 3.5386853209600557e-07, "loss": 0.5098, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6791 }, { "epoch": 0.9235790046233342, "grad_norm": 0.3359375, "learning_rate": 3.526177810249887e-07, "loss": 0.646, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6792 }, { "epoch": 0.9237149850421539, "grad_norm": 0.302734375, "learning_rate": 3.513692045918726e-07, "loss": 0.6146, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6793 }, { "epoch": 0.9238509654609737, "grad_norm": 0.5703125, "learning_rate": 3.5012280307810344e-07, "loss": 0.739, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6794 }, { "epoch": 0.9239869458797934, "grad_norm": 0.39453125, "learning_rate": 3.488785767646341e-07, "loss": 0.6955, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6795 }, { "epoch": 0.924122926298613, "grad_norm": 0.8984375, "learning_rate": 3.476365259319292e-07, "loss": 0.6956, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6796 }, { "epoch": 0.9242589067174327, "grad_norm": 0.3046875, "learning_rate": 3.463966508599614e-07, "loss": 0.6087, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6797 }, { "epoch": 0.9243948871362524, "grad_norm": 0.3203125, "learning_rate": 3.45158951828215e-07, "loss": 0.7153, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6798 }, { "epoch": 0.9245308675550721, "grad_norm": 0.59765625, "learning_rate": 3.439234291156801e-07, "loss": 0.4855, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6799 }, { "epoch": 0.9246668479738918, "grad_norm": 0.53515625, "learning_rate": 3.426900830008628e-07, "loss": 0.7345, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6800 }, { "epoch": 0.9248028283927114, "grad_norm": 0.353515625, "learning_rate": 3.414589137617697e-07, "loss": 0.7212, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6801 }, { "epoch": 0.9249388088115311, "grad_norm": 0.482421875, "learning_rate": 3.402299216759242e-07, "loss": 0.5355, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6802 }, { "epoch": 0.9250747892303508, "grad_norm": 0.373046875, "learning_rate": 3.390031070203548e-07, "loss": 0.2656, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6803 }, { "epoch": 0.9252107696491705, "grad_norm": 0.28515625, "learning_rate": 3.3777847007160245e-07, "loss": 0.4932, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6804 }, { "epoch": 0.9253467500679902, "grad_norm": 0.263671875, "learning_rate": 3.365560111057131e-07, "loss": 0.3849, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6805 }, { "epoch": 0.9254827304868098, "grad_norm": 0.498046875, "learning_rate": 3.353357303982463e-07, "loss": 0.665, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6806 }, { "epoch": 0.9256187109056296, "grad_norm": 0.5546875, "learning_rate": 3.341176282242653e-07, "loss": 0.6314, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6807 }, { "epoch": 0.9257546913244493, "grad_norm": 0.345703125, "learning_rate": 3.3290170485834715e-07, "loss": 0.5746, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6808 }, { "epoch": 0.925890671743269, "grad_norm": 0.357421875, "learning_rate": 3.31687960574576e-07, "loss": 0.5602, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6809 }, { "epoch": 0.9260266521620887, "grad_norm": 0.40234375, "learning_rate": 3.30476395646544e-07, "loss": 0.7596, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6810 }, { "epoch": 0.9261626325809084, "grad_norm": 0.251953125, "learning_rate": 3.2926701034735163e-07, "loss": 0.4993, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6811 }, { "epoch": 0.926298612999728, "grad_norm": 0.5703125, "learning_rate": 3.28059804949612e-07, "loss": 0.4497, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6812 }, { "epoch": 0.9264345934185477, "grad_norm": 0.2421875, "learning_rate": 3.2685477972544064e-07, "loss": 0.4777, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6813 }, { "epoch": 0.9265705738373674, "grad_norm": 0.59765625, "learning_rate": 3.256519349464671e-07, "loss": 0.7295, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6814 }, { "epoch": 0.9267065542561871, "grad_norm": 0.26171875, "learning_rate": 3.2445127088382655e-07, "loss": 0.5042, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6815 }, { "epoch": 0.9268425346750068, "grad_norm": 0.51953125, "learning_rate": 3.232527878081648e-07, "loss": 0.5396, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6816 }, { "epoch": 0.9269785150938264, "grad_norm": 0.34375, "learning_rate": 3.2205648598963245e-07, "loss": 0.6468, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6817 }, { "epoch": 0.9271144955126461, "grad_norm": 3.8125, "learning_rate": 3.2086236569789155e-07, "loss": 0.7173, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6818 }, { "epoch": 0.9272504759314659, "grad_norm": 0.294921875, "learning_rate": 3.196704272021112e-07, "loss": 0.418, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6819 }, { "epoch": 0.9273864563502856, "grad_norm": 0.271484375, "learning_rate": 3.184806707709698e-07, "loss": 0.4998, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6820 }, { "epoch": 0.9275224367691053, "grad_norm": 0.345703125, "learning_rate": 3.1729309667265063e-07, "loss": 0.5574, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6821 }, { "epoch": 0.927658417187925, "grad_norm": 0.361328125, "learning_rate": 3.1610770517485066e-07, "loss": 0.6178, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6822 }, { "epoch": 0.9277943976067446, "grad_norm": 0.3515625, "learning_rate": 3.149244965447684e-07, "loss": 0.7627, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6823 }, { "epoch": 0.9279303780255643, "grad_norm": 0.380859375, "learning_rate": 3.1374347104911716e-07, "loss": 0.6903, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6824 }, { "epoch": 0.928066358444384, "grad_norm": 0.41796875, "learning_rate": 3.1256462895411066e-07, "loss": 0.6635, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6825 }, { "epoch": 0.9282023388632037, "grad_norm": 0.32421875, "learning_rate": 3.113879705254774e-07, "loss": 0.6706, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6826 }, { "epoch": 0.9283383192820234, "grad_norm": 0.43359375, "learning_rate": 3.1021349602844755e-07, "loss": 0.7665, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6827 }, { "epoch": 0.928474299700843, "grad_norm": 0.390625, "learning_rate": 3.0904120572776583e-07, "loss": 0.6875, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6828 }, { "epoch": 0.9286102801196627, "grad_norm": 0.39453125, "learning_rate": 3.078710998876777e-07, "loss": 0.8224, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6829 }, { "epoch": 0.9287462605384824, "grad_norm": 0.5390625, "learning_rate": 3.06703178771941e-07, "loss": 0.5381, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6830 }, { "epoch": 0.9288822409573021, "grad_norm": 0.5234375, "learning_rate": 3.055374426438185e-07, "loss": 0.6224, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6831 }, { "epoch": 0.9290182213761219, "grad_norm": 0.37890625, "learning_rate": 3.043738917660821e-07, "loss": 0.6584, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6832 }, { "epoch": 0.9291542017949416, "grad_norm": 0.3515625, "learning_rate": 3.0321252640100883e-07, "loss": 0.6308, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6833 }, { "epoch": 0.9292901822137613, "grad_norm": 0.376953125, "learning_rate": 3.02053346810387e-07, "loss": 0.6764, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6834 }, { "epoch": 0.9294261626325809, "grad_norm": 0.455078125, "learning_rate": 3.008963532555076e-07, "loss": 0.8034, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6835 }, { "epoch": 0.9295621430514006, "grad_norm": 1.0625, "learning_rate": 2.997415459971731e-07, "loss": 0.9098, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6836 }, { "epoch": 0.9296981234702203, "grad_norm": 0.3125, "learning_rate": 2.985889252956897e-07, "loss": 0.6838, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6837 }, { "epoch": 0.92983410388904, "grad_norm": 0.54296875, "learning_rate": 2.97438491410873e-07, "loss": 0.7303, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6838 }, { "epoch": 0.9299700843078597, "grad_norm": 0.59765625, "learning_rate": 2.9629024460204434e-07, "loss": 0.6795, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6839 }, { "epoch": 0.9301060647266793, "grad_norm": 0.380859375, "learning_rate": 2.9514418512803345e-07, "loss": 0.6403, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6840 }, { "epoch": 0.930242045145499, "grad_norm": 0.310546875, "learning_rate": 2.9400031324717473e-07, "loss": 0.5684, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6841 }, { "epoch": 0.9303780255643187, "grad_norm": 0.375, "learning_rate": 2.928586292173119e-07, "loss": 0.6196, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6842 }, { "epoch": 0.9305140059831384, "grad_norm": 0.365234375, "learning_rate": 2.917191332957958e-07, "loss": 0.6774, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6843 }, { "epoch": 0.9306499864019581, "grad_norm": 0.22265625, "learning_rate": 2.905818257394799e-07, "loss": 0.4183, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6844 }, { "epoch": 0.9307859668207779, "grad_norm": 0.40234375, "learning_rate": 2.894467068047291e-07, "loss": 0.7749, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6845 }, { "epoch": 0.9309219472395975, "grad_norm": 0.53125, "learning_rate": 2.8831377674741203e-07, "loss": 0.6089, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6846 }, { "epoch": 0.9310579276584172, "grad_norm": 0.380859375, "learning_rate": 2.8718303582290665e-07, "loss": 0.7057, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6847 }, { "epoch": 0.9311939080772369, "grad_norm": 0.48046875, "learning_rate": 2.860544842860924e-07, "loss": 0.6484, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6848 }, { "epoch": 0.9313298884960566, "grad_norm": 0.53515625, "learning_rate": 2.849281223913636e-07, "loss": 0.7813, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6849 }, { "epoch": 0.9314658689148763, "grad_norm": 0.3203125, "learning_rate": 2.8380395039261155e-07, "loss": 0.5119, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6850 }, { "epoch": 0.9316018493336959, "grad_norm": 0.53515625, "learning_rate": 2.826819685432414e-07, "loss": 0.7531, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6851 }, { "epoch": 0.9317378297525156, "grad_norm": 0.3046875, "learning_rate": 2.815621770961585e-07, "loss": 0.5293, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6852 }, { "epoch": 0.9318738101713353, "grad_norm": 0.609375, "learning_rate": 2.804445763037811e-07, "loss": 0.5591, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6853 }, { "epoch": 0.932009790590155, "grad_norm": 0.4453125, "learning_rate": 2.793291664180264e-07, "loss": 0.6396, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6854 }, { "epoch": 0.9321457710089747, "grad_norm": 0.361328125, "learning_rate": 2.782159476903246e-07, "loss": 0.6878, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6855 }, { "epoch": 0.9322817514277943, "grad_norm": 0.427734375, "learning_rate": 2.77104920371607e-07, "loss": 0.5461, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6856 }, { "epoch": 0.9324177318466141, "grad_norm": 0.3515625, "learning_rate": 2.7599608471231333e-07, "loss": 0.7122, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6857 }, { "epoch": 0.9325537122654338, "grad_norm": 0.375, "learning_rate": 2.748894409623881e-07, "loss": 0.7946, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6858 }, { "epoch": 0.9326896926842535, "grad_norm": 0.89453125, "learning_rate": 2.737849893712841e-07, "loss": 0.6182, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6859 }, { "epoch": 0.9328256731030732, "grad_norm": 0.75390625, "learning_rate": 2.726827301879553e-07, "loss": 0.7819, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6860 }, { "epoch": 0.9329616535218929, "grad_norm": 0.37890625, "learning_rate": 2.715826636608676e-07, "loss": 0.6854, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6861 }, { "epoch": 0.9330976339407125, "grad_norm": 0.34765625, "learning_rate": 2.70484790037987e-07, "loss": 0.6226, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6862 }, { "epoch": 0.9332336143595322, "grad_norm": 0.404296875, "learning_rate": 2.6938910956679e-07, "loss": 0.606, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6863 }, { "epoch": 0.9333695947783519, "grad_norm": 0.451171875, "learning_rate": 2.6829562249425234e-07, "loss": 0.7562, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6864 }, { "epoch": 0.9335055751971716, "grad_norm": 0.47265625, "learning_rate": 2.6720432906686467e-07, "loss": 0.7599, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6865 }, { "epoch": 0.9336415556159913, "grad_norm": 0.2431640625, "learning_rate": 2.6611522953061353e-07, "loss": 0.4733, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6866 }, { "epoch": 0.933777536034811, "grad_norm": 0.490234375, "learning_rate": 2.65028324130997e-07, "loss": 0.6641, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6867 }, { "epoch": 0.9339135164536306, "grad_norm": 0.435546875, "learning_rate": 2.6394361311301685e-07, "loss": 0.5837, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6868 }, { "epoch": 0.9340494968724503, "grad_norm": 0.41796875, "learning_rate": 2.6286109672118086e-07, "loss": 0.5625, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6869 }, { "epoch": 0.9341854772912701, "grad_norm": 0.828125, "learning_rate": 2.617807751994994e-07, "loss": 0.6877, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6870 }, { "epoch": 0.9343214577100898, "grad_norm": 0.6484375, "learning_rate": 2.6070264879149323e-07, "loss": 0.9129, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6871 }, { "epoch": 0.9344574381289095, "grad_norm": 0.361328125, "learning_rate": 2.5962671774018234e-07, "loss": 0.6271, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6872 }, { "epoch": 0.9345934185477291, "grad_norm": 0.56640625, "learning_rate": 2.585529822880961e-07, "loss": 0.7729, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6873 }, { "epoch": 0.9347293989665488, "grad_norm": 0.5078125, "learning_rate": 2.5748144267726646e-07, "loss": 0.8302, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6874 }, { "epoch": 0.9348653793853685, "grad_norm": 0.5390625, "learning_rate": 2.564120991492336e-07, "loss": 0.6074, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6875 }, { "epoch": 0.9350013598041882, "grad_norm": 0.39453125, "learning_rate": 2.553449519450391e-07, "loss": 0.6216, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6876 }, { "epoch": 0.9351373402230079, "grad_norm": 0.427734375, "learning_rate": 2.542800013052316e-07, "loss": 0.5523, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6877 }, { "epoch": 0.9352733206418276, "grad_norm": 0.625, "learning_rate": 2.532172474698624e-07, "loss": 0.8509, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6878 }, { "epoch": 0.9354093010606472, "grad_norm": 0.578125, "learning_rate": 2.5215669067849225e-07, "loss": 0.805, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6879 }, { "epoch": 0.9355452814794669, "grad_norm": 0.375, "learning_rate": 2.510983311701809e-07, "loss": 0.542, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6880 }, { "epoch": 0.9356812618982866, "grad_norm": 0.419921875, "learning_rate": 2.500421691834975e-07, "loss": 0.6833, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6881 }, { "epoch": 0.9358172423171064, "grad_norm": 0.94140625, "learning_rate": 2.4898820495651175e-07, "loss": 0.6864, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6882 }, { "epoch": 0.9359532227359261, "grad_norm": 0.384765625, "learning_rate": 2.479364387268024e-07, "loss": 0.6918, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6883 }, { "epoch": 0.9360892031547458, "grad_norm": 0.373046875, "learning_rate": 2.4688687073144867e-07, "loss": 0.5794, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6884 }, { "epoch": 0.9362251835735654, "grad_norm": 0.5546875, "learning_rate": 2.458395012070369e-07, "loss": 0.6424, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6885 }, { "epoch": 0.9363611639923851, "grad_norm": 0.44140625, "learning_rate": 2.4479433038965606e-07, "loss": 0.7687, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6886 }, { "epoch": 0.9364971444112048, "grad_norm": 0.3046875, "learning_rate": 2.4375135851490206e-07, "loss": 0.5688, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6887 }, { "epoch": 0.9366331248300245, "grad_norm": 0.333984375, "learning_rate": 2.4271058581787245e-07, "loss": 0.5208, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6888 }, { "epoch": 0.9367691052488442, "grad_norm": 0.5, "learning_rate": 2.416720125331695e-07, "loss": 0.8267, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6889 }, { "epoch": 0.9369050856676638, "grad_norm": 0.28515625, "learning_rate": 2.406356388949005e-07, "loss": 0.5288, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6890 }, { "epoch": 0.9370410660864835, "grad_norm": 0.37890625, "learning_rate": 2.396014651366785e-07, "loss": 0.6414, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6891 }, { "epoch": 0.9371770465053032, "grad_norm": 0.318359375, "learning_rate": 2.3856949149161703e-07, "loss": 0.641, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6892 }, { "epoch": 0.9373130269241229, "grad_norm": 0.30859375, "learning_rate": 2.3753971819233668e-07, "loss": 0.61, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6893 }, { "epoch": 0.9374490073429426, "grad_norm": 0.9375, "learning_rate": 2.3651214547095957e-07, "loss": 0.6105, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6894 }, { "epoch": 0.9375849877617624, "grad_norm": 0.412109375, "learning_rate": 2.3548677355911599e-07, "loss": 0.5708, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6895 }, { "epoch": 0.937720968180582, "grad_norm": 0.38671875, "learning_rate": 2.3446360268793323e-07, "loss": 0.7077, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6896 }, { "epoch": 0.9378569485994017, "grad_norm": 0.380859375, "learning_rate": 2.3344263308805016e-07, "loss": 0.7332, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6897 }, { "epoch": 0.9379929290182214, "grad_norm": 0.49609375, "learning_rate": 2.3242386498960267e-07, "loss": 0.5915, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6898 }, { "epoch": 0.9381289094370411, "grad_norm": 0.47265625, "learning_rate": 2.3140729862223599e-07, "loss": 0.6257, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6899 }, { "epoch": 0.9382648898558608, "grad_norm": 0.41015625, "learning_rate": 2.3039293421509567e-07, "loss": 0.6139, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6900 }, { "epoch": 0.9384008702746804, "grad_norm": 0.52734375, "learning_rate": 2.293807719968333e-07, "loss": 0.7741, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6901 }, { "epoch": 0.9385368506935001, "grad_norm": 0.7578125, "learning_rate": 2.2837081219559963e-07, "loss": 0.7879, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6902 }, { "epoch": 0.9386728311123198, "grad_norm": 0.63671875, "learning_rate": 2.2736305503905376e-07, "loss": 0.8618, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6903 }, { "epoch": 0.9388088115311395, "grad_norm": 0.2353515625, "learning_rate": 2.2635750075435725e-07, "loss": 0.5103, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6904 }, { "epoch": 0.9389447919499592, "grad_norm": 0.333984375, "learning_rate": 2.2535414956817213e-07, "loss": 0.6053, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6905 }, { "epoch": 0.9390807723687788, "grad_norm": 0.44140625, "learning_rate": 2.2435300170666863e-07, "loss": 0.415, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6906 }, { "epoch": 0.9392167527875986, "grad_norm": 0.2392578125, "learning_rate": 2.2335405739551507e-07, "loss": 0.4313, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6907 }, { "epoch": 0.9393527332064183, "grad_norm": 0.36328125, "learning_rate": 2.2235731685988804e-07, "loss": 0.579, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6908 }, { "epoch": 0.939488713625238, "grad_norm": 0.248046875, "learning_rate": 2.2136278032446335e-07, "loss": 0.5008, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6909 }, { "epoch": 0.9396246940440577, "grad_norm": 0.232421875, "learning_rate": 2.203704480134228e-07, "loss": 0.4043, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6910 }, { "epoch": 0.9397606744628774, "grad_norm": 0.291015625, "learning_rate": 2.1938032015044964e-07, "loss": 0.5308, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6911 }, { "epoch": 0.939896654881697, "grad_norm": 0.68359375, "learning_rate": 2.1839239695873205e-07, "loss": 0.7614, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6912 }, { "epoch": 0.9400326353005167, "grad_norm": 0.34765625, "learning_rate": 2.1740667866095744e-07, "loss": 0.451, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6913 }, { "epoch": 0.9401686157193364, "grad_norm": 0.59765625, "learning_rate": 2.164231654793203e-07, "loss": 0.5835, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6914 }, { "epoch": 0.9403045961381561, "grad_norm": 0.421875, "learning_rate": 2.154418576355166e-07, "loss": 0.8828, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6915 }, { "epoch": 0.9404405765569758, "grad_norm": 0.365234375, "learning_rate": 2.1446275535074612e-07, "loss": 0.6857, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6916 }, { "epoch": 0.9405765569757955, "grad_norm": 0.4609375, "learning_rate": 2.1348585884570781e-07, "loss": 0.8372, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6917 }, { "epoch": 0.9407125373946151, "grad_norm": 0.5, "learning_rate": 2.1251116834060893e-07, "loss": 0.583, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6918 }, { "epoch": 0.9408485178134348, "grad_norm": 0.333984375, "learning_rate": 2.1153868405515364e-07, "loss": 0.6331, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6919 }, { "epoch": 0.9409844982322546, "grad_norm": 0.470703125, "learning_rate": 2.1056840620855555e-07, "loss": 0.4736, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6920 }, { "epoch": 0.9411204786510743, "grad_norm": 0.39453125, "learning_rate": 2.0960033501952305e-07, "loss": 0.61, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6921 }, { "epoch": 0.941256459069894, "grad_norm": 0.27734375, "learning_rate": 2.0863447070627484e-07, "loss": 0.529, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6922 }, { "epoch": 0.9413924394887137, "grad_norm": 0.46484375, "learning_rate": 2.0767081348652574e-07, "loss": 0.7189, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6923 }, { "epoch": 0.9415284199075333, "grad_norm": 2.328125, "learning_rate": 2.067093635774975e-07, "loss": 0.6426, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6924 }, { "epoch": 0.941664400326353, "grad_norm": 0.486328125, "learning_rate": 2.057501211959123e-07, "loss": 0.7865, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6925 }, { "epoch": 0.9418003807451727, "grad_norm": 0.31640625, "learning_rate": 2.0479308655799613e-07, "loss": 0.6502, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6926 }, { "epoch": 0.9419363611639924, "grad_norm": 0.31640625, "learning_rate": 2.0383825987947303e-07, "loss": 0.5623, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6927 }, { "epoch": 0.9420723415828121, "grad_norm": 0.609375, "learning_rate": 2.0288564137557642e-07, "loss": 0.6729, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6928 }, { "epoch": 0.9422083220016317, "grad_norm": 0.30078125, "learning_rate": 2.0193523126103565e-07, "loss": 0.6432, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6929 }, { "epoch": 0.9423443024204514, "grad_norm": 0.416015625, "learning_rate": 2.009870297500871e-07, "loss": 0.7463, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6930 }, { "epoch": 0.9424802828392711, "grad_norm": 0.34375, "learning_rate": 2.0004103705646426e-07, "loss": 0.5472, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6931 }, { "epoch": 0.9426162632580908, "grad_norm": 0.45703125, "learning_rate": 1.990972533934077e-07, "loss": 0.6556, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6932 }, { "epoch": 0.9427522436769106, "grad_norm": 0.423828125, "learning_rate": 1.9815567897365608e-07, "loss": 0.7738, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6933 }, { "epoch": 0.9428882240957303, "grad_norm": 0.341796875, "learning_rate": 1.9721631400945295e-07, "loss": 0.7443, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6934 }, { "epoch": 0.9430242045145499, "grad_norm": 0.408203125, "learning_rate": 1.962791587125412e-07, "loss": 0.5549, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6935 }, { "epoch": 0.9431601849333696, "grad_norm": 0.328125, "learning_rate": 1.9534421329416963e-07, "loss": 0.6118, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6936 }, { "epoch": 0.9432961653521893, "grad_norm": 0.38671875, "learning_rate": 1.9441147796508408e-07, "loss": 0.6895, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6937 }, { "epoch": 0.943432145771009, "grad_norm": 0.609375, "learning_rate": 1.9348095293553526e-07, "loss": 0.4818, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6938 }, { "epoch": 0.9435681261898287, "grad_norm": 0.35546875, "learning_rate": 1.9255263841527537e-07, "loss": 0.6744, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6939 }, { "epoch": 0.9437041066086483, "grad_norm": 0.341796875, "learning_rate": 1.9162653461355708e-07, "loss": 0.5866, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6940 }, { "epoch": 0.943840087027468, "grad_norm": 0.8125, "learning_rate": 1.9070264173913554e-07, "loss": 0.7401, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6941 }, { "epoch": 0.9439760674462877, "grad_norm": 0.484375, "learning_rate": 1.8978096000026868e-07, "loss": 0.7576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6942 }, { "epoch": 0.9441120478651074, "grad_norm": 0.380859375, "learning_rate": 1.8886148960471362e-07, "loss": 0.4884, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6943 }, { "epoch": 0.9442480282839271, "grad_norm": 0.3515625, "learning_rate": 1.8794423075973013e-07, "loss": 0.5721, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6944 }, { "epoch": 0.9443840087027469, "grad_norm": 0.373046875, "learning_rate": 1.8702918367208057e-07, "loss": 0.6681, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6945 }, { "epoch": 0.9445199891215665, "grad_norm": 0.30078125, "learning_rate": 1.8611634854802663e-07, "loss": 0.4727, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6946 }, { "epoch": 0.9446559695403862, "grad_norm": 0.8203125, "learning_rate": 1.8520572559333372e-07, "loss": 0.9168, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6947 }, { "epoch": 0.9447919499592059, "grad_norm": 0.34765625, "learning_rate": 1.8429731501326652e-07, "loss": 0.7495, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6948 }, { "epoch": 0.9449279303780256, "grad_norm": 0.330078125, "learning_rate": 1.8339111701259126e-07, "loss": 0.4784, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6949 }, { "epoch": 0.9450639107968453, "grad_norm": 0.484375, "learning_rate": 1.8248713179557788e-07, "loss": 0.8898, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6950 }, { "epoch": 0.945199891215665, "grad_norm": 0.3515625, "learning_rate": 1.8158535956599332e-07, "loss": 0.569, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6951 }, { "epoch": 0.9453358716344846, "grad_norm": 0.43359375, "learning_rate": 1.8068580052711172e-07, "loss": 0.526, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6952 }, { "epoch": 0.9454718520533043, "grad_norm": 0.380859375, "learning_rate": 1.797884548816997e-07, "loss": 0.7731, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6953 }, { "epoch": 0.945607832472124, "grad_norm": 0.77734375, "learning_rate": 1.7889332283203442e-07, "loss": 0.752, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6954 }, { "epoch": 0.9457438128909437, "grad_norm": 0.349609375, "learning_rate": 1.7800040457988777e-07, "loss": 0.6082, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6955 }, { "epoch": 0.9458797933097634, "grad_norm": 0.40625, "learning_rate": 1.771097003265343e-07, "loss": 0.7525, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6956 }, { "epoch": 0.946015773728583, "grad_norm": 0.490234375, "learning_rate": 1.7622121027274897e-07, "loss": 0.6268, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6957 }, { "epoch": 0.9461517541474028, "grad_norm": 0.37109375, "learning_rate": 1.7533493461881045e-07, "loss": 0.832, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6958 }, { "epoch": 0.9462877345662225, "grad_norm": 0.671875, "learning_rate": 1.7445087356449343e-07, "loss": 0.8389, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6959 }, { "epoch": 0.9464237149850422, "grad_norm": 0.333984375, "learning_rate": 1.7356902730907843e-07, "loss": 0.6484, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6960 }, { "epoch": 0.9465596954038619, "grad_norm": 0.68359375, "learning_rate": 1.7268939605134317e-07, "loss": 0.5396, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6961 }, { "epoch": 0.9466956758226815, "grad_norm": 0.328125, "learning_rate": 1.718119799895668e-07, "loss": 0.6548, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6962 }, { "epoch": 0.9468316562415012, "grad_norm": 0.255859375, "learning_rate": 1.7093677932153218e-07, "loss": 0.5524, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6963 }, { "epoch": 0.9469676366603209, "grad_norm": 0.478515625, "learning_rate": 1.7006379424451602e-07, "loss": 0.7222, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6964 }, { "epoch": 0.9471036170791406, "grad_norm": 1.6796875, "learning_rate": 1.691930249553042e-07, "loss": 0.8695, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6965 }, { "epoch": 0.9472395974979603, "grad_norm": 0.6015625, "learning_rate": 1.683244716501764e-07, "loss": 0.6372, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6966 }, { "epoch": 0.94737557791678, "grad_norm": 0.294921875, "learning_rate": 1.674581345249149e-07, "loss": 0.7409, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6967 }, { "epoch": 0.9475115583355996, "grad_norm": 0.45703125, "learning_rate": 1.6659401377480344e-07, "loss": 0.6475, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6968 }, { "epoch": 0.9476475387544193, "grad_norm": 0.345703125, "learning_rate": 1.6573210959462628e-07, "loss": 0.7743, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6969 }, { "epoch": 0.9477835191732391, "grad_norm": 0.21875, "learning_rate": 1.6487242217866462e-07, "loss": 0.4255, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6970 }, { "epoch": 0.9479194995920588, "grad_norm": 0.443359375, "learning_rate": 1.640149517207057e-07, "loss": 0.7546, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6971 }, { "epoch": 0.9480554800108785, "grad_norm": 0.23828125, "learning_rate": 1.6315969841403045e-07, "loss": 0.4214, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6972 }, { "epoch": 0.9481914604296982, "grad_norm": 0.55078125, "learning_rate": 1.623066624514258e-07, "loss": 0.861, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6973 }, { "epoch": 0.9483274408485178, "grad_norm": 0.419921875, "learning_rate": 1.6145584402517568e-07, "loss": 0.5876, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6974 }, { "epoch": 0.9484634212673375, "grad_norm": 0.51171875, "learning_rate": 1.6060724332706667e-07, "loss": 0.8271, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6975 }, { "epoch": 0.9485994016861572, "grad_norm": 0.6171875, "learning_rate": 1.5976086054838024e-07, "loss": 0.6273, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6976 }, { "epoch": 0.9487353821049769, "grad_norm": 0.34375, "learning_rate": 1.589166958799049e-07, "loss": 0.5422, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6977 }, { "epoch": 0.9488713625237966, "grad_norm": 0.443359375, "learning_rate": 1.5807474951192282e-07, "loss": 0.793, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6978 }, { "epoch": 0.9490073429426162, "grad_norm": 0.345703125, "learning_rate": 1.572350216342211e-07, "loss": 0.592, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6979 }, { "epoch": 0.9491433233614359, "grad_norm": 0.375, "learning_rate": 1.5639751243608393e-07, "loss": 0.6519, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6980 }, { "epoch": 0.9492793037802556, "grad_norm": 0.68359375, "learning_rate": 1.555622221062969e-07, "loss": 0.6283, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6981 }, { "epoch": 0.9494152841990753, "grad_norm": 0.40625, "learning_rate": 1.5472915083314277e-07, "loss": 0.7985, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6982 }, { "epoch": 0.9495512646178951, "grad_norm": 0.37109375, "learning_rate": 1.5389829880440688e-07, "loss": 0.652, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6983 }, { "epoch": 0.9496872450367148, "grad_norm": 0.40234375, "learning_rate": 1.5306966620737385e-07, "loss": 0.7985, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6984 }, { "epoch": 0.9498232254555344, "grad_norm": 0.259765625, "learning_rate": 1.5224325322882872e-07, "loss": 0.4233, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6985 }, { "epoch": 0.9499592058743541, "grad_norm": 0.30859375, "learning_rate": 1.5141906005505248e-07, "loss": 0.527, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6986 }, { "epoch": 0.9500951862931738, "grad_norm": 0.478515625, "learning_rate": 1.5059708687182984e-07, "loss": 0.8356, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6987 }, { "epoch": 0.9502311667119935, "grad_norm": 0.28515625, "learning_rate": 1.497773338644437e-07, "loss": 0.584, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6988 }, { "epoch": 0.9503671471308132, "grad_norm": 0.322265625, "learning_rate": 1.4895980121767627e-07, "loss": 0.641, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6989 }, { "epoch": 0.9505031275496328, "grad_norm": 0.318359375, "learning_rate": 1.481444891158079e-07, "loss": 0.665, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6990 }, { "epoch": 0.9506391079684525, "grad_norm": 0.28125, "learning_rate": 1.4733139774262274e-07, "loss": 0.5047, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6991 }, { "epoch": 0.9507750883872722, "grad_norm": 0.3828125, "learning_rate": 1.4652052728139964e-07, "loss": 0.752, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6992 }, { "epoch": 0.9509110688060919, "grad_norm": 0.369140625, "learning_rate": 1.4571187791491915e-07, "loss": 0.6255, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6993 }, { "epoch": 0.9510470492249116, "grad_norm": 0.359375, "learning_rate": 1.4490544982545984e-07, "loss": 0.7511, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6994 }, { "epoch": 0.9511830296437312, "grad_norm": 0.326171875, "learning_rate": 1.44101243194803e-07, "loss": 0.6489, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6995 }, { "epoch": 0.951319010062551, "grad_norm": 0.404296875, "learning_rate": 1.4329925820422364e-07, "loss": 0.7347, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6996 }, { "epoch": 0.9514549904813707, "grad_norm": 0.306640625, "learning_rate": 1.4249949503450045e-07, "loss": 0.5942, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6997 }, { "epoch": 0.9515909709001904, "grad_norm": 0.43359375, "learning_rate": 1.4170195386590923e-07, "loss": 0.5054, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6998 }, { "epoch": 0.9517269513190101, "grad_norm": 0.6796875, "learning_rate": 1.4090663487822732e-07, "loss": 0.5685, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 6999 }, { "epoch": 0.9518629317378298, "grad_norm": 0.390625, "learning_rate": 1.4011353825072683e-07, "loss": 0.741, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7000 }, { "epoch": 0.9519989121566494, "grad_norm": 0.52734375, "learning_rate": 1.3932266416218366e-07, "loss": 0.696, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7001 }, { "epoch": 0.9521348925754691, "grad_norm": 0.57421875, "learning_rate": 1.3853401279086853e-07, "loss": 0.8178, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7002 }, { "epoch": 0.9522708729942888, "grad_norm": 0.5078125, "learning_rate": 1.377475843145537e-07, "loss": 0.4191, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7003 }, { "epoch": 0.9524068534131085, "grad_norm": 0.9453125, "learning_rate": 1.369633789105107e-07, "loss": 0.5112, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7004 }, { "epoch": 0.9525428338319282, "grad_norm": 0.259765625, "learning_rate": 1.3618139675550813e-07, "loss": 0.4824, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7005 }, { "epoch": 0.9526788142507479, "grad_norm": 0.57421875, "learning_rate": 1.3540163802581496e-07, "loss": 0.6286, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7006 }, { "epoch": 0.9528147946695675, "grad_norm": 0.45703125, "learning_rate": 1.3462410289719728e-07, "loss": 0.5938, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7007 }, { "epoch": 0.9529507750883873, "grad_norm": 0.408203125, "learning_rate": 1.3384879154492158e-07, "loss": 0.6125, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7008 }, { "epoch": 0.953086755507207, "grad_norm": 0.55078125, "learning_rate": 1.330757041437536e-07, "loss": 0.6279, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7009 }, { "epoch": 0.9532227359260267, "grad_norm": 1.921875, "learning_rate": 1.3230484086795503e-07, "loss": 0.7609, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7010 }, { "epoch": 0.9533587163448464, "grad_norm": 0.462890625, "learning_rate": 1.3153620189128913e-07, "loss": 0.835, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7011 }, { "epoch": 0.953494696763666, "grad_norm": 0.42578125, "learning_rate": 1.3076978738701507e-07, "loss": 0.7088, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7012 }, { "epoch": 0.9536306771824857, "grad_norm": 0.296875, "learning_rate": 1.3000559752789354e-07, "loss": 0.6012, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7013 }, { "epoch": 0.9537666576013054, "grad_norm": 0.20703125, "learning_rate": 1.2924363248618122e-07, "loss": 0.4287, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7014 }, { "epoch": 0.9539026380201251, "grad_norm": 0.59765625, "learning_rate": 1.2848389243363514e-07, "loss": 0.5016, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7015 }, { "epoch": 0.9540386184389448, "grad_norm": 0.6015625, "learning_rate": 1.2772637754150941e-07, "loss": 0.495, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7016 }, { "epoch": 0.9541745988577645, "grad_norm": 0.3125, "learning_rate": 1.2697108798055745e-07, "loss": 0.6499, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7017 }, { "epoch": 0.9543105792765841, "grad_norm": 0.306640625, "learning_rate": 1.262180239210309e-07, "loss": 0.5376, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7018 }, { "epoch": 0.9544465596954038, "grad_norm": 0.640625, "learning_rate": 1.2546718553267944e-07, "loss": 0.6836, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7019 }, { "epoch": 0.9545825401142235, "grad_norm": 0.3359375, "learning_rate": 1.2471857298475108e-07, "loss": 0.6908, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7020 }, { "epoch": 0.9547185205330433, "grad_norm": 0.6015625, "learning_rate": 1.2397218644599197e-07, "loss": 0.7731, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7021 }, { "epoch": 0.954854500951863, "grad_norm": 0.474609375, "learning_rate": 1.2322802608464746e-07, "loss": 0.7031, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7022 }, { "epoch": 0.9549904813706827, "grad_norm": 0.39453125, "learning_rate": 1.2248609206846008e-07, "loss": 0.7008, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7023 }, { "epoch": 0.9551264617895023, "grad_norm": 0.28515625, "learning_rate": 1.2174638456467046e-07, "loss": 0.5412, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7024 }, { "epoch": 0.955262442208322, "grad_norm": 0.35546875, "learning_rate": 1.210089037400186e-07, "loss": 0.5365, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7025 }, { "epoch": 0.9553984226271417, "grad_norm": 0.9921875, "learning_rate": 1.2027364976074152e-07, "loss": 0.9142, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7026 }, { "epoch": 0.9555344030459614, "grad_norm": 0.5625, "learning_rate": 1.1954062279257328e-07, "loss": 0.7563, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7027 }, { "epoch": 0.9556703834647811, "grad_norm": 0.337890625, "learning_rate": 1.1880982300074839e-07, "loss": 0.7264, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7028 }, { "epoch": 0.9558063638836007, "grad_norm": 0.451171875, "learning_rate": 1.180812505499973e-07, "loss": 0.7448, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7029 }, { "epoch": 0.9559423443024204, "grad_norm": 0.302734375, "learning_rate": 1.1735490560454976e-07, "loss": 0.5132, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7030 }, { "epoch": 0.9560783247212401, "grad_norm": 0.69921875, "learning_rate": 1.1663078832813146e-07, "loss": 0.6971, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7031 }, { "epoch": 0.9562143051400598, "grad_norm": 0.181640625, "learning_rate": 1.1590889888396962e-07, "loss": 0.3283, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7032 }, { "epoch": 0.9563502855588796, "grad_norm": 0.53515625, "learning_rate": 1.1518923743478516e-07, "loss": 0.5511, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7033 }, { "epoch": 0.9564862659776993, "grad_norm": 0.259765625, "learning_rate": 1.1447180414279946e-07, "loss": 0.4567, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7034 }, { "epoch": 0.9566222463965189, "grad_norm": 0.45703125, "learning_rate": 1.137565991697298e-07, "loss": 0.5381, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7035 }, { "epoch": 0.9567582268153386, "grad_norm": 0.30078125, "learning_rate": 1.130436226767928e-07, "loss": 0.5542, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7036 }, { "epoch": 0.9568942072341583, "grad_norm": 0.57421875, "learning_rate": 1.1233287482470212e-07, "loss": 0.5578, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7037 }, { "epoch": 0.957030187652978, "grad_norm": 0.494140625, "learning_rate": 1.116243557736696e-07, "loss": 0.3091, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7038 }, { "epoch": 0.9571661680717977, "grad_norm": 0.54296875, "learning_rate": 1.109180656834019e-07, "loss": 0.668, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7039 }, { "epoch": 0.9573021484906173, "grad_norm": 0.34765625, "learning_rate": 1.102140047131084e-07, "loss": 0.5037, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7040 }, { "epoch": 0.957438128909437, "grad_norm": 0.7421875, "learning_rate": 1.0951217302148986e-07, "loss": 0.5964, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7041 }, { "epoch": 0.9575741093282567, "grad_norm": 0.2314453125, "learning_rate": 1.0881257076675088e-07, "loss": 0.4455, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7042 }, { "epoch": 0.9577100897470764, "grad_norm": 0.49609375, "learning_rate": 1.0811519810658866e-07, "loss": 0.5879, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7043 }, { "epoch": 0.9578460701658961, "grad_norm": 0.353515625, "learning_rate": 1.0742005519819964e-07, "loss": 0.665, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7044 }, { "epoch": 0.9579820505847157, "grad_norm": 0.390625, "learning_rate": 1.0672714219827629e-07, "loss": 0.721, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7045 }, { "epoch": 0.9581180310035355, "grad_norm": 0.177734375, "learning_rate": 1.0603645926301254e-07, "loss": 0.2944, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7046 }, { "epoch": 0.9582540114223552, "grad_norm": 0.40234375, "learning_rate": 1.0534800654809496e-07, "loss": 0.6499, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7047 }, { "epoch": 0.9583899918411749, "grad_norm": 0.3515625, "learning_rate": 1.0466178420870943e-07, "loss": 0.6336, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7048 }, { "epoch": 0.9585259722599946, "grad_norm": 0.431640625, "learning_rate": 1.0397779239953887e-07, "loss": 0.7622, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7049 }, { "epoch": 0.9586619526788143, "grad_norm": 0.396484375, "learning_rate": 1.0329603127476439e-07, "loss": 0.6463, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7050 }, { "epoch": 0.958797933097634, "grad_norm": 0.34765625, "learning_rate": 1.0261650098806086e-07, "loss": 0.7451, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7051 }, { "epoch": 0.9589339135164536, "grad_norm": 0.2578125, "learning_rate": 1.0193920169260574e-07, "loss": 0.5392, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7052 }, { "epoch": 0.9590698939352733, "grad_norm": 0.26171875, "learning_rate": 1.012641335410669e-07, "loss": 0.4539, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7053 }, { "epoch": 0.959205874354093, "grad_norm": 0.408203125, "learning_rate": 1.0059129668561707e-07, "loss": 0.597, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7054 }, { "epoch": 0.9593418547729127, "grad_norm": 0.30859375, "learning_rate": 9.992069127791937e-08, "loss": 0.543, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7055 }, { "epoch": 0.9594778351917324, "grad_norm": 0.486328125, "learning_rate": 9.925231746913622e-08, "loss": 0.7303, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7056 }, { "epoch": 0.959613815610552, "grad_norm": 0.322265625, "learning_rate": 9.858617540992932e-08, "loss": 0.5731, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7057 }, { "epoch": 0.9597497960293717, "grad_norm": 0.3359375, "learning_rate": 9.792226525045301e-08, "loss": 0.627, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7058 }, { "epoch": 0.9598857764481915, "grad_norm": 0.37109375, "learning_rate": 9.726058714036201e-08, "loss": 0.5093, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7059 }, { "epoch": 0.9600217568670112, "grad_norm": 0.3984375, "learning_rate": 9.660114122880593e-08, "loss": 0.5973, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7060 }, { "epoch": 0.9601577372858309, "grad_norm": 0.8828125, "learning_rate": 9.59439276644325e-08, "loss": 0.6021, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7061 }, { "epoch": 0.9602937177046506, "grad_norm": 0.34765625, "learning_rate": 9.528894659538657e-08, "loss": 0.6806, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7062 }, { "epoch": 0.9604296981234702, "grad_norm": 0.353515625, "learning_rate": 9.46361981693067e-08, "loss": 0.633, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7063 }, { "epoch": 0.9605656785422899, "grad_norm": 0.375, "learning_rate": 9.398568253333185e-08, "loss": 0.6802, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7064 }, { "epoch": 0.9607016589611096, "grad_norm": 0.72265625, "learning_rate": 9.333739983409584e-08, "loss": 0.7339, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7065 }, { "epoch": 0.9608376393799293, "grad_norm": 0.296875, "learning_rate": 9.269135021773068e-08, "loss": 0.5213, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7066 }, { "epoch": 0.960973619798749, "grad_norm": 0.640625, "learning_rate": 9.204753382986097e-08, "loss": 0.6225, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7067 }, { "epoch": 0.9611096002175686, "grad_norm": 0.404296875, "learning_rate": 9.140595081561399e-08, "loss": 0.6958, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7068 }, { "epoch": 0.9612455806363883, "grad_norm": 0.361328125, "learning_rate": 9.076660131960735e-08, "loss": 0.5936, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7069 }, { "epoch": 0.961381561055208, "grad_norm": 0.31640625, "learning_rate": 9.012948548596134e-08, "loss": 0.6383, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7070 }, { "epoch": 0.9615175414740278, "grad_norm": 0.345703125, "learning_rate": 8.949460345828776e-08, "loss": 0.6662, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7071 }, { "epoch": 0.9616535218928475, "grad_norm": 0.52734375, "learning_rate": 8.886195537969655e-08, "loss": 0.8561, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7072 }, { "epoch": 0.9617895023116672, "grad_norm": 0.451171875, "learning_rate": 8.823154139279588e-08, "loss": 0.7039, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7073 }, { "epoch": 0.9619254827304868, "grad_norm": 0.52734375, "learning_rate": 8.760336163968764e-08, "loss": 0.5137, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7074 }, { "epoch": 0.9620614631493065, "grad_norm": 0.3671875, "learning_rate": 8.697741626197076e-08, "loss": 0.5241, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7075 }, { "epoch": 0.9621974435681262, "grad_norm": 0.498046875, "learning_rate": 8.63537054007424e-08, "loss": 0.5682, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7076 }, { "epoch": 0.9623334239869459, "grad_norm": 0.3515625, "learning_rate": 8.573222919659341e-08, "loss": 0.6107, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7077 }, { "epoch": 0.9624694044057656, "grad_norm": 0.3359375, "learning_rate": 8.511298778961286e-08, "loss": 0.6883, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7078 }, { "epoch": 0.9626053848245852, "grad_norm": 0.8203125, "learning_rate": 8.449598131938464e-08, "loss": 0.7429, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7079 }, { "epoch": 0.9627413652434049, "grad_norm": 0.35546875, "learning_rate": 8.388120992499083e-08, "loss": 0.5654, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7080 }, { "epoch": 0.9628773456622246, "grad_norm": 0.4296875, "learning_rate": 8.326867374500613e-08, "loss": 0.8461, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7081 }, { "epoch": 0.9630133260810443, "grad_norm": 0.53125, "learning_rate": 8.265837291750677e-08, "loss": 0.7292, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7082 }, { "epoch": 0.963149306499864, "grad_norm": 0.19921875, "learning_rate": 8.205030758005938e-08, "loss": 0.3501, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7083 }, { "epoch": 0.9632852869186838, "grad_norm": 0.28125, "learning_rate": 8.144447786972986e-08, "loss": 0.5788, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7084 }, { "epoch": 0.9634212673375034, "grad_norm": 0.369140625, "learning_rate": 8.084088392308121e-08, "loss": 0.603, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7085 }, { "epoch": 0.9635572477563231, "grad_norm": 0.48828125, "learning_rate": 8.023952587616901e-08, "loss": 0.6683, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7086 }, { "epoch": 0.9636932281751428, "grad_norm": 0.357421875, "learning_rate": 7.964040386454818e-08, "loss": 0.703, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7087 }, { "epoch": 0.9638292085939625, "grad_norm": 0.62109375, "learning_rate": 7.904351802326626e-08, "loss": 0.9741, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7088 }, { "epoch": 0.9639651890127822, "grad_norm": 0.51171875, "learning_rate": 7.844886848687116e-08, "loss": 0.6733, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7089 }, { "epoch": 0.9641011694316018, "grad_norm": 0.40234375, "learning_rate": 7.785645538940234e-08, "loss": 0.7578, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7090 }, { "epoch": 0.9642371498504215, "grad_norm": 0.31640625, "learning_rate": 7.726627886439741e-08, "loss": 0.5441, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7091 }, { "epoch": 0.9643731302692412, "grad_norm": 0.546875, "learning_rate": 7.667833904488886e-08, "loss": 0.6862, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7092 }, { "epoch": 0.9645091106880609, "grad_norm": 0.453125, "learning_rate": 7.609263606340622e-08, "loss": 0.5804, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7093 }, { "epoch": 0.9646450911068806, "grad_norm": 0.375, "learning_rate": 7.550917005197389e-08, "loss": 0.7155, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7094 }, { "epoch": 0.9647810715257003, "grad_norm": 0.46484375, "learning_rate": 7.492794114211222e-08, "loss": 0.7228, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7095 }, { "epoch": 0.96491705194452, "grad_norm": 0.474609375, "learning_rate": 7.434894946483751e-08, "loss": 0.7469, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7096 }, { "epoch": 0.9650530323633397, "grad_norm": 0.64453125, "learning_rate": 7.377219515066092e-08, "loss": 0.5586, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7097 }, { "epoch": 0.9651890127821594, "grad_norm": 0.55859375, "learning_rate": 7.319767832959069e-08, "loss": 0.5807, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7098 }, { "epoch": 0.9653249932009791, "grad_norm": 0.330078125, "learning_rate": 7.262539913112987e-08, "loss": 0.6068, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7099 }, { "epoch": 0.9654609736197988, "grad_norm": 0.37109375, "learning_rate": 7.20553576842764e-08, "loss": 0.7135, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7100 }, { "epoch": 0.9655969540386184, "grad_norm": 0.3984375, "learning_rate": 7.14875541175264e-08, "loss": 0.6561, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7101 }, { "epoch": 0.9657329344574381, "grad_norm": 0.44921875, "learning_rate": 7.092198855886746e-08, "loss": 0.8646, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7102 }, { "epoch": 0.9658689148762578, "grad_norm": 0.318359375, "learning_rate": 7.035866113578649e-08, "loss": 0.486, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7103 }, { "epoch": 0.9660048952950775, "grad_norm": 0.4296875, "learning_rate": 6.979757197526305e-08, "loss": 0.688, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7104 }, { "epoch": 0.9661408757138972, "grad_norm": 0.486328125, "learning_rate": 6.923872120377484e-08, "loss": 0.7069, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7105 }, { "epoch": 0.9662768561327169, "grad_norm": 0.376953125, "learning_rate": 6.868210894729333e-08, "loss": 0.5265, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7106 }, { "epoch": 0.9664128365515365, "grad_norm": 0.50390625, "learning_rate": 6.812773533128592e-08, "loss": 0.7492, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7107 }, { "epoch": 0.9665488169703562, "grad_norm": 0.392578125, "learning_rate": 6.757560048071376e-08, "loss": 0.5143, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7108 }, { "epoch": 0.966684797389176, "grad_norm": 0.55078125, "learning_rate": 6.70257045200362e-08, "loss": 0.7918, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7109 }, { "epoch": 0.9668207778079957, "grad_norm": 0.2890625, "learning_rate": 6.64780475732063e-08, "loss": 0.5181, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7110 }, { "epoch": 0.9669567582268154, "grad_norm": 0.55078125, "learning_rate": 6.593262976367309e-08, "loss": 0.6398, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7111 }, { "epoch": 0.967092738645635, "grad_norm": 0.2138671875, "learning_rate": 6.538945121437934e-08, "loss": 0.4111, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7112 }, { "epoch": 0.9672287190644547, "grad_norm": 0.447265625, "learning_rate": 6.484851204776488e-08, "loss": 0.6525, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7113 }, { "epoch": 0.9673646994832744, "grad_norm": 0.419921875, "learning_rate": 6.430981238576328e-08, "loss": 0.6687, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7114 }, { "epoch": 0.9675006799020941, "grad_norm": 1.078125, "learning_rate": 6.377335234980408e-08, "loss": 0.6344, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7115 }, { "epoch": 0.9676366603209138, "grad_norm": 0.392578125, "learning_rate": 6.323913206081277e-08, "loss": 0.6623, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7116 }, { "epoch": 0.9677726407397335, "grad_norm": 0.263671875, "learning_rate": 6.270715163920971e-08, "loss": 0.3328, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7117 }, { "epoch": 0.9679086211585531, "grad_norm": 0.390625, "learning_rate": 6.217741120490783e-08, "loss": 0.5638, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7118 }, { "epoch": 0.9680446015773728, "grad_norm": 0.375, "learning_rate": 6.164991087731831e-08, "loss": 0.5905, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7119 }, { "epoch": 0.9681805819961925, "grad_norm": 0.447265625, "learning_rate": 6.112465077534602e-08, "loss": 0.5905, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7120 }, { "epoch": 0.9683165624150122, "grad_norm": 0.466796875, "learning_rate": 6.06016310173907e-08, "loss": 0.7699, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7121 }, { "epoch": 0.968452542833832, "grad_norm": 0.369140625, "learning_rate": 6.008085172134804e-08, "loss": 0.659, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7122 }, { "epoch": 0.9685885232526517, "grad_norm": 0.46484375, "learning_rate": 5.956231300460747e-08, "loss": 0.6921, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7123 }, { "epoch": 0.9687245036714713, "grad_norm": 0.466796875, "learning_rate": 5.904601498405327e-08, "loss": 0.6045, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7124 }, { "epoch": 0.968860484090291, "grad_norm": 0.9765625, "learning_rate": 5.8531957776067905e-08, "loss": 0.6423, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7125 }, { "epoch": 0.9689964645091107, "grad_norm": 0.35546875, "learning_rate": 5.802014149652313e-08, "loss": 0.4189, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7126 }, { "epoch": 0.9691324449279304, "grad_norm": 0.28125, "learning_rate": 5.751056626078999e-08, "loss": 0.6416, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7127 }, { "epoch": 0.9692684253467501, "grad_norm": 0.76953125, "learning_rate": 5.700323218373327e-08, "loss": 0.7277, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7128 }, { "epoch": 0.9694044057655697, "grad_norm": 0.44140625, "learning_rate": 5.649813937971149e-08, "loss": 0.7427, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7129 }, { "epoch": 0.9695403861843894, "grad_norm": 0.482421875, "learning_rate": 5.5995287962579135e-08, "loss": 0.7188, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7130 }, { "epoch": 0.9696763666032091, "grad_norm": 0.58984375, "learning_rate": 5.5494678045685534e-08, "loss": 0.7376, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7131 }, { "epoch": 0.9698123470220288, "grad_norm": 0.451171875, "learning_rate": 5.4996309741873755e-08, "loss": 0.7375, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7132 }, { "epoch": 0.9699483274408485, "grad_norm": 0.423828125, "learning_rate": 5.4500183163481714e-08, "loss": 0.776, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7133 }, { "epoch": 0.9700843078596683, "grad_norm": 0.275390625, "learning_rate": 5.400629842234328e-08, "loss": 0.5075, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7134 }, { "epoch": 0.9702202882784879, "grad_norm": 0.419921875, "learning_rate": 5.351465562978608e-08, "loss": 0.592, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7135 }, { "epoch": 0.9703562686973076, "grad_norm": 0.28515625, "learning_rate": 5.302525489663146e-08, "loss": 0.6097, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7136 }, { "epoch": 0.9704922491161273, "grad_norm": 0.3515625, "learning_rate": 5.253809633319673e-08, "loss": 0.7227, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7137 }, { "epoch": 0.970628229534947, "grad_norm": 0.443359375, "learning_rate": 5.205318004929405e-08, "loss": 0.7233, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7138 }, { "epoch": 0.9707642099537667, "grad_norm": 0.376953125, "learning_rate": 5.15705061542282e-08, "loss": 0.672, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7139 }, { "epoch": 0.9709001903725863, "grad_norm": 0.28125, "learning_rate": 5.109007475680217e-08, "loss": 0.5823, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7140 }, { "epoch": 0.971036170791406, "grad_norm": 0.341796875, "learning_rate": 5.0611885965307086e-08, "loss": 0.5602, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7141 }, { "epoch": 0.9711721512102257, "grad_norm": 0.3828125, "learning_rate": 5.0135939887536736e-08, "loss": 0.73, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7142 }, { "epoch": 0.9713081316290454, "grad_norm": 0.349609375, "learning_rate": 4.9662236630771963e-08, "loss": 0.6063, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7143 }, { "epoch": 0.9714441120478651, "grad_norm": 0.65625, "learning_rate": 4.919077630179403e-08, "loss": 0.7606, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7144 }, { "epoch": 0.9715800924666848, "grad_norm": 0.30859375, "learning_rate": 4.8721559006873473e-08, "loss": 0.3643, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7145 }, { "epoch": 0.9717160728855044, "grad_norm": 0.53125, "learning_rate": 4.825458485178014e-08, "loss": 0.9007, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7146 }, { "epoch": 0.9718520533043242, "grad_norm": 0.2890625, "learning_rate": 4.778985394177204e-08, "loss": 0.5863, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7147 }, { "epoch": 0.9719880337231439, "grad_norm": 0.396484375, "learning_rate": 4.732736638160984e-08, "loss": 0.7362, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7148 }, { "epoch": 0.9721240141419636, "grad_norm": 0.2890625, "learning_rate": 4.686712227553902e-08, "loss": 0.6263, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7149 }, { "epoch": 0.9722599945607833, "grad_norm": 0.271484375, "learning_rate": 4.640912172730883e-08, "loss": 0.5648, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7150 }, { "epoch": 0.972395974979603, "grad_norm": 0.49609375, "learning_rate": 4.595336484015445e-08, "loss": 0.8118, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7151 }, { "epoch": 0.9725319553984226, "grad_norm": 0.62109375, "learning_rate": 4.5499851716811484e-08, "loss": 0.7702, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7152 }, { "epoch": 0.9726679358172423, "grad_norm": 0.291015625, "learning_rate": 4.504858245950594e-08, "loss": 0.5182, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7153 }, { "epoch": 0.972803916236062, "grad_norm": 0.294921875, "learning_rate": 4.4599557169959785e-08, "loss": 0.6087, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7154 }, { "epoch": 0.9729398966548817, "grad_norm": 0.349609375, "learning_rate": 4.4152775949388716e-08, "loss": 0.5207, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7155 }, { "epoch": 0.9730758770737014, "grad_norm": 0.50390625, "learning_rate": 4.370823889850439e-08, "loss": 0.7826, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7156 }, { "epoch": 0.973211857492521, "grad_norm": 0.65625, "learning_rate": 4.3265946117507783e-08, "loss": 0.4829, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7157 }, { "epoch": 0.9733478379113407, "grad_norm": 0.31640625, "learning_rate": 4.2825897706100237e-08, "loss": 0.596, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7158 }, { "epoch": 0.9734838183301605, "grad_norm": 0.5078125, "learning_rate": 4.23880937634713e-08, "loss": 0.8052, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7159 }, { "epoch": 0.9736197987489802, "grad_norm": 0.63671875, "learning_rate": 4.195253438830871e-08, "loss": 0.717, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7160 }, { "epoch": 0.9737557791677999, "grad_norm": 0.453125, "learning_rate": 4.15192196787928e-08, "loss": 0.4373, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7161 }, { "epoch": 0.9738917595866196, "grad_norm": 0.31640625, "learning_rate": 4.108814973259878e-08, "loss": 0.6247, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7162 }, { "epoch": 0.9740277400054392, "grad_norm": 0.74609375, "learning_rate": 4.065932464689337e-08, "loss": 0.5295, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7163 }, { "epoch": 0.9741637204242589, "grad_norm": 0.53515625, "learning_rate": 4.023274451834036e-08, "loss": 0.8397, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7164 }, { "epoch": 0.9742997008430786, "grad_norm": 0.54296875, "learning_rate": 3.9808409443095054e-08, "loss": 0.5467, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7165 }, { "epoch": 0.9744356812618983, "grad_norm": 0.1865234375, "learning_rate": 3.9386319516809826e-08, "loss": 0.3516, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7166 }, { "epoch": 0.974571661680718, "grad_norm": 0.4140625, "learning_rate": 3.896647483462634e-08, "loss": 0.6722, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7167 }, { "epoch": 0.9747076420995376, "grad_norm": 0.41796875, "learning_rate": 3.8548875491183355e-08, "loss": 0.7572, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7168 }, { "epoch": 0.9748436225183573, "grad_norm": 0.26953125, "learning_rate": 3.813352158061445e-08, "loss": 0.4861, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7169 }, { "epoch": 0.974979602937177, "grad_norm": 0.470703125, "learning_rate": 3.7720413196543625e-08, "loss": 0.6857, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7170 }, { "epoch": 0.9751155833559967, "grad_norm": 0.392578125, "learning_rate": 3.730955043209083e-08, "loss": 0.7303, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7171 }, { "epoch": 0.9752515637748165, "grad_norm": 0.267578125, "learning_rate": 3.6900933379870887e-08, "loss": 0.4318, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7172 }, { "epoch": 0.9753875441936362, "grad_norm": 0.267578125, "learning_rate": 3.6494562131990096e-08, "loss": 0.4836, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7173 }, { "epoch": 0.9755235246124558, "grad_norm": 0.4296875, "learning_rate": 3.609043678004964e-08, "loss": 0.7493, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7174 }, { "epoch": 0.9756595050312755, "grad_norm": 0.27734375, "learning_rate": 3.568855741514332e-08, "loss": 0.5103, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7175 }, { "epoch": 0.9757954854500952, "grad_norm": 0.279296875, "learning_rate": 3.5288924127862e-08, "loss": 0.3923, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7176 }, { "epoch": 0.9759314658689149, "grad_norm": 0.423828125, "learning_rate": 3.489153700828585e-08, "loss": 0.4967, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7177 }, { "epoch": 0.9760674462877346, "grad_norm": 0.515625, "learning_rate": 3.44963961459921e-08, "loss": 0.7318, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7178 }, { "epoch": 0.9762034267065542, "grad_norm": 0.48828125, "learning_rate": 3.410350163004839e-08, "loss": 0.8638, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7179 }, { "epoch": 0.9763394071253739, "grad_norm": 0.875, "learning_rate": 3.371285354902054e-08, "loss": 0.6755, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7180 }, { "epoch": 0.9764753875441936, "grad_norm": 0.310546875, "learning_rate": 3.332445199096368e-08, "loss": 0.5845, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7181 }, { "epoch": 0.9766113679630133, "grad_norm": 0.431640625, "learning_rate": 3.293829704342777e-08, "loss": 0.4958, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7182 }, { "epoch": 0.976747348381833, "grad_norm": 0.302734375, "learning_rate": 3.2554388793458735e-08, "loss": 0.5203, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7183 }, { "epoch": 0.9768833288006527, "grad_norm": 0.291015625, "learning_rate": 3.2172727327594024e-08, "loss": 0.515, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7184 }, { "epoch": 0.9770193092194724, "grad_norm": 0.375, "learning_rate": 3.179331273186259e-08, "loss": 0.6504, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7185 }, { "epoch": 0.9771552896382921, "grad_norm": 0.337890625, "learning_rate": 3.141614509179158e-08, "loss": 0.6403, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7186 }, { "epoch": 0.9772912700571118, "grad_norm": 0.28515625, "learning_rate": 3.1041224492397436e-08, "loss": 0.5887, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7187 }, { "epoch": 0.9774272504759315, "grad_norm": 0.7265625, "learning_rate": 3.066855101819255e-08, "loss": 0.639, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7188 }, { "epoch": 0.9775632308947512, "grad_norm": 0.337890625, "learning_rate": 3.029812475318306e-08, "loss": 0.6143, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7189 }, { "epoch": 0.9776992113135708, "grad_norm": 0.330078125, "learning_rate": 2.9929945780865496e-08, "loss": 0.5106, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7190 }, { "epoch": 0.9778351917323905, "grad_norm": 0.400390625, "learning_rate": 2.956401418423349e-08, "loss": 0.7538, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7191 }, { "epoch": 0.9779711721512102, "grad_norm": 0.349609375, "learning_rate": 2.9200330045772164e-08, "loss": 0.6087, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7192 }, { "epoch": 0.9781071525700299, "grad_norm": 0.279296875, "learning_rate": 2.883889344746038e-08, "loss": 0.5946, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7193 }, { "epoch": 0.9782431329888496, "grad_norm": 0.435546875, "learning_rate": 2.8479704470769642e-08, "loss": 0.6414, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7194 }, { "epoch": 0.9783791134076693, "grad_norm": 0.365234375, "learning_rate": 2.812276319666629e-08, "loss": 0.6935, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7195 }, { "epoch": 0.9785150938264889, "grad_norm": 0.326171875, "learning_rate": 2.7768069705609302e-08, "loss": 0.4113, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7196 }, { "epoch": 0.9786510742453087, "grad_norm": 0.306640625, "learning_rate": 2.7415624077551383e-08, "loss": 0.6611, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7197 }, { "epoch": 0.9787870546641284, "grad_norm": 0.349609375, "learning_rate": 2.7065426391935655e-08, "loss": 0.5719, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7198 }, { "epoch": 0.9789230350829481, "grad_norm": 0.392578125, "learning_rate": 2.6717476727704527e-08, "loss": 0.7575, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7199 }, { "epoch": 0.9790590155017678, "grad_norm": 0.42578125, "learning_rate": 2.6371775163287482e-08, "loss": 0.8467, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7200 }, { "epoch": 0.9791949959205875, "grad_norm": 0.345703125, "learning_rate": 2.6028321776609966e-08, "loss": 0.7337, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7201 }, { "epoch": 0.9793309763394071, "grad_norm": 0.408203125, "learning_rate": 2.568711664509227e-08, "loss": 0.7803, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7202 }, { "epoch": 0.9794669567582268, "grad_norm": 0.875, "learning_rate": 2.534815984564509e-08, "loss": 0.7726, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7203 }, { "epoch": 0.9796029371770465, "grad_norm": 0.267578125, "learning_rate": 2.5011451454672876e-08, "loss": 0.5085, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7204 }, { "epoch": 0.9797389175958662, "grad_norm": 0.35546875, "learning_rate": 2.4676991548074902e-08, "loss": 0.5074, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7205 }, { "epoch": 0.9798748980146859, "grad_norm": 0.421875, "learning_rate": 2.434478020124309e-08, "loss": 0.6707, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7206 }, { "epoch": 0.9800108784335055, "grad_norm": 0.38671875, "learning_rate": 2.4014817489059762e-08, "loss": 0.8781, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7207 }, { "epoch": 0.9801468588523252, "grad_norm": 0.38671875, "learning_rate": 2.3687103485904307e-08, "loss": 0.7793, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7208 }, { "epoch": 0.9802828392711449, "grad_norm": 0.298828125, "learning_rate": 2.3361638265646524e-08, "loss": 0.5145, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7209 }, { "epoch": 0.9804188196899647, "grad_norm": 0.40625, "learning_rate": 2.3038421901651064e-08, "loss": 0.6374, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7210 }, { "epoch": 0.9805548001087844, "grad_norm": 0.58203125, "learning_rate": 2.2717454466774092e-08, "loss": 0.6868, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7211 }, { "epoch": 0.9806907805276041, "grad_norm": 0.353515625, "learning_rate": 2.239873603336662e-08, "loss": 0.6318, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7212 }, { "epoch": 0.9808267609464237, "grad_norm": 1.2421875, "learning_rate": 2.2082266673270068e-08, "loss": 0.7624, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7213 }, { "epoch": 0.9809627413652434, "grad_norm": 0.361328125, "learning_rate": 2.1768046457821822e-08, "loss": 0.6346, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7214 }, { "epoch": 0.9810987217840631, "grad_norm": 0.466796875, "learning_rate": 2.1456075457850777e-08, "loss": 0.6826, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7215 }, { "epoch": 0.9812347022028828, "grad_norm": 0.404296875, "learning_rate": 2.114635374367846e-08, "loss": 0.7482, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7216 }, { "epoch": 0.9813706826217025, "grad_norm": 0.625, "learning_rate": 2.0838881385121245e-08, "loss": 0.555, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7217 }, { "epoch": 0.9815066630405221, "grad_norm": 0.330078125, "learning_rate": 2.0533658451484807e-08, "loss": 0.4993, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7218 }, { "epoch": 0.9816426434593418, "grad_norm": 0.2373046875, "learning_rate": 2.0230685011571883e-08, "loss": 0.4022, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7219 }, { "epoch": 0.9817786238781615, "grad_norm": 0.3125, "learning_rate": 1.9929961133676733e-08, "loss": 0.6049, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7220 }, { "epoch": 0.9819146042969812, "grad_norm": 0.318359375, "learning_rate": 1.9631486885585137e-08, "loss": 0.5999, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7221 }, { "epoch": 0.982050584715801, "grad_norm": 0.423828125, "learning_rate": 1.933526233457661e-08, "loss": 0.7974, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7222 }, { "epoch": 0.9821865651346207, "grad_norm": 0.390625, "learning_rate": 1.9041287547424404e-08, "loss": 0.5765, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7223 }, { "epoch": 0.9823225455534403, "grad_norm": 0.4296875, "learning_rate": 1.87495625903944e-08, "loss": 0.6774, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7224 }, { "epoch": 0.98245852597226, "grad_norm": 0.373046875, "learning_rate": 1.8460087529245107e-08, "loss": 0.6307, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7225 }, { "epoch": 0.9825945063910797, "grad_norm": 0.353515625, "learning_rate": 1.8172862429226557e-08, "loss": 0.6657, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7226 }, { "epoch": 0.9827304868098994, "grad_norm": 0.470703125, "learning_rate": 1.7887887355082513e-08, "loss": 0.6608, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7227 }, { "epoch": 0.9828664672287191, "grad_norm": 0.498046875, "learning_rate": 1.7605162371051587e-08, "loss": 0.8581, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7228 }, { "epoch": 0.9830024476475387, "grad_norm": 0.380859375, "learning_rate": 1.7324687540862804e-08, "loss": 0.6336, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7229 }, { "epoch": 0.9831384280663584, "grad_norm": 0.59765625, "learning_rate": 1.7046462927738926e-08, "loss": 0.6688, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7230 }, { "epoch": 0.9832744084851781, "grad_norm": 0.361328125, "learning_rate": 1.677048859439423e-08, "loss": 0.6304, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7231 }, { "epoch": 0.9834103889039978, "grad_norm": 0.384765625, "learning_rate": 1.649676460303673e-08, "loss": 0.6338, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7232 }, { "epoch": 0.9835463693228175, "grad_norm": 0.69921875, "learning_rate": 1.6225291015368183e-08, "loss": 0.8159, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7233 }, { "epoch": 0.9836823497416372, "grad_norm": 0.349609375, "learning_rate": 1.595606789258075e-08, "loss": 0.5335, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7234 }, { "epoch": 0.9838183301604569, "grad_norm": 0.50390625, "learning_rate": 1.5689095295361444e-08, "loss": 0.7357, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7235 }, { "epoch": 0.9839543105792766, "grad_norm": 0.26953125, "learning_rate": 1.5424373283889904e-08, "loss": 0.4556, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7236 }, { "epoch": 0.9840902909980963, "grad_norm": 0.228515625, "learning_rate": 1.5161901917836175e-08, "loss": 0.4836, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7237 }, { "epoch": 0.984226271416916, "grad_norm": 0.4296875, "learning_rate": 1.4901681256365153e-08, "loss": 0.6745, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7238 }, { "epoch": 0.9843622518357357, "grad_norm": 0.3125, "learning_rate": 1.464371135813436e-08, "loss": 0.6045, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7239 }, { "epoch": 0.9844982322545553, "grad_norm": 0.296875, "learning_rate": 1.438799228129173e-08, "loss": 0.6138, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7240 }, { "epoch": 0.984634212673375, "grad_norm": 0.486328125, "learning_rate": 1.413452408348115e-08, "loss": 0.6991, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7241 }, { "epoch": 0.9847701930921947, "grad_norm": 0.296875, "learning_rate": 1.3883306821836918e-08, "loss": 0.4803, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7242 }, { "epoch": 0.9849061735110144, "grad_norm": 0.29296875, "learning_rate": 1.3634340552985958e-08, "loss": 0.6506, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7243 }, { "epoch": 0.9850421539298341, "grad_norm": 0.490234375, "learning_rate": 1.3387625333048936e-08, "loss": 0.5083, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7244 }, { "epoch": 0.9851781343486538, "grad_norm": 0.322265625, "learning_rate": 1.3143161217639144e-08, "loss": 0.6115, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7245 }, { "epoch": 0.9853141147674734, "grad_norm": 0.443359375, "learning_rate": 1.290094826186028e-08, "loss": 0.6357, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7246 }, { "epoch": 0.9854500951862932, "grad_norm": 0.546875, "learning_rate": 1.2660986520309781e-08, "loss": 0.7983, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7247 }, { "epoch": 0.9855860756051129, "grad_norm": 0.39453125, "learning_rate": 1.2423276047079936e-08, "loss": 0.7541, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7248 }, { "epoch": 0.9857220560239326, "grad_norm": 0.359375, "learning_rate": 1.2187816895752324e-08, "loss": 0.4966, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7249 }, { "epoch": 0.9858580364427523, "grad_norm": 0.41796875, "learning_rate": 1.1954609119403382e-08, "loss": 0.759, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7250 }, { "epoch": 0.985994016861572, "grad_norm": 0.263671875, "learning_rate": 1.1723652770598836e-08, "loss": 0.5094, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7251 }, { "epoch": 0.9861299972803916, "grad_norm": 0.322265625, "learning_rate": 1.1494947901401487e-08, "loss": 0.6128, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7252 }, { "epoch": 0.9862659776992113, "grad_norm": 0.56640625, "learning_rate": 1.126849456336232e-08, "loss": 0.8371, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7253 }, { "epoch": 0.986401958118031, "grad_norm": 0.3125, "learning_rate": 1.1044292807528278e-08, "loss": 0.6457, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7254 }, { "epoch": 0.9865379385368507, "grad_norm": 0.365234375, "learning_rate": 1.0822342684435605e-08, "loss": 0.7723, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7255 }, { "epoch": 0.9866739189556704, "grad_norm": 0.322265625, "learning_rate": 1.06026442441165e-08, "loss": 0.605, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7256 }, { "epoch": 0.98680989937449, "grad_norm": 0.310546875, "learning_rate": 1.0385197536091351e-08, "loss": 0.585, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7257 }, { "epoch": 0.9869458797933097, "grad_norm": 0.37890625, "learning_rate": 1.0170002609377616e-08, "loss": 0.6577, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7258 }, { "epoch": 0.9870818602121294, "grad_norm": 0.3671875, "learning_rate": 9.957059512480938e-09, "loss": 0.6517, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7259 }, { "epoch": 0.9872178406309492, "grad_norm": 0.4296875, "learning_rate": 9.746368293401809e-09, "loss": 0.7051, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7260 }, { "epoch": 0.9873538210497689, "grad_norm": 0.5546875, "learning_rate": 9.53792899963335e-09, "loss": 0.661, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7261 }, { "epoch": 0.9874898014685886, "grad_norm": 0.80078125, "learning_rate": 9.3317416781602e-09, "loss": 0.7466, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7262 }, { "epoch": 0.9876257818874082, "grad_norm": 0.328125, "learning_rate": 9.127806375458514e-09, "loss": 0.5833, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7263 }, { "epoch": 0.9877617623062279, "grad_norm": 0.3515625, "learning_rate": 8.926123137499298e-09, "loss": 0.6353, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7264 }, { "epoch": 0.9878977427250476, "grad_norm": 0.5, "learning_rate": 8.726692009742855e-09, "loss": 0.66, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7265 }, { "epoch": 0.9880337231438673, "grad_norm": 0.486328125, "learning_rate": 8.529513037144333e-09, "loss": 0.5181, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7266 }, { "epoch": 0.988169703562687, "grad_norm": 0.51953125, "learning_rate": 8.334586264150401e-09, "loss": 0.7362, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7267 }, { "epoch": 0.9883056839815066, "grad_norm": 0.388671875, "learning_rate": 8.141911734699248e-09, "loss": 0.647, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7268 }, { "epoch": 0.9884416644003263, "grad_norm": 0.369140625, "learning_rate": 7.951489492222797e-09, "loss": 0.6719, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7269 }, { "epoch": 0.988577644819146, "grad_norm": 0.185546875, "learning_rate": 7.76331957964449e-09, "loss": 0.3774, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7270 }, { "epoch": 0.9887136252379657, "grad_norm": 0.470703125, "learning_rate": 7.57740203937929e-09, "loss": 0.5378, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7271 }, { "epoch": 0.9888496056567854, "grad_norm": 0.34765625, "learning_rate": 7.3937369133358914e-09, "loss": 0.6213, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7272 }, { "epoch": 0.9889855860756052, "grad_norm": 0.26953125, "learning_rate": 7.212324242914514e-09, "loss": 0.5042, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7273 }, { "epoch": 0.9891215664944248, "grad_norm": 0.27734375, "learning_rate": 7.033164069008003e-09, "loss": 0.4004, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7274 }, { "epoch": 0.9892575469132445, "grad_norm": 0.52734375, "learning_rate": 6.856256432000719e-09, "loss": 0.4466, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7275 }, { "epoch": 0.9893935273320642, "grad_norm": 0.435546875, "learning_rate": 6.681601371770763e-09, "loss": 0.7614, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7276 }, { "epoch": 0.9895295077508839, "grad_norm": 0.3984375, "learning_rate": 6.5091989276866445e-09, "loss": 0.5947, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7277 }, { "epoch": 0.9896654881697036, "grad_norm": 0.41015625, "learning_rate": 6.339049138610609e-09, "loss": 0.6299, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7278 }, { "epoch": 0.9898014685885232, "grad_norm": 0.6953125, "learning_rate": 6.171152042895312e-09, "loss": 0.6017, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7279 }, { "epoch": 0.9899374490073429, "grad_norm": 0.50390625, "learning_rate": 6.005507678388256e-09, "loss": 0.9302, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7280 }, { "epoch": 0.9900734294261626, "grad_norm": 0.380859375, "learning_rate": 5.842116082427351e-09, "loss": 0.7192, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7281 }, { "epoch": 0.9902094098449823, "grad_norm": 0.296875, "learning_rate": 5.680977291843137e-09, "loss": 0.5648, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7282 }, { "epoch": 0.990345390263802, "grad_norm": 0.6015625, "learning_rate": 5.5220913429576696e-09, "loss": 0.8332, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7283 }, { "epoch": 0.9904813706826217, "grad_norm": 0.275390625, "learning_rate": 5.365458271585633e-09, "loss": 0.516, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7284 }, { "epoch": 0.9906173511014414, "grad_norm": 0.38671875, "learning_rate": 5.21107811303545e-09, "loss": 0.6619, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7285 }, { "epoch": 0.9907533315202611, "grad_norm": 0.412109375, "learning_rate": 5.058950902104842e-09, "loss": 0.605, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7286 }, { "epoch": 0.9908893119390808, "grad_norm": 0.43359375, "learning_rate": 4.909076673085267e-09, "loss": 0.8289, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7287 }, { "epoch": 0.9910252923579005, "grad_norm": 0.404296875, "learning_rate": 4.761455459760811e-09, "loss": 0.7201, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7288 }, { "epoch": 0.9911612727767202, "grad_norm": 0.333984375, "learning_rate": 4.616087295407079e-09, "loss": 0.6016, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7289 }, { "epoch": 0.9912972531955399, "grad_norm": 0.73046875, "learning_rate": 4.4729722127911934e-09, "loss": 0.6611, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7290 }, { "epoch": 0.9914332336143595, "grad_norm": 5.46875, "learning_rate": 4.332110244174015e-09, "loss": 0.6313, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7291 }, { "epoch": 0.9915692140331792, "grad_norm": 0.2412109375, "learning_rate": 4.193501421306811e-09, "loss": 0.3556, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7292 }, { "epoch": 0.9917051944519989, "grad_norm": 0.490234375, "learning_rate": 4.05714577543459e-09, "loss": 0.6221, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7293 }, { "epoch": 0.9918411748708186, "grad_norm": 0.2216796875, "learning_rate": 3.923043337291654e-09, "loss": 0.4414, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7294 }, { "epoch": 0.9919771552896383, "grad_norm": 0.337890625, "learning_rate": 3.7911941371082674e-09, "loss": 0.5516, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7295 }, { "epoch": 0.9921131357084579, "grad_norm": 0.498046875, "learning_rate": 3.661598204603989e-09, "loss": 0.6799, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7296 }, { "epoch": 0.9922491161272776, "grad_norm": 0.53515625, "learning_rate": 3.5342555689921177e-09, "loss": 0.9676, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7297 }, { "epoch": 0.9923850965460974, "grad_norm": 0.443359375, "learning_rate": 3.4091662589752494e-09, "loss": 0.665, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7298 }, { "epoch": 0.9925210769649171, "grad_norm": 0.76953125, "learning_rate": 3.2863303027530492e-09, "loss": 0.8717, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7299 }, { "epoch": 0.9926570573837368, "grad_norm": 0.375, "learning_rate": 3.165747728011148e-09, "loss": 0.6016, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7300 }, { "epoch": 0.9927930378025565, "grad_norm": 0.47265625, "learning_rate": 3.0474185619333573e-09, "loss": 0.7399, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7301 }, { "epoch": 0.9929290182213761, "grad_norm": 0.66015625, "learning_rate": 2.9313428311894542e-09, "loss": 0.6981, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7302 }, { "epoch": 0.9930649986401958, "grad_norm": 0.302734375, "learning_rate": 2.8175205619462853e-09, "loss": 0.4207, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7303 }, { "epoch": 0.9932009790590155, "grad_norm": 0.41796875, "learning_rate": 2.7059517798611046e-09, "loss": 0.71, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7304 }, { "epoch": 0.9933369594778352, "grad_norm": 0.369140625, "learning_rate": 2.596636510081574e-09, "loss": 0.6698, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7305 }, { "epoch": 0.9934729398966549, "grad_norm": 0.58203125, "learning_rate": 2.489574777249093e-09, "loss": 0.7716, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7306 }, { "epoch": 0.9936089203154745, "grad_norm": 0.294921875, "learning_rate": 2.38476660549658e-09, "loss": 0.5998, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7307 }, { "epoch": 0.9937449007342942, "grad_norm": 0.296875, "learning_rate": 2.282212018449581e-09, "loss": 0.5488, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7308 }, { "epoch": 0.9938808811531139, "grad_norm": 0.7734375, "learning_rate": 2.1819110392251597e-09, "loss": 0.9181, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7309 }, { "epoch": 0.9940168615719337, "grad_norm": 0.2412109375, "learning_rate": 2.0838636904318974e-09, "loss": 0.3257, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7310 }, { "epoch": 0.9941528419907534, "grad_norm": 0.400390625, "learning_rate": 1.9880699941710048e-09, "loss": 0.8262, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7311 }, { "epoch": 0.9942888224095731, "grad_norm": 0.287109375, "learning_rate": 1.8945299720352086e-09, "loss": 0.6011, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7312 }, { "epoch": 0.9944248028283927, "grad_norm": 0.4921875, "learning_rate": 1.8032436451109746e-09, "loss": 0.8154, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7313 }, { "epoch": 0.9945607832472124, "grad_norm": 0.91796875, "learning_rate": 1.7142110339740669e-09, "loss": 0.8638, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7314 }, { "epoch": 0.9946967636660321, "grad_norm": 0.322265625, "learning_rate": 1.6274321586928765e-09, "loss": 0.6785, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7315 }, { "epoch": 0.9948327440848518, "grad_norm": 0.373046875, "learning_rate": 1.5429070388306434e-09, "loss": 0.7204, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7316 }, { "epoch": 0.9949687245036715, "grad_norm": 0.5, "learning_rate": 1.4606356934387944e-09, "loss": 0.5552, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7317 }, { "epoch": 0.9951047049224911, "grad_norm": 0.27734375, "learning_rate": 1.3806181410624952e-09, "loss": 0.5934, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7318 }, { "epoch": 0.9952406853413108, "grad_norm": 0.349609375, "learning_rate": 1.302854399738429e-09, "loss": 0.5238, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7319 }, { "epoch": 0.9953766657601305, "grad_norm": 0.2216796875, "learning_rate": 1.2273444869959072e-09, "loss": 0.4198, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7320 }, { "epoch": 0.9955126461789502, "grad_norm": 0.5, "learning_rate": 1.154088419855759e-09, "loss": 0.8483, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7321 }, { "epoch": 0.9956486265977699, "grad_norm": 0.478515625, "learning_rate": 1.0830862148314413e-09, "loss": 0.8921, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7322 }, { "epoch": 0.9957846070165897, "grad_norm": 0.439453125, "learning_rate": 1.0143378879257093e-09, "loss": 0.7799, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7323 }, { "epoch": 0.9959205874354093, "grad_norm": 0.484375, "learning_rate": 9.47843454637276e-10, "loss": 0.6916, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7324 }, { "epoch": 0.996056567854229, "grad_norm": 0.4453125, "learning_rate": 8.836029299552628e-10, "loss": 0.675, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7325 }, { "epoch": 0.9961925482730487, "grad_norm": 0.9296875, "learning_rate": 8.21616328358088e-10, "loss": 0.5949, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7326 }, { "epoch": 0.9963285286918684, "grad_norm": 0.3125, "learning_rate": 7.618836638190186e-10, "loss": 0.638, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7327 }, { "epoch": 0.9964645091106881, "grad_norm": 0.3125, "learning_rate": 7.044049498028393e-10, "loss": 0.6576, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7328 }, { "epoch": 0.9966004895295077, "grad_norm": 0.447265625, "learning_rate": 6.491801992669633e-10, "loss": 0.8953, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7329 }, { "epoch": 0.9967364699483274, "grad_norm": 0.251953125, "learning_rate": 5.962094246581007e-10, "loss": 0.5161, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7330 }, { "epoch": 0.9968724503671471, "grad_norm": 0.75390625, "learning_rate": 5.454926379167003e-10, "loss": 0.6047, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7331 }, { "epoch": 0.9970084307859668, "grad_norm": 0.423828125, "learning_rate": 4.970298504758386e-10, "loss": 0.741, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7332 }, { "epoch": 0.9971444112047865, "grad_norm": 0.62890625, "learning_rate": 4.508210732601104e-10, "loss": 0.5462, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7333 }, { "epoch": 0.9972803916236062, "grad_norm": 0.53125, "learning_rate": 4.068663166834075e-10, "loss": 0.783, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7334 }, { "epoch": 0.9974163720424258, "grad_norm": 0.26953125, "learning_rate": 3.6516559065558064e-10, "loss": 0.533, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7335 }, { "epoch": 0.9975523524612456, "grad_norm": 0.5859375, "learning_rate": 3.2571890457466783e-10, "loss": 0.6906, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7336 }, { "epoch": 0.9976883328800653, "grad_norm": 0.47265625, "learning_rate": 2.8852626733466557e-10, "loss": 0.702, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7337 }, { "epoch": 0.997824313298885, "grad_norm": 0.61328125, "learning_rate": 2.5358768731775783e-10, "loss": 0.66, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7338 }, { "epoch": 0.9979602937177047, "grad_norm": 0.400390625, "learning_rate": 2.2090317239875648e-10, "loss": 0.7363, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7339 }, { "epoch": 0.9980962741365244, "grad_norm": 0.330078125, "learning_rate": 1.904727299473219e-10, "loss": 0.6437, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7340 }, { "epoch": 0.998232254555344, "grad_norm": 0.279296875, "learning_rate": 1.62296366822412e-10, "loss": 0.4985, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7341 }, { "epoch": 0.9983682349741637, "grad_norm": 0.75390625, "learning_rate": 1.3637408937339224e-10, "loss": 0.5636, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7342 }, { "epoch": 0.9985042153929834, "grad_norm": 0.228515625, "learning_rate": 1.1270590344558685e-10, "loss": 0.3901, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7343 }, { "epoch": 0.9986401958118031, "grad_norm": 0.435546875, "learning_rate": 9.129181437361745e-11, "loss": 0.7904, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7344 }, { "epoch": 0.9987761762306228, "grad_norm": 0.36328125, "learning_rate": 7.213182698362353e-11, "loss": 0.769, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7345 }, { "epoch": 0.9989121566494424, "grad_norm": 0.240234375, "learning_rate": 5.522594559548289e-11, "loss": 0.5444, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7346 }, { "epoch": 0.9990481370682621, "grad_norm": 0.85546875, "learning_rate": 4.057417401837071e-11, "loss": 0.7067, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7347 }, { "epoch": 0.9991841174870819, "grad_norm": 0.40625, "learning_rate": 2.8176515557420957e-11, "loss": 0.7075, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7348 }, { "epoch": 0.9993200979059016, "grad_norm": 0.443359375, "learning_rate": 1.8032973004844524e-11, "loss": 0.7359, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7349 }, { "epoch": 0.9994560783247213, "grad_norm": 0.28515625, "learning_rate": 1.0143548648811064e-11, "loss": 0.5378, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7350 }, { "epoch": 0.999592058743541, "grad_norm": 0.453125, "learning_rate": 4.5082442678978655e-12, "loss": 0.6157, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7351 }, { "epoch": 0.9997280391623606, "grad_norm": 0.625, "learning_rate": 1.127061129979623e-12, "loss": 0.6589, "memory/device_mem_reserved(gib)": 47.75, "memory/max_mem_active(gib)": 39.5, "memory/max_mem_allocated(gib)": 39.5, "step": 7352 } ], "logging_steps": 1, "max_steps": 7352, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.5772130034993217e+21, "train_batch_size": 1, "trial_name": null, "trial_params": null }