diff --git "a/logs/000_c2e7a920-6eca-4f21-8a3c-6022d81a4f29.txt" "b/logs/000_c2e7a920-6eca-4f21-8a3c-6022d81a4f29.txt" new file mode 100644--- /dev/null +++ "b/logs/000_c2e7a920-6eca-4f21-8a3c-6022d81a4f29.txt" @@ -0,0 +1,73417 @@ +import os +import sys + +with open(sys.argv[0]) as f: + code = f.read() # read the code of this file ASAP, for logging +import copy +import time +import uuid +from dataclasses import dataclass +from functools import lru_cache +from pathlib import Path + +from tqdm import tqdm + +os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" +import torch + +torch.empty( + 1, device="cuda", requires_grad=True +).backward() # prevents a bug on some systems +import torch._inductor.codecache # noqa: E402 +import torch._inductor.graph # noqa: E402 +import torch.nn.functional as F +from torch import Tensor, nn +from torch._logging._internal import trace_structured # noqa: E402 + +# use of FlexAttention contributed by @KoszarskyB +from torch.nn.attention.flex_attention import BlockMask, flex_attention + +torch._inductor.config.coordinate_descent_tuning = ( + True # we allow this flag for medium track +) +torch._dynamo.config.compiled_autograd = False + +# ----------------------------------------------------------------------------- +# Muon optimizer + + +def zeropower_via_newtonschulz5(G: Tensor) -> Tensor: + """ + Newton-Schulz iteration to compute the zeroth power / orthogonalization of G. We opt to use a + quintic iteration whose coefficients are selected to maximize the slope at zero. For the purpose + of minimizing steps, it turns out to be empirically effective to keep increasing the slope at + zero even beyond the point where the iteration no longer converges all the way to one everywhere + on the interval. This iteration therefore does not produce UV^T but rather something like US'V^T + where S' is diagonal with S_{ii}' ∈ [1 - l, 1 + r], which turns out not to hurt model + performance at all relative to UV^T, where USV^T = G is the SVD. + """ + assert ( + G.ndim >= 2 + ) # batched Muon implementation by @scottjmaddox, and put into practice in the record by @YouJiacheng + X = G.bfloat16() + if G.size(-2) > G.size(-1): + X = X.mT + + # Ensure spectral norm is at most 1 + X = X / (X.norm(dim=(-2, -1), keepdim=True) + 1e-7) + # Perform the NS iterations + for a, b, c in [ + (4.0848, -6.8946, 2.9270), + (3.9505, -6.3029, 2.6377), + (3.7418, -5.5913, 2.3037), + (2.8769, -3.1427, 1.2046), + (2.8366, -3.0525, 1.2012), + ]: + A = X @ X.mT + B = ( + b * A + c * A @ A + ) # quintic computation strategy adapted from suggestion by @jxbz, @leloykun, and @YouJiacheng + X = a * X + B @ X + + if G.size(-2) > G.size(-1): + X = X.mT + return X + + +@torch.compile +def update( + acc_bf16_view_u16: Tensor, + mantissa: Tensor, + momentum_buffer: Tensor, + grad: Tensor, + momentum: Tensor, + eff_lr: Tensor, + eff_weight_decay: Tensor, +): + """ + Memory-efficient parameter update that reconstructs float32 precision from bfloat16 + mantissa. + + The key insight: bfloat16 has 16 bits total, but we can reconstruct higher precision by: + 1. Storing the bfloat16 parameter normally + 2. Maintaining extra mantissa bits as a separate uint16 tensor + 3. Combining them into uint32 (effectively float32) for precise computation + 4. Splitting the result back into bfloat16 + mantissa for storage + + This gives us ~23 bits of effective precision instead of bfloat16's 16 bits. + """ + assert acc_bf16_view_u16.dtype == mantissa.dtype == torch.uint16 + grad = grad.float() + + # Standard momentum update in float32 precision + momentum_buffer.copy_(momentum * momentum_buffer + (1 - momentum) * grad) + + # Compute orthogonalized update using Newton-Schulz iteration + v = zeropower_via_newtonschulz5(momentum * momentum_buffer + (1 - momentum) * grad) + + # Reconstruct float32 precision from bfloat16 + extra mantissa bits + # This is the clever part: combine stored bfloat16 with extra mantissa to get float32 + acc_m_u32 = (acc_bf16_view_u16.to(torch.uint32) << 16) | mantissa.to(torch.uint32) + + # Perform the actual parameter update in float32 precision + acc_m_u32.view(torch.float32).mul_(1 - eff_weight_decay) # weight decay + acc_m_u32.view(torch.float32).add_(other=v, alpha=-eff_lr) # gradient step + + # Split the result back: high 16 bits → bfloat16, low 16 bits → extra mantissa + acc_bf16_view_u16.copy_((acc_m_u32 >> 16).to(torch.uint16)) # bfloat16 part + mantissa.copy_(acc_m_u32.to(torch.uint16)) # extra mantissa bits + + +class Muon(torch.optim.Optimizer): + """ + Muon - MomentUm Orthogonalized by Newton-schulz + + https://kellerjordan.github.io/posts/muon/ + + Muon internally runs standard SGD-momentum, and then performs an orthogonalization post- + processing step, in which each 2D parameter's update is replaced with the nearest orthogonal + matrix. To efficiently orthogonalize each update, we use a Newton-Schulz iteration, which has + the advantage that it can be stably run in bfloat16 on the GPU. + + Warning: This optimizer should not be used for the embedding layer, the final fully connected layer, + or any {0,1}-D parameters; those should all be optimized by a standard method (e.g., AdamW). + """ + + def __init__(self, params, lr=0.02, weight_decay=0.01, momentum=0.95): + defaults = dict(lr=lr, weight_decay=weight_decay, momentum=momentum) + super().__init__(params, defaults) + assert all( + p.dtype == torch.bfloat16 + for group in self.param_groups + for p in group["params"] + ) + + @torch.no_grad() + def step(self): + for group in self.param_groups: + params: list[Tensor] = group["params"] + momentum = torch._as_tensor_fullprec(group["momentum"]) + for p in params: + state = self.state[p] + if len(state) == 0: + # Initialize extra mantissa bits - starts as zeros, accumulates precision over time + state["mantissa"] = torch.zeros_like(p, dtype=torch.uint16) + # Standard momentum buffer for gradient accumulation + state["momentum_buffer"] = torch.zeros_like(p, dtype=torch.float32) + update( + p.view(torch.uint16), + state["mantissa"], + state["momentum_buffer"], + p.grad, + momentum, + eff_lr=torch._as_tensor_fullprec( + group["lr"] * max(1, p.size(-2) / p.size(-1)) ** 0.5 + ), + eff_weight_decay=torch._as_tensor_fullprec( + group["lr"] * group["weight_decay"] * getattr(p, "wd_mul", 1.0) + ), + ) + + +# ----------------------------------------------------------------------------- +# PyTorch nn.Module definitions for the model + + +def norm(x: Tensor): + return F.rms_norm(x, (x.size(-1),)) + + +@torch.no_grad() +def init_linear(w: Tensor): + std = 0.5 * (w.size(-1) ** -0.5) # 0.5 is a bit better than the default 1/sqrt(3) + bound = (3**0.5) * std + return w.uniform_(-bound, bound) + + +class Rotary(nn.Module): + def __init__(self, dim: int, max_seq_len: int): + super().__init__() + # half-truncate RoPE by @YouJiacheng (w/ base freq tuning) + angular_freq = (1 / 1024) ** torch.linspace( + 0, 1, steps=dim // 4, dtype=torch.float32 + ) + angular_freq = torch.cat([angular_freq, angular_freq.new_zeros(dim // 4)]) + t = torch.arange(max_seq_len, dtype=torch.float32) + theta = torch.einsum("i,j -> ij", t, angular_freq) + self.cos = nn.Buffer(theta.cos(), persistent=False) + self.sin = nn.Buffer(theta.sin(), persistent=False) + + def forward(self, x_BTHD: Tensor): + assert self.cos.size(0) >= x_BTHD.size(-3) + cos, sin = ( + self.cos[None, : x_BTHD.size(-3), None, :], + self.sin[None, : x_BTHD.size(-3), None, :], + ) + x1, x2 = x_BTHD.to(dtype=torch.float32).chunk(2, dim=-1) + y1 = x1 * cos + x2 * sin + y2 = x1 * (-sin) + x2 * cos + return torch.cat((y1, y2), 3).type_as(x_BTHD) + + +class CausalSelfAttention(nn.Module): + def __init__(self, dim: int, num_heads: int, max_seq_len: int, head_dim=128): + super().__init__() + self.num_heads = num_heads + self.head_dim = head_dim + hdim = num_heads * head_dim + # merged QKV weights: suggested by many, implemented by @fernbear.bsky.social, and further improved by @YouJiacheng + # https://x.com/hi_tysam/status/1879699187107033311 + self.qkvo_w = nn.Parameter(init_linear(torch.empty(4, hdim, dim)).bfloat16()) + self.qkvo_w.detach()[3].zero_() # out zero init suggested by @Grad62304977 + self.rotary = Rotary(head_dim, max_seq_len) + # scale the attention logits by given constant, instead of the default head_dim**-0.5, by @leloykun + # inspired by learnable scalars used by @brendanh0gan https://x.com/hi_tysam/status/1879693583898591283 + self.attn_scale = 0.12 + + def forward( + self, x: Tensor, ve: Tensor | None, block_mask: BlockMask, lambdas: Tensor + ): + B, T = x.size(0), x.size(1) # batch size, sequence length + assert B == 1, "Must use batch size = 1 for FlexAttention" + q, k, v = ( + F.linear(x, self.qkvo_w[:3].flatten(end_dim=1)) + .view(B, T, 3 * self.num_heads, self.head_dim) + .chunk(3, dim=-2) + ) + q, k = norm(q), norm(k) # QK norm @Grad62304977 + q, k = self.rotary(q), self.rotary(k) + v = norm(v) + if ve is not None: + v = lambdas[0] * v + lambdas[1] * ve.view_as( + v + ) # @KoszarskyB & @Grad62304977 + else: # skip mid-layers token value embeddings by @YouJiacheng + v = lambdas[0] * v + y = flex_attention( + q.transpose(1, 2), + k.transpose(1, 2), + v.transpose(1, 2), + block_mask=block_mask, + scale=self.attn_scale, + ).transpose(1, 2) + y = y.contiguous().view( + B, T, self.num_heads * self.head_dim + ) # re-assemble all head outputs side by side + y = F.linear(y, self.qkvo_w[3]) + return y + + +class MLP(nn.Module): + def __init__(self, dim: int): + super().__init__() + hdim = 4 * dim + self.fc_w = nn.Parameter(init_linear(torch.empty(hdim, dim)).bfloat16()) + self.proj_w = nn.Parameter(torch.zeros(dim, hdim).bfloat16()) + self.fc_w.wd_mul = 2.0 + self.proj_w.wd_mul = 2.0 + + def forward(self, x: Tensor): + x = F.linear(x, self.fc_w) + x = F.relu( + x + ).square() # https://arxiv.org/abs/2109.08668v2; ~1-2% better than GELU; suggested by @SKYLINEZ007 and @Grad62304977 + x = F.linear(x, self.proj_w) + return x + + +class Block(nn.Module): + def __init__(self, dim: int, num_heads: int, max_seq_len: int, layer_idx: int): + super().__init__() + # skip attention of blocks.7 (the 8th layer) by @YouJiacheng + self.attn = ( + CausalSelfAttention(dim, num_heads, max_seq_len) if layer_idx != 7 else None + ) + self.mlp = MLP(dim) + + def forward( + self, + x: Tensor, + ve: Tensor | None, + x0: Tensor, + block_mask: BlockMask, + lambdas: Tensor, + sa_lambdas: Tensor, + ): + x = lambdas[0] * x + lambdas[1] * x0 + if self.attn is not None: + x = x + self.attn(x, ve, block_mask, sa_lambdas) + x = x + self.mlp(norm(x)) + return x + + +# ----------------------------------------------------------------------------- +# The main model + + +def next_multiple_of_n(v: float | int, *, n: int): + return next(x for x in range(n, int(v) + 1 + n, n) if x >= v) + + +class GPT(nn.Module): + def __init__( + self, + vocab_size: int, + num_layers: int, + num_heads: int, + model_dim: int, + max_seq_len: int, + ): + super().__init__() + self.embed = nn.Embedding(vocab_size, model_dim) + # token value embeddings by @KoszarskyB - inspired by @Grad62304977's value residual implementation following https://arxiv.org/abs/2410.17897 + # value embedding code simplification inspired by @ragulpr https://github.com/KellerJordan/modded-nanogpt/pull/78 + self.value_embeds = nn.ModuleList( + [nn.Embedding(vocab_size, model_dim) for _ in range(3)] + ) + self.blocks = nn.ModuleList( + [Block(model_dim, num_heads, max_seq_len, i) for i in range(num_layers)] + ) + # there are only 50257 unique GPT-2 tokens; we extend to nearest multiple of 128 for efficiency. + # suggested to me by @Grad62304977. this originates from Karpathy's experiments. + self.lm_head_w = nn.Parameter( + torch.zeros(next_multiple_of_n(vocab_size, n=128), model_dim) + ) + # Add learnable skip connection weights for decoder layers + assert num_layers % 2 == 0 + self.scalars = nn.Parameter( + torch.cat( + [ + torch.ones(num_layers), # skip_weights + *[ + torch.tensor([1.0, 0.0]) for _ in range(num_layers) + ], # block lambdas + *[ + torch.tensor([0.5, 0.5]) for _ in range(num_layers) + ], # SA lambdas + ] + ) + ) + + def create_blockmasks(self, input_seq: Tensor, sliding_window_num_blocks: Tensor): + BLOCK_SIZE = 128 + docs = (input_seq == 50256).cumsum(0) + + def document_causal(b, h, q_idx, kv_idx): + causal_mask = q_idx >= kv_idx + document_mask = docs[q_idx] == docs[kv_idx] + return causal_mask & document_mask + + def dense_to_ordered(dense_blockmask: Tensor): + num_blocks = dense_blockmask.sum(dim=-1, dtype=torch.int32) + indices = ( + dense_blockmask.argsort(dim=-1, descending=False, stable=True) + .flip(-1) + .to(torch.int32) + ) + return num_blocks[None, None].contiguous(), indices[None, None].contiguous() + + # manual block mask creation by @YouJiacheng + assert len(input_seq) % BLOCK_SIZE == 0 + NUM_BLOCKS = len(input_seq) // BLOCK_SIZE + block_idx = torch.arange(NUM_BLOCKS, dtype=torch.int32, device="cuda") + causal_blockmask_any = block_idx[:, None] >= block_idx + causal_blockmask_all = block_idx[:, None] > block_idx + docs_low = docs.view(-1, BLOCK_SIZE)[:, 0].contiguous() + docs_high = docs.view(-1, BLOCK_SIZE)[:, -1].contiguous() + document_blockmask_any = (docs_low[:, None] <= docs_high) & ( + docs_high[:, None] >= docs_low + ) + document_blockmask_all = (docs_low[:, None] == docs_high) & ( + docs_high[:, None] == docs_low + ) + blockmask_any = causal_blockmask_any & document_blockmask_any + blockmask_all = causal_blockmask_all & document_blockmask_all + partial_kv_num_blocks, partial_kv_indices = dense_to_ordered( + blockmask_any & ~blockmask_all + ) + full_kv_num_blocks, full_kv_indices = dense_to_ordered(blockmask_all) + + def build_bm(window_size_blocks: Tensor) -> BlockMask: + return BlockMask.from_kv_blocks( + torch.clamp_max( + partial_kv_num_blocks, + torch.clamp_min(window_size_blocks - full_kv_num_blocks, 1), + ), + partial_kv_indices, + torch.clamp_max(full_kv_num_blocks, window_size_blocks - 1), + full_kv_indices, + BLOCK_SIZE=BLOCK_SIZE, + mask_mod=document_causal, + ) + + # Long-short SWA block masks by @leloykun & @YouJiacheng, adapated from suggestion by @Grad62304977, following Gemma 2 paper + return build_bm(sliding_window_num_blocks), build_bm( + sliding_window_num_blocks // 2 + ) + + def forward( + self, input_seq: Tensor, target_seq: Tensor, sliding_window_num_blocks: Tensor + ): + assert input_seq.ndim == 1 + + ve = [value_embed(input_seq) for value_embed in self.value_embeds] + # 012 ... 012 structure on token value embeddings by @YouJiacheng, improved on @leloykun's U-net structure + ve = ( + [ve[0], ve[1], ve[2]] + + [None] * (len(self.blocks) - 6) + + [ve[0], ve[1], ve[2]] + ) + assert len(ve) == len(self.blocks) + + long_bm, short_bm = self.create_blockmasks(input_seq, sliding_window_num_blocks) + block_masks = [ + long_bm, + short_bm, + short_bm, + short_bm, + long_bm, + short_bm, + short_bm, + short_bm, + short_bm, + short_bm, + short_bm, + long_bm, + short_bm, + short_bm, + short_bm, + long_bm, + ] + assert len(block_masks) == len(self.blocks) + + x = x0 = norm(self.embed(input_seq)[None]) # use of norm here by @Grad62304977 + + skip_connections = [] + skip_map = { + 9: 6, + 10: 4, + 11: 2, + } + skip_weights = self.scalars[: len(self.blocks)] + lambdas = self.scalars[1 * len(self.blocks) : 3 * len(self.blocks)].view(-1, 2) + sa_lambdas = self.scalars[3 * len(self.blocks) : 5 * len(self.blocks)].view( + -1, 2 + ) + for i in range(len(self.blocks)): + if i in skip_map: + x = x + skip_weights[skip_map[i]] * skip_connections[skip_map[i]] + x = self.blocks[i](x, ve[i], x0, block_masks[i], lambdas[i], sa_lambdas[i]) + skip_connections.append(x) + + x = norm(x) + if self.training: + logits: Tensor = F.linear( + x.flatten(end_dim=1), self.lm_head_w.bfloat16() + ).float() + loss = F.cross_entropy( + 15 * logits * torch.rsqrt(logits.square() + 225), target_seq + ) + return loss + + loss = 0 + for i in range(4): + logits: Tensor = F.linear( + x.flatten(end_dim=1).chunk(4)[i], self.lm_head_w.bfloat16() + ).float() + loss += ( + F.cross_entropy( + 15 * logits * torch.rsqrt(logits.square() + 225), + target_seq.chunk(4)[i], + ) + / 4 + ) + return loss + + +# ----------------------------------------------------------------------------- +# Our own simple Distributed Data Loader + + +def _load_data_shard(file: Path): + header = torch.from_file( + str(file), False, 256, dtype=torch.int32 + ) # header is 256 int32 + assert header[0] == 20240520, "magic number mismatch in the data .bin file" + assert header[1] == 1, "unsupported version" + num_tokens = int(header[2]) # number of tokens (claimed) + with file.open("rb", buffering=0) as f: + tokens = torch.empty( + num_tokens, dtype=torch.uint16, pin_memory=True + ) # avoid pin_memory copy by @YouJiacheng + f.seek(256 * 4) + nbytes = f.readinto(tokens.numpy()) # avoid bytes->array copy by @YouJiacheng + assert nbytes == 2 * num_tokens, "number of tokens read does not match header" + return tokens + + +def data_generator(filename_pattern: str, batch_size: int): + files = sorted(Path.cwd().glob(filename_pattern)) + file_iter = iter( + files + ) # use itertools.cycle(files) instead if you want to do multi-epoch training + tokens, pos = _load_data_shard(next(file_iter)), 0 + while True: + if pos + batch_size + 1 >= len(tokens): + tokens, pos = _load_data_shard(next(file_iter)), 0 + buf = tokens[pos : pos + batch_size + 1] + inputs = buf[:-1].to(device="cuda", dtype=torch.int32, non_blocking=True) + targets = buf[1:].to(device="cuda", dtype=torch.int64, non_blocking=True) + pos += batch_size + yield inputs, targets + + +# ----------------------------------------------------------------------------- +# int main + + +@dataclass +class Hyperparameters: + # data + train_files = "data/fineweb10B/fineweb_train_*.bin" # input .bin to train on + val_files = ( + "data/fineweb10B/fineweb_val_*.bin" # input .bin to eval validation loss on + ) + train_seq_len = 42 * 1024 # FlexAttention sequence length + val_tokens = ( + 64 * 4 * 42 * 1024 + ) # how many tokens of validation data? it's important to keep this fixed for consistent comparisons + val_seq_len = 4 * 42 * 1024 # FlexAttention sequence length for validation + # optimization + grad_accum_steps = 4 + num_iterations = 14_336 # number of iterations to run + cooldown_frac = 0.7 # fraction of training spent cooling down the learning rate + # architecture + vocab_size = 50257 + # evaluation and logging + val_loss_every = ( + grad_accum_steps + * 16 # every how many steps to evaluate val loss? 0 for only at the end + ) + save_checkpoint = True + + +args = Hyperparameters() + + +# learning rate schedule: stable then decay +def get_lr(step: int): + x = step / args.num_iterations # progress in training + assert 0 <= x < 1 + if x < 1 - args.cooldown_frac: + return 1.0 + else: + return (1 - x) / args.cooldown_frac + + +# attention window size schedule: linearly increase +@lru_cache(1) +def get_window_size_blocks_helper(window_size: int): + return torch.tensor(window_size // 128, dtype=torch.int32, pin_memory=True).cuda( + non_blocking=True + ) + + +def get_window_size_blocks(step: int): + x = step / args.num_iterations # progress in training + assert 0 <= x <= 1 + # Linearly increase the block-wise sliding window size over training 128 -> 1792 + # increase by @fernbear.bsky.social; block-wise by @YouJiacheng + factor = 4 * x**3 - 6 * x**2 + 3 * x # cubic schedule by @jadenj3o + window_size = next_multiple_of_n(3456 * factor, n=128) + return get_window_size_blocks_helper(window_size) + + +def main(): + start_time = time.time() + + run_id = int(os.environ.get("RUN_ID", 0)) + # Single GPU setup + assert torch.cuda.is_available() + device = torch.device("cuda", 0) + torch.cuda.set_device(device) + + # begin logging + run_id_full = f"{run_id:03d}_{uuid.uuid4()}" + os.makedirs("logs", exist_ok=True) + logfile = f"logs/{run_id_full}.txt" + print(logfile) + + def print0(s, console=False): + with open(logfile, "a") as f: + if console: + print(s) + print(s, file=f) + + def _patched_trace_structured(name, *args, **kwargs): + if name == "inductor_output_code": + metadata_fn = args[0] + print0(f"inductor_output_code: {metadata_fn().get('filename', 'Unknown')}") + trace_structured(name, *args, **kwargs) + + torch._inductor.codecache.trace_structured = _patched_trace_structured + torch._inductor.graph.trace_structured = _patched_trace_structured + + # begin by printing this file (the Python code) + print0(code) + print0("=" * 100) + # log information about the hardware/software environment this is running on + print0(f"Running Python {sys.version}") + print0( + f"Running PyTorch {torch.version.__version__} compiled for CUDA {torch.version.cuda}" + ) + + def nvidia_smi(): + import subprocess # avoid top level import + + return subprocess.run( + ["nvidia-smi"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + ).stdout + + print0(nvidia_smi()) + print0("=" * 100) + print0(f"Start time: {start_time}") + + ######################################## + # Construct model and optimizer # + ######################################## + + model: nn.Module = GPT( + vocab_size=args.vocab_size, + num_layers=16, + num_heads=8, + model_dim=1024, + max_seq_len=max(args.train_seq_len, args.val_seq_len), + ).cuda() + for m in model.modules(): + if isinstance(m, nn.Embedding): + m.bfloat16() + + # collect the parameters to optimize + hidden_matrix_params = sorted( + (p for p in model.blocks.parameters() if p.ndim >= 2), + key=lambda x: x.size(), + reverse=True, + ) + embed_params = [*model.embed.parameters(), *model.value_embeds.parameters()] + scalar_params = [model.scalars] + head_params: list[nn.Parameter] = [model.lm_head_w] + # sanity check + params_collections = [ + hidden_matrix_params, + embed_params, + scalar_params, + head_params, + ] + optimized_parameters_set = {p for params in params_collections for p in params} + assert optimized_parameters_set == {*model.parameters()} + assert len(optimized_parameters_set) == sum(len(lst) for lst in params_collections) + + # init the optimizer(s) + adam_param_groups = [ + dict(params=head_params, lr=1 / 320), + dict(params=embed_params, lr=0.3), + dict(params=scalar_params, lr=0.015), + ] + # small adam epsilon by @YouJiacheng. this is an alternate method of fixing the world_size dependence + # discovered by @fernbear.bsky.social https://x.com/hi_tysam/status/1879692937589875094 + optimizer1 = torch.optim.AdamW( + adam_param_groups, betas=(0.8, 0.95), eps=1e-10, weight_decay=0.0, fused=True + ) + optimizer2 = Muon(hidden_matrix_params, lr=0.025, momentum=0.95) + optimizers: list[torch.optim.Optimizer] = [optimizer1, optimizer2] + + for opt in optimizers: + for group in opt.param_groups: + group["initial_lr"] = group["lr"] + + model: nn.Module = torch.compile(model, dynamic=False) + + ######################################## + # Warmup kernels # + ######################################## + + # Warmup the training kernels, then re-initialize the state so we aren't cheating + warmup_steps = 10 + initial_state = copy.deepcopy( + dict( + model=model.state_dict(), + optimizers=[opt.state_dict() for opt in optimizers], + ) + ) + for _ in range(warmup_steps): + inputs = targets = torch.randint( + 0, args.vocab_size, size=(args.train_seq_len,), device="cuda" + ) + model(inputs.to(torch.int32), targets, get_window_size_blocks(0)).backward() + for opt in optimizers: + opt.step() + model.zero_grad(set_to_none=True) + model.load_state_dict(initial_state["model"]) + for opt, opt_state in zip(optimizers, initial_state["optimizers"]): + opt.load_state_dict(opt_state) + del initial_state + + ######################################## + # Training and validation # + ######################################## + + torch.cuda.reset_peak_memory_stats() + train_loader = data_generator(args.train_files, args.train_seq_len) + training_time_ms = 0 + # start the clock + t0 = time.perf_counter() + # begin training + grad_accum_steps = args.grad_accum_steps + train_steps = args.num_iterations * grad_accum_steps + for step in tqdm(range(train_steps + 1), desc="Training", total=train_steps + 1): + last_step = step == train_steps + update_step = step // grad_accum_steps + + # --------------- VALIDATION SECTION ----------------- + if last_step or (args.val_loss_every > 0 and step % args.val_loss_every == 0): + # stop the clock + training_time_ms += 1000 * (time.perf_counter() - t0) + model.eval() + val_batch_size = args.val_seq_len + assert args.val_tokens % val_batch_size == 0 + val_steps = args.val_tokens // val_batch_size + val_loader = data_generator(args.val_files, val_batch_size) + val_loss = 0 + with torch.no_grad(): + for _ in range(val_steps): + inputs, targets = next(val_loader) + val_loss += model( + inputs, targets, get_window_size_blocks(update_step) + ) + val_loss /= val_steps + del val_loader + print0( + f"step:{step}/{train_steps} val_loss:{val_loss:.6f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms / max(step, 1):.2f}ms", + console=True, + ) + model.train() + # start the clock again + t0 = time.perf_counter() + + if last_step: + if args.save_checkpoint: + log = dict( + step=step, + code=code, + model=model.state_dict(), + optimizers=[opt.state_dict() for opt in optimizers], + ) + os.makedirs(f"logs/{run_id_full}", exist_ok=True) + torch.save(log, f"logs/{run_id_full}/state_step{step:06d}.pt") + torch.save(log["model"], f"logs/{run_id_full}/latest_model.pt") + # the last step only has the validation loop, so break to avoid training + break + + # --------------- TRAINING SECTION ----------------- + inputs, targets = next(train_loader) + model(inputs, targets, get_window_size_blocks(update_step)).backward() + if step % grad_accum_steps == (grad_accum_steps - 1): + # set optimization hyperparameters + for opt in optimizers: + for group in opt.param_groups: + group["lr"] = group["initial_lr"] * get_lr(update_step) + for group in optimizer2.param_groups: + frac = min(update_step / 300, 1) # momentum warmup for muon + group["momentum"] = (1 - frac) * 0.85 + frac * 0.95 + # step the optimizers + for opt in optimizers: + opt.step() + # null the gradients + model.zero_grad(set_to_none=True) + print0( + f"grad accum step:{update_step + 1}/{train_steps // grad_accum_steps}" + ) + + # logging + approx_training_time_ms = training_time_ms + 1000 * (time.perf_counter() - t0) + print0( + f"step:{step + 1}/{train_steps} train_time:{approx_training_time_ms:.0f}ms step_avg:{approx_training_time_ms / (step + 1):.2f}ms", + ) + end_time = time.time() + + print0( + f"peak memory allocated: {torch.cuda.max_memory_allocated() // 1024 // 1024} MiB " + f"reserved: {torch.cuda.max_memory_reserved() // 1024 // 1024} MiB", + f"end time: {end_time}", + f"total training time: {end_time - start_time / 60:.2f}m ({end_time - start_time / 60 / 60:.2f}h)", + console=True, + ) + + +if __name__ == "__main__": + main() + +==================================================================================================== +Running Python 3.12.7 (main, Oct 16 2024, 04:37:19) [Clang 18.1.8 ] +Running PyTorch 2.8.0+cu128 compiled for CUDA 12.8 +Thu Oct 2 14:11:58 2025 ++---------------------------------------------------------------------------------------+ +| NVIDIA-SMI 535.261.03 Driver Version: 535.261.03 CUDA Version: 12.2 | +|-----------------------------------------+----------------------+----------------------+ +| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC | +| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | +| | | MIG M. | +|=========================================+======================+======================| +| 0 NVIDIA A100-PCIE-40GB On | 00000000:D8:00.0 Off | Off | +| N/A 22C P0 32W / 250W | 501MiB / 40960MiB | 0% Default | +| | | Disabled | ++-----------------------------------------+----------------------+----------------------+ + ++---------------------------------------------------------------------------------------+ +| Processes: | +| GPU GI CI PID Type Process name GPU Memory | +| ID ID Usage | +|=======================================================================================| +| 0 N/A N/A 1608595 C ...lo/modded-nanogpt/.venv/bin/python3 492MiB | ++---------------------------------------------------------------------------------------+ + +==================================================================================================== +Start time: 1759432318.7282798 +inductor_output_code: /tmp/job.16700835/torchinductor_henrycastillo/au/cauepvgz5xvqqe2yvk3lioq3hfctbr5cv6nlhv6blqvsruh2k5w5.py +inductor_output_code: /tmp/job.16700835/torchinductor_henrycastillo/25/c25ej2djpgyuo7tfjiuhif5vnzctt7jpmzwrwrgoff5nxxbzzodf.py +inductor_output_code: /tmp/job.16700835/torchinductor_henrycastillo/nx/cnxbg3bvb47pbvlk7wkkvsaaraz7yepysps2vsuzvgfi6rxct7be.py +inductor_output_code: /tmp/job.16700835/torchinductor_henrycastillo/qu/cquvn55pxy2x26kwbdzhwmrohus672kafhfvdcpie2nbg3sfejkf.py +inductor_output_code: /tmp/job.16700835/torchinductor_henrycastillo/jk/cjkudycmcek2vtrabti3ubdkiqbjwysu7o6fhadcm2azowysknlo.py +inductor_output_code: /tmp/job.16700835/torchinductor_henrycastillo/ko/ckozkvhhqaftrkc3ri7lqvbvud6lffj44uxsrwr7k5iqprjehn4e.py +step:0/57344 val_loss:10.825846 train_time:49497ms step_avg:49496.54ms +step:1/57344 train_time:50040ms step_avg:50040.27ms +step:2/57344 train_time:58698ms step_avg:29348.97ms +step:3/57344 train_time:58711ms step_avg:19570.42ms +grad accum step:1/14336 +step:4/57344 train_time:59537ms step_avg:14884.23ms +step:5/57344 train_time:59552ms step_avg:11910.40ms +step:6/57344 train_time:59768ms step_avg:9961.30ms +step:7/57344 train_time:60213ms step_avg:8601.84ms +grad accum step:2/14336 +step:8/57344 train_time:61258ms step_avg:7657.27ms +step:9/57344 train_time:61275ms step_avg:6808.38ms +step:10/57344 train_time:61495ms step_avg:6149.48ms +step:11/57344 train_time:61945ms step_avg:5631.39ms +grad accum step:3/14336 +step:12/57344 train_time:63001ms step_avg:5250.11ms +step:13/57344 train_time:63018ms step_avg:4847.55ms +step:14/57344 train_time:63239ms step_avg:4517.04ms +step:15/57344 train_time:63689ms step_avg:4245.93ms +grad accum step:4/14336 +step:16/57344 train_time:64745ms step_avg:4046.58ms +step:17/57344 train_time:64763ms step_avg:3809.58ms +step:18/57344 train_time:64983ms step_avg:3610.18ms +step:19/57344 train_time:65435ms step_avg:3443.96ms +grad accum step:5/14336 +step:20/57344 train_time:66493ms step_avg:3324.66ms +step:21/57344 train_time:66511ms step_avg:3167.17ms +step:22/57344 train_time:66732ms step_avg:3033.26ms +step:23/57344 train_time:67185ms step_avg:2921.09ms +grad accum step:6/14336 +step:24/57344 train_time:68242ms step_avg:2843.42ms +step:25/57344 train_time:68259ms step_avg:2730.38ms +step:26/57344 train_time:68481ms step_avg:2633.88ms +step:27/57344 train_time:68933ms step_avg:2553.06ms +grad accum step:7/14336 +step:28/57344 train_time:69994ms step_avg:2499.80ms +step:29/57344 train_time:70012ms step_avg:2414.20ms +step:30/57344 train_time:70234ms step_avg:2341.12ms +step:31/57344 train_time:70687ms step_avg:2280.23ms +grad accum step:8/14336 +step:32/57344 train_time:71748ms step_avg:2242.13ms +step:33/57344 train_time:71766ms step_avg:2174.72ms +step:34/57344 train_time:71987ms step_avg:2117.28ms +step:35/57344 train_time:72440ms step_avg:2069.72ms +grad accum step:9/14336 +step:36/57344 train_time:73506ms step_avg:2041.82ms +step:37/57344 train_time:73523ms step_avg:1987.11ms +step:38/57344 train_time:73743ms step_avg:1940.61ms +step:39/57344 train_time:74197ms step_avg:1902.48ms +grad accum step:10/14336 +step:40/57344 train_time:75258ms step_avg:1881.44ms +step:41/57344 train_time:75275ms step_avg:1835.98ms +step:42/57344 train_time:75497ms step_avg:1797.56ms +step:43/57344 train_time:75952ms step_avg:1766.34ms +grad accum step:11/14336 +step:44/57344 train_time:77015ms step_avg:1750.33ms +step:45/57344 train_time:77032ms step_avg:1711.83ms +step:46/57344 train_time:77253ms step_avg:1679.42ms +step:47/57344 train_time:77707ms step_avg:1653.34ms +grad accum step:12/14336 +step:48/57344 train_time:78767ms step_avg:1640.99ms +step:49/57344 train_time:78784ms step_avg:1607.85ms +step:50/57344 train_time:79006ms step_avg:1580.12ms +step:51/57344 train_time:79458ms step_avg:1558.00ms +grad accum step:13/14336 +step:52/57344 train_time:80516ms step_avg:1548.39ms +step:53/57344 train_time:80534ms step_avg:1519.50ms +step:54/57344 train_time:80756ms step_avg:1495.49ms +step:55/57344 train_time:81211ms step_avg:1476.56ms +grad accum step:14/14336 +step:56/57344 train_time:82270ms step_avg:1469.11ms +step:57/57344 train_time:82287ms step_avg:1443.63ms +step:58/57344 train_time:82507ms step_avg:1422.54ms +step:59/57344 train_time:109435ms step_avg:1854.82ms +grad accum step:15/14336 +step:60/57344 train_time:110331ms step_avg:1838.84ms +step:61/57344 train_time:110348ms step_avg:1808.98ms +step:62/57344 train_time:110567ms step_avg:1783.34ms +step:63/57344 train_time:111016ms step_avg:1762.15ms +grad accum step:16/14336 +step:64/57344 train_time:112067ms step_avg:1751.05ms +step:64/57344 val_loss:7.268824 train_time:112068ms step_avg:1751.06ms +step:65/57344 train_time:112082ms step_avg:1724.35ms +step:66/57344 train_time:112278ms step_avg:1701.19ms +step:67/57344 train_time:112733ms step_avg:1682.58ms +grad accum step:17/14336 +step:68/57344 train_time:113797ms step_avg:1673.49ms +step:69/57344 train_time:113815ms step_avg:1649.49ms +step:70/57344 train_time:114035ms step_avg:1629.06ms +step:71/57344 train_time:114487ms step_avg:1612.50ms +grad accum step:18/14336 +step:72/57344 train_time:115547ms step_avg:1604.82ms +step:73/57344 train_time:115564ms step_avg:1583.08ms +step:74/57344 train_time:115786ms step_avg:1564.67ms +step:75/57344 train_time:116236ms step_avg:1549.81ms +grad accum step:19/14336 +step:76/57344 train_time:117296ms step_avg:1543.37ms +step:77/57344 train_time:117314ms step_avg:1523.55ms +step:78/57344 train_time:117533ms step_avg:1506.84ms +step:79/57344 train_time:117985ms step_avg:1493.49ms +grad accum step:20/14336 +step:80/57344 train_time:119046ms step_avg:1488.07ms +step:81/57344 train_time:119063ms step_avg:1469.92ms +step:82/57344 train_time:119282ms step_avg:1454.66ms +step:83/57344 train_time:119734ms step_avg:1442.57ms +grad accum step:21/14336 +step:84/57344 train_time:120797ms step_avg:1438.05ms +step:85/57344 train_time:120814ms step_avg:1421.34ms +step:86/57344 train_time:121033ms step_avg:1407.36ms +step:87/57344 train_time:121485ms step_avg:1396.38ms +grad accum step:22/14336 +step:88/57344 train_time:122544ms step_avg:1392.54ms +step:89/57344 train_time:122561ms step_avg:1377.09ms +step:90/57344 train_time:122782ms step_avg:1364.25ms +step:91/57344 train_time:123234ms step_avg:1354.22ms +grad accum step:23/14336 +step:92/57344 train_time:124294ms step_avg:1351.02ms +step:93/57344 train_time:124311ms step_avg:1336.68ms +step:94/57344 train_time:124529ms step_avg:1324.78ms +step:95/57344 train_time:124981ms step_avg:1315.59ms +grad accum step:24/14336 +step:96/57344 train_time:126039ms step_avg:1312.90ms +step:97/57344 train_time:126056ms step_avg:1299.55ms +step:98/57344 train_time:126273ms step_avg:1288.50ms +step:99/57344 train_time:126724ms step_avg:1280.05ms +grad accum step:25/14336 +step:100/57344 train_time:127785ms step_avg:1277.85ms +step:101/57344 train_time:127802ms step_avg:1265.37ms +step:102/57344 train_time:128022ms step_avg:1255.11ms +step:103/57344 train_time:128473ms step_avg:1247.31ms +grad accum step:26/14336 +step:104/57344 train_time:129528ms step_avg:1245.46ms +step:105/57344 train_time:129545ms step_avg:1233.76ms +step:106/57344 train_time:129764ms step_avg:1224.19ms +step:107/57344 train_time:130215ms step_avg:1216.96ms +grad accum step:27/14336 +step:108/57344 train_time:131272ms step_avg:1215.48ms +step:109/57344 train_time:131289ms step_avg:1204.49ms +step:110/57344 train_time:131507ms step_avg:1195.52ms +step:111/57344 train_time:131958ms step_avg:1188.81ms +grad accum step:28/14336 +step:112/57344 train_time:133015ms step_avg:1187.63ms +step:113/57344 train_time:133032ms step_avg:1177.28ms +step:114/57344 train_time:133251ms step_avg:1168.87ms +step:115/57344 train_time:133700ms step_avg:1162.61ms +grad accum step:29/14336 +step:116/57344 train_time:134755ms step_avg:1161.68ms +step:117/57344 train_time:134773ms step_avg:1151.90ms +step:118/57344 train_time:134990ms step_avg:1143.98ms +step:119/57344 train_time:135440ms step_avg:1138.15ms +grad accum step:30/14336 +step:120/57344 train_time:136494ms step_avg:1137.45ms +step:121/57344 train_time:136511ms step_avg:1128.19ms +step:122/57344 train_time:136729ms step_avg:1120.73ms +step:123/57344 train_time:137179ms step_avg:1115.27ms +grad accum step:31/14336 +step:124/57344 train_time:138233ms step_avg:1114.78ms +step:125/57344 train_time:138250ms step_avg:1106.00ms +step:126/57344 train_time:138468ms step_avg:1098.95ms +step:127/57344 train_time:138918ms step_avg:1093.84ms +grad accum step:32/14336 +step:128/57344 train_time:139973ms step_avg:1093.54ms +step:128/57344 val_loss:7.868536 train_time:139973ms step_avg:1093.54ms +step:129/57344 train_time:139988ms step_avg:1085.18ms +step:130/57344 train_time:140181ms step_avg:1078.32ms +step:131/57344 train_time:140632ms step_avg:1073.53ms +grad accum step:33/14336 +step:132/57344 train_time:141690ms step_avg:1073.41ms +step:133/57344 train_time:141707ms step_avg:1065.47ms +step:134/57344 train_time:141927ms step_avg:1059.15ms +step:135/57344 train_time:142378ms step_avg:1054.65ms +grad accum step:34/14336 +step:136/57344 train_time:143434ms step_avg:1054.66ms +step:137/57344 train_time:143451ms step_avg:1047.09ms +step:138/57344 train_time:143671ms step_avg:1041.09ms +step:139/57344 train_time:144121ms step_avg:1036.84ms +grad accum step:35/14336 +step:140/57344 train_time:145181ms step_avg:1037.01ms +step:141/57344 train_time:145198ms step_avg:1029.78ms +step:142/57344 train_time:145416ms step_avg:1024.06ms +step:143/57344 train_time:145868ms step_avg:1020.06ms +grad accum step:36/14336 +step:144/57344 train_time:146924ms step_avg:1020.31ms +step:145/57344 train_time:146942ms step_avg:1013.39ms +step:146/57344 train_time:147160ms step_avg:1007.95ms +step:147/57344 train_time:147612ms step_avg:1004.16ms +grad accum step:37/14336 +step:148/57344 train_time:148672ms step_avg:1004.54ms +step:149/57344 train_time:148689ms step_avg:997.91ms +step:150/57344 train_time:148908ms step_avg:992.72ms +step:151/57344 train_time:149358ms step_avg:989.12ms +grad accum step:38/14336 +step:152/57344 train_time:150420ms step_avg:989.61ms +step:153/57344 train_time:150438ms step_avg:983.25ms +step:154/57344 train_time:150656ms step_avg:978.28ms +step:155/57344 train_time:151107ms step_avg:974.89ms +grad accum step:39/14336 +step:156/57344 train_time:152164ms step_avg:975.41ms +step:157/57344 train_time:152182ms step_avg:969.31ms +step:158/57344 train_time:152399ms step_avg:964.55ms +step:159/57344 train_time:152849ms step_avg:961.32ms +grad accum step:40/14336 +step:160/57344 train_time:153908ms step_avg:961.93ms +step:161/57344 train_time:153926ms step_avg:956.06ms +step:162/57344 train_time:154146ms step_avg:951.52ms +step:163/57344 train_time:154597ms step_avg:948.45ms +grad accum step:41/14336 +step:164/57344 train_time:155657ms step_avg:949.13ms +step:165/57344 train_time:155674ms step_avg:943.48ms +step:166/57344 train_time:155893ms step_avg:939.12ms +step:167/57344 train_time:156344ms step_avg:936.19ms +grad accum step:42/14336 +step:168/57344 train_time:157402ms step_avg:936.92ms +step:169/57344 train_time:157420ms step_avg:931.48ms +step:170/57344 train_time:157637ms step_avg:927.28ms +step:171/57344 train_time:158088ms step_avg:924.49ms +grad accum step:43/14336 +step:172/57344 train_time:159147ms step_avg:925.27ms +step:173/57344 train_time:159164ms step_avg:920.03ms +step:174/57344 train_time:159383ms step_avg:916.00ms +step:175/57344 train_time:159835ms step_avg:913.34ms +grad accum step:44/14336 +step:176/57344 train_time:160895ms step_avg:914.18ms +step:177/57344 train_time:160913ms step_avg:909.11ms +step:178/57344 train_time:161130ms step_avg:905.22ms +step:179/57344 train_time:161583ms step_avg:902.70ms +grad accum step:45/14336 +step:180/57344 train_time:162644ms step_avg:903.58ms +step:181/57344 train_time:162662ms step_avg:898.68ms +step:182/57344 train_time:162880ms step_avg:894.95ms +step:183/57344 train_time:163332ms step_avg:892.52ms +grad accum step:46/14336 +step:184/57344 train_time:164394ms step_avg:893.44ms +step:185/57344 train_time:164411ms step_avg:888.71ms +step:186/57344 train_time:164628ms step_avg:885.10ms +step:187/57344 train_time:165081ms step_avg:882.78ms +grad accum step:47/14336 +step:188/57344 train_time:166140ms step_avg:883.72ms +step:189/57344 train_time:166158ms step_avg:879.14ms +step:190/57344 train_time:166376ms step_avg:875.66ms +step:191/57344 train_time:166829ms step_avg:873.45ms +grad accum step:48/14336 +step:192/57344 train_time:167885ms step_avg:874.40ms +step:192/57344 val_loss:7.894432 train_time:167886ms step_avg:874.40ms +step:193/57344 train_time:167900ms step_avg:869.95ms +step:194/57344 train_time:168094ms step_avg:866.46ms +step:195/57344 train_time:168546ms step_avg:864.34ms +grad accum step:49/14336 +step:196/57344 train_time:169606ms step_avg:865.34ms +step:197/57344 train_time:169624ms step_avg:861.03ms +step:198/57344 train_time:169843ms step_avg:857.79ms +step:199/57344 train_time:170295ms step_avg:855.75ms +grad accum step:50/14336 +step:200/57344 train_time:171356ms step_avg:856.78ms +step:201/57344 train_time:171374ms step_avg:852.61ms +step:202/57344 train_time:171593ms step_avg:849.47ms +step:203/57344 train_time:172045ms step_avg:847.51ms +grad accum step:51/14336 +step:204/57344 train_time:173107ms step_avg:848.57ms +step:205/57344 train_time:173125ms step_avg:844.51ms +step:206/57344 train_time:173343ms step_avg:841.47ms +step:207/57344 train_time:173795ms step_avg:839.59ms +grad accum step:52/14336 +step:208/57344 train_time:174856ms step_avg:840.65ms +step:209/57344 train_time:174874ms step_avg:836.72ms +step:210/57344 train_time:175092ms step_avg:833.77ms +step:211/57344 train_time:175544ms step_avg:831.96ms +grad accum step:53/14336 +step:212/57344 train_time:176605ms step_avg:833.04ms +step:213/57344 train_time:176623ms step_avg:829.21ms +step:214/57344 train_time:176843ms step_avg:826.37ms +step:215/57344 train_time:177294ms step_avg:824.62ms +grad accum step:54/14336 +step:216/57344 train_time:178357ms step_avg:825.73ms +step:217/57344 train_time:178375ms step_avg:822.00ms +step:218/57344 train_time:178593ms step_avg:819.24ms +step:219/57344 train_time:179044ms step_avg:817.55ms +grad accum step:55/14336 +step:220/57344 train_time:180106ms step_avg:818.66ms +step:221/57344 train_time:180123ms step_avg:815.04ms +step:222/57344 train_time:180343ms step_avg:812.35ms +step:223/57344 train_time:180795ms step_avg:810.74ms +grad accum step:56/14336 +step:224/57344 train_time:181856ms step_avg:811.86ms +step:225/57344 train_time:181874ms step_avg:808.33ms +step:226/57344 train_time:182092ms step_avg:805.72ms +step:227/57344 train_time:182543ms step_avg:804.16ms +grad accum step:57/14336 +step:228/57344 train_time:183604ms step_avg:805.28ms +step:229/57344 train_time:183622ms step_avg:801.84ms +step:230/57344 train_time:183841ms step_avg:799.31ms +step:231/57344 train_time:184293ms step_avg:797.80ms +grad accum step:58/14336 +step:232/57344 train_time:185354ms step_avg:798.94ms +step:233/57344 train_time:185372ms step_avg:795.59ms +step:234/57344 train_time:185590ms step_avg:793.12ms +step:235/57344 train_time:186041ms step_avg:791.66ms +grad accum step:59/14336 +step:236/57344 train_time:187102ms step_avg:792.81ms +step:237/57344 train_time:187120ms step_avg:789.54ms +step:238/57344 train_time:187339ms step_avg:787.14ms +step:239/57344 train_time:187791ms step_avg:785.74ms +grad accum step:60/14336 +step:240/57344 train_time:188853ms step_avg:786.89ms +step:241/57344 train_time:188871ms step_avg:783.70ms +step:242/57344 train_time:189090ms step_avg:781.36ms +step:243/57344 train_time:189540ms step_avg:780.00ms +grad accum step:61/14336 +step:244/57344 train_time:190600ms step_avg:781.15ms +step:245/57344 train_time:190618ms step_avg:778.03ms +step:246/57344 train_time:190837ms step_avg:775.76ms +step:247/57344 train_time:191288ms step_avg:774.44ms +grad accum step:62/14336 +step:248/57344 train_time:192348ms step_avg:775.60ms +step:249/57344 train_time:192366ms step_avg:772.56ms +step:250/57344 train_time:192585ms step_avg:770.34ms +step:251/57344 train_time:193036ms step_avg:769.07ms +grad accum step:63/14336 +step:252/57344 train_time:194097ms step_avg:770.23ms +step:253/57344 train_time:194115ms step_avg:767.25ms +step:254/57344 train_time:194334ms step_avg:765.09ms +step:255/57344 train_time:194785ms step_avg:763.86ms +grad accum step:64/14336 +step:256/57344 train_time:195847ms step_avg:765.03ms +step:256/57344 val_loss:7.892108 train_time:195848ms step_avg:765.03ms +step:257/57344 train_time:195862ms step_avg:762.11ms +step:258/57344 train_time:196055ms step_avg:759.90ms +step:259/57344 train_time:196509ms step_avg:758.72ms +grad accum step:65/14336 +step:260/57344 train_time:197568ms step_avg:759.88ms +step:261/57344 train_time:197586ms step_avg:757.04ms +step:262/57344 train_time:197805ms step_avg:754.98ms +step:263/57344 train_time:198258ms step_avg:753.83ms +grad accum step:66/14336 +step:264/57344 train_time:199315ms step_avg:754.98ms +step:265/57344 train_time:199333ms step_avg:752.20ms +step:266/57344 train_time:199552ms step_avg:750.19ms +step:267/57344 train_time:200004ms step_avg:749.08ms +grad accum step:67/14336 +step:268/57344 train_time:201066ms step_avg:750.25ms +step:269/57344 train_time:201084ms step_avg:747.52ms +step:270/57344 train_time:201302ms step_avg:745.56ms +step:271/57344 train_time:201754ms step_avg:744.48ms +grad accum step:68/14336 +step:272/57344 train_time:202815ms step_avg:745.64ms +step:273/57344 train_time:202832ms step_avg:742.97ms +step:274/57344 train_time:203051ms step_avg:741.06ms +step:275/57344 train_time:203503ms step_avg:740.01ms +grad accum step:69/14336 +step:276/57344 train_time:204564ms step_avg:741.18ms +step:277/57344 train_time:204582ms step_avg:738.56ms +step:278/57344 train_time:204800ms step_avg:736.69ms +step:279/57344 train_time:205253ms step_avg:735.67ms +grad accum step:70/14336 +step:280/57344 train_time:206310ms step_avg:736.82ms +step:281/57344 train_time:206328ms step_avg:734.26ms +step:282/57344 train_time:206548ms step_avg:732.44ms +step:283/57344 train_time:206998ms step_avg:731.44ms +grad accum step:71/14336 +step:284/57344 train_time:208061ms step_avg:732.61ms +step:285/57344 train_time:208079ms step_avg:730.10ms +step:286/57344 train_time:208296ms step_avg:728.31ms +step:287/57344 train_time:208747ms step_avg:727.34ms +grad accum step:72/14336 +step:288/57344 train_time:209809ms step_avg:728.50ms +step:289/57344 train_time:209826ms step_avg:726.04ms +step:290/57344 train_time:210044ms step_avg:724.29ms +step:291/57344 train_time:210497ms step_avg:723.36ms +grad accum step:73/14336 +step:292/57344 train_time:211553ms step_avg:724.50ms +step:293/57344 train_time:211571ms step_avg:722.09ms +step:294/57344 train_time:211789ms step_avg:720.37ms +step:295/57344 train_time:212241ms step_avg:719.46ms +grad accum step:74/14336 +step:296/57344 train_time:213302ms step_avg:720.61ms +step:297/57344 train_time:213319ms step_avg:718.25ms +step:298/57344 train_time:213537ms step_avg:716.57ms +step:299/57344 train_time:213987ms step_avg:715.68ms +grad accum step:75/14336 +step:300/57344 train_time:215048ms step_avg:716.83ms +step:301/57344 train_time:215064ms step_avg:714.50ms +step:302/57344 train_time:215283ms step_avg:712.86ms +step:303/57344 train_time:215735ms step_avg:712.00ms +grad accum step:76/14336 +step:304/57344 train_time:216795ms step_avg:713.14ms +step:305/57344 train_time:216813ms step_avg:710.86ms +step:306/57344 train_time:217031ms step_avg:709.25ms +step:307/57344 train_time:217483ms step_avg:708.41ms +grad accum step:77/14336 +step:308/57344 train_time:218541ms step_avg:709.55ms +step:309/57344 train_time:218558ms step_avg:707.31ms +step:310/57344 train_time:218776ms step_avg:705.73ms +step:311/57344 train_time:219226ms step_avg:704.91ms +grad accum step:78/14336 +step:312/57344 train_time:220286ms step_avg:706.04ms +step:313/57344 train_time:220303ms step_avg:703.84ms +step:314/57344 train_time:220522ms step_avg:702.30ms +step:315/57344 train_time:220973ms step_avg:701.50ms +grad accum step:79/14336 +step:316/57344 train_time:222034ms step_avg:702.64ms +step:317/57344 train_time:222052ms step_avg:700.48ms +step:318/57344 train_time:222270ms step_avg:698.96ms +step:319/57344 train_time:222721ms step_avg:698.18ms +grad accum step:80/14336 +step:320/57344 train_time:223782ms step_avg:699.32ms +step:320/57344 val_loss:7.884940 train_time:223782ms step_avg:699.32ms +step:321/57344 train_time:223795ms step_avg:697.18ms +step:322/57344 train_time:223989ms step_avg:695.62ms +step:323/57344 train_time:224442ms step_avg:694.87ms +grad accum step:81/14336 +step:324/57344 train_time:225499ms step_avg:695.98ms +step:325/57344 train_time:225516ms step_avg:693.90ms +step:326/57344 train_time:225735ms step_avg:692.44ms +step:327/57344 train_time:226185ms step_avg:691.70ms +grad accum step:82/14336 +step:328/57344 train_time:227242ms step_avg:692.81ms +step:329/57344 train_time:227260ms step_avg:690.76ms +step:330/57344 train_time:227479ms step_avg:689.33ms +step:331/57344 train_time:227928ms step_avg:688.60ms +grad accum step:83/14336 +step:332/57344 train_time:228984ms step_avg:689.71ms +step:333/57344 train_time:229002ms step_avg:687.69ms +step:334/57344 train_time:229220ms step_avg:686.29ms +step:335/57344 train_time:229672ms step_avg:685.59ms +grad accum step:84/14336 +step:336/57344 train_time:230728ms step_avg:686.69ms +step:337/57344 train_time:230746ms step_avg:684.70ms +step:338/57344 train_time:230965ms step_avg:683.33ms +step:339/57344 train_time:231415ms step_avg:682.64ms +grad accum step:85/14336 +step:340/57344 train_time:232471ms step_avg:683.74ms +step:341/57344 train_time:232489ms step_avg:681.79ms +step:342/57344 train_time:232707ms step_avg:680.43ms +step:343/57344 train_time:233158ms step_avg:679.76ms +grad accum step:86/14336 +step:344/57344 train_time:234215ms step_avg:680.86ms +step:345/57344 train_time:234232ms step_avg:678.93ms +step:346/57344 train_time:234450ms step_avg:677.60ms +step:347/57344 train_time:234901ms step_avg:676.95ms +grad accum step:87/14336 +step:348/57344 train_time:235960ms step_avg:678.05ms +step:349/57344 train_time:235978ms step_avg:676.15ms +step:350/57344 train_time:236195ms step_avg:674.84ms +step:351/57344 train_time:236648ms step_avg:674.21ms +grad accum step:88/14336 +step:352/57344 train_time:237706ms step_avg:675.30ms +step:353/57344 train_time:237723ms step_avg:673.44ms +step:354/57344 train_time:237941ms step_avg:672.15ms +step:355/57344 train_time:238392ms step_avg:671.53ms +grad accum step:89/14336 +step:356/57344 train_time:239448ms step_avg:672.61ms +step:357/57344 train_time:239465ms step_avg:670.77ms +step:358/57344 train_time:239683ms step_avg:669.51ms +step:359/57344 train_time:240137ms step_avg:668.90ms +grad accum step:90/14336 +step:360/57344 train_time:241191ms step_avg:669.98ms +step:361/57344 train_time:241209ms step_avg:668.17ms +step:362/57344 train_time:241427ms step_avg:666.92ms +step:363/57344 train_time:241878ms step_avg:666.33ms +grad accum step:91/14336 +step:364/57344 train_time:242934ms step_avg:667.40ms +step:365/57344 train_time:242951ms step_avg:665.62ms +step:366/57344 train_time:243169ms step_avg:664.40ms +step:367/57344 train_time:243621ms step_avg:663.82ms +grad accum step:92/14336 +step:368/57344 train_time:244676ms step_avg:664.88ms +step:369/57344 train_time:244694ms step_avg:663.13ms +step:370/57344 train_time:244912ms step_avg:661.92ms +step:371/57344 train_time:245361ms step_avg:661.35ms +grad accum step:93/14336 +step:372/57344 train_time:246421ms step_avg:662.42ms +step:373/57344 train_time:246438ms step_avg:660.69ms +step:374/57344 train_time:246656ms step_avg:659.51ms +step:375/57344 train_time:247107ms step_avg:658.95ms +grad accum step:94/14336 +step:376/57344 train_time:248163ms step_avg:660.01ms +step:377/57344 train_time:248180ms step_avg:658.30ms +step:378/57344 train_time:248398ms step_avg:657.14ms +step:379/57344 train_time:248848ms step_avg:656.59ms +grad accum step:95/14336 +step:380/57344 train_time:249905ms step_avg:657.64ms +step:381/57344 train_time:249923ms step_avg:655.96ms +step:382/57344 train_time:250141ms step_avg:654.82ms +step:383/57344 train_time:250591ms step_avg:654.29ms +grad accum step:96/14336 +step:384/57344 train_time:251648ms step_avg:655.33ms +step:384/57344 val_loss:7.894324 train_time:251648ms step_avg:655.33ms +step:385/57344 train_time:251660ms step_avg:653.66ms +step:386/57344 train_time:251855ms step_avg:652.47ms +step:387/57344 train_time:252305ms step_avg:651.95ms +grad accum step:97/14336 +step:388/57344 train_time:253361ms step_avg:652.99ms +step:389/57344 train_time:253379ms step_avg:651.36ms +step:390/57344 train_time:253598ms step_avg:650.25ms +step:391/57344 train_time:254047ms step_avg:649.74ms +grad accum step:98/14336 +step:392/57344 train_time:255103ms step_avg:650.77ms +step:393/57344 train_time:255121ms step_avg:649.16ms +step:394/57344 train_time:255339ms step_avg:648.07ms +step:395/57344 train_time:255790ms step_avg:647.57ms +grad accum step:99/14336 +step:396/57344 train_time:256845ms step_avg:648.60ms +step:397/57344 train_time:256862ms step_avg:647.01ms +step:398/57344 train_time:257081ms step_avg:645.93ms +step:399/57344 train_time:257530ms step_avg:645.44ms +grad accum step:100/14336 +step:400/57344 train_time:258584ms step_avg:646.46ms +step:401/57344 train_time:258602ms step_avg:644.89ms +step:402/57344 train_time:258821ms step_avg:643.83ms +step:403/57344 train_time:259270ms step_avg:643.35ms +grad accum step:101/14336 +step:404/57344 train_time:260325ms step_avg:644.37ms +step:405/57344 train_time:260343ms step_avg:642.82ms +step:406/57344 train_time:260560ms step_avg:641.77ms +step:407/57344 train_time:261010ms step_avg:641.30ms +grad accum step:102/14336 +step:408/57344 train_time:262067ms step_avg:642.32ms +step:409/57344 train_time:262085ms step_avg:640.79ms +step:410/57344 train_time:262302ms step_avg:639.76ms +step:411/57344 train_time:262753ms step_avg:639.30ms +grad accum step:103/14336 +step:412/57344 train_time:263809ms step_avg:640.31ms +step:413/57344 train_time:263827ms step_avg:638.81ms +step:414/57344 train_time:264045ms step_avg:637.79ms +step:415/57344 train_time:264496ms step_avg:637.34ms +grad accum step:104/14336 +step:416/57344 train_time:265553ms step_avg:638.35ms +step:417/57344 train_time:265571ms step_avg:636.86ms +step:418/57344 train_time:265788ms step_avg:635.86ms +step:419/57344 train_time:266239ms step_avg:635.41ms +grad accum step:105/14336 +step:420/57344 train_time:267295ms step_avg:636.42ms +step:421/57344 train_time:267312ms step_avg:634.95ms +step:422/57344 train_time:267530ms step_avg:633.96ms +step:423/57344 train_time:267981ms step_avg:633.52ms +grad accum step:106/14336 +step:424/57344 train_time:269035ms step_avg:634.52ms +step:425/57344 train_time:269053ms step_avg:633.07ms +step:426/57344 train_time:269271ms step_avg:632.09ms +step:427/57344 train_time:269722ms step_avg:631.67ms +grad accum step:107/14336 +step:428/57344 train_time:270779ms step_avg:632.66ms +step:429/57344 train_time:270797ms step_avg:631.23ms +step:430/57344 train_time:271013ms step_avg:630.26ms +step:431/57344 train_time:271463ms step_avg:629.84ms +grad accum step:108/14336 +step:432/57344 train_time:272520ms step_avg:630.83ms +step:433/57344 train_time:272538ms step_avg:629.42ms +step:434/57344 train_time:272756ms step_avg:628.47ms +step:435/57344 train_time:273206ms step_avg:628.06ms +grad accum step:109/14336 +step:436/57344 train_time:274263ms step_avg:629.04ms +step:437/57344 train_time:274281ms step_avg:627.64ms +step:438/57344 train_time:274498ms step_avg:626.71ms +step:439/57344 train_time:274950ms step_avg:626.31ms +grad accum step:110/14336 +step:440/57344 train_time:276006ms step_avg:627.29ms +step:441/57344 train_time:276023ms step_avg:625.90ms +step:442/57344 train_time:276241ms step_avg:624.98ms +step:443/57344 train_time:276691ms step_avg:624.58ms +grad accum step:111/14336 +step:444/57344 train_time:277746ms step_avg:625.55ms +step:445/57344 train_time:277764ms step_avg:624.19ms +step:446/57344 train_time:277980ms step_avg:623.27ms +step:447/57344 train_time:278429ms step_avg:622.88ms +grad accum step:112/14336 +step:448/57344 train_time:279486ms step_avg:623.85ms +step:448/57344 val_loss:7.885835 train_time:279487ms step_avg:623.85ms +step:449/57344 train_time:279499ms step_avg:622.49ms +step:450/57344 train_time:279692ms step_avg:621.54ms +step:451/57344 train_time:280143ms step_avg:621.16ms +grad accum step:113/14336 +step:452/57344 train_time:281199ms step_avg:622.12ms +step:453/57344 train_time:281216ms step_avg:620.79ms +step:454/57344 train_time:281433ms step_avg:619.90ms +step:455/57344 train_time:281883ms step_avg:619.52ms +grad accum step:114/14336 +step:456/57344 train_time:282938ms step_avg:620.48ms +step:457/57344 train_time:282956ms step_avg:619.16ms +step:458/57344 train_time:283173ms step_avg:618.28ms +step:459/57344 train_time:283623ms step_avg:617.92ms +grad accum step:115/14336 +step:460/57344 train_time:284678ms step_avg:618.87ms +step:461/57344 train_time:284696ms step_avg:617.56ms +step:462/57344 train_time:284913ms step_avg:616.70ms +step:463/57344 train_time:285366ms step_avg:616.34ms +grad accum step:116/14336 +step:464/57344 train_time:286420ms step_avg:617.28ms +step:465/57344 train_time:286438ms step_avg:616.00ms +step:466/57344 train_time:286656ms step_avg:615.14ms +step:467/57344 train_time:287106ms step_avg:614.79ms +grad accum step:117/14336 +step:468/57344 train_time:288161ms step_avg:615.73ms +step:469/57344 train_time:288179ms step_avg:614.45ms +step:470/57344 train_time:288397ms step_avg:613.61ms +step:471/57344 train_time:288846ms step_avg:613.26ms +grad accum step:118/14336 +step:472/57344 train_time:289901ms step_avg:614.20ms +step:473/57344 train_time:289919ms step_avg:612.94ms +step:474/57344 train_time:290137ms step_avg:612.10ms +step:475/57344 train_time:290588ms step_avg:611.76ms +grad accum step:119/14336 +step:476/57344 train_time:291642ms step_avg:612.69ms +step:477/57344 train_time:291660ms step_avg:611.45ms +step:478/57344 train_time:291877ms step_avg:610.62ms +step:479/57344 train_time:292327ms step_avg:610.29ms +grad accum step:120/14336 +step:480/57344 train_time:293382ms step_avg:611.21ms +step:481/57344 train_time:293399ms step_avg:609.98ms +step:482/57344 train_time:293617ms step_avg:609.16ms +step:483/57344 train_time:294066ms step_avg:608.83ms +grad accum step:121/14336 +step:484/57344 train_time:295120ms step_avg:609.75ms +step:485/57344 train_time:295137ms step_avg:608.53ms +step:486/57344 train_time:295355ms step_avg:607.73ms +step:487/57344 train_time:295805ms step_avg:607.40ms +grad accum step:122/14336 +step:488/57344 train_time:296861ms step_avg:608.32ms +step:489/57344 train_time:296879ms step_avg:607.11ms +step:490/57344 train_time:297095ms step_avg:606.32ms +step:491/57344 train_time:297543ms step_avg:605.99ms +grad accum step:123/14336 +step:492/57344 train_time:298602ms step_avg:606.91ms +step:493/57344 train_time:298619ms step_avg:605.72ms +step:494/57344 train_time:298835ms step_avg:604.93ms +step:495/57344 train_time:299285ms step_avg:604.62ms +grad accum step:124/14336 +step:496/57344 train_time:300343ms step_avg:605.53ms +step:497/57344 train_time:300361ms step_avg:604.35ms +step:498/57344 train_time:300578ms step_avg:603.57ms +step:499/57344 train_time:301027ms step_avg:603.26ms +grad accum step:125/14336 +step:500/57344 train_time:302083ms step_avg:604.17ms +step:501/57344 train_time:302101ms step_avg:603.00ms +step:502/57344 train_time:302317ms step_avg:602.22ms +step:503/57344 train_time:302767ms step_avg:601.92ms +grad accum step:126/14336 +step:504/57344 train_time:303820ms step_avg:602.82ms +step:505/57344 train_time:303838ms step_avg:601.66ms +step:506/57344 train_time:304056ms step_avg:600.90ms +step:507/57344 train_time:304505ms step_avg:600.60ms +grad accum step:127/14336 +step:508/57344 train_time:305562ms step_avg:601.50ms +step:509/57344 train_time:305579ms step_avg:600.35ms +step:510/57344 train_time:305796ms step_avg:599.60ms +step:511/57344 train_time:306246ms step_avg:599.31ms +grad accum step:128/14336 +step:512/57344 train_time:307303ms step_avg:600.20ms +step:512/57344 val_loss:7.861404 train_time:307303ms step_avg:600.20ms +step:513/57344 train_time:307315ms step_avg:599.05ms +step:514/57344 train_time:307509ms step_avg:598.27ms +step:515/57344 train_time:307959ms step_avg:597.98ms +grad accum step:129/14336 +step:516/57344 train_time:309014ms step_avg:598.87ms +step:517/57344 train_time:309032ms step_avg:597.74ms +step:518/57344 train_time:309250ms step_avg:597.01ms +step:519/57344 train_time:309702ms step_avg:596.73ms +grad accum step:130/14336 +step:520/57344 train_time:310755ms step_avg:597.61ms +step:521/57344 train_time:310772ms step_avg:596.49ms +step:522/57344 train_time:310990ms step_avg:595.77ms +step:523/57344 train_time:311440ms step_avg:595.49ms +grad accum step:131/14336 +step:524/57344 train_time:312494ms step_avg:596.36ms +step:525/57344 train_time:312512ms step_avg:595.26ms +step:526/57344 train_time:312730ms step_avg:594.54ms +step:527/57344 train_time:313180ms step_avg:594.27ms +grad accum step:132/14336 +step:528/57344 train_time:314234ms step_avg:595.14ms +step:529/57344 train_time:314252ms step_avg:594.05ms +step:530/57344 train_time:314470ms step_avg:593.34ms +step:531/57344 train_time:314919ms step_avg:593.07ms +grad accum step:133/14336 +step:532/57344 train_time:315974ms step_avg:593.94ms +step:533/57344 train_time:315991ms step_avg:592.85ms +step:534/57344 train_time:316209ms step_avg:592.15ms +step:535/57344 train_time:316658ms step_avg:591.88ms +grad accum step:134/14336 +step:536/57344 train_time:317713ms step_avg:592.75ms +step:537/57344 train_time:317731ms step_avg:591.68ms +step:538/57344 train_time:317949ms step_avg:590.98ms +step:539/57344 train_time:318398ms step_avg:590.72ms +grad accum step:135/14336 +step:540/57344 train_time:319454ms step_avg:591.58ms +step:541/57344 train_time:319471ms step_avg:590.52ms +step:542/57344 train_time:319689ms step_avg:589.83ms +step:543/57344 train_time:320139ms step_avg:589.57ms +grad accum step:136/14336 +step:544/57344 train_time:321195ms step_avg:590.43ms +step:545/57344 train_time:321213ms step_avg:589.38ms +step:546/57344 train_time:321429ms step_avg:588.70ms +step:547/57344 train_time:321878ms step_avg:588.44ms +grad accum step:137/14336 +step:548/57344 train_time:322933ms step_avg:589.29ms +step:549/57344 train_time:322951ms step_avg:588.25ms +step:550/57344 train_time:323169ms step_avg:587.58ms +step:551/57344 train_time:323618ms step_avg:587.33ms +grad accum step:138/14336 +step:552/57344 train_time:324672ms step_avg:588.17ms +step:553/57344 train_time:324690ms step_avg:587.14ms +step:554/57344 train_time:324906ms step_avg:586.47ms +step:555/57344 train_time:325356ms step_avg:586.23ms +grad accum step:139/14336 +step:556/57344 train_time:326412ms step_avg:587.07ms +step:557/57344 train_time:326430ms step_avg:586.05ms +step:558/57344 train_time:326646ms step_avg:585.39ms +step:559/57344 train_time:327096ms step_avg:585.14ms +grad accum step:140/14336 +step:560/57344 train_time:328151ms step_avg:585.98ms +step:561/57344 train_time:328168ms step_avg:584.97ms +step:562/57344 train_time:328386ms step_avg:584.32ms +step:563/57344 train_time:328835ms step_avg:584.08ms +grad accum step:141/14336 +step:564/57344 train_time:329892ms step_avg:584.91ms +step:565/57344 train_time:329909ms step_avg:583.91ms +step:566/57344 train_time:330127ms step_avg:583.26ms +step:567/57344 train_time:330578ms step_avg:583.03ms +grad accum step:142/14336 +step:568/57344 train_time:331634ms step_avg:583.86ms +step:569/57344 train_time:331651ms step_avg:582.87ms +step:570/57344 train_time:331868ms step_avg:582.22ms +step:571/57344 train_time:332317ms step_avg:581.99ms +grad accum step:143/14336 +step:572/57344 train_time:333373ms step_avg:582.82ms +step:573/57344 train_time:333391ms step_avg:581.83ms +step:574/57344 train_time:333609ms step_avg:581.20ms +step:575/57344 train_time:334060ms step_avg:580.97ms +grad accum step:144/14336 +step:576/57344 train_time:335114ms step_avg:581.80ms +step:576/57344 val_loss:7.886768 train_time:335115ms step_avg:581.80ms +step:577/57344 train_time:335126ms step_avg:580.81ms +step:578/57344 train_time:335321ms step_avg:580.14ms +step:579/57344 train_time:335771ms step_avg:579.92ms +grad accum step:145/14336 +step:580/57344 train_time:336825ms step_avg:580.73ms +step:581/57344 train_time:336843ms step_avg:579.76ms +step:582/57344 train_time:337060ms step_avg:579.14ms +step:583/57344 train_time:337511ms step_avg:578.92ms +grad accum step:146/14336 +step:584/57344 train_time:338564ms step_avg:579.73ms +step:585/57344 train_time:338581ms step_avg:578.77ms +step:586/57344 train_time:338798ms step_avg:578.15ms +step:587/57344 train_time:339249ms step_avg:577.94ms +grad accum step:147/14336 +step:588/57344 train_time:340303ms step_avg:578.75ms +step:589/57344 train_time:340320ms step_avg:577.79ms +step:590/57344 train_time:340538ms step_avg:577.18ms +step:591/57344 train_time:340988ms step_avg:576.97ms +grad accum step:148/14336 +step:592/57344 train_time:342043ms step_avg:577.78ms +step:593/57344 train_time:342061ms step_avg:576.83ms +step:594/57344 train_time:342278ms step_avg:576.23ms +step:595/57344 train_time:342728ms step_avg:576.01ms +grad accum step:149/14336 +step:596/57344 train_time:343784ms step_avg:576.82ms +step:597/57344 train_time:343802ms step_avg:575.88ms +step:598/57344 train_time:344020ms step_avg:575.28ms +step:599/57344 train_time:344469ms step_avg:575.07ms +grad accum step:150/14336 +step:600/57344 train_time:345524ms step_avg:575.87ms +step:601/57344 train_time:345542ms step_avg:574.94ms +step:602/57344 train_time:345760ms step_avg:574.35ms +step:603/57344 train_time:346209ms step_avg:574.14ms +grad accum step:151/14336 +step:604/57344 train_time:347266ms step_avg:574.94ms +step:605/57344 train_time:347283ms step_avg:574.02ms +step:606/57344 train_time:347503ms step_avg:573.44ms +step:607/57344 train_time:347953ms step_avg:573.23ms +grad accum step:152/14336 +step:608/57344 train_time:349008ms step_avg:574.03ms +step:609/57344 train_time:349026ms step_avg:573.11ms +step:610/57344 train_time:349242ms step_avg:572.53ms +step:611/57344 train_time:349692ms step_avg:572.33ms +grad accum step:153/14336 +step:612/57344 train_time:350750ms step_avg:573.12ms +step:613/57344 train_time:350767ms step_avg:572.21ms +step:614/57344 train_time:350984ms step_avg:571.64ms +step:615/57344 train_time:351432ms step_avg:571.43ms +grad accum step:154/14336 +step:616/57344 train_time:352489ms step_avg:572.22ms +step:617/57344 train_time:352507ms step_avg:571.32ms +step:618/57344 train_time:352724ms step_avg:570.75ms +step:619/57344 train_time:353175ms step_avg:570.56ms +grad accum step:155/14336 +step:620/57344 train_time:354231ms step_avg:571.34ms +step:621/57344 train_time:354249ms step_avg:570.45ms +step:622/57344 train_time:354466ms step_avg:569.88ms +step:623/57344 train_time:354915ms step_avg:569.69ms +grad accum step:156/14336 +step:624/57344 train_time:355971ms step_avg:570.47ms +step:625/57344 train_time:355989ms step_avg:569.58ms +step:626/57344 train_time:356206ms step_avg:569.02ms +step:627/57344 train_time:356655ms step_avg:568.83ms +grad accum step:157/14336 +step:628/57344 train_time:357712ms step_avg:569.60ms +step:629/57344 train_time:357730ms step_avg:568.73ms +step:630/57344 train_time:357947ms step_avg:568.17ms +step:631/57344 train_time:358397ms step_avg:567.98ms +grad accum step:158/14336 +step:632/57344 train_time:359453ms step_avg:568.76ms +step:633/57344 train_time:359471ms step_avg:567.88ms +step:634/57344 train_time:359688ms step_avg:567.33ms +step:635/57344 train_time:360137ms step_avg:567.15ms +grad accum step:159/14336 +step:636/57344 train_time:361195ms step_avg:567.92ms +step:637/57344 train_time:361212ms step_avg:567.05ms +step:638/57344 train_time:361429ms step_avg:566.50ms +step:639/57344 train_time:361880ms step_avg:566.32ms +grad accum step:160/14336 +step:640/57344 train_time:362938ms step_avg:567.09ms +step:640/57344 val_loss:7.896187 train_time:362939ms step_avg:567.09ms +step:641/57344 train_time:362950ms step_avg:566.23ms +step:642/57344 train_time:363145ms step_avg:565.65ms +step:643/57344 train_time:363597ms step_avg:565.47ms +grad accum step:161/14336 +step:644/57344 train_time:364652ms step_avg:566.23ms +step:645/57344 train_time:364670ms step_avg:565.38ms +step:646/57344 train_time:364888ms step_avg:564.84ms +step:647/57344 train_time:365337ms step_avg:564.66ms +grad accum step:162/14336 +step:648/57344 train_time:366391ms step_avg:565.42ms +step:649/57344 train_time:366409ms step_avg:564.57ms +step:650/57344 train_time:366626ms step_avg:564.04ms +step:651/57344 train_time:367075ms step_avg:563.86ms +grad accum step:163/14336 +step:652/57344 train_time:368130ms step_avg:564.62ms +step:653/57344 train_time:368147ms step_avg:563.78ms +step:654/57344 train_time:368365ms step_avg:563.25ms +step:655/57344 train_time:368815ms step_avg:563.08ms +grad accum step:164/14336 +step:656/57344 train_time:369869ms step_avg:563.82ms +step:657/57344 train_time:369886ms step_avg:562.99ms +step:658/57344 train_time:370104ms step_avg:562.47ms +step:659/57344 train_time:370554ms step_avg:562.30ms +grad accum step:165/14336 +step:660/57344 train_time:371606ms step_avg:563.04ms +step:661/57344 train_time:371624ms step_avg:562.22ms +step:662/57344 train_time:371842ms step_avg:561.70ms +step:663/57344 train_time:372292ms step_avg:561.53ms +grad accum step:166/14336 +step:664/57344 train_time:373345ms step_avg:562.27ms +step:665/57344 train_time:373362ms step_avg:561.45ms +step:666/57344 train_time:373580ms step_avg:560.93ms +step:667/57344 train_time:374029ms step_avg:560.76ms +grad accum step:167/14336 +step:668/57344 train_time:375083ms step_avg:561.50ms +step:669/57344 train_time:375101ms step_avg:560.69ms +step:670/57344 train_time:375318ms step_avg:560.18ms +step:671/57344 train_time:375769ms step_avg:560.01ms +grad accum step:168/14336 +step:672/57344 train_time:376822ms step_avg:560.75ms +step:673/57344 train_time:376840ms step_avg:559.94ms +step:674/57344 train_time:377057ms step_avg:559.43ms +step:675/57344 train_time:377506ms step_avg:559.27ms +grad accum step:169/14336 +step:676/57344 train_time:378559ms step_avg:560.00ms +step:677/57344 train_time:378577ms step_avg:559.20ms +step:678/57344 train_time:378794ms step_avg:558.69ms +step:679/57344 train_time:379244ms step_avg:558.53ms +grad accum step:170/14336 +step:680/57344 train_time:380299ms step_avg:559.26ms +step:681/57344 train_time:380317ms step_avg:558.47ms +step:682/57344 train_time:380534ms step_avg:557.97ms +step:683/57344 train_time:380983ms step_avg:557.81ms +grad accum step:171/14336 +step:684/57344 train_time:382038ms step_avg:558.54ms +step:685/57344 train_time:382056ms step_avg:557.75ms +step:686/57344 train_time:382273ms step_avg:557.25ms +step:687/57344 train_time:382724ms step_avg:557.09ms +grad accum step:172/14336 +step:688/57344 train_time:383780ms step_avg:557.82ms +step:689/57344 train_time:383798ms step_avg:557.04ms +step:690/57344 train_time:384014ms step_avg:556.54ms +step:691/57344 train_time:384463ms step_avg:556.39ms +grad accum step:173/14336 +step:692/57344 train_time:385520ms step_avg:557.11ms +step:693/57344 train_time:385538ms step_avg:556.33ms +step:694/57344 train_time:385755ms step_avg:555.84ms +step:695/57344 train_time:386205ms step_avg:555.69ms +grad accum step:174/14336 +step:696/57344 train_time:387261ms step_avg:556.41ms +step:697/57344 train_time:387278ms step_avg:555.64ms +step:698/57344 train_time:387495ms step_avg:555.15ms +step:699/57344 train_time:387944ms step_avg:555.00ms +grad accum step:175/14336 +step:700/57344 train_time:389000ms step_avg:555.71ms +step:701/57344 train_time:389018ms step_avg:554.95ms +step:702/57344 train_time:389235ms step_avg:554.47ms +step:703/57344 train_time:389685ms step_avg:554.32ms +grad accum step:176/14336 +step:704/57344 train_time:390740ms step_avg:555.03ms +step:704/57344 val_loss:7.831467 train_time:390740ms step_avg:555.03ms +step:705/57344 train_time:390752ms step_avg:554.26ms +step:706/57344 train_time:390946ms step_avg:553.75ms +step:707/57344 train_time:391398ms step_avg:553.60ms +grad accum step:177/14336 +step:708/57344 train_time:392452ms step_avg:554.31ms +step:709/57344 train_time:392470ms step_avg:553.55ms +step:710/57344 train_time:392687ms step_avg:553.08ms +step:711/57344 train_time:393136ms step_avg:552.93ms +grad accum step:178/14336 +step:712/57344 train_time:394191ms step_avg:553.64ms +step:713/57344 train_time:394209ms step_avg:552.89ms +step:714/57344 train_time:394427ms step_avg:552.42ms +step:715/57344 train_time:394876ms step_avg:552.27ms +grad accum step:179/14336 +step:716/57344 train_time:395930ms step_avg:552.97ms +step:717/57344 train_time:395948ms step_avg:552.23ms +step:718/57344 train_time:396165ms step_avg:551.76ms +step:719/57344 train_time:396614ms step_avg:551.62ms +grad accum step:180/14336 +step:720/57344 train_time:397668ms step_avg:552.32ms +step:721/57344 train_time:397686ms step_avg:551.58ms +step:722/57344 train_time:397903ms step_avg:551.11ms +step:723/57344 train_time:398353ms step_avg:550.97ms +grad accum step:181/14336 +step:724/57344 train_time:399409ms step_avg:551.67ms +step:725/57344 train_time:399427ms step_avg:550.93ms +step:726/57344 train_time:399644ms step_avg:550.47ms +step:727/57344 train_time:400092ms step_avg:550.33ms +grad accum step:182/14336 +step:728/57344 train_time:401151ms step_avg:551.03ms +step:729/57344 train_time:401169ms step_avg:550.30ms +step:730/57344 train_time:401389ms step_avg:549.85ms +step:731/57344 train_time:401843ms step_avg:549.72ms +grad accum step:183/14336 +step:732/57344 train_time:402910ms step_avg:550.42ms +step:733/57344 train_time:402927ms step_avg:549.70ms +step:734/57344 train_time:403146ms step_avg:549.24ms +step:735/57344 train_time:403599ms step_avg:549.11ms +grad accum step:184/14336 +step:736/57344 train_time:404670ms step_avg:549.82ms +step:737/57344 train_time:404688ms step_avg:549.10ms +step:738/57344 train_time:404906ms step_avg:548.65ms +step:739/57344 train_time:405359ms step_avg:548.52ms +grad accum step:185/14336 +step:740/57344 train_time:406425ms step_avg:549.22ms +step:741/57344 train_time:406442ms step_avg:548.51ms +step:742/57344 train_time:406661ms step_avg:548.06ms +step:743/57344 train_time:407116ms step_avg:547.94ms +grad accum step:186/14336 +step:744/57344 train_time:408185ms step_avg:548.64ms +step:745/57344 train_time:408203ms step_avg:547.92ms +step:746/57344 train_time:408421ms step_avg:547.48ms +step:747/57344 train_time:408876ms step_avg:547.36ms +grad accum step:187/14336 +step:748/57344 train_time:409941ms step_avg:548.05ms +step:749/57344 train_time:409959ms step_avg:547.34ms +step:750/57344 train_time:410177ms step_avg:546.90ms +step:751/57344 train_time:410631ms step_avg:546.78ms +grad accum step:188/14336 +step:752/57344 train_time:411699ms step_avg:547.47ms +step:753/57344 train_time:411717ms step_avg:546.77ms +step:754/57344 train_time:411935ms step_avg:546.33ms +step:755/57344 train_time:412389ms step_avg:546.21ms +grad accum step:189/14336 +step:756/57344 train_time:413453ms step_avg:546.90ms +step:757/57344 train_time:413471ms step_avg:546.20ms +step:758/57344 train_time:413691ms step_avg:545.77ms +step:759/57344 train_time:414146ms step_avg:545.65ms +grad accum step:190/14336 +step:760/57344 train_time:415212ms step_avg:546.33ms +step:761/57344 train_time:415230ms step_avg:545.64ms +step:762/57344 train_time:415447ms step_avg:545.21ms +step:763/57344 train_time:415901ms step_avg:545.09ms +grad accum step:191/14336 +step:764/57344 train_time:416969ms step_avg:545.77ms +step:765/57344 train_time:416987ms step_avg:545.08ms +step:766/57344 train_time:417206ms step_avg:544.66ms +step:767/57344 train_time:417660ms step_avg:544.54ms +grad accum step:192/14336 +step:768/57344 train_time:418727ms step_avg:545.22ms +step:768/57344 val_loss:7.814738 train_time:418728ms step_avg:545.22ms +step:769/57344 train_time:418740ms step_avg:544.52ms +step:770/57344 train_time:418935ms step_avg:544.07ms +step:771/57344 train_time:419388ms step_avg:543.95ms +grad accum step:193/14336 +step:772/57344 train_time:420457ms step_avg:544.63ms +step:773/57344 train_time:420474ms step_avg:543.95ms +step:774/57344 train_time:420693ms step_avg:543.53ms +step:775/57344 train_time:421145ms step_avg:543.41ms +grad accum step:194/14336 +step:776/57344 train_time:422209ms step_avg:544.08ms +step:777/57344 train_time:422227ms step_avg:543.41ms +step:778/57344 train_time:422446ms step_avg:542.99ms +step:779/57344 train_time:422901ms step_avg:542.88ms +grad accum step:195/14336 +step:780/57344 train_time:423967ms step_avg:543.55ms +step:781/57344 train_time:423984ms step_avg:542.87ms +step:782/57344 train_time:424203ms step_avg:542.46ms +step:783/57344 train_time:424659ms step_avg:542.35ms +grad accum step:196/14336 +step:784/57344 train_time:425727ms step_avg:543.02ms +step:785/57344 train_time:425744ms step_avg:542.35ms +step:786/57344 train_time:425963ms step_avg:541.94ms +step:787/57344 train_time:426418ms step_avg:541.83ms +grad accum step:197/14336 +step:788/57344 train_time:427486ms step_avg:542.50ms +step:789/57344 train_time:427504ms step_avg:541.83ms +step:790/57344 train_time:427723ms step_avg:541.42ms +step:791/57344 train_time:428177ms step_avg:541.31ms +grad accum step:198/14336 +step:792/57344 train_time:429246ms step_avg:541.98ms +step:793/57344 train_time:429264ms step_avg:541.32ms +step:794/57344 train_time:429482ms step_avg:540.91ms +step:795/57344 train_time:429937ms step_avg:540.80ms +grad accum step:199/14336 +step:796/57344 train_time:431007ms step_avg:541.47ms +step:797/57344 train_time:431024ms step_avg:540.81ms +step:798/57344 train_time:431242ms step_avg:540.40ms +step:799/57344 train_time:431698ms step_avg:540.30ms +grad accum step:200/14336 +step:800/57344 train_time:432766ms step_avg:540.96ms +step:801/57344 train_time:432784ms step_avg:540.30ms +step:802/57344 train_time:433001ms step_avg:539.90ms +step:803/57344 train_time:433456ms step_avg:539.80ms +grad accum step:201/14336 +step:804/57344 train_time:434526ms step_avg:540.45ms +step:805/57344 train_time:434543ms step_avg:539.81ms +step:806/57344 train_time:434762ms step_avg:539.41ms +step:807/57344 train_time:435216ms step_avg:539.30ms +grad accum step:202/14336 +step:808/57344 train_time:436284ms step_avg:539.96ms +step:809/57344 train_time:436301ms step_avg:539.31ms +step:810/57344 train_time:436520ms step_avg:538.91ms +step:811/57344 train_time:436976ms step_avg:538.81ms +grad accum step:203/14336 +step:812/57344 train_time:438044ms step_avg:539.46ms +step:813/57344 train_time:438062ms step_avg:538.82ms +step:814/57344 train_time:438280ms step_avg:538.43ms +step:815/57344 train_time:438737ms step_avg:538.33ms +grad accum step:204/14336 +step:816/57344 train_time:439803ms step_avg:538.97ms +step:817/57344 train_time:439821ms step_avg:538.34ms +step:818/57344 train_time:440041ms step_avg:537.95ms +step:819/57344 train_time:440497ms step_avg:537.85ms +grad accum step:205/14336 +step:820/57344 train_time:441564ms step_avg:538.49ms +step:821/57344 train_time:441582ms step_avg:537.86ms +step:822/57344 train_time:441801ms step_avg:537.47ms +step:823/57344 train_time:442259ms step_avg:537.37ms +grad accum step:206/14336 +step:824/57344 train_time:443325ms step_avg:538.02ms +step:825/57344 train_time:443342ms step_avg:537.38ms +step:826/57344 train_time:443561ms step_avg:537.00ms +step:827/57344 train_time:444018ms step_avg:536.90ms +grad accum step:207/14336 +step:828/57344 train_time:445087ms step_avg:537.55ms +step:829/57344 train_time:445105ms step_avg:536.92ms +step:830/57344 train_time:445324ms step_avg:536.53ms +step:831/57344 train_time:445780ms step_avg:536.44ms +grad accum step:208/14336 +step:832/57344 train_time:446848ms step_avg:537.08ms +step:832/57344 val_loss:7.819521 train_time:446848ms step_avg:537.08ms +step:833/57344 train_time:446860ms step_avg:536.45ms +step:834/57344 train_time:447055ms step_avg:536.04ms +step:835/57344 train_time:447511ms step_avg:535.94ms +grad accum step:209/14336 +step:836/57344 train_time:448581ms step_avg:536.58ms +step:837/57344 train_time:448599ms step_avg:535.96ms +step:838/57344 train_time:448819ms step_avg:535.58ms +step:839/57344 train_time:449273ms step_avg:535.49ms +grad accum step:210/14336 +step:840/57344 train_time:450344ms step_avg:536.12ms +step:841/57344 train_time:450361ms step_avg:535.51ms +step:842/57344 train_time:450580ms step_avg:535.13ms +step:843/57344 train_time:451037ms step_avg:535.04ms +grad accum step:211/14336 +step:844/57344 train_time:452105ms step_avg:535.67ms +step:845/57344 train_time:452122ms step_avg:535.06ms +step:846/57344 train_time:452342ms step_avg:534.68ms +step:847/57344 train_time:452798ms step_avg:534.59ms +grad accum step:212/14336 +step:848/57344 train_time:453866ms step_avg:535.22ms +step:849/57344 train_time:453884ms step_avg:534.61ms +step:850/57344 train_time:454103ms step_avg:534.24ms +step:851/57344 train_time:454559ms step_avg:534.15ms +grad accum step:213/14336 +step:852/57344 train_time:455627ms step_avg:534.77ms +step:853/57344 train_time:455644ms step_avg:534.17ms +step:854/57344 train_time:455864ms step_avg:533.80ms +step:855/57344 train_time:456320ms step_avg:533.71ms +grad accum step:214/14336 +step:856/57344 train_time:457386ms step_avg:534.33ms +step:857/57344 train_time:457404ms step_avg:533.73ms +step:858/57344 train_time:457623ms step_avg:533.36ms +step:859/57344 train_time:458079ms step_avg:533.27ms +grad accum step:215/14336 +step:860/57344 train_time:459144ms step_avg:533.89ms +step:861/57344 train_time:459162ms step_avg:533.29ms +step:862/57344 train_time:459382ms step_avg:532.93ms +step:863/57344 train_time:459838ms step_avg:532.84ms +grad accum step:216/14336 +step:864/57344 train_time:460906ms step_avg:533.46ms +step:865/57344 train_time:460924ms step_avg:532.86ms +step:866/57344 train_time:461143ms step_avg:532.50ms +step:867/57344 train_time:461599ms step_avg:532.41ms +grad accum step:217/14336 +step:868/57344 train_time:462665ms step_avg:533.02ms +step:869/57344 train_time:462683ms step_avg:532.43ms +step:870/57344 train_time:462902ms step_avg:532.07ms +step:871/57344 train_time:463357ms step_avg:531.98ms +grad accum step:218/14336 +step:872/57344 train_time:464427ms step_avg:532.60ms +step:873/57344 train_time:464444ms step_avg:532.01ms +step:874/57344 train_time:464664ms step_avg:531.65ms +step:875/57344 train_time:465118ms step_avg:531.56ms +grad accum step:219/14336 +step:876/57344 train_time:466189ms step_avg:532.18ms +step:877/57344 train_time:466207ms step_avg:531.59ms +step:878/57344 train_time:466427ms step_avg:531.24ms +step:879/57344 train_time:466883ms step_avg:531.15ms +grad accum step:220/14336 +step:880/57344 train_time:467952ms step_avg:531.76ms +step:881/57344 train_time:467970ms step_avg:531.18ms +step:882/57344 train_time:468190ms step_avg:530.83ms +step:883/57344 train_time:468647ms step_avg:530.74ms +grad accum step:221/14336 +step:884/57344 train_time:469717ms step_avg:531.35ms +step:885/57344 train_time:469734ms step_avg:530.77ms +step:886/57344 train_time:469954ms step_avg:530.42ms +step:887/57344 train_time:470409ms step_avg:530.34ms +grad accum step:222/14336 +step:888/57344 train_time:471479ms step_avg:530.95ms +step:889/57344 train_time:471497ms step_avg:530.37ms +step:890/57344 train_time:471717ms step_avg:530.02ms +step:891/57344 train_time:472173ms step_avg:529.94ms +grad accum step:223/14336 +step:892/57344 train_time:473240ms step_avg:530.54ms +step:893/57344 train_time:473258ms step_avg:529.96ms +step:894/57344 train_time:473477ms step_avg:529.62ms +step:895/57344 train_time:473933ms step_avg:529.53ms +grad accum step:224/14336 +step:896/57344 train_time:475002ms step_avg:530.14ms +step:896/57344 val_loss:7.808165 train_time:475003ms step_avg:530.14ms +step:897/57344 train_time:475015ms step_avg:529.56ms +step:898/57344 train_time:475285ms step_avg:529.27ms +step:899/57344 train_time:475739ms step_avg:529.19ms +grad accum step:225/14336 +step:900/57344 train_time:476805ms step_avg:529.78ms +step:901/57344 train_time:476823ms step_avg:529.22ms +step:902/57344 train_time:477043ms step_avg:528.87ms +step:903/57344 train_time:477500ms step_avg:528.79ms +grad accum step:226/14336 +step:904/57344 train_time:478569ms step_avg:529.39ms +step:905/57344 train_time:478586ms step_avg:528.82ms +step:906/57344 train_time:478805ms step_avg:528.48ms +step:907/57344 train_time:479261ms step_avg:528.40ms +grad accum step:227/14336 +step:908/57344 train_time:480331ms step_avg:529.00ms +step:909/57344 train_time:480348ms step_avg:528.44ms +step:910/57344 train_time:480567ms step_avg:528.10ms +step:911/57344 train_time:481021ms step_avg:528.01ms +grad accum step:228/14336 +step:912/57344 train_time:482090ms step_avg:528.61ms +step:913/57344 train_time:482108ms step_avg:528.05ms +step:914/57344 train_time:482327ms step_avg:527.71ms +step:915/57344 train_time:482783ms step_avg:527.63ms +grad accum step:229/14336 +step:916/57344 train_time:483853ms step_avg:528.22ms +step:917/57344 train_time:483871ms step_avg:527.67ms +step:918/57344 train_time:484090ms step_avg:527.33ms +step:919/57344 train_time:484544ms step_avg:527.25ms +grad accum step:230/14336 +step:920/57344 train_time:485614ms step_avg:527.84ms +step:921/57344 train_time:485631ms step_avg:527.29ms +step:922/57344 train_time:485851ms step_avg:526.95ms +step:923/57344 train_time:486306ms step_avg:526.88ms +grad accum step:231/14336 +step:924/57344 train_time:487377ms step_avg:527.46ms +step:925/57344 train_time:487394ms step_avg:526.91ms +step:926/57344 train_time:487614ms step_avg:526.58ms +step:927/57344 train_time:488071ms step_avg:526.51ms +grad accum step:232/14336 +step:928/57344 train_time:489137ms step_avg:527.09ms +step:929/57344 train_time:489154ms step_avg:526.54ms +step:930/57344 train_time:489375ms step_avg:526.21ms +step:931/57344 train_time:495550ms step_avg:532.28ms +grad accum step:233/14336 +step:932/57344 train_time:496449ms step_avg:532.67ms +step:933/57344 train_time:496466ms step_avg:532.12ms +step:934/57344 train_time:496685ms step_avg:531.78ms +step:935/57344 train_time:497137ms step_avg:531.70ms +grad accum step:234/14336 +step:936/57344 train_time:498200ms step_avg:532.27ms +step:937/57344 train_time:498218ms step_avg:531.72ms +step:938/57344 train_time:498436ms step_avg:531.38ms +step:939/57344 train_time:498891ms step_avg:531.30ms +grad accum step:235/14336 +step:940/57344 train_time:499957ms step_avg:531.87ms +step:941/57344 train_time:499975ms step_avg:531.32ms +step:942/57344 train_time:500193ms step_avg:530.99ms +step:943/57344 train_time:500647ms step_avg:530.91ms +grad accum step:236/14336 +step:944/57344 train_time:501714ms step_avg:531.48ms +step:945/57344 train_time:501732ms step_avg:530.93ms +step:946/57344 train_time:501949ms step_avg:530.60ms +step:947/57344 train_time:502403ms step_avg:530.52ms +grad accum step:237/14336 +step:948/57344 train_time:503469ms step_avg:531.09ms +step:949/57344 train_time:503487ms step_avg:530.54ms +step:950/57344 train_time:503706ms step_avg:530.22ms +step:951/57344 train_time:504161ms step_avg:530.14ms +grad accum step:238/14336 +step:952/57344 train_time:505228ms step_avg:530.70ms +step:953/57344 train_time:505246ms step_avg:530.16ms +step:954/57344 train_time:505465ms step_avg:529.84ms +step:955/57344 train_time:505921ms step_avg:529.76ms +grad accum step:239/14336 +step:956/57344 train_time:506988ms step_avg:530.32ms +step:957/57344 train_time:507005ms step_avg:529.79ms +step:958/57344 train_time:507223ms step_avg:529.46ms +step:959/57344 train_time:507679ms step_avg:529.38ms +grad accum step:240/14336 +step:960/57344 train_time:508745ms step_avg:529.94ms +step:960/57344 val_loss:8.015900 train_time:508745ms step_avg:529.94ms +step:961/57344 train_time:508757ms step_avg:529.40ms +step:962/57344 train_time:508953ms step_avg:529.06ms +step:963/57344 train_time:509412ms step_avg:528.98ms +grad accum step:241/14336 +step:964/57344 train_time:510480ms step_avg:529.54ms +step:965/57344 train_time:510498ms step_avg:529.01ms +step:966/57344 train_time:510717ms step_avg:528.69ms +step:967/57344 train_time:511173ms step_avg:528.62ms +grad accum step:242/14336 +step:968/57344 train_time:512243ms step_avg:529.18ms +step:969/57344 train_time:512261ms step_avg:528.65ms +step:970/57344 train_time:512481ms step_avg:528.33ms +step:971/57344 train_time:512939ms step_avg:528.26ms +grad accum step:243/14336 +step:972/57344 train_time:514004ms step_avg:528.81ms +step:973/57344 train_time:514022ms step_avg:528.29ms +step:974/57344 train_time:514241ms step_avg:527.97ms +step:975/57344 train_time:514699ms step_avg:527.90ms +grad accum step:244/14336 +step:976/57344 train_time:515764ms step_avg:528.45ms +step:977/57344 train_time:515782ms step_avg:527.92ms +step:978/57344 train_time:516002ms step_avg:527.61ms +step:979/57344 train_time:516458ms step_avg:527.54ms +grad accum step:245/14336 +step:980/57344 train_time:517525ms step_avg:528.09ms +step:981/57344 train_time:517542ms step_avg:527.57ms +step:982/57344 train_time:517761ms step_avg:527.25ms +step:983/57344 train_time:518218ms step_avg:527.18ms +grad accum step:246/14336 +step:984/57344 train_time:519284ms step_avg:527.73ms +step:985/57344 train_time:519302ms step_avg:527.21ms +step:986/57344 train_time:519522ms step_avg:526.90ms +step:987/57344 train_time:519978ms step_avg:526.83ms +grad accum step:247/14336 +step:988/57344 train_time:521045ms step_avg:527.37ms +step:989/57344 train_time:521063ms step_avg:526.86ms +step:990/57344 train_time:521282ms step_avg:526.55ms +step:991/57344 train_time:521738ms step_avg:526.48ms +grad accum step:248/14336 +step:992/57344 train_time:522805ms step_avg:527.02ms +step:993/57344 train_time:522822ms step_avg:526.51ms +step:994/57344 train_time:523042ms step_avg:526.20ms +step:995/57344 train_time:523497ms step_avg:526.13ms +grad accum step:249/14336 +step:996/57344 train_time:524563ms step_avg:526.67ms +step:997/57344 train_time:524581ms step_avg:526.16ms +step:998/57344 train_time:524801ms step_avg:525.85ms +step:999/57344 train_time:525256ms step_avg:525.78ms +grad accum step:250/14336 +step:1000/57344 train_time:526325ms step_avg:526.32ms +step:1001/57344 train_time:526343ms step_avg:525.82ms +step:1002/57344 train_time:526563ms step_avg:525.51ms +step:1003/57344 train_time:527017ms step_avg:525.44ms +grad accum step:251/14336 +step:1004/57344 train_time:528084ms step_avg:525.98ms +step:1005/57344 train_time:528102ms step_avg:525.47ms +step:1006/57344 train_time:528322ms step_avg:525.17ms +step:1007/57344 train_time:528776ms step_avg:525.10ms +grad accum step:252/14336 +step:1008/57344 train_time:529846ms step_avg:525.64ms +step:1009/57344 train_time:529864ms step_avg:525.14ms +step:1010/57344 train_time:530084ms step_avg:524.84ms +step:1011/57344 train_time:530540ms step_avg:524.77ms +grad accum step:253/14336 +step:1012/57344 train_time:531610ms step_avg:525.31ms +step:1013/57344 train_time:531627ms step_avg:524.81ms +step:1014/57344 train_time:531847ms step_avg:524.50ms +step:1015/57344 train_time:532303ms step_avg:524.44ms +grad accum step:254/14336 +step:1016/57344 train_time:533374ms step_avg:524.97ms +step:1017/57344 train_time:533391ms step_avg:524.47ms +step:1018/57344 train_time:533610ms step_avg:524.18ms +step:1019/57344 train_time:534065ms step_avg:524.11ms +grad accum step:255/14336 +step:1020/57344 train_time:535135ms step_avg:524.64ms +step:1021/57344 train_time:535152ms step_avg:524.15ms +step:1022/57344 train_time:535372ms step_avg:523.85ms +step:1023/57344 train_time:535829ms step_avg:523.78ms +grad accum step:256/14336 +step:1024/57344 train_time:536895ms step_avg:524.31ms +step:1024/57344 val_loss:7.776239 train_time:536896ms step_avg:524.31ms +step:1025/57344 train_time:536908ms step_avg:523.81ms +step:1026/57344 train_time:537104ms step_avg:523.49ms +step:1027/57344 train_time:537561ms step_avg:523.43ms +grad accum step:257/14336 +step:1028/57344 train_time:538630ms step_avg:523.96ms +step:1029/57344 train_time:538647ms step_avg:523.47ms +step:1030/57344 train_time:538866ms step_avg:523.17ms +step:1031/57344 train_time:539323ms step_avg:523.11ms +grad accum step:258/14336 +step:1032/57344 train_time:540393ms step_avg:523.64ms +step:1033/57344 train_time:540410ms step_avg:523.15ms +step:1034/57344 train_time:540630ms step_avg:522.85ms +step:1035/57344 train_time:541088ms step_avg:522.79ms +grad accum step:259/14336 +step:1036/57344 train_time:542157ms step_avg:523.32ms +step:1037/57344 train_time:542175ms step_avg:522.83ms +step:1038/57344 train_time:542395ms step_avg:522.54ms +step:1039/57344 train_time:542849ms step_avg:522.47ms +grad accum step:260/14336 +step:1040/57344 train_time:543920ms step_avg:523.00ms +step:1041/57344 train_time:543938ms step_avg:522.51ms +step:1042/57344 train_time:544156ms step_avg:522.22ms +step:1043/57344 train_time:544613ms step_avg:522.16ms +grad accum step:261/14336 +step:1044/57344 train_time:545679ms step_avg:522.68ms +step:1045/57344 train_time:545697ms step_avg:522.20ms +step:1046/57344 train_time:545917ms step_avg:521.91ms +step:1047/57344 train_time:546375ms step_avg:521.85ms +grad accum step:262/14336 +step:1048/57344 train_time:547444ms step_avg:522.37ms +step:1049/57344 train_time:547461ms step_avg:521.89ms +step:1050/57344 train_time:547681ms step_avg:521.60ms +step:1051/57344 train_time:548135ms step_avg:521.54ms +grad accum step:263/14336 +step:1052/57344 train_time:549202ms step_avg:522.05ms +step:1053/57344 train_time:549219ms step_avg:521.58ms +step:1054/57344 train_time:549439ms step_avg:521.29ms +step:1055/57344 train_time:549892ms step_avg:521.22ms +grad accum step:264/14336 +step:1056/57344 train_time:550959ms step_avg:521.74ms +step:1057/57344 train_time:550976ms step_avg:521.26ms +step:1058/57344 train_time:551196ms step_avg:520.98ms +step:1059/57344 train_time:551652ms step_avg:520.92ms +grad accum step:265/14336 +step:1060/57344 train_time:552719ms step_avg:521.43ms +step:1061/57344 train_time:552736ms step_avg:520.96ms +step:1062/57344 train_time:552956ms step_avg:520.67ms +step:1063/57344 train_time:553410ms step_avg:520.61ms +grad accum step:266/14336 +step:1064/57344 train_time:554478ms step_avg:521.13ms +step:1065/57344 train_time:554496ms step_avg:520.65ms +step:1066/57344 train_time:554714ms step_avg:520.37ms +step:1067/57344 train_time:555170ms step_avg:520.31ms +grad accum step:267/14336 +step:1068/57344 train_time:556236ms step_avg:520.82ms +step:1069/57344 train_time:556254ms step_avg:520.35ms +step:1070/57344 train_time:556475ms step_avg:520.07ms +step:1071/57344 train_time:556930ms step_avg:520.01ms +grad accum step:268/14336 +step:1072/57344 train_time:557995ms step_avg:520.52ms +step:1073/57344 train_time:558013ms step_avg:520.05ms +step:1074/57344 train_time:558232ms step_avg:519.77ms +step:1075/57344 train_time:558688ms step_avg:519.71ms +grad accum step:269/14336 +step:1076/57344 train_time:559753ms step_avg:520.22ms +step:1077/57344 train_time:559771ms step_avg:519.75ms +step:1078/57344 train_time:559990ms step_avg:519.47ms +step:1079/57344 train_time:560446ms step_avg:519.41ms +grad accum step:270/14336 +step:1080/57344 train_time:561514ms step_avg:519.92ms +step:1081/57344 train_time:561531ms step_avg:519.46ms +step:1082/57344 train_time:561752ms step_avg:519.18ms +step:1083/57344 train_time:562205ms step_avg:519.12ms +grad accum step:271/14336 +step:1084/57344 train_time:563271ms step_avg:519.62ms +step:1085/57344 train_time:563289ms step_avg:519.16ms +step:1086/57344 train_time:563508ms step_avg:518.88ms +step:1087/57344 train_time:563964ms step_avg:518.83ms +grad accum step:272/14336 +step:1088/57344 train_time:565031ms step_avg:519.33ms +step:1088/57344 val_loss:7.767220 train_time:565031ms step_avg:519.33ms +step:1089/57344 train_time:565043ms step_avg:518.86ms +step:1090/57344 train_time:565817ms step_avg:519.10ms +step:1091/57344 train_time:566038ms step_avg:518.83ms +grad accum step:273/14336 +step:1092/57344 train_time:567108ms step_avg:519.33ms +step:1093/57344 train_time:567125ms step_avg:518.87ms +step:1094/57344 train_time:567344ms step_avg:518.60ms +step:1095/57344 train_time:567801ms step_avg:518.54ms +grad accum step:274/14336 +step:1096/57344 train_time:568870ms step_avg:519.04ms +step:1097/57344 train_time:568888ms step_avg:518.58ms +step:1098/57344 train_time:569108ms step_avg:518.31ms +step:1099/57344 train_time:569563ms step_avg:518.26ms +grad accum step:275/14336 +step:1100/57344 train_time:570634ms step_avg:518.76ms +step:1101/57344 train_time:570651ms step_avg:518.30ms +step:1102/57344 train_time:570871ms step_avg:518.03ms +step:1103/57344 train_time:571328ms step_avg:517.98ms +grad accum step:276/14336 +step:1104/57344 train_time:572397ms step_avg:518.48ms +step:1105/57344 train_time:572415ms step_avg:518.02ms +step:1106/57344 train_time:572634ms step_avg:517.75ms +step:1107/57344 train_time:573090ms step_avg:517.70ms +grad accum step:277/14336 +step:1108/57344 train_time:574160ms step_avg:518.20ms +step:1109/57344 train_time:574178ms step_avg:517.74ms +step:1110/57344 train_time:574397ms step_avg:517.48ms +step:1111/57344 train_time:574856ms step_avg:517.42ms +grad accum step:278/14336 +step:1112/57344 train_time:575923ms step_avg:517.92ms +step:1113/57344 train_time:575940ms step_avg:517.47ms +step:1114/57344 train_time:576159ms step_avg:517.20ms +step:1115/57344 train_time:576617ms step_avg:517.14ms +grad accum step:279/14336 +step:1116/57344 train_time:577683ms step_avg:517.64ms +step:1117/57344 train_time:577701ms step_avg:517.19ms +step:1118/57344 train_time:577920ms step_avg:516.92ms +step:1119/57344 train_time:578378ms step_avg:516.87ms +grad accum step:280/14336 +step:1120/57344 train_time:579443ms step_avg:517.36ms +step:1121/57344 train_time:579461ms step_avg:516.91ms +step:1122/57344 train_time:579681ms step_avg:516.65ms +step:1123/57344 train_time:580138ms step_avg:516.60ms +grad accum step:281/14336 +step:1124/57344 train_time:581204ms step_avg:517.09ms +step:1125/57344 train_time:581222ms step_avg:516.64ms +step:1126/57344 train_time:581443ms step_avg:516.38ms +step:1127/57344 train_time:581899ms step_avg:516.33ms +grad accum step:282/14336 +step:1128/57344 train_time:582966ms step_avg:516.81ms +step:1129/57344 train_time:582984ms step_avg:516.37ms +step:1130/57344 train_time:583203ms step_avg:516.11ms +step:1131/57344 train_time:583658ms step_avg:516.05ms +grad accum step:283/14336 +step:1132/57344 train_time:584725ms step_avg:516.54ms +step:1133/57344 train_time:584743ms step_avg:516.10ms +step:1134/57344 train_time:584962ms step_avg:515.84ms +step:1135/57344 train_time:585418ms step_avg:515.79ms +grad accum step:284/14336 +step:1136/57344 train_time:586486ms step_avg:516.27ms +step:1137/57344 train_time:586504ms step_avg:515.83ms +step:1138/57344 train_time:586725ms step_avg:515.58ms +step:1139/57344 train_time:587179ms step_avg:515.52ms +grad accum step:285/14336 +step:1140/57344 train_time:588247ms step_avg:516.01ms +step:1141/57344 train_time:588264ms step_avg:515.57ms +step:1142/57344 train_time:588484ms step_avg:515.31ms +step:1143/57344 train_time:588939ms step_avg:515.26ms +grad accum step:286/14336 +step:1144/57344 train_time:590008ms step_avg:515.74ms +step:1145/57344 train_time:590026ms step_avg:515.31ms +step:1146/57344 train_time:590245ms step_avg:515.05ms +step:1147/57344 train_time:590701ms step_avg:515.00ms +grad accum step:287/14336 +step:1148/57344 train_time:591771ms step_avg:515.48ms +step:1149/57344 train_time:591788ms step_avg:515.05ms +step:1150/57344 train_time:592009ms step_avg:514.79ms +step:1151/57344 train_time:592465ms step_avg:514.74ms +grad accum step:288/14336 +step:1152/57344 train_time:593535ms step_avg:515.22ms +step:1152/57344 val_loss:7.740644 train_time:593535ms step_avg:515.22ms +step:1153/57344 train_time:593547ms step_avg:514.78ms +step:1154/57344 train_time:593743ms step_avg:514.51ms +step:1155/57344 train_time:594197ms step_avg:514.46ms +grad accum step:289/14336 +step:1156/57344 train_time:595265ms step_avg:514.94ms +step:1157/57344 train_time:595282ms step_avg:514.51ms +step:1158/57344 train_time:595501ms step_avg:514.25ms +step:1159/57344 train_time:595960ms step_avg:514.20ms +grad accum step:290/14336 +step:1160/57344 train_time:597029ms step_avg:514.68ms +step:1161/57344 train_time:597046ms step_avg:514.25ms +step:1162/57344 train_time:597267ms step_avg:514.00ms +step:1163/57344 train_time:597722ms step_avg:513.95ms +grad accum step:291/14336 +step:1164/57344 train_time:598793ms step_avg:514.43ms +step:1165/57344 train_time:598811ms step_avg:514.00ms +step:1166/57344 train_time:599029ms step_avg:513.75ms +step:1167/57344 train_time:599485ms step_avg:513.70ms +grad accum step:292/14336 +step:1168/57344 train_time:600556ms step_avg:514.17ms +step:1169/57344 train_time:600573ms step_avg:513.75ms +step:1170/57344 train_time:600793ms step_avg:513.50ms +step:1171/57344 train_time:601249ms step_avg:513.45ms +grad accum step:293/14336 +step:1172/57344 train_time:602318ms step_avg:513.92ms +step:1173/57344 train_time:602336ms step_avg:513.50ms +step:1174/57344 train_time:602555ms step_avg:513.25ms +step:1175/57344 train_time:603011ms step_avg:513.20ms +grad accum step:294/14336 +step:1176/57344 train_time:604080ms step_avg:513.67ms +step:1177/57344 train_time:604098ms step_avg:513.25ms +step:1178/57344 train_time:604318ms step_avg:513.00ms +step:1179/57344 train_time:604775ms step_avg:512.96ms +grad accum step:295/14336 +step:1180/57344 train_time:605843ms step_avg:513.43ms +step:1181/57344 train_time:605861ms step_avg:513.01ms +step:1182/57344 train_time:606080ms step_avg:512.76ms +step:1183/57344 train_time:606536ms step_avg:512.71ms +grad accum step:296/14336 +step:1184/57344 train_time:607609ms step_avg:513.18ms +step:1185/57344 train_time:607626ms step_avg:512.76ms +step:1186/57344 train_time:607845ms step_avg:512.52ms +step:1187/57344 train_time:608304ms step_avg:512.47ms +grad accum step:297/14336 +step:1188/57344 train_time:609373ms step_avg:512.94ms +step:1189/57344 train_time:609390ms step_avg:512.52ms +step:1190/57344 train_time:609610ms step_avg:512.28ms +step:1191/57344 train_time:610066ms step_avg:512.23ms +grad accum step:298/14336 +step:1192/57344 train_time:611135ms step_avg:512.70ms +step:1193/57344 train_time:611153ms step_avg:512.28ms +step:1194/57344 train_time:611373ms step_avg:512.04ms +step:1195/57344 train_time:611828ms step_avg:511.99ms +grad accum step:299/14336 +step:1196/57344 train_time:612897ms step_avg:512.46ms +step:1197/57344 train_time:612914ms step_avg:512.04ms +step:1198/57344 train_time:613135ms step_avg:511.80ms +step:1199/57344 train_time:613592ms step_avg:511.75ms +grad accum step:300/14336 +step:1200/57344 train_time:614664ms step_avg:512.22ms +step:1201/57344 train_time:614682ms step_avg:511.81ms +step:1202/57344 train_time:614901ms step_avg:511.56ms +step:1203/57344 train_time:615357ms step_avg:511.52ms +grad accum step:301/14336 +step:1204/57344 train_time:616427ms step_avg:511.98ms +step:1205/57344 train_time:616444ms step_avg:511.57ms +step:1206/57344 train_time:616664ms step_avg:511.33ms +step:1207/57344 train_time:617121ms step_avg:511.28ms +grad accum step:302/14336 +step:1208/57344 train_time:618188ms step_avg:511.74ms +step:1209/57344 train_time:618205ms step_avg:511.34ms +step:1210/57344 train_time:618425ms step_avg:511.10ms +step:1211/57344 train_time:618881ms step_avg:511.05ms +grad accum step:303/14336 +step:1212/57344 train_time:619950ms step_avg:511.51ms +step:1213/57344 train_time:619968ms step_avg:511.10ms +step:1214/57344 train_time:620187ms step_avg:510.86ms +step:1215/57344 train_time:620641ms step_avg:510.82ms +grad accum step:304/14336 +step:1216/57344 train_time:621712ms step_avg:511.28ms +step:1216/57344 val_loss:7.748327 train_time:621713ms step_avg:511.28ms +step:1217/57344 train_time:621725ms step_avg:510.87ms +step:1218/57344 train_time:621920ms step_avg:510.61ms +step:1219/57344 train_time:622377ms step_avg:510.56ms +grad accum step:305/14336 +step:1220/57344 train_time:623445ms step_avg:511.02ms +step:1221/57344 train_time:623463ms step_avg:510.62ms +step:1222/57344 train_time:623683ms step_avg:510.38ms +step:1223/57344 train_time:624139ms step_avg:510.33ms +grad accum step:306/14336 +step:1224/57344 train_time:625209ms step_avg:510.79ms +step:1225/57344 train_time:625227ms step_avg:510.39ms +step:1226/57344 train_time:625446ms step_avg:510.15ms +step:1227/57344 train_time:625901ms step_avg:510.11ms +grad accum step:307/14336 +step:1228/57344 train_time:626972ms step_avg:510.56ms +step:1229/57344 train_time:626990ms step_avg:510.16ms +step:1230/57344 train_time:627210ms step_avg:509.93ms +step:1231/57344 train_time:627665ms step_avg:509.88ms +grad accum step:308/14336 +step:1232/57344 train_time:628735ms step_avg:510.34ms +step:1233/57344 train_time:628752ms step_avg:509.94ms +step:1234/57344 train_time:628972ms step_avg:509.70ms +step:1235/57344 train_time:629429ms step_avg:509.66ms +grad accum step:309/14336 +step:1236/57344 train_time:630497ms step_avg:510.11ms +step:1237/57344 train_time:630515ms step_avg:509.71ms +step:1238/57344 train_time:630735ms step_avg:509.48ms +step:1239/57344 train_time:631192ms step_avg:509.44ms +grad accum step:310/14336 +step:1240/57344 train_time:632262ms step_avg:509.89ms +step:1241/57344 train_time:632279ms step_avg:509.49ms +step:1242/57344 train_time:632499ms step_avg:509.26ms +step:1243/57344 train_time:632956ms step_avg:509.22ms +grad accum step:311/14336 +step:1244/57344 train_time:634025ms step_avg:509.67ms +step:1245/57344 train_time:634042ms step_avg:509.27ms +step:1246/57344 train_time:634263ms step_avg:509.04ms +step:1247/57344 train_time:634720ms step_avg:509.00ms +grad accum step:312/14336 +step:1248/57344 train_time:635790ms step_avg:509.45ms +step:1249/57344 train_time:635808ms step_avg:509.05ms +step:1250/57344 train_time:636027ms step_avg:508.82ms +step:1251/57344 train_time:636484ms step_avg:508.78ms +grad accum step:313/14336 +step:1252/57344 train_time:637554ms step_avg:509.23ms +step:1253/57344 train_time:637571ms step_avg:508.84ms +step:1254/57344 train_time:637791ms step_avg:508.61ms +step:1255/57344 train_time:638247ms step_avg:508.56ms +grad accum step:314/14336 +step:1256/57344 train_time:639319ms step_avg:509.01ms +step:1257/57344 train_time:639337ms step_avg:508.62ms +step:1258/57344 train_time:639556ms step_avg:508.39ms +step:1259/57344 train_time:640015ms step_avg:508.35ms +grad accum step:315/14336 +step:1260/57344 train_time:641085ms step_avg:508.80ms +step:1261/57344 train_time:641103ms step_avg:508.41ms +step:1262/57344 train_time:641322ms step_avg:508.18ms +step:1263/57344 train_time:641779ms step_avg:508.14ms +grad accum step:316/14336 +step:1264/57344 train_time:642847ms step_avg:508.58ms +step:1265/57344 train_time:642864ms step_avg:508.19ms +step:1266/57344 train_time:643084ms step_avg:507.97ms +step:1267/57344 train_time:643540ms step_avg:507.92ms +grad accum step:317/14336 +step:1268/57344 train_time:644607ms step_avg:508.36ms +step:1269/57344 train_time:644624ms step_avg:507.98ms +step:1270/57344 train_time:644844ms step_avg:507.75ms +step:1271/57344 train_time:645300ms step_avg:507.71ms +grad accum step:318/14336 +step:1272/57344 train_time:646368ms step_avg:508.15ms +step:1273/57344 train_time:646386ms step_avg:507.77ms +step:1274/57344 train_time:646606ms step_avg:507.54ms +step:1275/57344 train_time:647062ms step_avg:507.50ms +grad accum step:319/14336 +step:1276/57344 train_time:648133ms step_avg:507.94ms +step:1277/57344 train_time:648150ms step_avg:507.56ms +step:1278/57344 train_time:648369ms step_avg:507.33ms +step:1279/57344 train_time:648826ms step_avg:507.29ms +grad accum step:320/14336 +step:1280/57344 train_time:649897ms step_avg:507.73ms +step:1280/57344 val_loss:7.739236 train_time:649898ms step_avg:507.73ms +step:1281/57344 train_time:649910ms step_avg:507.35ms +step:1282/57344 train_time:650106ms step_avg:507.10ms +step:1283/57344 train_time:650564ms step_avg:507.06ms +grad accum step:321/14336 +step:1284/57344 train_time:651636ms step_avg:507.50ms +step:1285/57344 train_time:651653ms step_avg:507.12ms +step:1286/57344 train_time:651872ms step_avg:506.90ms +step:1287/57344 train_time:652328ms step_avg:506.86ms +grad accum step:322/14336 +step:1288/57344 train_time:653398ms step_avg:507.30ms +step:1289/57344 train_time:653416ms step_avg:506.92ms +step:1290/57344 train_time:653636ms step_avg:506.69ms +step:1291/57344 train_time:654094ms step_avg:506.66ms +grad accum step:323/14336 +step:1292/57344 train_time:655161ms step_avg:507.09ms +step:1293/57344 train_time:655179ms step_avg:506.71ms +step:1294/57344 train_time:655399ms step_avg:506.49ms +step:1295/57344 train_time:655856ms step_avg:506.45ms +grad accum step:324/14336 +step:1296/57344 train_time:656924ms step_avg:506.89ms +step:1297/57344 train_time:656941ms step_avg:506.51ms +step:1298/57344 train_time:657161ms step_avg:506.29ms +step:1299/57344 train_time:657618ms step_avg:506.25ms +grad accum step:325/14336 +step:1300/57344 train_time:658685ms step_avg:506.68ms +step:1301/57344 train_time:658703ms step_avg:506.31ms +step:1302/57344 train_time:658922ms step_avg:506.08ms +step:1303/57344 train_time:659380ms step_avg:506.05ms +grad accum step:326/14336 +step:1304/57344 train_time:660448ms step_avg:506.48ms +step:1305/57344 train_time:660466ms step_avg:506.10ms +step:1306/57344 train_time:660684ms step_avg:505.88ms +step:1307/57344 train_time:661140ms step_avg:505.85ms +grad accum step:327/14336 +step:1308/57344 train_time:662211ms step_avg:506.28ms +step:1309/57344 train_time:662228ms step_avg:505.90ms +step:1310/57344 train_time:662448ms step_avg:505.69ms +step:1311/57344 train_time:662903ms step_avg:505.65ms +grad accum step:328/14336 +step:1312/57344 train_time:663975ms step_avg:506.08ms +step:1313/57344 train_time:663992ms step_avg:505.71ms +step:1314/57344 train_time:664212ms step_avg:505.49ms +step:1315/57344 train_time:664669ms step_avg:505.45ms +grad accum step:329/14336 +step:1316/57344 train_time:665739ms step_avg:505.88ms +step:1317/57344 train_time:665756ms step_avg:505.51ms +step:1318/57344 train_time:665976ms step_avg:505.29ms +step:1319/57344 train_time:666433ms step_avg:505.26ms +grad accum step:330/14336 +step:1320/57344 train_time:667504ms step_avg:505.68ms +step:1321/57344 train_time:667522ms step_avg:505.32ms +step:1322/57344 train_time:667740ms step_avg:505.10ms +step:1323/57344 train_time:668196ms step_avg:505.06ms +grad accum step:331/14336 +step:1324/57344 train_time:669267ms step_avg:505.49ms +step:1325/57344 train_time:669284ms step_avg:505.12ms +step:1326/57344 train_time:669504ms step_avg:504.90ms +step:1327/57344 train_time:669959ms step_avg:504.87ms +grad accum step:332/14336 +step:1328/57344 train_time:671029ms step_avg:505.29ms +step:1329/57344 train_time:671046ms step_avg:504.93ms +step:1330/57344 train_time:671266ms step_avg:504.71ms +step:1331/57344 train_time:671723ms step_avg:504.68ms +grad accum step:333/14336 +step:1332/57344 train_time:672794ms step_avg:505.10ms +step:1333/57344 train_time:672811ms step_avg:504.73ms +step:1334/57344 train_time:673031ms step_avg:504.52ms +step:1335/57344 train_time:673488ms step_avg:504.49ms +grad accum step:334/14336 +step:1336/57344 train_time:674558ms step_avg:504.91ms +step:1337/57344 train_time:674575ms step_avg:504.54ms +step:1338/57344 train_time:674795ms step_avg:504.33ms +step:1339/57344 train_time:675253ms step_avg:504.30ms +grad accum step:335/14336 +step:1340/57344 train_time:676324ms step_avg:504.72ms +step:1341/57344 train_time:676342ms step_avg:504.36ms +step:1342/57344 train_time:676562ms step_avg:504.14ms +step:1343/57344 train_time:677019ms step_avg:504.11ms +grad accum step:336/14336 +step:1344/57344 train_time:678089ms step_avg:504.53ms +step:1344/57344 val_loss:7.731204 train_time:678089ms step_avg:504.53ms +step:1345/57344 train_time:678101ms step_avg:504.16ms +step:1346/57344 train_time:678297ms step_avg:503.94ms +step:1347/57344 train_time:678753ms step_avg:503.90ms +grad accum step:337/14336 +step:1348/57344 train_time:679823ms step_avg:504.32ms +step:1349/57344 train_time:679841ms step_avg:503.96ms +step:1350/57344 train_time:680060ms step_avg:503.75ms +step:1351/57344 train_time:680518ms step_avg:503.71ms +grad accum step:338/14336 +step:1352/57344 train_time:681587ms step_avg:504.13ms +step:1353/57344 train_time:681605ms step_avg:503.77ms +step:1354/57344 train_time:681825ms step_avg:503.56ms +step:1355/57344 train_time:682283ms step_avg:503.53ms +grad accum step:339/14336 +step:1356/57344 train_time:683350ms step_avg:503.95ms +step:1357/57344 train_time:683368ms step_avg:503.59ms +step:1358/57344 train_time:683588ms step_avg:503.38ms +step:1359/57344 train_time:684043ms step_avg:503.34ms +grad accum step:340/14336 +step:1360/57344 train_time:685114ms step_avg:503.76ms +step:1361/57344 train_time:685132ms step_avg:503.40ms +step:1362/57344 train_time:685352ms step_avg:503.20ms +step:1363/57344 train_time:685806ms step_avg:503.16ms +grad accum step:341/14336 +step:1364/57344 train_time:686875ms step_avg:503.57ms +step:1365/57344 train_time:686893ms step_avg:503.22ms +step:1366/57344 train_time:687113ms step_avg:503.01ms +step:1367/57344 train_time:687568ms step_avg:502.98ms +grad accum step:342/14336 +step:1368/57344 train_time:688639ms step_avg:503.39ms +step:1369/57344 train_time:688656ms step_avg:503.04ms +step:1370/57344 train_time:688876ms step_avg:502.83ms +step:1371/57344 train_time:689333ms step_avg:502.80ms +grad accum step:343/14336 +step:1372/57344 train_time:690404ms step_avg:503.21ms +step:1373/57344 train_time:690422ms step_avg:502.86ms +step:1374/57344 train_time:690642ms step_avg:502.65ms +step:1375/57344 train_time:691098ms step_avg:502.62ms +grad accum step:344/14336 +step:1376/57344 train_time:692169ms step_avg:503.03ms +step:1377/57344 train_time:692187ms step_avg:502.68ms +step:1378/57344 train_time:692407ms step_avg:502.47ms +step:1379/57344 train_time:692861ms step_avg:502.44ms +grad accum step:345/14336 +step:1380/57344 train_time:693935ms step_avg:502.85ms +step:1381/57344 train_time:693952ms step_avg:502.50ms +step:1382/57344 train_time:694172ms step_avg:502.30ms +step:1383/57344 train_time:694629ms step_avg:502.26ms +grad accum step:346/14336 +step:1384/57344 train_time:695700ms step_avg:502.67ms +step:1385/57344 train_time:695717ms step_avg:502.32ms +step:1386/57344 train_time:695936ms step_avg:502.12ms +step:1387/57344 train_time:696393ms step_avg:502.09ms +grad accum step:347/14336 +step:1388/57344 train_time:697465ms step_avg:502.50ms +step:1389/57344 train_time:697483ms step_avg:502.15ms +step:1390/57344 train_time:697702ms step_avg:501.94ms +step:1391/57344 train_time:698159ms step_avg:501.91ms +grad accum step:348/14336 +step:1392/57344 train_time:699230ms step_avg:502.32ms +step:1393/57344 train_time:699247ms step_avg:501.97ms +step:1394/57344 train_time:699466ms step_avg:501.77ms +step:1395/57344 train_time:699926ms step_avg:501.74ms +grad accum step:349/14336 +step:1396/57344 train_time:700996ms step_avg:502.15ms +step:1397/57344 train_time:701013ms step_avg:501.80ms +step:1398/57344 train_time:701233ms step_avg:501.60ms +step:1399/57344 train_time:701691ms step_avg:501.57ms +grad accum step:350/14336 +step:1400/57344 train_time:702761ms step_avg:501.97ms +step:1401/57344 train_time:702778ms step_avg:501.63ms +step:1402/57344 train_time:702998ms step_avg:501.42ms +step:1403/57344 train_time:703456ms step_avg:501.39ms +grad accum step:351/14336 +step:1404/57344 train_time:704527ms step_avg:501.80ms +step:1405/57344 train_time:704544ms step_avg:501.45ms +step:1406/57344 train_time:704763ms step_avg:501.25ms +step:1407/57344 train_time:705221ms step_avg:501.22ms +grad accum step:352/14336 +step:1408/57344 train_time:706295ms step_avg:501.63ms +step:1408/57344 val_loss:7.720304 train_time:706296ms step_avg:501.63ms +step:1409/57344 train_time:706308ms step_avg:501.28ms +step:1410/57344 train_time:706505ms step_avg:501.07ms +step:1411/57344 train_time:706960ms step_avg:501.03ms +grad accum step:353/14336 +step:1412/57344 train_time:708030ms step_avg:501.44ms +step:1413/57344 train_time:708047ms step_avg:501.10ms +step:1414/57344 train_time:708268ms step_avg:500.90ms +step:1415/57344 train_time:708725ms step_avg:500.87ms +grad accum step:354/14336 +step:1416/57344 train_time:709797ms step_avg:501.27ms +step:1417/57344 train_time:709815ms step_avg:500.93ms +step:1418/57344 train_time:710034ms step_avg:500.73ms +step:1419/57344 train_time:710491ms step_avg:500.70ms +grad accum step:355/14336 +step:1420/57344 train_time:711562ms step_avg:501.10ms +step:1421/57344 train_time:711579ms step_avg:500.76ms +step:1422/57344 train_time:711799ms step_avg:500.56ms +step:1423/57344 train_time:712255ms step_avg:500.53ms +grad accum step:356/14336 +step:1424/57344 train_time:713326ms step_avg:500.93ms +step:1425/57344 train_time:713343ms step_avg:500.59ms +step:1426/57344 train_time:713562ms step_avg:500.39ms +step:1427/57344 train_time:714021ms step_avg:500.37ms +grad accum step:357/14336 +step:1428/57344 train_time:715089ms step_avg:500.76ms +step:1429/57344 train_time:715107ms step_avg:500.42ms +step:1430/57344 train_time:715328ms step_avg:500.23ms +step:1431/57344 train_time:715786ms step_avg:500.20ms +grad accum step:358/14336 +step:1432/57344 train_time:716854ms step_avg:500.60ms +step:1433/57344 train_time:716872ms step_avg:500.26ms +step:1434/57344 train_time:717093ms step_avg:500.06ms +step:1435/57344 train_time:717548ms step_avg:500.03ms +grad accum step:359/14336 +step:1436/57344 train_time:718615ms step_avg:500.43ms +step:1437/57344 train_time:718632ms step_avg:500.09ms +step:1438/57344 train_time:718853ms step_avg:499.90ms +step:1439/57344 train_time:719310ms step_avg:499.87ms +grad accum step:360/14336 +step:1440/57344 train_time:720378ms step_avg:500.26ms +step:1441/57344 train_time:720395ms step_avg:499.93ms +step:1442/57344 train_time:720615ms step_avg:499.73ms +step:1443/57344 train_time:721071ms step_avg:499.70ms +grad accum step:361/14336 +step:1444/57344 train_time:722141ms step_avg:500.10ms +step:1445/57344 train_time:722158ms step_avg:499.76ms +step:1446/57344 train_time:722378ms step_avg:499.57ms +step:1447/57344 train_time:722834ms step_avg:499.54ms +grad accum step:362/14336 +step:1448/57344 train_time:723903ms step_avg:499.93ms +step:1449/57344 train_time:723920ms step_avg:499.60ms +step:1450/57344 train_time:724140ms step_avg:499.41ms +step:1451/57344 train_time:724596ms step_avg:499.38ms +grad accum step:363/14336 +step:1452/57344 train_time:725665ms step_avg:499.77ms +step:1453/57344 train_time:725683ms step_avg:499.44ms +step:1454/57344 train_time:725904ms step_avg:499.25ms +step:1455/57344 train_time:726358ms step_avg:499.22ms +grad accum step:364/14336 +step:1456/57344 train_time:727429ms step_avg:499.61ms +step:1457/57344 train_time:727446ms step_avg:499.28ms +step:1458/57344 train_time:727667ms step_avg:499.09ms +step:1459/57344 train_time:728124ms step_avg:499.06ms +grad accum step:365/14336 +step:1460/57344 train_time:729190ms step_avg:499.45ms +step:1461/57344 train_time:729208ms step_avg:499.12ms +step:1462/57344 train_time:729428ms step_avg:498.92ms +step:1463/57344 train_time:729885ms step_avg:498.90ms +grad accum step:366/14336 +step:1464/57344 train_time:730955ms step_avg:499.29ms +step:1465/57344 train_time:730973ms step_avg:498.96ms +step:1466/57344 train_time:731192ms step_avg:498.77ms +step:1467/57344 train_time:731651ms step_avg:498.74ms +grad accum step:367/14336 +step:1468/57344 train_time:732722ms step_avg:499.13ms +step:1469/57344 train_time:732740ms step_avg:498.80ms +step:1470/57344 train_time:732959ms step_avg:498.61ms +step:1471/57344 train_time:733416ms step_avg:498.58ms +grad accum step:368/14336 +step:1472/57344 train_time:734484ms step_avg:498.97ms +step:1472/57344 val_loss:7.720869 train_time:734485ms step_avg:498.97ms +step:1473/57344 train_time:734496ms step_avg:498.64ms +step:1474/57344 train_time:734693ms step_avg:498.43ms +step:1475/57344 train_time:735151ms step_avg:498.41ms +grad accum step:369/14336 +step:1476/57344 train_time:736221ms step_avg:498.79ms +step:1477/57344 train_time:736239ms step_avg:498.47ms +step:1478/57344 train_time:736459ms step_avg:498.28ms +step:1479/57344 train_time:736915ms step_avg:498.25ms +grad accum step:370/14336 +step:1480/57344 train_time:737985ms step_avg:498.64ms +step:1481/57344 train_time:738003ms step_avg:498.31ms +step:1482/57344 train_time:738222ms step_avg:498.13ms +step:1483/57344 train_time:738679ms step_avg:498.10ms +grad accum step:371/14336 +step:1484/57344 train_time:739750ms step_avg:498.48ms +step:1485/57344 train_time:739767ms step_avg:498.16ms +step:1486/57344 train_time:739987ms step_avg:497.97ms +step:1487/57344 train_time:740445ms step_avg:497.95ms +grad accum step:372/14336 +step:1488/57344 train_time:741515ms step_avg:498.33ms +step:1489/57344 train_time:741532ms step_avg:498.01ms +step:1490/57344 train_time:741752ms step_avg:497.82ms +step:1491/57344 train_time:742207ms step_avg:497.79ms +grad accum step:373/14336 +step:1492/57344 train_time:743279ms step_avg:498.18ms +step:1493/57344 train_time:743297ms step_avg:497.85ms +step:1494/57344 train_time:743516ms step_avg:497.67ms +step:1495/57344 train_time:743975ms step_avg:497.64ms +grad accum step:374/14336 +step:1496/57344 train_time:745048ms step_avg:498.03ms +step:1497/57344 train_time:745066ms step_avg:497.71ms +step:1498/57344 train_time:745287ms step_avg:497.52ms +step:1499/57344 train_time:745748ms step_avg:497.50ms +grad accum step:375/14336 +step:1500/57344 train_time:746834ms step_avg:497.89ms +step:1501/57344 train_time:746852ms step_avg:497.57ms +step:1502/57344 train_time:747074ms step_avg:497.39ms +step:1503/57344 train_time:747534ms step_avg:497.36ms +grad accum step:376/14336 +step:1504/57344 train_time:748616ms step_avg:497.75ms +step:1505/57344 train_time:748633ms step_avg:497.43ms +step:1506/57344 train_time:748854ms step_avg:497.25ms +step:1507/57344 train_time:749315ms step_avg:497.22ms +grad accum step:377/14336 +step:1508/57344 train_time:750398ms step_avg:497.61ms +step:1509/57344 train_time:750415ms step_avg:497.29ms +step:1510/57344 train_time:750637ms step_avg:497.11ms +step:1511/57344 train_time:751098ms step_avg:497.09ms +grad accum step:378/14336 +step:1512/57344 train_time:752182ms step_avg:497.47ms +step:1513/57344 train_time:752199ms step_avg:497.16ms +step:1514/57344 train_time:752420ms step_avg:496.97ms +step:1515/57344 train_time:752882ms step_avg:496.95ms +grad accum step:379/14336 +step:1516/57344 train_time:753965ms step_avg:497.34ms +step:1517/57344 train_time:753983ms step_avg:497.02ms +step:1518/57344 train_time:754204ms step_avg:496.84ms +step:1519/57344 train_time:754664ms step_avg:496.82ms +grad accum step:380/14336 +step:1520/57344 train_time:755745ms step_avg:497.20ms +step:1521/57344 train_time:755762ms step_avg:496.89ms +step:1522/57344 train_time:755984ms step_avg:496.70ms +step:1523/57344 train_time:756445ms step_avg:496.68ms +grad accum step:381/14336 +step:1524/57344 train_time:757527ms step_avg:497.06ms +step:1525/57344 train_time:757544ms step_avg:496.75ms +step:1526/57344 train_time:757765ms step_avg:496.57ms +step:1527/57344 train_time:758226ms step_avg:496.55ms +grad accum step:382/14336 +step:1528/57344 train_time:759310ms step_avg:496.93ms +step:1529/57344 train_time:759327ms step_avg:496.62ms +step:1530/57344 train_time:759550ms step_avg:496.44ms +step:1531/57344 train_time:760011ms step_avg:496.42ms +grad accum step:383/14336 +step:1532/57344 train_time:761096ms step_avg:496.80ms +step:1533/57344 train_time:761113ms step_avg:496.49ms +step:1534/57344 train_time:761334ms step_avg:496.31ms +step:1535/57344 train_time:761796ms step_avg:496.28ms +grad accum step:384/14336 +step:1536/57344 train_time:762880ms step_avg:496.67ms +step:1536/57344 val_loss:7.711709 train_time:762880ms step_avg:496.67ms +step:1537/57344 train_time:762892ms step_avg:496.35ms +step:1538/57344 train_time:763157ms step_avg:496.20ms +step:1539/57344 train_time:763613ms step_avg:496.17ms +grad accum step:385/14336 +step:1540/57344 train_time:764691ms step_avg:496.55ms +step:1541/57344 train_time:764708ms step_avg:496.24ms +step:1542/57344 train_time:764929ms step_avg:496.06ms +step:1543/57344 train_time:765389ms step_avg:496.04ms +grad accum step:386/14336 +step:1544/57344 train_time:766467ms step_avg:496.42ms +step:1545/57344 train_time:766484ms step_avg:496.11ms +step:1546/57344 train_time:766705ms step_avg:495.93ms +step:1547/57344 train_time:767165ms step_avg:495.90ms +grad accum step:387/14336 +step:1548/57344 train_time:768242ms step_avg:496.28ms +step:1549/57344 train_time:768260ms step_avg:495.97ms +step:1550/57344 train_time:768481ms step_avg:495.79ms +step:1551/57344 train_time:768939ms step_avg:495.77ms +grad accum step:388/14336 +step:1552/57344 train_time:770018ms step_avg:496.15ms +step:1553/57344 train_time:770035ms step_avg:495.84ms +step:1554/57344 train_time:770257ms step_avg:495.66ms +step:1555/57344 train_time:770715ms step_avg:495.64ms +grad accum step:389/14336 +step:1556/57344 train_time:771793ms step_avg:496.01ms +step:1557/57344 train_time:771810ms step_avg:495.70ms +step:1558/57344 train_time:772033ms step_avg:495.53ms +step:1559/57344 train_time:772493ms step_avg:495.51ms +grad accum step:390/14336 +step:1560/57344 train_time:773573ms step_avg:495.88ms +step:1561/57344 train_time:773590ms step_avg:495.57ms +step:1562/57344 train_time:773812ms step_avg:495.40ms +step:1563/57344 train_time:774273ms step_avg:495.38ms +grad accum step:391/14336 +step:1564/57344 train_time:775352ms step_avg:495.75ms +step:1565/57344 train_time:775370ms step_avg:495.44ms +step:1566/57344 train_time:775592ms step_avg:495.27ms +step:1567/57344 train_time:776052ms step_avg:495.25ms +grad accum step:392/14336 +step:1568/57344 train_time:777134ms step_avg:495.62ms +step:1569/57344 train_time:777151ms step_avg:495.32ms +step:1570/57344 train_time:777373ms step_avg:495.14ms +step:1571/57344 train_time:777834ms step_avg:495.12ms +grad accum step:393/14336 +step:1572/57344 train_time:778914ms step_avg:495.49ms +step:1573/57344 train_time:778931ms step_avg:495.19ms +step:1574/57344 train_time:779154ms step_avg:495.01ms +step:1575/57344 train_time:779616ms step_avg:494.99ms +grad accum step:394/14336 +step:1576/57344 train_time:780695ms step_avg:495.37ms +step:1577/57344 train_time:780713ms step_avg:495.06ms +step:1578/57344 train_time:780935ms step_avg:494.89ms +step:1579/57344 train_time:781396ms step_avg:494.87ms +grad accum step:395/14336 +step:1580/57344 train_time:782477ms step_avg:495.24ms +step:1581/57344 train_time:782495ms step_avg:494.94ms +step:1582/57344 train_time:782717ms step_avg:494.76ms +step:1583/57344 train_time:783178ms step_avg:494.74ms +grad accum step:396/14336 +step:1584/57344 train_time:784259ms step_avg:495.11ms +step:1585/57344 train_time:784278ms step_avg:494.81ms +step:1586/57344 train_time:784500ms step_avg:494.64ms +step:1587/57344 train_time:784960ms step_avg:494.62ms +grad accum step:397/14336 +step:1588/57344 train_time:786042ms step_avg:494.99ms +step:1589/57344 train_time:786060ms step_avg:494.69ms +step:1590/57344 train_time:786281ms step_avg:494.52ms +step:1591/57344 train_time:786742ms step_avg:494.50ms +grad accum step:398/14336 +step:1592/57344 train_time:787822ms step_avg:494.86ms +step:1593/57344 train_time:787839ms step_avg:494.56ms +step:1594/57344 train_time:788060ms step_avg:494.39ms +step:1595/57344 train_time:788520ms step_avg:494.37ms +grad accum step:399/14336 +step:1596/57344 train_time:789603ms step_avg:494.74ms +step:1597/57344 train_time:789620ms step_avg:494.44ms +step:1598/57344 train_time:789841ms step_avg:494.27ms +step:1599/57344 train_time:790303ms step_avg:494.25ms +grad accum step:400/14336 +step:1600/57344 train_time:791385ms step_avg:494.62ms +step:1600/57344 val_loss:7.718039 train_time:791386ms step_avg:494.62ms +step:1601/57344 train_time:791398ms step_avg:494.31ms +step:1602/57344 train_time:791597ms step_avg:494.13ms +step:1603/57344 train_time:792058ms step_avg:494.11ms +grad accum step:401/14336 +step:1604/57344 train_time:793142ms step_avg:494.48ms +step:1605/57344 train_time:793159ms step_avg:494.18ms +step:1606/57344 train_time:793381ms step_avg:494.01ms +step:1607/57344 train_time:793844ms step_avg:493.99ms +grad accum step:402/14336 +step:1608/57344 train_time:794926ms step_avg:494.36ms +step:1609/57344 train_time:794943ms step_avg:494.06ms +step:1610/57344 train_time:795166ms step_avg:493.89ms +step:1611/57344 train_time:795628ms step_avg:493.87ms +grad accum step:403/14336 +step:1612/57344 train_time:796712ms step_avg:494.24ms +step:1613/57344 train_time:796729ms step_avg:493.94ms +step:1614/57344 train_time:796952ms step_avg:493.77ms +step:1615/57344 train_time:797413ms step_avg:493.75ms +grad accum step:404/14336 +step:1616/57344 train_time:798495ms step_avg:494.12ms +step:1617/57344 train_time:798513ms step_avg:493.82ms +step:1618/57344 train_time:798735ms step_avg:493.66ms +step:1619/57344 train_time:799195ms step_avg:493.64ms +grad accum step:405/14336 +step:1620/57344 train_time:800277ms step_avg:494.00ms +step:1621/57344 train_time:800295ms step_avg:493.70ms +step:1622/57344 train_time:800516ms step_avg:493.54ms +step:1623/57344 train_time:800976ms step_avg:493.52ms +grad accum step:406/14336 +step:1624/57344 train_time:802059ms step_avg:493.88ms +step:1625/57344 train_time:802076ms step_avg:493.59ms +step:1626/57344 train_time:802298ms step_avg:493.42ms +step:1627/57344 train_time:802760ms step_avg:493.40ms +grad accum step:407/14336 +step:1628/57344 train_time:803843ms step_avg:493.76ms +step:1629/57344 train_time:803860ms step_avg:493.47ms +step:1630/57344 train_time:804082ms step_avg:493.30ms +step:1631/57344 train_time:804542ms step_avg:493.28ms +grad accum step:408/14336 +step:1632/57344 train_time:808208ms step_avg:495.23ms +step:1633/57344 train_time:808220ms step_avg:494.93ms +step:1634/57344 train_time:808476ms step_avg:494.78ms +step:1635/57344 train_time:808956ms step_avg:494.77ms +grad accum step:409/14336 +step:1636/57344 train_time:810036ms step_avg:495.13ms +step:1637/57344 train_time:810053ms step_avg:494.84ms +step:1638/57344 train_time:810274ms step_avg:494.67ms +step:1639/57344 train_time:810734ms step_avg:494.65ms +grad accum step:410/14336 +step:1640/57344 train_time:811815ms step_avg:495.01ms +step:1641/57344 train_time:811833ms step_avg:494.72ms +step:1642/57344 train_time:812055ms step_avg:494.55ms +step:1643/57344 train_time:812515ms step_avg:494.53ms +grad accum step:411/14336 +step:1644/57344 train_time:813598ms step_avg:494.89ms +step:1645/57344 train_time:813615ms step_avg:494.60ms +step:1646/57344 train_time:813836ms step_avg:494.43ms +step:1647/57344 train_time:814297ms step_avg:494.41ms +grad accum step:412/14336 +step:1648/57344 train_time:815379ms step_avg:494.77ms +step:1649/57344 train_time:815397ms step_avg:494.48ms +step:1650/57344 train_time:815619ms step_avg:494.31ms +step:1651/57344 train_time:816079ms step_avg:494.29ms +grad accum step:413/14336 +step:1652/57344 train_time:817161ms step_avg:494.65ms +step:1653/57344 train_time:817179ms step_avg:494.36ms +step:1654/57344 train_time:817400ms step_avg:494.20ms +step:1655/57344 train_time:817862ms step_avg:494.18ms +grad accum step:414/14336 +step:1656/57344 train_time:818943ms step_avg:494.53ms +step:1657/57344 train_time:818960ms step_avg:494.24ms +step:1658/57344 train_time:819182ms step_avg:494.08ms +step:1659/57344 train_time:819642ms step_avg:494.06ms +grad accum step:415/14336 +step:1660/57344 train_time:820726ms step_avg:494.41ms +step:1661/57344 train_time:820744ms step_avg:494.13ms +step:1662/57344 train_time:820966ms step_avg:493.96ms +step:1663/57344 train_time:821426ms step_avg:493.94ms +grad accum step:416/14336 +step:1664/57344 train_time:822510ms step_avg:494.30ms +step:1664/57344 val_loss:7.701055 train_time:822510ms step_avg:494.30ms +step:1665/57344 train_time:822522ms step_avg:494.01ms +step:1666/57344 train_time:822721ms step_avg:493.83ms +step:1667/57344 train_time:823183ms step_avg:493.81ms +grad accum step:417/14336 +step:1668/57344 train_time:824268ms step_avg:494.17ms +step:1669/57344 train_time:824285ms step_avg:493.88ms +step:1670/57344 train_time:824507ms step_avg:493.72ms +step:1671/57344 train_time:824969ms step_avg:493.70ms +grad accum step:418/14336 +step:1672/57344 train_time:826053ms step_avg:494.05ms +step:1673/57344 train_time:826070ms step_avg:493.77ms +step:1674/57344 train_time:826292ms step_avg:493.60ms +step:1675/57344 train_time:826754ms step_avg:493.58ms +grad accum step:419/14336 +step:1676/57344 train_time:827841ms step_avg:493.94ms +step:1677/57344 train_time:827859ms step_avg:493.65ms +step:1678/57344 train_time:828081ms step_avg:493.49ms +step:1679/57344 train_time:828543ms step_avg:493.47ms +grad accum step:420/14336 +step:1680/57344 train_time:829627ms step_avg:493.83ms +step:1681/57344 train_time:829644ms step_avg:493.54ms +step:1682/57344 train_time:829867ms step_avg:493.38ms +step:1683/57344 train_time:830330ms step_avg:493.36ms +grad accum step:421/14336 +step:1684/57344 train_time:831414ms step_avg:493.71ms +step:1685/57344 train_time:831431ms step_avg:493.43ms +step:1686/57344 train_time:831653ms step_avg:493.27ms +step:1687/57344 train_time:832113ms step_avg:493.25ms +grad accum step:422/14336 +step:1688/57344 train_time:833196ms step_avg:493.60ms +step:1689/57344 train_time:833214ms step_avg:493.32ms +step:1690/57344 train_time:833436ms step_avg:493.16ms +step:1691/57344 train_time:833896ms step_avg:493.14ms +grad accum step:423/14336 +step:1692/57344 train_time:834980ms step_avg:493.49ms +step:1693/57344 train_time:834997ms step_avg:493.21ms +step:1694/57344 train_time:835219ms step_avg:493.05ms +step:1695/57344 train_time:835680ms step_avg:493.03ms +grad accum step:424/14336 +step:1696/57344 train_time:836766ms step_avg:493.38ms +step:1697/57344 train_time:836784ms step_avg:493.10ms +step:1698/57344 train_time:837007ms step_avg:492.94ms +step:1699/57344 train_time:837468ms step_avg:492.92ms +grad accum step:425/14336 +step:1700/57344 train_time:838553ms step_avg:493.27ms +step:1701/57344 train_time:838570ms step_avg:492.99ms +step:1702/57344 train_time:838793ms step_avg:492.83ms +step:1703/57344 train_time:839255ms step_avg:492.81ms +grad accum step:426/14336 +step:1704/57344 train_time:840339ms step_avg:493.16ms +step:1705/57344 train_time:840357ms step_avg:492.88ms +step:1706/57344 train_time:840578ms step_avg:492.72ms +step:1707/57344 train_time:841040ms step_avg:492.70ms +grad accum step:427/14336 +step:1708/57344 train_time:842123ms step_avg:493.05ms +step:1709/57344 train_time:842141ms step_avg:492.77ms +step:1710/57344 train_time:842363ms step_avg:492.61ms +step:1711/57344 train_time:842824ms step_avg:492.59ms +grad accum step:428/14336 +step:1712/57344 train_time:843909ms step_avg:492.94ms +step:1713/57344 train_time:843924ms step_avg:492.66ms +step:1714/57344 train_time:844147ms step_avg:492.50ms +step:1715/57344 train_time:844608ms step_avg:492.48ms +grad accum step:429/14336 +step:1716/57344 train_time:845692ms step_avg:492.83ms +step:1717/57344 train_time:845709ms step_avg:492.55ms +step:1718/57344 train_time:845932ms step_avg:492.39ms +step:1719/57344 train_time:846395ms step_avg:492.38ms +grad accum step:430/14336 +step:1720/57344 train_time:847479ms step_avg:492.72ms +step:1721/57344 train_time:847496ms step_avg:492.44ms +step:1722/57344 train_time:847718ms step_avg:492.29ms +step:1723/57344 train_time:848181ms step_avg:492.27ms +grad accum step:431/14336 +step:1724/57344 train_time:849267ms step_avg:492.61ms +step:1725/57344 train_time:849284ms step_avg:492.34ms +step:1726/57344 train_time:849506ms step_avg:492.18ms +step:1727/57344 train_time:849969ms step_avg:492.17ms +grad accum step:432/14336 +step:1728/57344 train_time:851054ms step_avg:492.51ms +step:1728/57344 val_loss:7.690996 train_time:851055ms step_avg:492.51ms +step:1729/57344 train_time:851067ms step_avg:492.23ms +step:1730/57344 train_time:851265ms step_avg:492.06ms +step:1731/57344 train_time:851728ms step_avg:492.04ms +grad accum step:433/14336 +step:1732/57344 train_time:852813ms step_avg:492.39ms +step:1733/57344 train_time:852830ms step_avg:492.11ms +step:1734/57344 train_time:853053ms step_avg:491.96ms +step:1735/57344 train_time:853515ms step_avg:491.94ms +grad accum step:434/14336 +step:1736/57344 train_time:854599ms step_avg:492.28ms +step:1737/57344 train_time:854616ms step_avg:492.01ms +step:1738/57344 train_time:854840ms step_avg:491.85ms +step:1739/57344 train_time:855299ms step_avg:491.83ms +grad accum step:435/14336 +step:1740/57344 train_time:856386ms step_avg:492.18ms +step:1741/57344 train_time:856404ms step_avg:491.90ms +step:1742/57344 train_time:856626ms step_avg:491.75ms +step:1743/57344 train_time:857088ms step_avg:491.73ms +grad accum step:436/14336 +step:1744/57344 train_time:858175ms step_avg:492.07ms +step:1745/57344 train_time:858192ms step_avg:491.80ms +step:1746/57344 train_time:858415ms step_avg:491.65ms +step:1747/57344 train_time:858876ms step_avg:491.63ms +grad accum step:437/14336 +step:1748/57344 train_time:859961ms step_avg:491.97ms +step:1749/57344 train_time:859979ms step_avg:491.70ms +step:1750/57344 train_time:860201ms step_avg:491.54ms +step:1751/57344 train_time:860665ms step_avg:491.53ms +grad accum step:438/14336 +step:1752/57344 train_time:861751ms step_avg:491.87ms +step:1753/57344 train_time:861768ms step_avg:491.60ms +step:1754/57344 train_time:861990ms step_avg:491.44ms +step:1755/57344 train_time:862453ms step_avg:491.43ms +grad accum step:439/14336 +step:1756/57344 train_time:863538ms step_avg:491.76ms +step:1757/57344 train_time:863555ms step_avg:491.49ms +step:1758/57344 train_time:863778ms step_avg:491.34ms +step:1759/57344 train_time:864239ms step_avg:491.32ms +grad accum step:440/14336 +step:1760/57344 train_time:865324ms step_avg:491.66ms +step:1761/57344 train_time:865341ms step_avg:491.39ms +step:1762/57344 train_time:865564ms step_avg:491.24ms +step:1763/57344 train_time:866027ms step_avg:491.22ms +grad accum step:441/14336 +step:1764/57344 train_time:867114ms step_avg:491.56ms +step:1765/57344 train_time:867132ms step_avg:491.29ms +step:1766/57344 train_time:867355ms step_avg:491.14ms +step:1767/57344 train_time:867816ms step_avg:491.12ms +grad accum step:442/14336 +step:1768/57344 train_time:868902ms step_avg:491.46ms +step:1769/57344 train_time:868919ms step_avg:491.19ms +step:1770/57344 train_time:869142ms step_avg:491.04ms +step:1771/57344 train_time:869603ms step_avg:491.02ms +grad accum step:443/14336 +step:1772/57344 train_time:870688ms step_avg:491.36ms +step:1773/57344 train_time:870706ms step_avg:491.09ms +step:1774/57344 train_time:870928ms step_avg:490.94ms +step:1775/57344 train_time:871390ms step_avg:490.92ms +grad accum step:444/14336 +step:1776/57344 train_time:872474ms step_avg:491.26ms +step:1777/57344 train_time:872492ms step_avg:490.99ms +step:1778/57344 train_time:872715ms step_avg:490.84ms +step:1779/57344 train_time:873178ms step_avg:490.83ms +grad accum step:445/14336 +step:1780/57344 train_time:874262ms step_avg:491.16ms +step:1781/57344 train_time:874280ms step_avg:490.89ms +step:1782/57344 train_time:874502ms step_avg:490.74ms +step:1783/57344 train_time:874965ms step_avg:490.73ms +grad accum step:446/14336 +step:1784/57344 train_time:876052ms step_avg:491.06ms +step:1785/57344 train_time:876070ms step_avg:490.80ms +step:1786/57344 train_time:876292ms step_avg:490.65ms +step:1787/57344 train_time:876752ms step_avg:490.63ms +grad accum step:447/14336 +step:1788/57344 train_time:877840ms step_avg:490.96ms +step:1789/57344 train_time:877858ms step_avg:490.70ms +step:1790/57344 train_time:878080ms step_avg:490.55ms +step:1791/57344 train_time:878542ms step_avg:490.53ms +grad accum step:448/14336 +step:1792/57344 train_time:879628ms step_avg:490.86ms +step:1792/57344 val_loss:7.680806 train_time:879628ms step_avg:490.86ms +step:1793/57344 train_time:879640ms step_avg:490.60ms +step:1794/57344 train_time:879840ms step_avg:490.43ms +step:1795/57344 train_time:880302ms step_avg:490.42ms +grad accum step:449/14336 +step:1796/57344 train_time:881387ms step_avg:490.75ms +step:1797/57344 train_time:881405ms step_avg:490.49ms +step:1798/57344 train_time:881628ms step_avg:490.34ms +step:1799/57344 train_time:882090ms step_avg:490.32ms +grad accum step:450/14336 +step:1800/57344 train_time:883175ms step_avg:490.65ms +step:1801/57344 train_time:883192ms step_avg:490.39ms +step:1802/57344 train_time:883415ms step_avg:490.24ms +step:1803/57344 train_time:883878ms step_avg:490.23ms +grad accum step:451/14336 +step:1804/57344 train_time:884963ms step_avg:490.56ms +step:1805/57344 train_time:884981ms step_avg:490.29ms +step:1806/57344 train_time:885205ms step_avg:490.15ms +step:1807/57344 train_time:885668ms step_avg:490.13ms +grad accum step:452/14336 +step:1808/57344 train_time:886752ms step_avg:490.46ms +step:1809/57344 train_time:886770ms step_avg:490.20ms +step:1810/57344 train_time:886993ms step_avg:490.05ms +step:1811/57344 train_time:887456ms step_avg:490.04ms +grad accum step:453/14336 +step:1812/57344 train_time:888544ms step_avg:490.37ms +step:1813/57344 train_time:888561ms step_avg:490.11ms +step:1814/57344 train_time:888784ms step_avg:489.96ms +step:1815/57344 train_time:889247ms step_avg:489.94ms +grad accum step:454/14336 +step:1816/57344 train_time:890333ms step_avg:490.27ms +step:1817/57344 train_time:890351ms step_avg:490.01ms +step:1818/57344 train_time:890574ms step_avg:489.86ms +step:1819/57344 train_time:891038ms step_avg:489.85ms +grad accum step:455/14336 +step:1820/57344 train_time:892125ms step_avg:490.18ms +step:1821/57344 train_time:892142ms step_avg:489.92ms +step:1822/57344 train_time:892365ms step_avg:489.77ms +step:1823/57344 train_time:892826ms step_avg:489.76ms +grad accum step:456/14336 +step:1824/57344 train_time:893911ms step_avg:490.08ms +step:1825/57344 train_time:893929ms step_avg:489.82ms +step:1826/57344 train_time:894153ms step_avg:489.68ms +step:1827/57344 train_time:894618ms step_avg:489.66ms +grad accum step:457/14336 +step:1828/57344 train_time:895704ms step_avg:489.99ms +step:1829/57344 train_time:895721ms step_avg:489.73ms +step:1830/57344 train_time:895943ms step_avg:489.59ms +step:1831/57344 train_time:896406ms step_avg:489.57ms +grad accum step:458/14336 +step:1832/57344 train_time:897489ms step_avg:489.90ms +step:1833/57344 train_time:897506ms step_avg:489.64ms +step:1834/57344 train_time:897729ms step_avg:489.49ms +step:1835/57344 train_time:898192ms step_avg:489.48ms +grad accum step:459/14336 +step:1836/57344 train_time:899277ms step_avg:489.80ms +step:1837/57344 train_time:899295ms step_avg:489.55ms +step:1838/57344 train_time:899518ms step_avg:489.40ms +step:1839/57344 train_time:899982ms step_avg:489.39ms +grad accum step:460/14336 +step:1840/57344 train_time:901070ms step_avg:489.71ms +step:1841/57344 train_time:901087ms step_avg:489.46ms +step:1842/57344 train_time:901310ms step_avg:489.31ms +step:1843/57344 train_time:901772ms step_avg:489.30ms +grad accum step:461/14336 +step:1844/57344 train_time:902858ms step_avg:489.62ms +step:1845/57344 train_time:902875ms step_avg:489.36ms +step:1846/57344 train_time:903098ms step_avg:489.22ms +step:1847/57344 train_time:903561ms step_avg:489.20ms +grad accum step:462/14336 +step:1848/57344 train_time:904648ms step_avg:489.53ms +step:1849/57344 train_time:904665ms step_avg:489.27ms +step:1850/57344 train_time:904888ms step_avg:489.13ms +step:1851/57344 train_time:905350ms step_avg:489.11ms +grad accum step:463/14336 +step:1852/57344 train_time:906438ms step_avg:489.44ms +step:1853/57344 train_time:906455ms step_avg:489.18ms +step:1854/57344 train_time:906679ms step_avg:489.04ms +step:1855/57344 train_time:907141ms step_avg:489.02ms +grad accum step:464/14336 +step:1856/57344 train_time:908226ms step_avg:489.35ms +step:1856/57344 val_loss:7.694116 train_time:908227ms step_avg:489.35ms +step:1857/57344 train_time:908239ms step_avg:489.09ms +step:1858/57344 train_time:908440ms step_avg:488.93ms +step:1859/57344 train_time:908903ms step_avg:488.92ms +grad accum step:465/14336 +step:1860/57344 train_time:909991ms step_avg:489.24ms +step:1861/57344 train_time:910008ms step_avg:488.99ms +step:1862/57344 train_time:910231ms step_avg:488.85ms +step:1863/57344 train_time:910694ms step_avg:488.83ms +grad accum step:466/14336 +step:1864/57344 train_time:911781ms step_avg:489.15ms +step:1865/57344 train_time:911798ms step_avg:488.90ms +step:1866/57344 train_time:912021ms step_avg:488.76ms +step:1867/57344 train_time:912485ms step_avg:488.74ms +grad accum step:467/14336 +step:1868/57344 train_time:913574ms step_avg:489.07ms +step:1869/57344 train_time:913591ms step_avg:488.81ms +step:1870/57344 train_time:913814ms step_avg:488.67ms +step:1871/57344 train_time:914277ms step_avg:488.66ms +grad accum step:468/14336 +step:1872/57344 train_time:915364ms step_avg:488.98ms +step:1873/57344 train_time:915382ms step_avg:488.72ms +step:1874/57344 train_time:915606ms step_avg:488.58ms +step:1875/57344 train_time:916068ms step_avg:488.57ms +grad accum step:469/14336 +step:1876/57344 train_time:917154ms step_avg:488.89ms +step:1877/57344 train_time:917172ms step_avg:488.64ms +step:1878/57344 train_time:917394ms step_avg:488.50ms +step:1879/57344 train_time:917856ms step_avg:488.48ms +grad accum step:470/14336 +step:1880/57344 train_time:918944ms step_avg:488.80ms +step:1881/57344 train_time:918962ms step_avg:488.55ms +step:1882/57344 train_time:919185ms step_avg:488.41ms +step:1883/57344 train_time:919649ms step_avg:488.40ms +grad accum step:471/14336 +step:1884/57344 train_time:920735ms step_avg:488.71ms +step:1885/57344 train_time:920753ms step_avg:488.46ms +step:1886/57344 train_time:920976ms step_avg:488.32ms +step:1887/57344 train_time:921437ms step_avg:488.31ms +grad accum step:472/14336 +step:1888/57344 train_time:922524ms step_avg:488.62ms +step:1889/57344 train_time:922542ms step_avg:488.38ms +step:1890/57344 train_time:922765ms step_avg:488.24ms +step:1891/57344 train_time:923228ms step_avg:488.22ms +grad accum step:473/14336 +step:1892/57344 train_time:924315ms step_avg:488.54ms +step:1893/57344 train_time:924332ms step_avg:488.29ms +step:1894/57344 train_time:924556ms step_avg:488.15ms +step:1895/57344 train_time:925017ms step_avg:488.14ms +grad accum step:474/14336 +step:1896/57344 train_time:926105ms step_avg:488.45ms +step:1897/57344 train_time:926122ms step_avg:488.20ms +step:1898/57344 train_time:926347ms step_avg:488.06ms +step:1899/57344 train_time:926812ms step_avg:488.05ms +grad accum step:475/14336 +step:1900/57344 train_time:927898ms step_avg:488.37ms +step:1901/57344 train_time:927915ms step_avg:488.12ms +step:1902/57344 train_time:928138ms step_avg:487.98ms +step:1903/57344 train_time:928599ms step_avg:487.97ms +grad accum step:476/14336 +step:1904/57344 train_time:929687ms step_avg:488.28ms +step:1905/57344 train_time:929705ms step_avg:488.03ms +step:1906/57344 train_time:929929ms step_avg:487.90ms +step:1907/57344 train_time:930392ms step_avg:487.88ms +grad accum step:477/14336 +step:1908/57344 train_time:931479ms step_avg:488.20ms +step:1909/57344 train_time:931497ms step_avg:487.95ms +step:1910/57344 train_time:931720ms step_avg:487.81ms +step:1911/57344 train_time:932182ms step_avg:487.80ms +grad accum step:478/14336 +step:1912/57344 train_time:933269ms step_avg:488.11ms +step:1913/57344 train_time:933287ms step_avg:487.87ms +step:1914/57344 train_time:933510ms step_avg:487.73ms +step:1915/57344 train_time:933972ms step_avg:487.71ms +grad accum step:479/14336 +step:1916/57344 train_time:935060ms step_avg:488.03ms +step:1917/57344 train_time:935078ms step_avg:487.78ms +step:1918/57344 train_time:935301ms step_avg:487.64ms +step:1919/57344 train_time:935764ms step_avg:487.63ms +grad accum step:480/14336 +step:1920/57344 train_time:936851ms step_avg:487.94ms +step:1920/57344 val_loss:7.679022 train_time:936851ms step_avg:487.94ms +step:1921/57344 train_time:936863ms step_avg:487.70ms +step:1922/57344 train_time:937062ms step_avg:487.55ms +step:1923/57344 train_time:937525ms step_avg:487.53ms +grad accum step:481/14336 +step:1924/57344 train_time:938613ms step_avg:487.84ms +step:1925/57344 train_time:938630ms step_avg:487.60ms +step:1926/57344 train_time:938854ms step_avg:487.46ms +step:1927/57344 train_time:939317ms step_avg:487.45ms +grad accum step:482/14336 +step:1928/57344 train_time:940404ms step_avg:487.76ms +step:1929/57344 train_time:940421ms step_avg:487.52ms +step:1930/57344 train_time:940644ms step_avg:487.38ms +step:1931/57344 train_time:941107ms step_avg:487.37ms +grad accum step:483/14336 +step:1932/57344 train_time:942196ms step_avg:487.68ms +step:1933/57344 train_time:942213ms step_avg:487.44ms +step:1934/57344 train_time:942437ms step_avg:487.30ms +step:1935/57344 train_time:942900ms step_avg:487.29ms +grad accum step:484/14336 +step:1936/57344 train_time:943988ms step_avg:487.60ms +step:1937/57344 train_time:944005ms step_avg:487.35ms +step:1938/57344 train_time:944228ms step_avg:487.22ms +step:1939/57344 train_time:944690ms step_avg:487.20ms +grad accum step:485/14336 +step:1940/57344 train_time:945778ms step_avg:487.51ms +step:1941/57344 train_time:945873ms step_avg:487.31ms +step:1942/57344 train_time:946068ms step_avg:487.16ms +step:1943/57344 train_time:946530ms step_avg:487.15ms +grad accum step:486/14336 +step:1944/57344 train_time:947619ms step_avg:487.46ms +step:1945/57344 train_time:947636ms step_avg:487.22ms +step:1946/57344 train_time:947859ms step_avg:487.08ms +step:1947/57344 train_time:948322ms step_avg:487.07ms +grad accum step:487/14336 +step:1948/57344 train_time:949410ms step_avg:487.38ms +step:1949/57344 train_time:949426ms step_avg:487.14ms +step:1950/57344 train_time:949649ms step_avg:487.00ms +step:1951/57344 train_time:950111ms step_avg:486.99ms +grad accum step:488/14336 +step:1952/57344 train_time:951201ms step_avg:487.30ms +step:1953/57344 train_time:951217ms step_avg:487.05ms +step:1954/57344 train_time:951441ms step_avg:486.92ms +step:1955/57344 train_time:951903ms step_avg:486.91ms +grad accum step:489/14336 +step:1956/57344 train_time:952991ms step_avg:487.21ms +step:1957/57344 train_time:953008ms step_avg:486.97ms +step:1958/57344 train_time:953232ms step_avg:486.84ms +step:1959/57344 train_time:953694ms step_avg:486.83ms +grad accum step:490/14336 +step:1960/57344 train_time:954781ms step_avg:487.13ms +step:1961/57344 train_time:954798ms step_avg:486.89ms +step:1962/57344 train_time:955021ms step_avg:486.76ms +step:1963/57344 train_time:955485ms step_avg:486.75ms +grad accum step:491/14336 +step:1964/57344 train_time:956571ms step_avg:487.05ms +step:1965/57344 train_time:956589ms step_avg:486.81ms +step:1966/57344 train_time:956812ms step_avg:486.68ms +step:1967/57344 train_time:957275ms step_avg:486.67ms +grad accum step:492/14336 +step:1968/57344 train_time:959205ms step_avg:487.40ms +step:1969/57344 train_time:959217ms step_avg:487.16ms +step:1970/57344 train_time:959424ms step_avg:487.02ms +step:1971/57344 train_time:959887ms step_avg:487.01ms +grad accum step:493/14336 +step:1972/57344 train_time:960975ms step_avg:487.31ms +step:1973/57344 train_time:960993ms step_avg:487.07ms +step:1974/57344 train_time:961216ms step_avg:486.94ms +step:1975/57344 train_time:961678ms step_avg:486.93ms +grad accum step:494/14336 +step:1976/57344 train_time:962765ms step_avg:487.23ms +step:1977/57344 train_time:962783ms step_avg:486.99ms +step:1978/57344 train_time:963006ms step_avg:486.86ms +step:1979/57344 train_time:963470ms step_avg:486.85ms +grad accum step:495/14336 +step:1980/57344 train_time:964558ms step_avg:487.15ms +step:1981/57344 train_time:964575ms step_avg:486.91ms +step:1982/57344 train_time:964798ms step_avg:486.78ms +step:1983/57344 train_time:965261ms step_avg:486.77ms +grad accum step:496/14336 +step:1984/57344 train_time:966348ms step_avg:487.07ms +step:1984/57344 val_loss:7.671133 train_time:966348ms step_avg:487.07ms +step:1985/57344 train_time:966360ms step_avg:486.83ms +step:1986/57344 train_time:966559ms step_avg:486.69ms +step:1987/57344 train_time:967023ms step_avg:486.68ms +grad accum step:497/14336 +step:1988/57344 train_time:968111ms step_avg:486.98ms +step:1989/57344 train_time:968129ms step_avg:486.74ms +step:1990/57344 train_time:968353ms step_avg:486.61ms +step:1991/57344 train_time:968817ms step_avg:486.60ms +grad accum step:498/14336 +step:1992/57344 train_time:969904ms step_avg:486.90ms +step:1993/57344 train_time:969921ms step_avg:486.66ms +step:1994/57344 train_time:970144ms step_avg:486.53ms +step:1995/57344 train_time:970606ms step_avg:486.52ms +grad accum step:499/14336 +step:1996/57344 train_time:971694ms step_avg:486.82ms +step:1997/57344 train_time:971711ms step_avg:486.59ms +step:1998/57344 train_time:971934ms step_avg:486.45ms +step:1999/57344 train_time:972398ms step_avg:486.44ms +grad accum step:500/14336 +step:2000/57344 train_time:973484ms step_avg:486.74ms +step:2001/57344 train_time:973502ms step_avg:486.51ms +step:2002/57344 train_time:973724ms step_avg:486.38ms +step:2003/57344 train_time:974185ms step_avg:486.36ms +grad accum step:501/14336 +step:2004/57344 train_time:975273ms step_avg:486.66ms +step:2005/57344 train_time:975290ms step_avg:486.43ms +step:2006/57344 train_time:975513ms step_avg:486.30ms +step:2007/57344 train_time:975977ms step_avg:486.29ms +grad accum step:502/14336 +step:2008/57344 train_time:977064ms step_avg:486.59ms +step:2009/57344 train_time:977081ms step_avg:486.35ms +step:2010/57344 train_time:977303ms step_avg:486.22ms +step:2011/57344 train_time:977764ms step_avg:486.21ms +grad accum step:503/14336 +step:2012/57344 train_time:978851ms step_avg:486.51ms +step:2013/57344 train_time:978869ms step_avg:486.27ms +step:2014/57344 train_time:979091ms step_avg:486.14ms +step:2015/57344 train_time:979555ms step_avg:486.13ms +grad accum step:504/14336 +step:2016/57344 train_time:980640ms step_avg:486.43ms +step:2017/57344 train_time:980657ms step_avg:486.20ms +step:2018/57344 train_time:980880ms step_avg:486.07ms +step:2019/57344 train_time:981343ms step_avg:486.05ms +grad accum step:505/14336 +step:2020/57344 train_time:982450ms step_avg:486.36ms +step:2021/57344 train_time:982462ms step_avg:486.13ms +step:2022/57344 train_time:982671ms step_avg:485.99ms +step:2023/57344 train_time:983134ms step_avg:485.98ms +grad accum step:506/14336 +step:2024/57344 train_time:984222ms step_avg:486.28ms +step:2025/57344 train_time:984240ms step_avg:486.04ms +step:2026/57344 train_time:984463ms step_avg:485.91ms +step:2027/57344 train_time:984924ms step_avg:485.90ms +grad accum step:507/14336 +step:2028/57344 train_time:986010ms step_avg:486.20ms +step:2029/57344 train_time:986027ms step_avg:485.97ms +step:2030/57344 train_time:986251ms step_avg:485.84ms +step:2031/57344 train_time:986714ms step_avg:485.83ms +grad accum step:508/14336 +step:2032/57344 train_time:987801ms step_avg:486.12ms +step:2033/57344 train_time:987819ms step_avg:485.89ms +step:2034/57344 train_time:988042ms step_avg:485.76ms +step:2035/57344 train_time:988502ms step_avg:485.75ms +grad accum step:509/14336 +step:2036/57344 train_time:989587ms step_avg:486.04ms +step:2037/57344 train_time:989605ms step_avg:485.81ms +step:2038/57344 train_time:989828ms step_avg:485.69ms +step:2039/57344 train_time:990292ms step_avg:485.68ms +grad accum step:510/14336 +step:2040/57344 train_time:991376ms step_avg:485.97ms +step:2041/57344 train_time:991394ms step_avg:485.74ms +step:2042/57344 train_time:991617ms step_avg:485.61ms +step:2043/57344 train_time:992079ms step_avg:485.60ms +grad accum step:511/14336 +step:2044/57344 train_time:993163ms step_avg:485.89ms +step:2045/57344 train_time:993181ms step_avg:485.66ms +step:2046/57344 train_time:993404ms step_avg:485.53ms +step:2047/57344 train_time:993866ms step_avg:485.52ms +grad accum step:512/14336 +step:2048/57344 train_time:994951ms step_avg:485.82ms +step:2048/57344 val_loss:7.682602 train_time:994951ms step_avg:485.82ms +step:2049/57344 train_time:994963ms step_avg:485.58ms +step:2050/57344 train_time:995164ms step_avg:485.45ms +step:2051/57344 train_time:995624ms step_avg:485.43ms +grad accum step:513/14336 +step:2052/57344 train_time:996709ms step_avg:485.73ms +step:2053/57344 train_time:996726ms step_avg:485.50ms +step:2054/57344 train_time:996949ms step_avg:485.37ms +step:2055/57344 train_time:997413ms step_avg:485.36ms +grad accum step:514/14336 +step:2056/57344 train_time:998501ms step_avg:485.65ms +step:2057/57344 train_time:998518ms step_avg:485.42ms +step:2058/57344 train_time:998740ms step_avg:485.30ms +step:2059/57344 train_time:999201ms step_avg:485.28ms +grad accum step:515/14336 +step:2060/57344 train_time:1000285ms step_avg:485.58ms +step:2061/57344 train_time:1000302ms step_avg:485.35ms +step:2062/57344 train_time:1000525ms step_avg:485.22ms +step:2063/57344 train_time:1000987ms step_avg:485.21ms +grad accum step:516/14336 +step:2064/57344 train_time:1002072ms step_avg:485.50ms +step:2065/57344 train_time:1002090ms step_avg:485.27ms +step:2066/57344 train_time:1002314ms step_avg:485.15ms +step:2067/57344 train_time:1002777ms step_avg:485.14ms +grad accum step:517/14336 +step:2068/57344 train_time:1003863ms step_avg:485.43ms +step:2069/57344 train_time:1003880ms step_avg:485.20ms +step:2070/57344 train_time:1004103ms step_avg:485.07ms +step:2071/57344 train_time:1004565ms step_avg:485.06ms +grad accum step:518/14336 +step:2072/57344 train_time:1005651ms step_avg:485.35ms +step:2073/57344 train_time:1005669ms step_avg:485.13ms +step:2074/57344 train_time:1005893ms step_avg:485.00ms +step:2075/57344 train_time:1006355ms step_avg:484.99ms +grad accum step:519/14336 +step:2076/57344 train_time:1007440ms step_avg:485.28ms +step:2077/57344 train_time:1007458ms step_avg:485.05ms +step:2078/57344 train_time:1007681ms step_avg:484.93ms +step:2079/57344 train_time:1008144ms step_avg:484.92ms +grad accum step:520/14336 +step:2080/57344 train_time:1009232ms step_avg:485.21ms +step:2081/57344 train_time:1009249ms step_avg:484.98ms +step:2082/57344 train_time:1009472ms step_avg:484.86ms +step:2083/57344 train_time:1009934ms step_avg:484.85ms +grad accum step:521/14336 +step:2084/57344 train_time:1011021ms step_avg:485.14ms +step:2085/57344 train_time:1011039ms step_avg:484.91ms +step:2086/57344 train_time:1011262ms step_avg:484.79ms +step:2087/57344 train_time:1011723ms step_avg:484.77ms +grad accum step:522/14336 +step:2088/57344 train_time:1012807ms step_avg:485.06ms +step:2089/57344 train_time:1012825ms step_avg:484.84ms +step:2090/57344 train_time:1013047ms step_avg:484.71ms +step:2091/57344 train_time:1013508ms step_avg:484.70ms +grad accum step:523/14336 +step:2092/57344 train_time:1014595ms step_avg:484.99ms +step:2093/57344 train_time:1014612ms step_avg:484.76ms +step:2094/57344 train_time:1014835ms step_avg:484.64ms +step:2095/57344 train_time:1015298ms step_avg:484.63ms +grad accum step:524/14336 +step:2096/57344 train_time:1016385ms step_avg:484.92ms +step:2097/57344 train_time:1016402ms step_avg:484.69ms +step:2098/57344 train_time:1016626ms step_avg:484.57ms +step:2099/57344 train_time:1017086ms step_avg:484.56ms +grad accum step:525/14336 +step:2100/57344 train_time:1018174ms step_avg:484.84ms +step:2101/57344 train_time:1018191ms step_avg:484.62ms +step:2102/57344 train_time:1018415ms step_avg:484.50ms +step:2103/57344 train_time:1018876ms step_avg:484.49ms +grad accum step:526/14336 +step:2104/57344 train_time:1019963ms step_avg:484.77ms +step:2105/57344 train_time:1019981ms step_avg:484.55ms +step:2106/57344 train_time:1020202ms step_avg:484.43ms +step:2107/57344 train_time:1020665ms step_avg:484.42ms +grad accum step:527/14336 +step:2108/57344 train_time:1021748ms step_avg:484.70ms +step:2109/57344 train_time:1021765ms step_avg:484.48ms +step:2110/57344 train_time:1021988ms step_avg:484.35ms +step:2111/57344 train_time:1022448ms step_avg:484.34ms +grad accum step:528/14336 +step:2112/57344 train_time:1023533ms step_avg:484.63ms +step:2112/57344 val_loss:7.683117 train_time:1023534ms step_avg:484.63ms +step:2113/57344 train_time:1023546ms step_avg:484.40ms +step:2114/57344 train_time:1023744ms step_avg:484.27ms +step:2115/57344 train_time:1024208ms step_avg:484.26ms +grad accum step:529/14336 +step:2116/57344 train_time:1025293ms step_avg:484.54ms +step:2117/57344 train_time:1025310ms step_avg:484.32ms +step:2118/57344 train_time:1025533ms step_avg:484.20ms +step:2119/57344 train_time:1025994ms step_avg:484.19ms +grad accum step:530/14336 +step:2120/57344 train_time:1027078ms step_avg:484.47ms +step:2121/57344 train_time:1027095ms step_avg:484.25ms +step:2122/57344 train_time:1027317ms step_avg:484.13ms +step:2123/57344 train_time:1027778ms step_avg:484.12ms +grad accum step:531/14336 +step:2124/57344 train_time:1028865ms step_avg:484.40ms +step:2125/57344 train_time:1028881ms step_avg:484.18ms +step:2126/57344 train_time:1029104ms step_avg:484.06ms +step:2127/57344 train_time:1029567ms step_avg:484.05ms +grad accum step:532/14336 +step:2128/57344 train_time:1030652ms step_avg:484.33ms +step:2129/57344 train_time:1030668ms step_avg:484.11ms +step:2130/57344 train_time:1030892ms step_avg:483.99ms +step:2131/57344 train_time:1031355ms step_avg:483.98ms +grad accum step:533/14336 +step:2132/57344 train_time:1032445ms step_avg:484.26ms +step:2133/57344 train_time:1032457ms step_avg:484.04ms +step:2134/57344 train_time:1032679ms step_avg:483.92ms +step:2135/57344 train_time:1033140ms step_avg:483.91ms +grad accum step:534/14336 +step:2136/57344 train_time:1034227ms step_avg:484.19ms +step:2137/57344 train_time:1034243ms step_avg:483.97ms +step:2138/57344 train_time:1034466ms step_avg:483.85ms +step:2139/57344 train_time:1034929ms step_avg:483.84ms +grad accum step:535/14336 +step:2140/57344 train_time:1036017ms step_avg:484.12ms +step:2141/57344 train_time:1036035ms step_avg:483.90ms +step:2142/57344 train_time:1036257ms step_avg:483.78ms +step:2143/57344 train_time:1036719ms step_avg:483.77ms +grad accum step:536/14336 +step:2144/57344 train_time:1037804ms step_avg:484.05ms +step:2145/57344 train_time:1037821ms step_avg:483.83ms +step:2146/57344 train_time:1038044ms step_avg:483.71ms +step:2147/57344 train_time:1038505ms step_avg:483.70ms +grad accum step:537/14336 +step:2148/57344 train_time:1039590ms step_avg:483.98ms +step:2149/57344 train_time:1039607ms step_avg:483.76ms +step:2150/57344 train_time:1039829ms step_avg:483.64ms +step:2151/57344 train_time:1040291ms step_avg:483.63ms +grad accum step:538/14336 +step:2152/57344 train_time:1041378ms step_avg:483.91ms +step:2153/57344 train_time:1041394ms step_avg:483.69ms +step:2154/57344 train_time:1041616ms step_avg:483.57ms +step:2155/57344 train_time:1042077ms step_avg:483.56ms +grad accum step:539/14336 +step:2156/57344 train_time:1043161ms step_avg:483.84ms +step:2157/57344 train_time:1043178ms step_avg:483.62ms +step:2158/57344 train_time:1043401ms step_avg:483.50ms +step:2159/57344 train_time:1043862ms step_avg:483.49ms +grad accum step:540/14336 +step:2160/57344 train_time:1044949ms step_avg:483.77ms +step:2161/57344 train_time:1044965ms step_avg:483.56ms +step:2162/57344 train_time:1045188ms step_avg:483.44ms +step:2163/57344 train_time:1045649ms step_avg:483.43ms +grad accum step:541/14336 +step:2164/57344 train_time:1046738ms step_avg:483.71ms +step:2165/57344 train_time:1046754ms step_avg:483.49ms +step:2166/57344 train_time:1046976ms step_avg:483.37ms +step:2167/57344 train_time:1047437ms step_avg:483.36ms +grad accum step:542/14336 +step:2168/57344 train_time:1048521ms step_avg:483.63ms +step:2169/57344 train_time:1048538ms step_avg:483.42ms +step:2170/57344 train_time:1048761ms step_avg:483.30ms +step:2171/57344 train_time:1049223ms step_avg:483.29ms +grad accum step:543/14336 +step:2172/57344 train_time:1050310ms step_avg:483.57ms +step:2173/57344 train_time:1050327ms step_avg:483.35ms +step:2174/57344 train_time:1050551ms step_avg:483.23ms +step:2175/57344 train_time:1051012ms step_avg:483.22ms +grad accum step:544/14336 +step:2176/57344 train_time:1052098ms step_avg:483.50ms +step:2176/57344 val_loss:7.671322 train_time:1052098ms step_avg:483.50ms +step:2177/57344 train_time:1052110ms step_avg:483.28ms +step:2178/57344 train_time:1052309ms step_avg:483.15ms +step:2179/57344 train_time:1052771ms step_avg:483.14ms +grad accum step:545/14336 +step:2180/57344 train_time:1053857ms step_avg:483.42ms +step:2181/57344 train_time:1053874ms step_avg:483.21ms +step:2182/57344 train_time:1054096ms step_avg:483.09ms +step:2183/57344 train_time:1054557ms step_avg:483.08ms +grad accum step:546/14336 +step:2184/57344 train_time:1055642ms step_avg:483.35ms +step:2185/57344 train_time:1055660ms step_avg:483.14ms +step:2186/57344 train_time:1055882ms step_avg:483.02ms +step:2187/57344 train_time:1056345ms step_avg:483.01ms +grad accum step:547/14336 +step:2188/57344 train_time:1057428ms step_avg:483.29ms +step:2189/57344 train_time:1057445ms step_avg:483.07ms +step:2190/57344 train_time:1057668ms step_avg:482.95ms +step:2191/57344 train_time:1058128ms step_avg:482.94ms +grad accum step:548/14336 +step:2192/57344 train_time:1059215ms step_avg:483.22ms +step:2193/57344 train_time:1059232ms step_avg:483.01ms +step:2194/57344 train_time:1059455ms step_avg:482.89ms +step:2195/57344 train_time:1059916ms step_avg:482.88ms +grad accum step:549/14336 +step:2196/57344 train_time:1061001ms step_avg:483.15ms +step:2197/57344 train_time:1061018ms step_avg:482.94ms +step:2198/57344 train_time:1061240ms step_avg:482.82ms +step:2199/57344 train_time:1061703ms step_avg:482.81ms +grad accum step:550/14336 +step:2200/57344 train_time:1062791ms step_avg:483.09ms +step:2201/57344 train_time:1062808ms step_avg:482.88ms +step:2202/57344 train_time:1063031ms step_avg:482.76ms +step:2203/57344 train_time:1063492ms step_avg:482.75ms +grad accum step:551/14336 +step:2204/57344 train_time:1064579ms step_avg:483.02ms +step:2205/57344 train_time:1064597ms step_avg:482.81ms +step:2206/57344 train_time:1064819ms step_avg:482.69ms +step:2207/57344 train_time:1065282ms step_avg:482.68ms +grad accum step:552/14336 +step:2208/57344 train_time:1066366ms step_avg:482.96ms +step:2209/57344 train_time:1066384ms step_avg:482.75ms +step:2210/57344 train_time:1066606ms step_avg:482.63ms +step:2211/57344 train_time:1067069ms step_avg:482.62ms +grad accum step:553/14336 +step:2212/57344 train_time:1068153ms step_avg:482.89ms +step:2213/57344 train_time:1068170ms step_avg:482.68ms +step:2214/57344 train_time:1068394ms step_avg:482.56ms +step:2215/57344 train_time:1068855ms step_avg:482.55ms +grad accum step:554/14336 +step:2216/57344 train_time:1069939ms step_avg:482.82ms +step:2217/57344 train_time:1069957ms step_avg:482.61ms +step:2218/57344 train_time:1070180ms step_avg:482.50ms +step:2219/57344 train_time:1070643ms step_avg:482.49ms +grad accum step:555/14336 +step:2220/57344 train_time:1071730ms step_avg:482.76ms +step:2221/57344 train_time:1071747ms step_avg:482.55ms +step:2222/57344 train_time:1071969ms step_avg:482.43ms +step:2223/57344 train_time:1072432ms step_avg:482.43ms +grad accum step:556/14336 +step:2224/57344 train_time:1073515ms step_avg:482.70ms +step:2225/57344 train_time:1073533ms step_avg:482.49ms +step:2226/57344 train_time:1073755ms step_avg:482.37ms +step:2227/57344 train_time:1074216ms step_avg:482.36ms +grad accum step:557/14336 +step:2228/57344 train_time:1075301ms step_avg:482.63ms +step:2229/57344 train_time:1075318ms step_avg:482.42ms +step:2230/57344 train_time:1075541ms step_avg:482.31ms +step:2231/57344 train_time:1076003ms step_avg:482.30ms +grad accum step:558/14336 +step:2232/57344 train_time:1077088ms step_avg:482.57ms +step:2233/57344 train_time:1077106ms step_avg:482.36ms +step:2234/57344 train_time:1077329ms step_avg:482.24ms +step:2235/57344 train_time:1077790ms step_avg:482.23ms +grad accum step:559/14336 +step:2236/57344 train_time:1078874ms step_avg:482.50ms +step:2237/57344 train_time:1078891ms step_avg:482.29ms +step:2238/57344 train_time:1079113ms step_avg:482.18ms +step:2239/57344 train_time:1079574ms step_avg:482.17ms +grad accum step:560/14336 +step:2240/57344 train_time:1080661ms step_avg:482.44ms +step:2240/57344 val_loss:7.671769 train_time:1080662ms step_avg:482.44ms +step:2241/57344 train_time:1080674ms step_avg:482.23ms +step:2242/57344 train_time:1080871ms step_avg:482.10ms +step:2243/57344 train_time:1081333ms step_avg:482.09ms +grad accum step:561/14336 +step:2244/57344 train_time:1082418ms step_avg:482.36ms +step:2245/57344 train_time:1082434ms step_avg:482.15ms +step:2246/57344 train_time:1082657ms step_avg:482.04ms +step:2247/57344 train_time:1083118ms step_avg:482.03ms +grad accum step:562/14336 +step:2248/57344 train_time:1084203ms step_avg:482.30ms +step:2249/57344 train_time:1084219ms step_avg:482.09ms +step:2250/57344 train_time:1084442ms step_avg:481.97ms +step:2251/57344 train_time:1084904ms step_avg:481.97ms +grad accum step:563/14336 +step:2252/57344 train_time:1085988ms step_avg:482.23ms +step:2253/57344 train_time:1086005ms step_avg:482.03ms +step:2254/57344 train_time:1086228ms step_avg:481.91ms +step:2255/57344 train_time:1086688ms step_avg:481.90ms +grad accum step:564/14336 +step:2256/57344 train_time:1087771ms step_avg:482.17ms +step:2257/57344 train_time:1087789ms step_avg:481.96ms +step:2258/57344 train_time:1088010ms step_avg:481.85ms +step:2259/57344 train_time:1088472ms step_avg:481.84ms +grad accum step:565/14336 +step:2260/57344 train_time:1089554ms step_avg:482.10ms +step:2261/57344 train_time:1089572ms step_avg:481.90ms +step:2262/57344 train_time:1089793ms step_avg:481.78ms +step:2263/57344 train_time:1090256ms step_avg:481.77ms +grad accum step:566/14336 +step:2264/57344 train_time:1091340ms step_avg:482.04ms +step:2265/57344 train_time:1091357ms step_avg:481.84ms +step:2266/57344 train_time:1091580ms step_avg:481.72ms +step:2267/57344 train_time:1092041ms step_avg:481.71ms +grad accum step:567/14336 +step:2268/57344 train_time:1093129ms step_avg:481.98ms +step:2269/57344 train_time:1093145ms step_avg:481.77ms +step:2270/57344 train_time:1093368ms step_avg:481.66ms +step:2271/57344 train_time:1093830ms step_avg:481.65ms +grad accum step:568/14336 +step:2272/57344 train_time:1094916ms step_avg:481.92ms +step:2273/57344 train_time:1094932ms step_avg:481.71ms +step:2274/57344 train_time:1095154ms step_avg:481.60ms +step:2275/57344 train_time:1095617ms step_avg:481.59ms +grad accum step:569/14336 +step:2276/57344 train_time:1096701ms step_avg:481.85ms +step:2277/57344 train_time:1096718ms step_avg:481.65ms +step:2278/57344 train_time:1096941ms step_avg:481.54ms +step:2279/57344 train_time:1097403ms step_avg:481.53ms +grad accum step:570/14336 +step:2280/57344 train_time:1098488ms step_avg:481.79ms +step:2281/57344 train_time:1098505ms step_avg:481.59ms +step:2282/57344 train_time:1098728ms step_avg:481.48ms +step:2283/57344 train_time:1099191ms step_avg:481.47ms +grad accum step:571/14336 +step:2284/57344 train_time:1100276ms step_avg:481.73ms +step:2285/57344 train_time:1100294ms step_avg:481.53ms +step:2286/57344 train_time:1100516ms step_avg:481.42ms +step:2287/57344 train_time:1100979ms step_avg:481.41ms +grad accum step:572/14336 +step:2288/57344 train_time:1102063ms step_avg:481.67ms +step:2289/57344 train_time:1102080ms step_avg:481.47ms +step:2290/57344 train_time:1102303ms step_avg:481.35ms +step:2291/57344 train_time:1102763ms step_avg:481.35ms +grad accum step:573/14336 +step:2292/57344 train_time:1103849ms step_avg:481.61ms +step:2293/57344 train_time:1103867ms step_avg:481.41ms +step:2294/57344 train_time:1104090ms step_avg:481.29ms +step:2295/57344 train_time:1104552ms step_avg:481.29ms +grad accum step:574/14336 +step:2296/57344 train_time:1105638ms step_avg:481.55ms +step:2297/57344 train_time:1105656ms step_avg:481.35ms +step:2298/57344 train_time:1105877ms step_avg:481.23ms +step:2299/57344 train_time:1106340ms step_avg:481.23ms +grad accum step:575/14336 +step:2300/57344 train_time:1107425ms step_avg:481.49ms +step:2301/57344 train_time:1107441ms step_avg:481.29ms +step:2302/57344 train_time:1107663ms step_avg:481.17ms +step:2303/57344 train_time:1108125ms step_avg:481.17ms +grad accum step:576/14336 +step:2304/57344 train_time:1109210ms step_avg:481.43ms +step:2304/57344 val_loss:7.721270 train_time:1109210ms step_avg:481.43ms +step:2305/57344 train_time:1109222ms step_avg:481.22ms +step:2306/57344 train_time:1109420ms step_avg:481.10ms +step:2307/57344 train_time:1109884ms step_avg:481.09ms +grad accum step:577/14336 +step:2308/57344 train_time:1110969ms step_avg:481.36ms +step:2309/57344 train_time:1110986ms step_avg:481.15ms +step:2310/57344 train_time:1111213ms step_avg:481.04ms +step:2311/57344 train_time:1111694ms step_avg:481.04ms +grad accum step:578/14336 +step:2312/57344 train_time:1112832ms step_avg:481.33ms +step:2313/57344 train_time:1112848ms step_avg:481.13ms +step:2314/57344 train_time:1113075ms step_avg:481.02ms +step:2315/57344 train_time:1113558ms step_avg:481.02ms +grad accum step:579/14336 +step:2316/57344 train_time:1114694ms step_avg:481.30ms +step:2317/57344 train_time:1114711ms step_avg:481.10ms +step:2318/57344 train_time:1114938ms step_avg:480.99ms +step:2319/57344 train_time:1115420ms step_avg:480.99ms +grad accum step:580/14336 +step:2320/57344 train_time:1116559ms step_avg:481.28ms +step:2321/57344 train_time:1116576ms step_avg:481.08ms +step:2322/57344 train_time:1116803ms step_avg:480.97ms +step:2323/57344 train_time:1117286ms step_avg:480.97ms +grad accum step:581/14336 +step:2324/57344 train_time:1118424ms step_avg:481.25ms +step:2325/57344 train_time:1118441ms step_avg:481.05ms +step:2326/57344 train_time:1118970ms step_avg:481.07ms +step:2327/57344 train_time:1119200ms step_avg:480.96ms +grad accum step:582/14336 +step:2328/57344 train_time:1120336ms step_avg:481.24ms +step:2329/57344 train_time:1120352ms step_avg:481.04ms +step:2330/57344 train_time:1120579ms step_avg:480.94ms +step:2331/57344 train_time:1121063ms step_avg:480.94ms +grad accum step:583/14336 +step:2332/57344 train_time:1122198ms step_avg:481.22ms +step:2333/57344 train_time:1122215ms step_avg:481.02ms +step:2334/57344 train_time:1122441ms step_avg:480.91ms +step:2335/57344 train_time:1122924ms step_avg:480.91ms +grad accum step:584/14336 +step:2336/57344 train_time:1124062ms step_avg:481.19ms +step:2337/57344 train_time:1124080ms step_avg:480.99ms +step:2338/57344 train_time:1124307ms step_avg:480.88ms +step:2339/57344 train_time:1124790ms step_avg:480.88ms +grad accum step:585/14336 +step:2340/57344 train_time:1125929ms step_avg:481.17ms +step:2341/57344 train_time:1125946ms step_avg:480.97ms +step:2342/57344 train_time:1126173ms step_avg:480.86ms +step:2343/57344 train_time:1126656ms step_avg:480.86ms +grad accum step:586/14336 +step:2344/57344 train_time:1127793ms step_avg:481.14ms +step:2345/57344 train_time:1127810ms step_avg:480.94ms +step:2346/57344 train_time:1128036ms step_avg:480.83ms +step:2347/57344 train_time:1128518ms step_avg:480.83ms +grad accum step:587/14336 +step:2348/57344 train_time:1129655ms step_avg:481.11ms +step:2349/57344 train_time:1129672ms step_avg:480.92ms +step:2350/57344 train_time:1129899ms step_avg:480.81ms +step:2351/57344 train_time:1130379ms step_avg:480.81ms +grad accum step:588/14336 +step:2352/57344 train_time:1131517ms step_avg:481.09ms +step:2353/57344 train_time:1131534ms step_avg:480.89ms +step:2354/57344 train_time:1131762ms step_avg:480.78ms +step:2355/57344 train_time:1132246ms step_avg:480.78ms +grad accum step:589/14336 +step:2356/57344 train_time:1133383ms step_avg:481.06ms +step:2357/57344 train_time:1133401ms step_avg:480.87ms +step:2358/57344 train_time:1133628ms step_avg:480.76ms +step:2359/57344 train_time:1134112ms step_avg:480.76ms +grad accum step:590/14336 +step:2360/57344 train_time:1135249ms step_avg:481.04ms +step:2361/57344 train_time:1135265ms step_avg:480.84ms +step:2362/57344 train_time:1135493ms step_avg:480.73ms +step:2363/57344 train_time:1135976ms step_avg:480.73ms +grad accum step:591/14336 +step:2364/57344 train_time:1137109ms step_avg:481.01ms +step:2365/57344 train_time:1137126ms step_avg:480.81ms +step:2366/57344 train_time:1137353ms step_avg:480.71ms +step:2367/57344 train_time:1137836ms step_avg:480.71ms +grad accum step:592/14336 +step:2368/57344 train_time:1138975ms step_avg:480.99ms +step:2368/57344 val_loss:7.682425 train_time:1138975ms step_avg:480.99ms +step:2369/57344 train_time:1138987ms step_avg:480.79ms +step:2370/57344 train_time:1139235ms step_avg:480.69ms +step:2371/57344 train_time:1139707ms step_avg:480.69ms +grad accum step:593/14336 +step:2372/57344 train_time:1140826ms step_avg:480.96ms +step:2373/57344 train_time:1140843ms step_avg:480.76ms +step:2374/57344 train_time:1141065ms step_avg:480.65ms +step:2375/57344 train_time:1141539ms step_avg:480.65ms +grad accum step:594/14336 +step:2376/57344 train_time:1142654ms step_avg:480.92ms +step:2377/57344 train_time:1142671ms step_avg:480.72ms +step:2378/57344 train_time:1142894ms step_avg:480.61ms +step:2379/57344 train_time:1143368ms step_avg:480.61ms +grad accum step:595/14336 +step:2380/57344 train_time:1144486ms step_avg:480.88ms +step:2381/57344 train_time:1144501ms step_avg:480.68ms +step:2382/57344 train_time:1144725ms step_avg:480.57ms +step:2383/57344 train_time:1145199ms step_avg:480.57ms +grad accum step:596/14336 +step:2384/57344 train_time:1146319ms step_avg:480.84ms +step:2385/57344 train_time:1146335ms step_avg:480.64ms +step:2386/57344 train_time:1146559ms step_avg:480.54ms +step:2387/57344 train_time:1147033ms step_avg:480.53ms +grad accum step:597/14336 +step:2388/57344 train_time:1148158ms step_avg:480.80ms +step:2389/57344 train_time:1148174ms step_avg:480.61ms +step:2390/57344 train_time:1148397ms step_avg:480.50ms +step:2391/57344 train_time:1148872ms step_avg:480.50ms +grad accum step:598/14336 +step:2392/57344 train_time:1149994ms step_avg:480.77ms +step:2393/57344 train_time:1150010ms step_avg:480.57ms +step:2394/57344 train_time:1150235ms step_avg:480.47ms +step:2395/57344 train_time:1150712ms step_avg:480.46ms +grad accum step:599/14336 +step:2396/57344 train_time:1151835ms step_avg:480.73ms +step:2397/57344 train_time:1151852ms step_avg:480.54ms +step:2398/57344 train_time:1152076ms step_avg:480.43ms +step:2399/57344 train_time:1152552ms step_avg:480.43ms +grad accum step:600/14336 +step:2400/57344 train_time:1153672ms step_avg:480.70ms +step:2401/57344 train_time:1153688ms step_avg:480.50ms +step:2402/57344 train_time:1153912ms step_avg:480.40ms +step:2403/57344 train_time:1154387ms step_avg:480.39ms +grad accum step:601/14336 +step:2404/57344 train_time:1155510ms step_avg:480.66ms +step:2405/57344 train_time:1155526ms step_avg:480.47ms +step:2406/57344 train_time:1155750ms step_avg:480.36ms +step:2407/57344 train_time:1156226ms step_avg:480.36ms +grad accum step:602/14336 +step:2408/57344 train_time:1157347ms step_avg:480.63ms +step:2409/57344 train_time:1157363ms step_avg:480.43ms +step:2410/57344 train_time:1157589ms step_avg:480.33ms +step:2411/57344 train_time:1158066ms step_avg:480.33ms +grad accum step:603/14336 +step:2412/57344 train_time:1159187ms step_avg:480.59ms +step:2413/57344 train_time:1159204ms step_avg:480.40ms +step:2414/57344 train_time:1159427ms step_avg:480.29ms +step:2415/57344 train_time:1159905ms step_avg:480.29ms +grad accum step:604/14336 +step:2416/57344 train_time:1161030ms step_avg:480.56ms +step:2417/57344 train_time:1161045ms step_avg:480.37ms +step:2418/57344 train_time:1161271ms step_avg:480.26ms +step:2419/57344 train_time:1161747ms step_avg:480.26ms +grad accum step:605/14336 +step:2420/57344 train_time:1162871ms step_avg:480.53ms +step:2421/57344 train_time:1162887ms step_avg:480.33ms +step:2422/57344 train_time:1163112ms step_avg:480.23ms +step:2423/57344 train_time:1163588ms step_avg:480.23ms +grad accum step:606/14336 +step:2424/57344 train_time:1164713ms step_avg:480.49ms +step:2425/57344 train_time:1164730ms step_avg:480.30ms +step:2426/57344 train_time:1164955ms step_avg:480.20ms +step:2427/57344 train_time:1165430ms step_avg:480.19ms +grad accum step:607/14336 +step:2428/57344 train_time:1166554ms step_avg:480.46ms +step:2429/57344 train_time:1166571ms step_avg:480.27ms +step:2430/57344 train_time:1166794ms step_avg:480.16ms +step:2431/57344 train_time:1167273ms step_avg:480.16ms +grad accum step:608/14336 +step:2432/57344 train_time:1168401ms step_avg:480.43ms +step:2432/57344 val_loss:7.659926 train_time:1168401ms step_avg:480.43ms +step:2433/57344 train_time:1168413ms step_avg:480.24ms +step:2434/57344 train_time:1168692ms step_avg:480.15ms +step:2435/57344 train_time:1169164ms step_avg:480.15ms +grad accum step:609/14336 +step:2436/57344 train_time:1170277ms step_avg:480.41ms +step:2437/57344 train_time:1170293ms step_avg:480.22ms +step:2438/57344 train_time:1170515ms step_avg:480.11ms +step:2439/57344 train_time:1170989ms step_avg:480.11ms +grad accum step:610/14336 +step:2440/57344 train_time:1172103ms step_avg:480.37ms +step:2441/57344 train_time:1172120ms step_avg:480.18ms +step:2442/57344 train_time:1172344ms step_avg:480.08ms +step:2443/57344 train_time:1172817ms step_avg:480.07ms +grad accum step:611/14336 +step:2444/57344 train_time:1173930ms step_avg:480.33ms +step:2445/57344 train_time:1173947ms step_avg:480.14ms +step:2446/57344 train_time:1174169ms step_avg:480.04ms +step:2447/57344 train_time:1174644ms step_avg:480.03ms +grad accum step:612/14336 +step:2448/57344 train_time:1175758ms step_avg:480.29ms +step:2449/57344 train_time:1175775ms step_avg:480.10ms +step:2450/57344 train_time:1175998ms step_avg:480.00ms +step:2451/57344 train_time:1176471ms step_avg:480.00ms +grad accum step:613/14336 +step:2452/57344 train_time:1177587ms step_avg:480.26ms +step:2453/57344 train_time:1177604ms step_avg:480.07ms +step:2454/57344 train_time:1177827ms step_avg:479.96ms +step:2455/57344 train_time:1178300ms step_avg:479.96ms +grad accum step:614/14336 +step:2456/57344 train_time:1179417ms step_avg:480.22ms +step:2457/57344 train_time:1179434ms step_avg:480.03ms +step:2458/57344 train_time:1179657ms step_avg:479.93ms +step:2459/57344 train_time:1180130ms step_avg:479.92ms +grad accum step:615/14336 +step:2460/57344 train_time:1181248ms step_avg:480.18ms +step:2461/57344 train_time:1181265ms step_avg:479.99ms +step:2462/57344 train_time:1181488ms step_avg:479.89ms +step:2463/57344 train_time:1181963ms step_avg:479.89ms +grad accum step:616/14336 +step:2464/57344 train_time:1183078ms step_avg:480.15ms +step:2465/57344 train_time:1183095ms step_avg:479.96ms +step:2466/57344 train_time:1183318ms step_avg:479.85ms +step:2467/57344 train_time:1183794ms step_avg:479.85ms +grad accum step:617/14336 +step:2468/57344 train_time:1184912ms step_avg:480.11ms +step:2469/57344 train_time:1184929ms step_avg:479.92ms +step:2470/57344 train_time:1185153ms step_avg:479.82ms +step:2471/57344 train_time:1185627ms step_avg:479.82ms +grad accum step:618/14336 +step:2472/57344 train_time:1186746ms step_avg:480.08ms +step:2473/57344 train_time:1186763ms step_avg:479.89ms +step:2474/57344 train_time:1186987ms step_avg:479.78ms +step:2475/57344 train_time:1187462ms step_avg:479.78ms +grad accum step:619/14336 +step:2476/57344 train_time:1188584ms step_avg:480.04ms +step:2477/57344 train_time:1188601ms step_avg:479.86ms +step:2478/57344 train_time:1188825ms step_avg:479.75ms +step:2479/57344 train_time:1189301ms step_avg:479.75ms +grad accum step:620/14336 +step:2480/57344 train_time:1190422ms step_avg:480.01ms +step:2481/57344 train_time:1190440ms step_avg:479.82ms +step:2482/57344 train_time:1190664ms step_avg:479.72ms +step:2483/57344 train_time:1191139ms step_avg:479.72ms +grad accum step:621/14336 +step:2484/57344 train_time:1192260ms step_avg:479.98ms +step:2485/57344 train_time:1192276ms step_avg:479.79ms +step:2486/57344 train_time:1192501ms step_avg:479.69ms +step:2487/57344 train_time:1192977ms step_avg:479.69ms +grad accum step:622/14336 +step:2488/57344 train_time:1194100ms step_avg:479.94ms +step:2489/57344 train_time:1194117ms step_avg:479.76ms +step:2490/57344 train_time:1194341ms step_avg:479.65ms +step:2491/57344 train_time:1194819ms step_avg:479.65ms +grad accum step:623/14336 +step:2492/57344 train_time:1195942ms step_avg:479.91ms +step:2493/57344 train_time:1195959ms step_avg:479.73ms +step:2494/57344 train_time:1196183ms step_avg:479.62ms +step:2495/57344 train_time:1196659ms step_avg:479.62ms +grad accum step:624/14336 +step:2496/57344 train_time:1197781ms step_avg:479.88ms +step:2496/57344 val_loss:7.657366 train_time:1197782ms step_avg:479.88ms +step:2497/57344 train_time:1197794ms step_avg:479.69ms +step:2498/57344 train_time:1197996ms step_avg:479.58ms +step:2499/57344 train_time:1198477ms step_avg:479.58ms +grad accum step:625/14336 +step:2500/57344 train_time:1199610ms step_avg:479.84ms +step:2501/57344 train_time:1199627ms step_avg:479.66ms +step:2502/57344 train_time:1199853ms step_avg:479.56ms +step:2503/57344 train_time:1200335ms step_avg:479.56ms +grad accum step:626/14336 +step:2504/57344 train_time:1201463ms step_avg:479.82ms +step:2505/57344 train_time:1201481ms step_avg:479.63ms +step:2506/57344 train_time:1201707ms step_avg:479.53ms +step:2507/57344 train_time:1202187ms step_avg:479.53ms +grad accum step:627/14336 +step:2508/57344 train_time:1203316ms step_avg:479.79ms +step:2509/57344 train_time:1203333ms step_avg:479.61ms +step:2510/57344 train_time:1203558ms step_avg:479.51ms +step:2511/57344 train_time:1204038ms step_avg:479.51ms +grad accum step:628/14336 +step:2512/57344 train_time:1205169ms step_avg:479.76ms +step:2513/57344 train_time:1205186ms step_avg:479.58ms +step:2514/57344 train_time:1205412ms step_avg:479.48ms +step:2515/57344 train_time:1205894ms step_avg:479.48ms +grad accum step:629/14336 +step:2516/57344 train_time:1207023ms step_avg:479.74ms +step:2517/57344 train_time:1207040ms step_avg:479.56ms +step:2518/57344 train_time:1207267ms step_avg:479.45ms +step:2519/57344 train_time:1207746ms step_avg:479.45ms +grad accum step:630/14336 +step:2520/57344 train_time:1208879ms step_avg:479.71ms +step:2521/57344 train_time:1208896ms step_avg:479.53ms +step:2522/57344 train_time:1209122ms step_avg:479.43ms +step:2523/57344 train_time:1209601ms step_avg:479.43ms +grad accum step:631/14336 +step:2524/57344 train_time:1210731ms step_avg:479.69ms +step:2525/57344 train_time:1210748ms step_avg:479.50ms +step:2526/57344 train_time:1210974ms step_avg:479.40ms +step:2527/57344 train_time:1211456ms step_avg:479.40ms +grad accum step:632/14336 +step:2528/57344 train_time:1212584ms step_avg:479.66ms +step:2529/57344 train_time:1212602ms step_avg:479.48ms +step:2530/57344 train_time:1212827ms step_avg:479.38ms +step:2531/57344 train_time:1213306ms step_avg:479.38ms +grad accum step:633/14336 +step:2532/57344 train_time:1214438ms step_avg:479.64ms +step:2533/57344 train_time:1214455ms step_avg:479.45ms +step:2534/57344 train_time:1214682ms step_avg:479.35ms +step:2535/57344 train_time:1215161ms step_avg:479.35ms +grad accum step:634/14336 +step:2536/57344 train_time:1216292ms step_avg:479.61ms +step:2537/57344 train_time:1216309ms step_avg:479.43ms +step:2538/57344 train_time:1216536ms step_avg:479.33ms +step:2539/57344 train_time:1217015ms step_avg:479.33ms +grad accum step:635/14336 +step:2540/57344 train_time:1218148ms step_avg:479.59ms +step:2541/57344 train_time:1218165ms step_avg:479.40ms +step:2542/57344 train_time:1218391ms step_avg:479.30ms +step:2543/57344 train_time:1218870ms step_avg:479.30ms +grad accum step:636/14336 +step:2544/57344 train_time:1220000ms step_avg:479.56ms +step:2545/57344 train_time:1220018ms step_avg:479.38ms +step:2546/57344 train_time:1220244ms step_avg:479.28ms +step:2547/57344 train_time:1220723ms step_avg:479.28ms +grad accum step:637/14336 +step:2548/57344 train_time:1221854ms step_avg:479.53ms +step:2549/57344 train_time:1221871ms step_avg:479.35ms +step:2550/57344 train_time:1222097ms step_avg:479.25ms +step:2551/57344 train_time:1222578ms step_avg:479.25ms +grad accum step:638/14336 +step:2552/57344 train_time:1223709ms step_avg:479.51ms +step:2553/57344 train_time:1223727ms step_avg:479.33ms +step:2554/57344 train_time:1223953ms step_avg:479.23ms +step:2555/57344 train_time:1224434ms step_avg:479.23ms +grad accum step:639/14336 +step:2556/57344 train_time:1225566ms step_avg:479.49ms +step:2557/57344 train_time:1225583ms step_avg:479.30ms +step:2558/57344 train_time:1225809ms step_avg:479.21ms +step:2559/57344 train_time:1226290ms step_avg:479.21ms +grad accum step:640/14336 +step:2560/57344 train_time:1227423ms step_avg:479.46ms +step:2560/57344 val_loss:7.671144 train_time:1227424ms step_avg:479.46ms +step:2561/57344 train_time:1227435ms step_avg:479.28ms +step:2562/57344 train_time:1227638ms step_avg:479.17ms +step:2563/57344 train_time:1228120ms step_avg:479.17ms +grad accum step:641/14336 +step:2564/57344 train_time:1229255ms step_avg:479.43ms +step:2565/57344 train_time:1229273ms step_avg:479.25ms +step:2566/57344 train_time:1229499ms step_avg:479.15ms +step:2567/57344 train_time:1229980ms step_avg:479.15ms +grad accum step:642/14336 +step:2568/57344 train_time:1231115ms step_avg:479.41ms +step:2569/57344 train_time:1231132ms step_avg:479.23ms +step:2570/57344 train_time:1231358ms step_avg:479.13ms +step:2571/57344 train_time:1231840ms step_avg:479.13ms +grad accum step:643/14336 +step:2572/57344 train_time:1232975ms step_avg:479.38ms +step:2573/57344 train_time:1232992ms step_avg:479.20ms +step:2574/57344 train_time:1233219ms step_avg:479.11ms +step:2575/57344 train_time:1233701ms step_avg:479.11ms +grad accum step:644/14336 +step:2576/57344 train_time:1234835ms step_avg:479.36ms +step:2577/57344 train_time:1234853ms step_avg:479.18ms +step:2578/57344 train_time:1235080ms step_avg:479.08ms +step:2579/57344 train_time:1235562ms step_avg:479.09ms +grad accum step:645/14336 +step:2580/57344 train_time:1236697ms step_avg:479.34ms +step:2581/57344 train_time:1236714ms step_avg:479.16ms +step:2582/57344 train_time:1236941ms step_avg:479.06ms +step:2583/57344 train_time:1237422ms step_avg:479.06ms +grad accum step:646/14336 +step:2584/57344 train_time:1238558ms step_avg:479.32ms +step:2585/57344 train_time:1238575ms step_avg:479.14ms +step:2586/57344 train_time:1238803ms step_avg:479.04ms +step:2587/57344 train_time:1239284ms step_avg:479.04ms +grad accum step:647/14336 +step:2588/57344 train_time:1240420ms step_avg:479.30ms +step:2589/57344 train_time:1240437ms step_avg:479.12ms +step:2590/57344 train_time:1240664ms step_avg:479.02ms +step:2591/57344 train_time:1241146ms step_avg:479.02ms +grad accum step:648/14336 +step:2592/57344 train_time:1242281ms step_avg:479.28ms +step:2593/57344 train_time:1242298ms step_avg:479.10ms +step:2594/57344 train_time:1242525ms step_avg:479.00ms +step:2595/57344 train_time:1243007ms step_avg:479.00ms +grad accum step:649/14336 +step:2596/57344 train_time:1244144ms step_avg:479.25ms +step:2597/57344 train_time:1244161ms step_avg:479.08ms +step:2598/57344 train_time:1244388ms step_avg:478.98ms +step:2599/57344 train_time:1244871ms step_avg:478.98ms +grad accum step:650/14336 +step:2600/57344 train_time:1246007ms step_avg:479.23ms +step:2601/57344 train_time:1246024ms step_avg:479.06ms +step:2602/57344 train_time:1246251ms step_avg:478.96ms +step:2603/57344 train_time:1246733ms step_avg:478.96ms +grad accum step:651/14336 +step:2604/57344 train_time:1247872ms step_avg:479.21ms +step:2605/57344 train_time:1247889ms step_avg:479.04ms +step:2606/57344 train_time:1248115ms step_avg:478.94ms +step:2607/57344 train_time:1248600ms step_avg:478.94ms +grad accum step:652/14336 +step:2608/57344 train_time:1249738ms step_avg:479.19ms +step:2609/57344 train_time:1249755ms step_avg:479.02ms +step:2610/57344 train_time:1249983ms step_avg:478.92ms +step:2611/57344 train_time:1250465ms step_avg:478.92ms +grad accum step:653/14336 +step:2612/57344 train_time:1251602ms step_avg:479.17ms +step:2613/57344 train_time:1251620ms step_avg:479.00ms +step:2614/57344 train_time:1251847ms step_avg:478.90ms +step:2615/57344 train_time:1252330ms step_avg:478.90ms +grad accum step:654/14336 +step:2616/57344 train_time:1253468ms step_avg:479.15ms +step:2617/57344 train_time:1253486ms step_avg:478.98ms +step:2618/57344 train_time:1253713ms step_avg:478.88ms +step:2619/57344 train_time:1254197ms step_avg:478.88ms +grad accum step:655/14336 +step:2620/57344 train_time:1255337ms step_avg:479.14ms +step:2621/57344 train_time:1255354ms step_avg:478.96ms +step:2622/57344 train_time:1255581ms step_avg:478.86ms +step:2623/57344 train_time:1256063ms step_avg:478.87ms +grad accum step:656/14336 +step:2624/57344 train_time:1257202ms step_avg:479.12ms +step:2624/57344 val_loss:7.665736 train_time:1257202ms step_avg:479.12ms +step:2625/57344 train_time:1257214ms step_avg:478.94ms +step:2626/57344 train_time:1257418ms step_avg:478.83ms +step:2627/57344 train_time:1257903ms step_avg:478.84ms +grad accum step:657/14336 +step:2628/57344 train_time:1259042ms step_avg:479.09ms +step:2629/57344 train_time:1259060ms step_avg:478.91ms +step:2630/57344 train_time:1259288ms step_avg:478.82ms +step:2631/57344 train_time:1259770ms step_avg:478.82ms +grad accum step:658/14336 +step:2632/57344 train_time:1260912ms step_avg:479.07ms +step:2633/57344 train_time:1260930ms step_avg:478.89ms +step:2634/57344 train_time:1261157ms step_avg:478.80ms +step:2635/57344 train_time:1261641ms step_avg:478.80ms +grad accum step:659/14336 +step:2636/57344 train_time:1262781ms step_avg:479.05ms +step:2637/57344 train_time:1262798ms step_avg:478.88ms +step:2638/57344 train_time:1263026ms step_avg:478.78ms +step:2639/57344 train_time:1263508ms step_avg:478.78ms +grad accum step:660/14336 +step:2640/57344 train_time:1264646ms step_avg:479.03ms +step:2641/57344 train_time:1264663ms step_avg:478.86ms +step:2642/57344 train_time:1264891ms step_avg:478.76ms +step:2643/57344 train_time:1265373ms step_avg:478.76ms +grad accum step:661/14336 +step:2644/57344 train_time:1266514ms step_avg:479.01ms +step:2645/57344 train_time:1266531ms step_avg:478.84ms +step:2646/57344 train_time:1266759ms step_avg:478.75ms +step:2647/57344 train_time:1267244ms step_avg:478.75ms +grad accum step:662/14336 +step:2648/57344 train_time:1268384ms step_avg:479.00ms +step:2649/57344 train_time:1268401ms step_avg:478.82ms +step:2650/57344 train_time:1268628ms step_avg:478.73ms +step:2651/57344 train_time:1269112ms step_avg:478.73ms +grad accum step:663/14336 +step:2652/57344 train_time:1270252ms step_avg:478.98ms +step:2653/57344 train_time:1270269ms step_avg:478.80ms +step:2654/57344 train_time:1270497ms step_avg:478.71ms +step:2655/57344 train_time:1270981ms step_avg:478.71ms +grad accum step:664/14336 +step:2656/57344 train_time:1272121ms step_avg:478.96ms +step:2657/57344 train_time:1272138ms step_avg:478.79ms +step:2658/57344 train_time:1272366ms step_avg:478.69ms +step:2659/57344 train_time:1272849ms step_avg:478.69ms +grad accum step:665/14336 +step:2660/57344 train_time:1273987ms step_avg:478.94ms +step:2661/57344 train_time:1274004ms step_avg:478.77ms +step:2662/57344 train_time:1274232ms step_avg:478.67ms +step:2663/57344 train_time:1274715ms step_avg:478.68ms +grad accum step:666/14336 +step:2664/57344 train_time:1275855ms step_avg:478.92ms +step:2665/57344 train_time:1275872ms step_avg:478.75ms +step:2666/57344 train_time:1276100ms step_avg:478.66ms +step:2667/57344 train_time:1276584ms step_avg:478.66ms +grad accum step:667/14336 +step:2668/57344 train_time:1277725ms step_avg:478.91ms +step:2669/57344 train_time:1277742ms step_avg:478.73ms +step:2670/57344 train_time:1277970ms step_avg:478.64ms +step:2671/57344 train_time:1278453ms step_avg:478.64ms +grad accum step:668/14336 +step:2672/57344 train_time:1279592ms step_avg:478.89ms +step:2673/57344 train_time:1279609ms step_avg:478.72ms +step:2674/57344 train_time:1279837ms step_avg:478.62ms +step:2675/57344 train_time:1280319ms step_avg:478.62ms +grad accum step:669/14336 +step:2676/57344 train_time:1281460ms step_avg:478.87ms +step:2677/57344 train_time:1281477ms step_avg:478.70ms +step:2678/57344 train_time:1281705ms step_avg:478.61ms +step:2679/57344 train_time:1282187ms step_avg:478.61ms +grad accum step:670/14336 +step:2680/57344 train_time:1283323ms step_avg:478.85ms +step:2681/57344 train_time:1283341ms step_avg:478.68ms +step:2682/57344 train_time:1283569ms step_avg:478.59ms +step:2683/57344 train_time:1284053ms step_avg:478.59ms +grad accum step:671/14336 +step:2684/57344 train_time:1285191ms step_avg:478.83ms +step:2685/57344 train_time:1285209ms step_avg:478.66ms +step:2686/57344 train_time:1285436ms step_avg:478.57ms +step:2687/57344 train_time:1285921ms step_avg:478.57ms +grad accum step:672/14336 +step:2688/57344 train_time:1287060ms step_avg:478.82ms +step:2688/57344 val_loss:7.658612 train_time:1287061ms step_avg:478.82ms +step:2689/57344 train_time:1287073ms step_avg:478.64ms +step:2690/57344 train_time:1287278ms step_avg:478.54ms +step:2691/57344 train_time:1287763ms step_avg:478.54ms +grad accum step:673/14336 +step:2692/57344 train_time:1288902ms step_avg:478.79ms +step:2693/57344 train_time:1288919ms step_avg:478.62ms +step:2694/57344 train_time:1289146ms step_avg:478.52ms +step:2695/57344 train_time:1289628ms step_avg:478.53ms +grad accum step:674/14336 +step:2696/57344 train_time:1290767ms step_avg:478.77ms +step:2697/57344 train_time:1290784ms step_avg:478.60ms +step:2698/57344 train_time:1291010ms step_avg:478.51ms +step:2699/57344 train_time:1291493ms step_avg:478.51ms +grad accum step:675/14336 +step:2700/57344 train_time:1292633ms step_avg:478.75ms +step:2701/57344 train_time:1292650ms step_avg:478.58ms +step:2702/57344 train_time:1292877ms step_avg:478.49ms +step:2703/57344 train_time:1293361ms step_avg:478.49ms +grad accum step:676/14336 +step:2704/57344 train_time:1294498ms step_avg:478.73ms +step:2705/57344 train_time:1294515ms step_avg:478.56ms +step:2706/57344 train_time:1294743ms step_avg:478.47ms +step:2707/57344 train_time:1295226ms step_avg:478.47ms +grad accum step:677/14336 +step:2708/57344 train_time:1296366ms step_avg:478.72ms +step:2709/57344 train_time:1296383ms step_avg:478.55ms +step:2710/57344 train_time:1296610ms step_avg:478.45ms +step:2711/57344 train_time:1297094ms step_avg:478.46ms +grad accum step:678/14336 +step:2712/57344 train_time:1298232ms step_avg:478.70ms +step:2713/57344 train_time:1298249ms step_avg:478.53ms +step:2714/57344 train_time:1298476ms step_avg:478.44ms +step:2715/57344 train_time:1298958ms step_avg:478.44ms +grad accum step:679/14336 +step:2716/57344 train_time:1300098ms step_avg:478.68ms +step:2717/57344 train_time:1300115ms step_avg:478.51ms +step:2718/57344 train_time:1300343ms step_avg:478.42ms +step:2719/57344 train_time:1300826ms step_avg:478.42ms +grad accum step:680/14336 +step:2720/57344 train_time:1301963ms step_avg:478.66ms +step:2721/57344 train_time:1301980ms step_avg:478.49ms +step:2722/57344 train_time:1302207ms step_avg:478.40ms +step:2723/57344 train_time:1302691ms step_avg:478.40ms +grad accum step:681/14336 +step:2724/57344 train_time:1303829ms step_avg:478.64ms +step:2725/57344 train_time:1303846ms step_avg:478.48ms +step:2726/57344 train_time:1304073ms step_avg:478.38ms +step:2727/57344 train_time:1304555ms step_avg:478.38ms +grad accum step:682/14336 +step:2728/57344 train_time:1305692ms step_avg:478.63ms +step:2729/57344 train_time:1305709ms step_avg:478.46ms +step:2730/57344 train_time:1305935ms step_avg:478.36ms +step:2731/57344 train_time:1306418ms step_avg:478.37ms +grad accum step:683/14336 +step:2732/57344 train_time:1307555ms step_avg:478.61ms +step:2733/57344 train_time:1307576ms step_avg:478.44ms +step:2734/57344 train_time:1307800ms step_avg:478.35ms +step:2735/57344 train_time:1308282ms step_avg:478.35ms +grad accum step:684/14336 +step:2736/57344 train_time:1309420ms step_avg:478.59ms +step:2737/57344 train_time:1309436ms step_avg:478.42ms +step:2738/57344 train_time:1309664ms step_avg:478.33ms +step:2739/57344 train_time:1310146ms step_avg:478.33ms +grad accum step:685/14336 +step:2740/57344 train_time:1311285ms step_avg:478.57ms +step:2741/57344 train_time:1311550ms step_avg:478.49ms +step:2742/57344 train_time:1311748ms step_avg:478.39ms +step:2743/57344 train_time:1312231ms step_avg:478.39ms +grad accum step:686/14336 +step:2744/57344 train_time:1313372ms step_avg:478.63ms +step:2745/57344 train_time:1313389ms step_avg:478.47ms +step:2746/57344 train_time:1313616ms step_avg:478.37ms +step:2747/57344 train_time:1314098ms step_avg:478.38ms +grad accum step:687/14336 +step:2748/57344 train_time:1315236ms step_avg:478.62ms +step:2749/57344 train_time:1315253ms step_avg:478.45ms +step:2750/57344 train_time:1315480ms step_avg:478.36ms +step:2751/57344 train_time:1315963ms step_avg:478.36ms +grad accum step:688/14336 +step:2752/57344 train_time:1317099ms step_avg:478.60ms +step:2752/57344 val_loss:7.654680 train_time:1317100ms step_avg:478.60ms +step:2753/57344 train_time:1323830ms step_avg:480.87ms +step:2754/57344 train_time:1323905ms step_avg:480.72ms +step:2755/57344 train_time:1324331ms step_avg:480.70ms +grad accum step:689/14336 +step:2756/57344 train_time:1325711ms step_avg:481.03ms +step:2757/57344 train_time:1325723ms step_avg:480.86ms +step:2758/57344 train_time:1325928ms step_avg:480.76ms +step:2759/57344 train_time:1326408ms step_avg:480.76ms +grad accum step:690/14336 +step:2760/57344 train_time:1327539ms step_avg:480.99ms +step:2761/57344 train_time:1327556ms step_avg:480.82ms +step:2762/57344 train_time:1327782ms step_avg:480.73ms +step:2763/57344 train_time:1328265ms step_avg:480.73ms +grad accum step:691/14336 +step:2764/57344 train_time:1332995ms step_avg:482.27ms +step:2765/57344 train_time:1333007ms step_avg:482.10ms +step:2766/57344 train_time:1333297ms step_avg:482.03ms +step:2767/57344 train_time:1333773ms step_avg:482.03ms +grad accum step:692/14336 +step:2768/57344 train_time:1334901ms step_avg:482.26ms +step:2769/57344 train_time:1334919ms step_avg:482.09ms +step:2770/57344 train_time:1335145ms step_avg:482.00ms +step:2771/57344 train_time:1335624ms step_avg:482.00ms +grad accum step:693/14336 +step:2772/57344 train_time:1336753ms step_avg:482.23ms +step:2773/57344 train_time:1336770ms step_avg:482.07ms +step:2774/57344 train_time:1336996ms step_avg:481.97ms +step:2775/57344 train_time:1337473ms step_avg:481.97ms +grad accum step:694/14336 +step:2776/57344 train_time:1338603ms step_avg:482.21ms +step:2777/57344 train_time:1338621ms step_avg:482.04ms +step:2778/57344 train_time:1338847ms step_avg:481.95ms +step:2779/57344 train_time:1339325ms step_avg:481.95ms +grad accum step:695/14336 +step:2780/57344 train_time:1340454ms step_avg:482.18ms +step:2781/57344 train_time:1340471ms step_avg:482.01ms +step:2782/57344 train_time:1340696ms step_avg:481.92ms +step:2783/57344 train_time:1341174ms step_avg:481.92ms +grad accum step:696/14336 +step:2784/57344 train_time:1342302ms step_avg:482.15ms +step:2785/57344 train_time:1347665ms step_avg:483.90ms +step:2786/57344 train_time:1347906ms step_avg:483.81ms +step:2787/57344 train_time:1348390ms step_avg:483.81ms +grad accum step:697/14336 +step:2788/57344 train_time:1349514ms step_avg:484.04ms +step:2789/57344 train_time:1349531ms step_avg:483.88ms +step:2790/57344 train_time:1349755ms step_avg:483.78ms +step:2791/57344 train_time:1353199ms step_avg:484.84ms +grad accum step:698/14336 +step:2792/57344 train_time:1354179ms step_avg:485.02ms +step:2793/57344 train_time:1354196ms step_avg:484.85ms +step:2794/57344 train_time:1354420ms step_avg:484.76ms +step:2795/57344 train_time:1354897ms step_avg:484.76ms +grad accum step:699/14336 +step:2796/57344 train_time:1356022ms step_avg:484.99ms +step:2797/57344 train_time:1356039ms step_avg:484.82ms +step:2798/57344 train_time:1356264ms step_avg:484.73ms +step:2799/57344 train_time:1356743ms step_avg:484.72ms +grad accum step:700/14336 +step:2800/57344 train_time:1357872ms step_avg:484.95ms +step:2801/57344 train_time:1357889ms step_avg:484.79ms +step:2802/57344 train_time:1358114ms step_avg:484.69ms +step:2803/57344 train_time:1359183ms step_avg:484.90ms +grad accum step:701/14336 +step:2804/57344 train_time:1360050ms step_avg:485.04ms +step:2805/57344 train_time:1360062ms step_avg:484.87ms +step:2806/57344 train_time:1360266ms step_avg:484.77ms +step:2807/57344 train_time:1360745ms step_avg:484.77ms +grad accum step:702/14336 +step:2808/57344 train_time:1361874ms step_avg:485.00ms +step:2809/57344 train_time:1361891ms step_avg:484.83ms +step:2810/57344 train_time:1362115ms step_avg:484.74ms +step:2811/57344 train_time:1362595ms step_avg:484.74ms +grad accum step:703/14336 +step:2812/57344 train_time:1363721ms step_avg:484.96ms +step:2813/57344 train_time:1363738ms step_avg:484.80ms +step:2814/57344 train_time:1363963ms step_avg:484.71ms +step:2815/57344 train_time:1364440ms step_avg:484.70ms +grad accum step:704/14336 +step:2816/57344 train_time:1365570ms step_avg:484.93ms +step:2816/57344 val_loss:7.668962 train_time:1365570ms step_avg:484.93ms +step:2817/57344 train_time:1365582ms step_avg:484.76ms +step:2818/57344 train_time:1365784ms step_avg:484.66ms +step:2819/57344 train_time:1366264ms step_avg:484.66ms +grad accum step:705/14336 +step:2820/57344 train_time:1367397ms step_avg:484.89ms +step:2821/57344 train_time:1367414ms step_avg:484.73ms +step:2822/57344 train_time:1367641ms step_avg:484.64ms +step:2823/57344 train_time:1368122ms step_avg:484.63ms +grad accum step:706/14336 +step:2824/57344 train_time:1369255ms step_avg:484.86ms +step:2825/57344 train_time:1369272ms step_avg:484.70ms +step:2826/57344 train_time:1369499ms step_avg:484.61ms +step:2827/57344 train_time:1369980ms step_avg:484.61ms +grad accum step:707/14336 +step:2828/57344 train_time:1371111ms step_avg:484.83ms +step:2829/57344 train_time:1371128ms step_avg:484.67ms +step:2830/57344 train_time:1371355ms step_avg:484.58ms +step:2831/57344 train_time:1371837ms step_avg:484.58ms +grad accum step:708/14336 +step:2832/57344 train_time:1372969ms step_avg:484.81ms +step:2833/57344 train_time:1372986ms step_avg:484.64ms +step:2834/57344 train_time:1373213ms step_avg:484.55ms +step:2835/57344 train_time:1373696ms step_avg:484.55ms +grad accum step:709/14336 +step:2836/57344 train_time:1374828ms step_avg:484.78ms +step:2837/57344 train_time:1374845ms step_avg:484.61ms +step:2838/57344 train_time:1375071ms step_avg:484.52ms +step:2839/57344 train_time:1375554ms step_avg:484.52ms +grad accum step:710/14336 +step:2840/57344 train_time:1376685ms step_avg:484.75ms +step:2841/57344 train_time:1376703ms step_avg:484.58ms +step:2842/57344 train_time:1376929ms step_avg:484.49ms +step:2843/57344 train_time:1377410ms step_avg:484.49ms +grad accum step:711/14336 +step:2844/57344 train_time:1378544ms step_avg:484.72ms +step:2845/57344 train_time:1378561ms step_avg:484.56ms +step:2846/57344 train_time:1378787ms step_avg:484.46ms +step:2847/57344 train_time:1379268ms step_avg:484.46ms +grad accum step:712/14336 +step:2848/57344 train_time:1380403ms step_avg:484.69ms +step:2849/57344 train_time:1380420ms step_avg:484.53ms +step:2850/57344 train_time:1380647ms step_avg:484.44ms +step:2851/57344 train_time:1381129ms step_avg:484.44ms +grad accum step:713/14336 +step:2852/57344 train_time:1382263ms step_avg:484.66ms +step:2853/57344 train_time:1382281ms step_avg:484.50ms +step:2854/57344 train_time:1382508ms step_avg:484.41ms +step:2855/57344 train_time:1382991ms step_avg:484.41ms +grad accum step:714/14336 +step:2856/57344 train_time:1384125ms step_avg:484.64ms +step:2857/57344 train_time:1384143ms step_avg:484.47ms +step:2858/57344 train_time:1384369ms step_avg:484.38ms +step:2859/57344 train_time:1384850ms step_avg:484.38ms +grad accum step:715/14336 +step:2860/57344 train_time:1385982ms step_avg:484.61ms +step:2861/57344 train_time:1385999ms step_avg:484.45ms +step:2862/57344 train_time:1386225ms step_avg:484.36ms +step:2863/57344 train_time:1386706ms step_avg:484.35ms +grad accum step:716/14336 +step:2864/57344 train_time:1387841ms step_avg:484.58ms +step:2865/57344 train_time:1387858ms step_avg:484.42ms +step:2866/57344 train_time:1388084ms step_avg:484.33ms +step:2867/57344 train_time:1388567ms step_avg:484.33ms +grad accum step:717/14336 +step:2868/57344 train_time:1389703ms step_avg:484.55ms +step:2869/57344 train_time:1389721ms step_avg:484.39ms +step:2870/57344 train_time:1389947ms step_avg:484.30ms +step:2871/57344 train_time:1390429ms step_avg:484.30ms +grad accum step:718/14336 +step:2872/57344 train_time:1391561ms step_avg:484.53ms +step:2873/57344 train_time:1391579ms step_avg:484.36ms +step:2874/57344 train_time:1391806ms step_avg:484.27ms +step:2875/57344 train_time:1392289ms step_avg:484.27ms +grad accum step:719/14336 +step:2876/57344 train_time:1393424ms step_avg:484.50ms +step:2877/57344 train_time:1393441ms step_avg:484.34ms +step:2878/57344 train_time:1393667ms step_avg:484.25ms +step:2879/57344 train_time:1394147ms step_avg:484.25ms +grad accum step:720/14336 +step:2880/57344 train_time:1395282ms step_avg:484.47ms +step:2880/57344 val_loss:7.658406 train_time:1395282ms step_avg:484.47ms +step:2881/57344 train_time:1395293ms step_avg:484.31ms +step:2882/57344 train_time:1395497ms step_avg:484.21ms +step:2883/57344 train_time:1395978ms step_avg:484.21ms +grad accum step:721/14336 +step:2884/57344 train_time:1397111ms step_avg:484.44ms +step:2885/57344 train_time:1397128ms step_avg:484.27ms +step:2886/57344 train_time:1397355ms step_avg:484.18ms +step:2887/57344 train_time:1397836ms step_avg:484.18ms +grad accum step:722/14336 +step:2888/57344 train_time:1398970ms step_avg:484.41ms +step:2889/57344 train_time:1398988ms step_avg:484.25ms +step:2890/57344 train_time:1399214ms step_avg:484.16ms +step:2891/57344 train_time:1399696ms step_avg:484.16ms +grad accum step:723/14336 +step:2892/57344 train_time:1400834ms step_avg:484.38ms +step:2893/57344 train_time:1400851ms step_avg:484.22ms +step:2894/57344 train_time:1401077ms step_avg:484.13ms +step:2895/57344 train_time:1401558ms step_avg:484.13ms +grad accum step:724/14336 +step:2896/57344 train_time:1402694ms step_avg:484.36ms +step:2897/57344 train_time:1402711ms step_avg:484.19ms +step:2898/57344 train_time:1402939ms step_avg:484.11ms +step:2899/57344 train_time:1403421ms step_avg:484.11ms +grad accum step:725/14336 +step:2900/57344 train_time:1404557ms step_avg:484.33ms +step:2901/57344 train_time:1404574ms step_avg:484.17ms +step:2902/57344 train_time:1404801ms step_avg:484.08ms +step:2903/57344 train_time:1405282ms step_avg:484.08ms +grad accum step:726/14336 +step:2904/57344 train_time:1406417ms step_avg:484.30ms +step:2905/57344 train_time:1406435ms step_avg:484.14ms +step:2906/57344 train_time:1406661ms step_avg:484.05ms +step:2907/57344 train_time:1407143ms step_avg:484.05ms +grad accum step:727/14336 +step:2908/57344 train_time:1408281ms step_avg:484.28ms +step:2909/57344 train_time:1408298ms step_avg:484.12ms +step:2910/57344 train_time:1408525ms step_avg:484.03ms +step:2911/57344 train_time:1409006ms step_avg:484.03ms +grad accum step:728/14336 +step:2912/57344 train_time:1410141ms step_avg:484.25ms +step:2913/57344 train_time:1410158ms step_avg:484.09ms +step:2914/57344 train_time:1410384ms step_avg:484.00ms +step:2915/57344 train_time:1410866ms step_avg:484.00ms +grad accum step:729/14336 +step:2916/57344 train_time:1412002ms step_avg:484.23ms +step:2917/57344 train_time:1412019ms step_avg:484.07ms +step:2918/57344 train_time:1412246ms step_avg:483.98ms +step:2919/57344 train_time:1412727ms step_avg:483.98ms +grad accum step:730/14336 +step:2920/57344 train_time:1413862ms step_avg:484.20ms +step:2921/57344 train_time:1413880ms step_avg:484.04ms +step:2922/57344 train_time:1414106ms step_avg:483.95ms +step:2923/57344 train_time:1414589ms step_avg:483.95ms +grad accum step:731/14336 +step:2924/57344 train_time:1415723ms step_avg:484.17ms +step:2925/57344 train_time:1415740ms step_avg:484.01ms +step:2926/57344 train_time:1415967ms step_avg:483.93ms +step:2927/57344 train_time:1416449ms step_avg:483.93ms +grad accum step:732/14336 +step:2928/57344 train_time:1417584ms step_avg:484.15ms +step:2929/57344 train_time:1417602ms step_avg:483.99ms +step:2930/57344 train_time:1417829ms step_avg:483.90ms +step:2931/57344 train_time:1418313ms step_avg:483.90ms +grad accum step:733/14336 +step:2932/57344 train_time:1419448ms step_avg:484.12ms +step:2933/57344 train_time:1419466ms step_avg:483.96ms +step:2934/57344 train_time:1419693ms step_avg:483.88ms +step:2935/57344 train_time:1420177ms step_avg:483.88ms +grad accum step:734/14336 +step:2936/57344 train_time:1421312ms step_avg:484.10ms +step:2937/57344 train_time:1421329ms step_avg:483.94ms +step:2938/57344 train_time:1421556ms step_avg:483.85ms +step:2939/57344 train_time:1422038ms step_avg:483.85ms +grad accum step:735/14336 +step:2940/57344 train_time:1423173ms step_avg:484.07ms +step:2941/57344 train_time:1423190ms step_avg:483.91ms +step:2942/57344 train_time:1423418ms step_avg:483.83ms +step:2943/57344 train_time:1423900ms step_avg:483.83ms +grad accum step:736/14336 +step:2944/57344 train_time:1425036ms step_avg:484.05ms +step:2944/57344 val_loss:7.661676 train_time:1425036ms step_avg:484.05ms +step:2945/57344 train_time:1425048ms step_avg:483.89ms +step:2946/57344 train_time:1425251ms step_avg:483.79ms +step:2947/57344 train_time:1425735ms step_avg:483.79ms +grad accum step:737/14336 +step:2948/57344 train_time:1426872ms step_avg:484.01ms +step:2949/57344 train_time:1426889ms step_avg:483.86ms +step:2950/57344 train_time:1427115ms step_avg:483.77ms +step:2951/57344 train_time:1427597ms step_avg:483.77ms +grad accum step:738/14336 +step:2952/57344 train_time:1428733ms step_avg:483.99ms +step:2953/57344 train_time:1428750ms step_avg:483.83ms +step:2954/57344 train_time:1428977ms step_avg:483.74ms +step:2955/57344 train_time:1429457ms step_avg:483.74ms +grad accum step:739/14336 +step:2956/57344 train_time:1430594ms step_avg:483.96ms +step:2957/57344 train_time:1430611ms step_avg:483.80ms +step:2958/57344 train_time:1430838ms step_avg:483.72ms +step:2959/57344 train_time:1431320ms step_avg:483.72ms +grad accum step:740/14336 +step:2960/57344 train_time:1432457ms step_avg:483.94ms +step:2961/57344 train_time:1432474ms step_avg:483.78ms +step:2962/57344 train_time:1432701ms step_avg:483.69ms +step:2963/57344 train_time:1433184ms step_avg:483.69ms +grad accum step:741/14336 +step:2964/57344 train_time:1434319ms step_avg:483.91ms +step:2965/57344 train_time:1434336ms step_avg:483.76ms +step:2966/57344 train_time:1434563ms step_avg:483.67ms +step:2967/57344 train_time:1435045ms step_avg:483.67ms +grad accum step:742/14336 +step:2968/57344 train_time:1436180ms step_avg:483.89ms +step:2969/57344 train_time:1436197ms step_avg:483.73ms +step:2970/57344 train_time:1436423ms step_avg:483.64ms +step:2971/57344 train_time:1436905ms step_avg:483.64ms +grad accum step:743/14336 +step:2972/57344 train_time:1438041ms step_avg:483.86ms +step:2973/57344 train_time:1438058ms step_avg:483.71ms +step:2974/57344 train_time:1438285ms step_avg:483.62ms +step:2975/57344 train_time:1438765ms step_avg:483.62ms +grad accum step:744/14336 +step:2976/57344 train_time:1439901ms step_avg:483.84ms +step:2977/57344 train_time:1439918ms step_avg:483.68ms +step:2978/57344 train_time:1440145ms step_avg:483.59ms +step:2979/57344 train_time:1440628ms step_avg:483.59ms +grad accum step:745/14336 +step:2980/57344 train_time:1441765ms step_avg:483.81ms +step:2981/57344 train_time:1441782ms step_avg:483.66ms +step:2982/57344 train_time:1442009ms step_avg:483.57ms +step:2983/57344 train_time:1442492ms step_avg:483.57ms +grad accum step:746/14336 +step:2984/57344 train_time:1443629ms step_avg:483.79ms +step:2985/57344 train_time:1443646ms step_avg:483.63ms +step:2986/57344 train_time:1443873ms step_avg:483.55ms +step:2987/57344 train_time:1444353ms step_avg:483.55ms +grad accum step:747/14336 +step:2988/57344 train_time:1445490ms step_avg:483.77ms +step:2989/57344 train_time:1445508ms step_avg:483.61ms +step:2990/57344 train_time:1445735ms step_avg:483.52ms +step:2991/57344 train_time:1446216ms step_avg:483.52ms +grad accum step:748/14336 +step:2992/57344 train_time:1447352ms step_avg:483.74ms +step:2993/57344 train_time:1447369ms step_avg:483.58ms +step:2994/57344 train_time:1447596ms step_avg:483.50ms +step:2995/57344 train_time:1448077ms step_avg:483.50ms +grad accum step:749/14336 +step:2996/57344 train_time:1449215ms step_avg:483.72ms +step:2997/57344 train_time:1449232ms step_avg:483.56ms +step:2998/57344 train_time:1449458ms step_avg:483.48ms +step:2999/57344 train_time:1449941ms step_avg:483.47ms +grad accum step:750/14336 +step:3000/57344 train_time:1451081ms step_avg:483.69ms +step:3001/57344 train_time:1451098ms step_avg:483.54ms +step:3002/57344 train_time:1451325ms step_avg:483.45ms +step:3003/57344 train_time:1451807ms step_avg:483.45ms +grad accum step:751/14336 +step:3004/57344 train_time:1452942ms step_avg:483.67ms +step:3005/57344 train_time:1452960ms step_avg:483.51ms +step:3006/57344 train_time:1453187ms step_avg:483.43ms +step:3007/57344 train_time:1453669ms step_avg:483.43ms +grad accum step:752/14336 +step:3008/57344 train_time:1454807ms step_avg:483.65ms +step:3008/57344 val_loss:7.658646 train_time:1454808ms step_avg:483.65ms +step:3009/57344 train_time:1454820ms step_avg:483.49ms +step:3010/57344 train_time:1455023ms step_avg:483.40ms +step:3011/57344 train_time:1455507ms step_avg:483.40ms +grad accum step:753/14336 +step:3012/57344 train_time:1456640ms step_avg:483.61ms +step:3013/57344 train_time:1456658ms step_avg:483.46ms +step:3014/57344 train_time:1456886ms step_avg:483.37ms +step:3015/57344 train_time:1457368ms step_avg:483.37ms +grad accum step:754/14336 +step:3016/57344 train_time:1458504ms step_avg:483.59ms +step:3017/57344 train_time:1458521ms step_avg:483.43ms +step:3018/57344 train_time:1458749ms step_avg:483.35ms +step:3019/57344 train_time:1459232ms step_avg:483.35ms +grad accum step:755/14336 +step:3020/57344 train_time:1460369ms step_avg:483.57ms +step:3021/57344 train_time:1460386ms step_avg:483.41ms +step:3022/57344 train_time:1460613ms step_avg:483.33ms +step:3023/57344 train_time:1461096ms step_avg:483.33ms +grad accum step:756/14336 +step:3024/57344 train_time:1462230ms step_avg:483.54ms +step:3025/57344 train_time:1462248ms step_avg:483.39ms +step:3026/57344 train_time:1462475ms step_avg:483.30ms +step:3027/57344 train_time:1462957ms step_avg:483.30ms +grad accum step:757/14336 +step:3028/57344 train_time:1464093ms step_avg:483.52ms +step:3029/57344 train_time:1464110ms step_avg:483.36ms +step:3030/57344 train_time:1464337ms step_avg:483.28ms +step:3031/57344 train_time:1464819ms step_avg:483.28ms +grad accum step:758/14336 +step:3032/57344 train_time:1465955ms step_avg:483.49ms +step:3033/57344 train_time:1465972ms step_avg:483.34ms +step:3034/57344 train_time:1466199ms step_avg:483.26ms +step:3035/57344 train_time:1466681ms step_avg:483.26ms +grad accum step:759/14336 +step:3036/57344 train_time:1467819ms step_avg:483.47ms +step:3037/57344 train_time:1467836ms step_avg:483.32ms +step:3038/57344 train_time:1468063ms step_avg:483.23ms +step:3039/57344 train_time:1468545ms step_avg:483.23ms +grad accum step:760/14336 +step:3040/57344 train_time:1469681ms step_avg:483.45ms +step:3041/57344 train_time:1469698ms step_avg:483.29ms +step:3042/57344 train_time:1469924ms step_avg:483.21ms +step:3043/57344 train_time:1470406ms step_avg:483.21ms +grad accum step:761/14336 +step:3044/57344 train_time:1471543ms step_avg:483.42ms +step:3045/57344 train_time:1471560ms step_avg:483.27ms +step:3046/57344 train_time:1471786ms step_avg:483.19ms +step:3047/57344 train_time:1472267ms step_avg:483.19ms +grad accum step:762/14336 +step:3048/57344 train_time:1473403ms step_avg:483.40ms +step:3049/57344 train_time:1473420ms step_avg:483.25ms +step:3050/57344 train_time:1473647ms step_avg:483.16ms +step:3051/57344 train_time:1474128ms step_avg:483.16ms +grad accum step:763/14336 +step:3052/57344 train_time:1475265ms step_avg:483.38ms +step:3053/57344 train_time:1475282ms step_avg:483.22ms +step:3054/57344 train_time:1475509ms step_avg:483.14ms +step:3055/57344 train_time:1475989ms step_avg:483.14ms +grad accum step:764/14336 +step:3056/57344 train_time:1477127ms step_avg:483.35ms +step:3057/57344 train_time:1477145ms step_avg:483.20ms +step:3058/57344 train_time:1477371ms step_avg:483.12ms +step:3059/57344 train_time:1477852ms step_avg:483.12ms +grad accum step:765/14336 +step:3060/57344 train_time:1478989ms step_avg:483.33ms +step:3061/57344 train_time:1479007ms step_avg:483.18ms +step:3062/57344 train_time:1479234ms step_avg:483.09ms +step:3063/57344 train_time:1479717ms step_avg:483.09ms +grad accum step:766/14336 +step:3064/57344 train_time:1480853ms step_avg:483.31ms +step:3065/57344 train_time:1480870ms step_avg:483.16ms +step:3066/57344 train_time:1481096ms step_avg:483.07ms +step:3067/57344 train_time:1481579ms step_avg:483.07ms +grad accum step:767/14336 +step:3068/57344 train_time:1482714ms step_avg:483.28ms +step:3069/57344 train_time:1482731ms step_avg:483.13ms +step:3070/57344 train_time:1482959ms step_avg:483.05ms +step:3071/57344 train_time:1483440ms step_avg:483.05ms +grad accum step:768/14336 +step:3072/57344 train_time:1484576ms step_avg:483.26ms +step:3072/57344 val_loss:7.662162 train_time:1484577ms step_avg:483.26ms +step:3073/57344 train_time:1484589ms step_avg:483.11ms +step:3074/57344 train_time:1484792ms step_avg:483.02ms +step:3075/57344 train_time:1485274ms step_avg:483.02ms +grad accum step:769/14336 +step:3076/57344 train_time:1486409ms step_avg:483.23ms +step:3077/57344 train_time:1486426ms step_avg:483.08ms +step:3078/57344 train_time:1486652ms step_avg:482.99ms +step:3079/57344 train_time:1487134ms step_avg:482.99ms +grad accum step:770/14336 +step:3080/57344 train_time:1488269ms step_avg:483.20ms +step:3081/57344 train_time:1488286ms step_avg:483.05ms +step:3082/57344 train_time:1488514ms step_avg:482.97ms +step:3083/57344 train_time:1488996ms step_avg:482.97ms +grad accum step:771/14336 +step:3084/57344 train_time:1490134ms step_avg:483.18ms +step:3085/57344 train_time:1490152ms step_avg:483.03ms +step:3086/57344 train_time:1490380ms step_avg:482.95ms +step:3087/57344 train_time:1490863ms step_avg:482.95ms +grad accum step:772/14336 +step:3088/57344 train_time:1491998ms step_avg:483.16ms +step:3089/57344 train_time:1492015ms step_avg:483.01ms +step:3090/57344 train_time:1492242ms step_avg:482.93ms +step:3091/57344 train_time:1492725ms step_avg:482.93ms +grad accum step:773/14336 +step:3092/57344 train_time:1493860ms step_avg:483.14ms +step:3093/57344 train_time:1493878ms step_avg:482.99ms +step:3094/57344 train_time:1494105ms step_avg:482.90ms +step:3095/57344 train_time:1494588ms step_avg:482.90ms +grad accum step:774/14336 +step:3096/57344 train_time:1495724ms step_avg:483.12ms +step:3097/57344 train_time:1495741ms step_avg:482.96ms +step:3098/57344 train_time:1495968ms step_avg:482.88ms +step:3099/57344 train_time:1496450ms step_avg:482.88ms +grad accum step:775/14336 +step:3100/57344 train_time:1497589ms step_avg:483.09ms +step:3101/57344 train_time:1497606ms step_avg:482.94ms +step:3102/57344 train_time:1497833ms step_avg:482.86ms +step:3103/57344 train_time:1498314ms step_avg:482.86ms +grad accum step:776/14336 +step:3104/57344 train_time:1499452ms step_avg:483.07ms +step:3105/57344 train_time:1499469ms step_avg:482.92ms +step:3106/57344 train_time:1499696ms step_avg:482.84ms +step:3107/57344 train_time:1500177ms step_avg:482.84ms +grad accum step:777/14336 +step:3108/57344 train_time:1501312ms step_avg:483.05ms +step:3109/57344 train_time:1501330ms step_avg:482.90ms +step:3110/57344 train_time:1501556ms step_avg:482.82ms +step:3111/57344 train_time:1502038ms step_avg:482.82ms +grad accum step:778/14336 +step:3112/57344 train_time:1503174ms step_avg:483.02ms +step:3113/57344 train_time:1503191ms step_avg:482.88ms +step:3114/57344 train_time:1503417ms step_avg:482.79ms +step:3115/57344 train_time:1503900ms step_avg:482.79ms +grad accum step:779/14336 +step:3116/57344 train_time:1505038ms step_avg:483.00ms +step:3117/57344 train_time:1505055ms step_avg:482.85ms +step:3118/57344 train_time:1505282ms step_avg:482.77ms +step:3119/57344 train_time:1505763ms step_avg:482.77ms +grad accum step:780/14336 +step:3120/57344 train_time:1506902ms step_avg:482.98ms +step:3121/57344 train_time:1506919ms step_avg:482.83ms +step:3122/57344 train_time:1507146ms step_avg:482.75ms +step:3123/57344 train_time:1507629ms step_avg:482.75ms +grad accum step:781/14336 +step:3124/57344 train_time:1508767ms step_avg:482.96ms +step:3125/57344 train_time:1508784ms step_avg:482.81ms +step:3126/57344 train_time:1509011ms step_avg:482.73ms +step:3127/57344 train_time:1509494ms step_avg:482.73ms +grad accum step:782/14336 +step:3128/57344 train_time:1510629ms step_avg:482.94ms +step:3129/57344 train_time:1510646ms step_avg:482.79ms +step:3130/57344 train_time:1510873ms step_avg:482.71ms +step:3131/57344 train_time:1511354ms step_avg:482.71ms +grad accum step:783/14336 +step:3132/57344 train_time:1512490ms step_avg:482.91ms +step:3133/57344 train_time:1512507ms step_avg:482.77ms +step:3134/57344 train_time:1512734ms step_avg:482.68ms +step:3135/57344 train_time:1513217ms step_avg:482.68ms +grad accum step:784/14336 +step:3136/57344 train_time:1514354ms step_avg:482.89ms +step:3136/57344 val_loss:7.658408 train_time:1514355ms step_avg:482.89ms +step:3137/57344 train_time:1514367ms step_avg:482.74ms +step:3138/57344 train_time:1514571ms step_avg:482.65ms +step:3139/57344 train_time:1515054ms step_avg:482.66ms +grad accum step:785/14336 +step:3140/57344 train_time:1516191ms step_avg:482.86ms +step:3141/57344 train_time:1516208ms step_avg:482.72ms +step:3142/57344 train_time:1516436ms step_avg:482.63ms +step:3143/57344 train_time:1516919ms step_avg:482.63ms +grad accum step:786/14336 +step:3144/57344 train_time:1518053ms step_avg:482.84ms +step:3145/57344 train_time:1518071ms step_avg:482.69ms +step:3146/57344 train_time:1518299ms step_avg:482.61ms +step:3147/57344 train_time:1518781ms step_avg:482.61ms +grad accum step:787/14336 +step:3148/57344 train_time:1519921ms step_avg:482.82ms +step:3149/57344 train_time:1519938ms step_avg:482.67ms +step:3150/57344 train_time:1520164ms step_avg:482.59ms +step:3151/57344 train_time:1520645ms step_avg:482.59ms +grad accum step:788/14336 +step:3152/57344 train_time:1521784ms step_avg:482.80ms +step:3153/57344 train_time:1521802ms step_avg:482.65ms +step:3154/57344 train_time:1522028ms step_avg:482.57ms +step:3155/57344 train_time:1522510ms step_avg:482.57ms +grad accum step:789/14336 +step:3156/57344 train_time:1523648ms step_avg:482.78ms +step:3157/57344 train_time:1523665ms step_avg:482.63ms +step:3158/57344 train_time:1523892ms step_avg:482.55ms +step:3159/57344 train_time:1524373ms step_avg:482.55ms +grad accum step:790/14336 +step:3160/57344 train_time:1525510ms step_avg:482.76ms +step:3161/57344 train_time:1525527ms step_avg:482.61ms +step:3162/57344 train_time:1525754ms step_avg:482.53ms +step:3163/57344 train_time:1526237ms step_avg:482.53ms +grad accum step:791/14336 +step:3164/57344 train_time:1527376ms step_avg:482.74ms +step:3165/57344 train_time:1527391ms step_avg:482.59ms +step:3166/57344 train_time:1527618ms step_avg:482.51ms +step:3167/57344 train_time:1528101ms step_avg:482.51ms +grad accum step:792/14336 +step:3168/57344 train_time:1529238ms step_avg:482.71ms +step:3169/57344 train_time:1529255ms step_avg:482.57ms +step:3170/57344 train_time:1529482ms step_avg:482.49ms +step:3171/57344 train_time:1529964ms step_avg:482.49ms +grad accum step:793/14336 +step:3172/57344 train_time:1531101ms step_avg:482.69ms +step:3173/57344 train_time:1531118ms step_avg:482.55ms +step:3174/57344 train_time:1531348ms step_avg:482.47ms +step:3175/57344 train_time:1531833ms step_avg:482.47ms +grad accum step:794/14336 +step:3176/57344 train_time:1532978ms step_avg:482.68ms +step:3177/57344 train_time:1532996ms step_avg:482.53ms +step:3178/57344 train_time:1533224ms step_avg:482.45ms +step:3179/57344 train_time:1533709ms step_avg:482.45ms +grad accum step:795/14336 +step:3180/57344 train_time:1534853ms step_avg:482.66ms +step:3181/57344 train_time:1534870ms step_avg:482.51ms +step:3182/57344 train_time:1535099ms step_avg:482.43ms +step:3183/57344 train_time:1535584ms step_avg:482.43ms +grad accum step:796/14336 +step:3184/57344 train_time:1536728ms step_avg:482.64ms +step:3185/57344 train_time:1536746ms step_avg:482.49ms +step:3186/57344 train_time:1536975ms step_avg:482.42ms +step:3187/57344 train_time:1537460ms step_avg:482.42ms +grad accum step:797/14336 +step:3188/57344 train_time:1538612ms step_avg:482.63ms +step:3189/57344 train_time:1538629ms step_avg:482.48ms +step:3190/57344 train_time:1538858ms step_avg:482.40ms +step:3191/57344 train_time:1539347ms step_avg:482.40ms +grad accum step:798/14336 +step:3192/57344 train_time:1540493ms step_avg:482.61ms +step:3193/57344 train_time:1540510ms step_avg:482.46ms +step:3194/57344 train_time:1540738ms step_avg:482.39ms +step:3195/57344 train_time:1541224ms step_avg:482.39ms +grad accum step:799/14336 +step:3196/57344 train_time:1542367ms step_avg:482.59ms +step:3197/57344 train_time:1542384ms step_avg:482.45ms +step:3198/57344 train_time:1542613ms step_avg:482.37ms +step:3199/57344 train_time:1543098ms step_avg:482.37ms +grad accum step:800/14336 +step:3200/57344 train_time:1544241ms step_avg:482.58ms +step:3200/57344 val_loss:7.672349 train_time:1544242ms step_avg:482.58ms +step:3201/57344 train_time:1544254ms step_avg:482.43ms +step:3202/57344 train_time:1544459ms step_avg:482.34ms +step:3203/57344 train_time:1544945ms step_avg:482.34ms +grad accum step:801/14336 +step:3204/57344 train_time:1546090ms step_avg:482.55ms +step:3205/57344 train_time:1562627ms step_avg:487.56ms +step:3206/57344 train_time:1566025ms step_avg:488.47ms +step:3207/57344 train_time:1575422ms step_avg:491.24ms +grad accum step:802/14336 +step:3208/57344 train_time:1588507ms step_avg:495.17ms +step:3209/57344 train_time:1624439ms step_avg:506.21ms +step:3210/57344 train_time:1672576ms step_avg:521.05ms +step:3211/57344 train_time:1672887ms step_avg:520.99ms +grad accum step:803/14336 +step:3212/57344 train_time:1674006ms step_avg:521.17ms +step:3213/57344 train_time:1674023ms step_avg:521.02ms +step:3214/57344 train_time:1674246ms step_avg:520.92ms +step:3215/57344 train_time:1674725ms step_avg:520.91ms +grad accum step:804/14336 +step:3216/57344 train_time:1675849ms step_avg:521.10ms +step:3217/57344 train_time:1675866ms step_avg:520.94ms +step:3218/57344 train_time:1676088ms step_avg:520.85ms +step:3219/57344 train_time:1676564ms step_avg:520.83ms +grad accum step:805/14336 +step:3220/57344 train_time:1677683ms step_avg:521.02ms +step:3221/57344 train_time:1677700ms step_avg:520.86ms +step:3222/57344 train_time:1677922ms step_avg:520.77ms +step:3223/57344 train_time:1678398ms step_avg:520.76ms +grad accum step:806/14336 +step:3224/57344 train_time:1679519ms step_avg:520.94ms +step:3225/57344 train_time:1679536ms step_avg:520.79ms +step:3226/57344 train_time:1679758ms step_avg:520.69ms +step:3227/57344 train_time:1680232ms step_avg:520.68ms +grad accum step:807/14336 +step:3228/57344 train_time:1681355ms step_avg:520.87ms +step:3229/57344 train_time:1681372ms step_avg:520.71ms +step:3230/57344 train_time:1681596ms step_avg:520.62ms +step:3231/57344 train_time:1682072ms step_avg:520.60ms +grad accum step:808/14336 +step:3232/57344 train_time:1683199ms step_avg:520.79ms +step:3233/57344 train_time:1683216ms step_avg:520.64ms +step:3234/57344 train_time:1683439ms step_avg:520.54ms +step:3235/57344 train_time:1683914ms step_avg:520.53ms +grad accum step:809/14336 +step:3236/57344 train_time:1685035ms step_avg:520.72ms +step:3237/57344 train_time:1685052ms step_avg:520.56ms +step:3238/57344 train_time:1685275ms step_avg:520.47ms +step:3239/57344 train_time:1685752ms step_avg:520.45ms +grad accum step:810/14336 +step:3240/57344 train_time:1686874ms step_avg:520.64ms +step:3241/57344 train_time:1686891ms step_avg:520.48ms +step:3242/57344 train_time:1687115ms step_avg:520.39ms +step:3243/57344 train_time:1687591ms step_avg:520.38ms +grad accum step:811/14336 +step:3244/57344 train_time:1688714ms step_avg:520.57ms +step:3245/57344 train_time:1688731ms step_avg:520.41ms +step:3246/57344 train_time:1688955ms step_avg:520.32ms +step:3247/57344 train_time:1689432ms step_avg:520.31ms +grad accum step:812/14336 +step:3248/57344 train_time:1690555ms step_avg:520.49ms +step:3249/57344 train_time:1690571ms step_avg:520.34ms +step:3250/57344 train_time:1690795ms step_avg:520.24ms +step:3251/57344 train_time:1691272ms step_avg:520.23ms +grad accum step:813/14336 +step:3252/57344 train_time:1692394ms step_avg:520.42ms +step:3253/57344 train_time:1692411ms step_avg:520.26ms +step:3254/57344 train_time:1692636ms step_avg:520.17ms +step:3255/57344 train_time:1744129ms step_avg:535.83ms +grad accum step:814/14336 +step:3256/57344 train_time:1745118ms step_avg:535.97ms +step:3257/57344 train_time:1745135ms step_avg:535.81ms +step:3258/57344 train_time:1745359ms step_avg:535.71ms +step:3259/57344 train_time:1745835ms step_avg:535.70ms +grad accum step:815/14336 +step:3260/57344 train_time:1746955ms step_avg:535.88ms +step:3261/57344 train_time:1746972ms step_avg:535.72ms +step:3262/57344 train_time:1747195ms step_avg:535.62ms +step:3263/57344 train_time:1747673ms step_avg:535.60ms +grad accum step:816/14336 +step:3264/57344 train_time:1748797ms step_avg:535.78ms +step:3264/57344 val_loss:7.672679 train_time:1748797ms step_avg:535.78ms +step:3265/57344 train_time:1748809ms step_avg:535.62ms +step:3266/57344 train_time:1749011ms step_avg:535.52ms +step:3267/57344 train_time:1749490ms step_avg:535.50ms +grad accum step:817/14336 +step:3268/57344 train_time:1750618ms step_avg:535.68ms +step:3269/57344 train_time:1750635ms step_avg:535.53ms +step:3270/57344 train_time:1750861ms step_avg:535.43ms +step:3271/57344 train_time:1751341ms step_avg:535.41ms +grad accum step:818/14336 +step:3272/57344 train_time:1752474ms step_avg:535.60ms +step:3273/57344 train_time:1752490ms step_avg:535.44ms +step:3274/57344 train_time:1752715ms step_avg:535.34ms +step:3275/57344 train_time:1753194ms step_avg:535.33ms +grad accum step:819/14336 +step:3276/57344 train_time:1754324ms step_avg:535.51ms +step:3277/57344 train_time:1754341ms step_avg:535.35ms +step:3278/57344 train_time:1754568ms step_avg:535.26ms +step:3279/57344 train_time:1755046ms step_avg:535.24ms +grad accum step:820/14336 +step:3280/57344 train_time:1756179ms step_avg:535.42ms +step:3281/57344 train_time:1756196ms step_avg:535.26ms +step:3282/57344 train_time:1756422ms step_avg:535.17ms +step:3283/57344 train_time:1756903ms step_avg:535.15ms +grad accum step:821/14336 +step:3284/57344 train_time:1758034ms step_avg:535.33ms +step:3285/57344 train_time:1758051ms step_avg:535.18ms +step:3286/57344 train_time:1758275ms step_avg:535.08ms +step:3287/57344 train_time:1758754ms step_avg:535.06ms +grad accum step:822/14336 +step:3288/57344 train_time:1759884ms step_avg:535.24ms +step:3289/57344 train_time:1759901ms step_avg:535.09ms +step:3290/57344 train_time:1760127ms step_avg:534.99ms +step:3291/57344 train_time:1760606ms step_avg:534.98ms +grad accum step:823/14336 +step:3292/57344 train_time:1761737ms step_avg:535.16ms +step:3293/57344 train_time:1761754ms step_avg:535.00ms +step:3294/57344 train_time:1761980ms step_avg:534.91ms +step:3295/57344 train_time:1762461ms step_avg:534.89ms +grad accum step:824/14336 +step:3296/57344 train_time:1763595ms step_avg:535.07ms +step:3297/57344 train_time:1763612ms step_avg:534.91ms +step:3298/57344 train_time:1763838ms step_avg:534.82ms +step:3299/57344 train_time:1764317ms step_avg:534.80ms +grad accum step:825/14336 +step:3300/57344 train_time:1765449ms step_avg:534.98ms +step:3301/57344 train_time:1765466ms step_avg:534.83ms +step:3302/57344 train_time:1765691ms step_avg:534.73ms +step:3303/57344 train_time:1766172ms step_avg:534.72ms +grad accum step:826/14336 +step:3304/57344 train_time:1767302ms step_avg:534.90ms +step:3305/57344 train_time:1767319ms step_avg:534.74ms +step:3306/57344 train_time:1767545ms step_avg:534.65ms +step:3307/57344 train_time:1768025ms step_avg:534.63ms +grad accum step:827/14336 +step:3308/57344 train_time:1769155ms step_avg:534.81ms +step:3309/57344 train_time:1769172ms step_avg:534.65ms +step:3310/57344 train_time:1769399ms step_avg:534.56ms +step:3311/57344 train_time:1769883ms step_avg:534.55ms +grad accum step:828/14336 +step:3312/57344 train_time:1771019ms step_avg:534.73ms +step:3313/57344 train_time:1771036ms step_avg:534.57ms +step:3314/57344 train_time:1771262ms step_avg:534.48ms +step:3315/57344 train_time:1771745ms step_avg:534.46ms +grad accum step:829/14336 +step:3316/57344 train_time:1772884ms step_avg:534.65ms +step:3317/57344 train_time:1772901ms step_avg:534.49ms +step:3318/57344 train_time:1773127ms step_avg:534.40ms +step:3319/57344 train_time:1773609ms step_avg:534.38ms +grad accum step:830/14336 +step:3320/57344 train_time:1774742ms step_avg:534.56ms +step:3321/57344 train_time:1774759ms step_avg:534.41ms +step:3322/57344 train_time:1774986ms step_avg:534.31ms +step:3323/57344 train_time:1775469ms step_avg:534.30ms +grad accum step:831/14336 +step:3324/57344 train_time:1776604ms step_avg:534.48ms +step:3325/57344 train_time:1776621ms step_avg:534.32ms +step:3326/57344 train_time:1776848ms step_avg:534.23ms +step:3327/57344 train_time:1777331ms step_avg:534.21ms +grad accum step:832/14336 +step:3328/57344 train_time:1778461ms step_avg:534.39ms +step:3328/57344 val_loss:7.652864 train_time:1778462ms step_avg:534.39ms +step:3329/57344 train_time:1778474ms step_avg:534.24ms +step:3330/57344 train_time:1778679ms step_avg:534.14ms +step:3331/57344 train_time:1779163ms step_avg:534.12ms +grad accum step:833/14336 +step:3332/57344 train_time:1780303ms step_avg:534.30ms +step:3333/57344 train_time:1780320ms step_avg:534.15ms +step:3334/57344 train_time:1780548ms step_avg:534.06ms +step:3335/57344 train_time:1781032ms step_avg:534.04ms +grad accum step:834/14336 +step:3336/57344 train_time:1782172ms step_avg:534.22ms +step:3337/57344 train_time:1782189ms step_avg:534.07ms +step:3338/57344 train_time:1782416ms step_avg:533.98ms +step:3339/57344 train_time:1782900ms step_avg:533.96ms +grad accum step:835/14336 +step:3340/57344 train_time:1784041ms step_avg:534.14ms +step:3341/57344 train_time:1784059ms step_avg:533.99ms +step:3342/57344 train_time:1784286ms step_avg:533.90ms +step:3343/57344 train_time:1784770ms step_avg:533.88ms +grad accum step:836/14336 +step:3344/57344 train_time:1785909ms step_avg:534.06ms +step:3345/57344 train_time:1785926ms step_avg:533.91ms +step:3346/57344 train_time:1786154ms step_avg:533.82ms +step:3347/57344 train_time:1786639ms step_avg:533.80ms +grad accum step:837/14336 +step:3348/57344 train_time:1787782ms step_avg:533.99ms +step:3349/57344 train_time:1787799ms step_avg:533.83ms +step:3350/57344 train_time:1788027ms step_avg:533.74ms +step:3351/57344 train_time:1788510ms step_avg:533.72ms +grad accum step:838/14336 +step:3352/57344 train_time:1789652ms step_avg:533.91ms +step:3353/57344 train_time:1789669ms step_avg:533.75ms +step:3354/57344 train_time:1789897ms step_avg:533.66ms +step:3355/57344 train_time:1790383ms step_avg:533.65ms +grad accum step:839/14336 +step:3356/57344 train_time:1791523ms step_avg:533.83ms +step:3357/57344 train_time:1791540ms step_avg:533.67ms +step:3358/57344 train_time:1791766ms step_avg:533.58ms +step:3359/57344 train_time:1792250ms step_avg:533.57ms +grad accum step:840/14336 +step:3360/57344 train_time:1793391ms step_avg:533.75ms +step:3361/57344 train_time:1793408ms step_avg:533.59ms +step:3362/57344 train_time:1793636ms step_avg:533.50ms +step:3363/57344 train_time:1794120ms step_avg:533.49ms +grad accum step:841/14336 +step:3364/57344 train_time:1795259ms step_avg:533.67ms +step:3365/57344 train_time:1795276ms step_avg:533.51ms +step:3366/57344 train_time:1795505ms step_avg:533.42ms +step:3367/57344 train_time:1795990ms step_avg:533.41ms +grad accum step:842/14336 +step:3368/57344 train_time:1797131ms step_avg:533.59ms +step:3369/57344 train_time:1797148ms step_avg:533.44ms +step:3370/57344 train_time:1797376ms step_avg:533.35ms +step:3371/57344 train_time:1797859ms step_avg:533.33ms +grad accum step:843/14336 +step:3372/57344 train_time:1799004ms step_avg:533.51ms +step:3373/57344 train_time:1799021ms step_avg:533.36ms +step:3374/57344 train_time:1799249ms step_avg:533.27ms +step:3375/57344 train_time:1799733ms step_avg:533.25ms +grad accum step:844/14336 +step:3376/57344 train_time:1800873ms step_avg:533.43ms +step:3377/57344 train_time:1800890ms step_avg:533.28ms +step:3378/57344 train_time:1801118ms step_avg:533.19ms +step:3379/57344 train_time:1801602ms step_avg:533.18ms +grad accum step:845/14336 +step:3380/57344 train_time:1802745ms step_avg:533.36ms +step:3381/57344 train_time:1802762ms step_avg:533.20ms +step:3382/57344 train_time:1802991ms step_avg:533.11ms +step:3383/57344 train_time:1803477ms step_avg:533.10ms +grad accum step:846/14336 +step:3384/57344 train_time:1804620ms step_avg:533.28ms +step:3385/57344 train_time:1804637ms step_avg:533.13ms +step:3386/57344 train_time:1804864ms step_avg:533.04ms +step:3387/57344 train_time:1805348ms step_avg:533.02ms +grad accum step:847/14336 +step:3388/57344 train_time:1806491ms step_avg:533.20ms +step:3389/57344 train_time:1806508ms step_avg:533.05ms +step:3390/57344 train_time:1806736ms step_avg:532.96ms +step:3391/57344 train_time:1807220ms step_avg:532.95ms +grad accum step:848/14336 +step:3392/57344 train_time:1808363ms step_avg:533.13ms +step:3392/57344 val_loss:7.665180 train_time:1808364ms step_avg:533.13ms +step:3393/57344 train_time:1808375ms step_avg:532.97ms +step:3394/57344 train_time:1808582ms step_avg:532.88ms +step:3395/57344 train_time:1809068ms step_avg:532.86ms +grad accum step:849/14336 +step:3396/57344 train_time:1810214ms step_avg:533.04ms +step:3397/57344 train_time:1810231ms step_avg:532.89ms +step:3398/57344 train_time:1810460ms step_avg:532.80ms +step:3399/57344 train_time:1810949ms step_avg:532.79ms +grad accum step:850/14336 +step:3400/57344 train_time:1812098ms step_avg:532.97ms +step:3401/57344 train_time:1812115ms step_avg:532.82ms +step:3402/57344 train_time:1812344ms step_avg:532.73ms +step:3403/57344 train_time:1812834ms step_avg:532.72ms +grad accum step:851/14336 +step:3404/57344 train_time:1813981ms step_avg:532.90ms +step:3405/57344 train_time:1813998ms step_avg:532.75ms +step:3406/57344 train_time:1814228ms step_avg:532.66ms +step:3407/57344 train_time:1814716ms step_avg:532.64ms +grad accum step:852/14336 +step:3408/57344 train_time:1815864ms step_avg:532.82ms +step:3409/57344 train_time:1815881ms step_avg:532.67ms +step:3410/57344 train_time:1816110ms step_avg:532.58ms +step:3411/57344 train_time:1816598ms step_avg:532.57ms +grad accum step:853/14336 +step:3412/57344 train_time:1817754ms step_avg:532.75ms +step:3413/57344 train_time:1817771ms step_avg:532.60ms +step:3414/57344 train_time:1818001ms step_avg:532.51ms +step:3415/57344 train_time:1818486ms step_avg:532.50ms +grad accum step:854/14336 +step:3416/57344 train_time:1819633ms step_avg:532.68ms +step:3417/57344 train_time:1819650ms step_avg:532.53ms +step:3418/57344 train_time:1819879ms step_avg:532.44ms +step:3419/57344 train_time:1820365ms step_avg:532.43ms +grad accum step:855/14336 +step:3420/57344 train_time:1821517ms step_avg:532.61ms +step:3421/57344 train_time:1821535ms step_avg:532.46ms +step:3422/57344 train_time:1821763ms step_avg:532.37ms +step:3423/57344 train_time:1822250ms step_avg:532.35ms +grad accum step:856/14336 +step:3424/57344 train_time:1823398ms step_avg:532.53ms +step:3425/57344 train_time:1823415ms step_avg:532.38ms +step:3426/57344 train_time:1823645ms step_avg:532.30ms +step:3427/57344 train_time:1824133ms step_avg:532.28ms +grad accum step:857/14336 +step:3428/57344 train_time:1825279ms step_avg:532.46ms +step:3429/57344 train_time:1825296ms step_avg:532.31ms +step:3430/57344 train_time:1825526ms step_avg:532.22ms +step:3431/57344 train_time:1826012ms step_avg:532.21ms +grad accum step:858/14336 +step:3432/57344 train_time:1827161ms step_avg:532.39ms +step:3433/57344 train_time:1827178ms step_avg:532.24ms +step:3434/57344 train_time:1827409ms step_avg:532.15ms +step:3435/57344 train_time:1827900ms step_avg:532.14ms +grad accum step:859/14336 +step:3436/57344 train_time:1829059ms step_avg:532.32ms +step:3437/57344 train_time:1829077ms step_avg:532.17ms +step:3438/57344 train_time:1829307ms step_avg:532.08ms +step:3439/57344 train_time:1829795ms step_avg:532.07ms +grad accum step:860/14336 +step:3440/57344 train_time:1830943ms step_avg:532.25ms +step:3441/57344 train_time:1830961ms step_avg:532.10ms +step:3442/57344 train_time:1831191ms step_avg:532.01ms +step:3443/57344 train_time:1831680ms step_avg:532.00ms +grad accum step:861/14336 +step:3444/57344 train_time:1832828ms step_avg:532.18ms +step:3445/57344 train_time:1832846ms step_avg:532.03ms +step:3446/57344 train_time:1833076ms step_avg:531.94ms +step:3447/57344 train_time:1833562ms step_avg:531.93ms +grad accum step:862/14336 +step:3448/57344 train_time:1834711ms step_avg:532.11ms +step:3449/57344 train_time:1834728ms step_avg:531.96ms +step:3450/57344 train_time:1834958ms step_avg:531.87ms +step:3451/57344 train_time:1835444ms step_avg:531.86ms +grad accum step:863/14336 +step:3452/57344 train_time:1836592ms step_avg:532.04ms +step:3453/57344 train_time:1836609ms step_avg:531.89ms +step:3454/57344 train_time:1836839ms step_avg:531.80ms +step:3455/57344 train_time:1837324ms step_avg:531.79ms +grad accum step:864/14336 +step:3456/57344 train_time:1838476ms step_avg:531.97ms +step:3456/57344 val_loss:7.667527 train_time:1838477ms step_avg:531.97ms +step:3457/57344 train_time:1838489ms step_avg:531.82ms +step:3458/57344 train_time:1838694ms step_avg:531.72ms +step:3459/57344 train_time:1839180ms step_avg:531.71ms +grad accum step:865/14336 +step:3460/57344 train_time:1840332ms step_avg:531.89ms +step:3461/57344 train_time:1840347ms step_avg:531.74ms +step:3462/57344 train_time:1840578ms step_avg:531.65ms +step:3463/57344 train_time:1841065ms step_avg:531.64ms +grad accum step:866/14336 +step:3464/57344 train_time:1842213ms step_avg:531.82ms +step:3465/57344 train_time:1842231ms step_avg:531.67ms +step:3466/57344 train_time:1842458ms step_avg:531.58ms +step:3467/57344 train_time:1842944ms step_avg:531.57ms +grad accum step:867/14336 +step:3468/57344 train_time:1844089ms step_avg:531.74ms +step:3469/57344 train_time:1844106ms step_avg:531.60ms +step:3470/57344 train_time:1844335ms step_avg:531.51ms +step:3471/57344 train_time:1844823ms step_avg:531.50ms +grad accum step:868/14336 +step:3472/57344 train_time:1845971ms step_avg:531.67ms +step:3473/57344 train_time:1845988ms step_avg:531.53ms +step:3474/57344 train_time:1846216ms step_avg:531.44ms +step:3475/57344 train_time:1846704ms step_avg:531.43ms +grad accum step:869/14336 +step:3476/57344 train_time:1847853ms step_avg:531.60ms +step:3477/57344 train_time:1847870ms step_avg:531.46ms +step:3478/57344 train_time:1848100ms step_avg:531.37ms +step:3479/57344 train_time:1848588ms step_avg:531.36ms +grad accum step:870/14336 +step:3480/57344 train_time:1849737ms step_avg:531.53ms +step:3481/57344 train_time:1849754ms step_avg:531.39ms +step:3482/57344 train_time:1849984ms step_avg:531.30ms +step:3483/57344 train_time:1850470ms step_avg:531.29ms +grad accum step:871/14336 +step:3484/57344 train_time:1851616ms step_avg:531.46ms +step:3485/57344 train_time:1851633ms step_avg:531.32ms +step:3486/57344 train_time:1851862ms step_avg:531.23ms +step:3487/57344 train_time:1852350ms step_avg:531.22ms +grad accum step:872/14336 +step:3488/57344 train_time:1853500ms step_avg:531.39ms +step:3489/57344 train_time:1853518ms step_avg:531.25ms +step:3490/57344 train_time:1853747ms step_avg:531.16ms +step:3491/57344 train_time:1854233ms step_avg:531.15ms +grad accum step:873/14336 +step:3492/57344 train_time:1855380ms step_avg:531.32ms +step:3493/57344 train_time:1855397ms step_avg:531.18ms +step:3494/57344 train_time:1855626ms step_avg:531.09ms +step:3495/57344 train_time:1856113ms step_avg:531.08ms +grad accum step:874/14336 +step:3496/57344 train_time:1857261ms step_avg:531.25ms +step:3497/57344 train_time:1857278ms step_avg:531.11ms +step:3498/57344 train_time:1857506ms step_avg:531.02ms +step:3499/57344 train_time:1857992ms step_avg:531.01ms +grad accum step:875/14336 +step:3500/57344 train_time:1859142ms step_avg:531.18ms +step:3501/57344 train_time:1859159ms step_avg:531.04ms +step:3502/57344 train_time:1859388ms step_avg:530.95ms +step:3503/57344 train_time:1859876ms step_avg:530.94ms +grad accum step:876/14336 +step:3504/57344 train_time:1861024ms step_avg:531.11ms +step:3505/57344 train_time:1861042ms step_avg:530.97ms +step:3506/57344 train_time:1861271ms step_avg:530.88ms +step:3507/57344 train_time:1861757ms step_avg:530.87ms +grad accum step:877/14336 +step:3508/57344 train_time:1862903ms step_avg:531.04ms +step:3509/57344 train_time:1862920ms step_avg:530.90ms +step:3510/57344 train_time:1863149ms step_avg:530.81ms +step:3511/57344 train_time:1863635ms step_avg:530.80ms +grad accum step:878/14336 +step:3512/57344 train_time:1864780ms step_avg:530.97ms +step:3513/57344 train_time:1864797ms step_avg:530.83ms +step:3514/57344 train_time:1865026ms step_avg:530.74ms +step:3515/57344 train_time:1865511ms step_avg:530.73ms +grad accum step:879/14336 +step:3516/57344 train_time:1866659ms step_avg:530.90ms +step:3517/57344 train_time:1866676ms step_avg:530.76ms +step:3518/57344 train_time:1866906ms step_avg:530.67ms +step:3519/57344 train_time:1867393ms step_avg:530.66ms +grad accum step:880/14336 +step:3520/57344 train_time:1868545ms step_avg:530.84ms +step:3520/57344 val_loss:7.654069 train_time:1868547ms step_avg:530.84ms +step:3521/57344 train_time:1868559ms step_avg:530.69ms +step:3522/57344 train_time:1868764ms step_avg:530.60ms +step:3523/57344 train_time:1869252ms step_avg:530.59ms +grad accum step:881/14336 +step:3524/57344 train_time:1870404ms step_avg:530.76ms +step:3525/57344 train_time:1870421ms step_avg:530.62ms +step:3526/57344 train_time:1870650ms step_avg:530.53ms +step:3527/57344 train_time:1871137ms step_avg:530.52ms +grad accum step:882/14336 +step:3528/57344 train_time:1872285ms step_avg:530.69ms +step:3529/57344 train_time:1872302ms step_avg:530.55ms +step:3530/57344 train_time:1872530ms step_avg:530.46ms +step:3531/57344 train_time:1873015ms step_avg:530.45ms +grad accum step:883/14336 +step:3532/57344 train_time:1874157ms step_avg:530.62ms +step:3533/57344 train_time:1874174ms step_avg:530.48ms +step:3534/57344 train_time:1874403ms step_avg:530.39ms +step:3535/57344 train_time:1874890ms step_avg:530.38ms +grad accum step:884/14336 +step:3536/57344 train_time:1876038ms step_avg:530.55ms +step:3537/57344 train_time:1876055ms step_avg:530.41ms +step:3538/57344 train_time:1876284ms step_avg:530.32ms +step:3539/57344 train_time:1876772ms step_avg:530.31ms +grad accum step:885/14336 +step:3540/57344 train_time:1877920ms step_avg:530.49ms +step:3541/57344 train_time:1877937ms step_avg:530.34ms +step:3542/57344 train_time:1878166ms step_avg:530.26ms +step:3543/57344 train_time:1878653ms step_avg:530.24ms +grad accum step:886/14336 +step:3544/57344 train_time:1879796ms step_avg:530.42ms +step:3545/57344 train_time:1879814ms step_avg:530.27ms +step:3546/57344 train_time:1880042ms step_avg:530.19ms +step:3547/57344 train_time:1880530ms step_avg:530.17ms +grad accum step:887/14336 +step:3548/57344 train_time:1881677ms step_avg:530.35ms +step:3549/57344 train_time:1881694ms step_avg:530.20ms +step:3550/57344 train_time:1881922ms step_avg:530.12ms +step:3551/57344 train_time:1882408ms step_avg:530.11ms +grad accum step:888/14336 +step:3552/57344 train_time:1883554ms step_avg:530.28ms +step:3553/57344 train_time:1883571ms step_avg:530.14ms +step:3554/57344 train_time:1883799ms step_avg:530.05ms +step:3555/57344 train_time:1884839ms step_avg:530.19ms +grad accum step:889/14336 +step:3556/57344 train_time:1885574ms step_avg:530.25ms +step:3557/57344 train_time:1885591ms step_avg:530.11ms +step:3558/57344 train_time:1885819ms step_avg:530.02ms +step:3559/57344 train_time:1886307ms step_avg:530.01ms +grad accum step:890/14336 +step:3560/57344 train_time:1887452ms step_avg:530.18ms +step:3561/57344 train_time:1887469ms step_avg:530.04ms +step:3562/57344 train_time:1887697ms step_avg:529.95ms +step:3563/57344 train_time:1888183ms step_avg:529.94ms +grad accum step:891/14336 +step:3564/57344 train_time:1890031ms step_avg:530.31ms +step:3565/57344 train_time:1890049ms step_avg:530.17ms +step:3566/57344 train_time:1890247ms step_avg:530.07ms +step:3567/57344 train_time:1890731ms step_avg:530.06ms +grad accum step:892/14336 +step:3568/57344 train_time:1891875ms step_avg:530.23ms +step:3569/57344 train_time:1891892ms step_avg:530.09ms +step:3570/57344 train_time:1892121ms step_avg:530.01ms +step:3571/57344 train_time:1892607ms step_avg:529.99ms +grad accum step:893/14336 +step:3572/57344 train_time:1893755ms step_avg:530.17ms +step:3573/57344 train_time:1893772ms step_avg:530.02ms +step:3574/57344 train_time:1894002ms step_avg:529.94ms +step:3575/57344 train_time:1894488ms step_avg:529.93ms +grad accum step:894/14336 +step:3576/57344 train_time:1895632ms step_avg:530.10ms +step:3577/57344 train_time:1895650ms step_avg:529.96ms +step:3578/57344 train_time:1895879ms step_avg:529.87ms +step:3579/57344 train_time:1896365ms step_avg:529.86ms +grad accum step:895/14336 +step:3580/57344 train_time:1897537ms step_avg:530.04ms +step:3581/57344 train_time:1897568ms step_avg:529.90ms +step:3582/57344 train_time:1897769ms step_avg:529.81ms +step:3583/57344 train_time:1898255ms step_avg:529.79ms +grad accum step:896/14336 +step:3584/57344 train_time:1899400ms step_avg:529.97ms +step:3584/57344 val_loss:7.655974 train_time:1899400ms step_avg:529.97ms +step:3585/57344 train_time:1899602ms step_avg:529.88ms +step:3586/57344 train_time:1899886ms step_avg:529.81ms +step:3587/57344 train_time:1900366ms step_avg:529.79ms +grad accum step:897/14336 +step:3588/57344 train_time:1901494ms step_avg:529.96ms +step:3589/57344 train_time:1901511ms step_avg:529.82ms +step:3590/57344 train_time:1901736ms step_avg:529.73ms +step:3591/57344 train_time:1902216ms step_avg:529.72ms +grad accum step:898/14336 +step:3592/57344 train_time:1903344ms step_avg:529.88ms +step:3593/57344 train_time:1903361ms step_avg:529.74ms +step:3594/57344 train_time:1903587ms step_avg:529.66ms +step:3595/57344 train_time:1904066ms step_avg:529.64ms +grad accum step:899/14336 +step:3596/57344 train_time:1905200ms step_avg:529.81ms +step:3597/57344 train_time:1905217ms step_avg:529.67ms +step:3598/57344 train_time:1905443ms step_avg:529.58ms +step:3599/57344 train_time:1905925ms step_avg:529.57ms +grad accum step:900/14336 +step:3600/57344 train_time:1907062ms step_avg:529.74ms +step:3601/57344 train_time:1907079ms step_avg:529.60ms +step:3602/57344 train_time:1907305ms step_avg:529.51ms +step:3603/57344 train_time:1907784ms step_avg:529.50ms +grad accum step:901/14336 +step:3604/57344 train_time:1908920ms step_avg:529.67ms +step:3605/57344 train_time:1908937ms step_avg:529.52ms +step:3606/57344 train_time:1909163ms step_avg:529.44ms +step:3607/57344 train_time:1909645ms step_avg:529.43ms +grad accum step:902/14336 +step:3608/57344 train_time:1910782ms step_avg:529.60ms +step:3609/57344 train_time:1910799ms step_avg:529.45ms +step:3610/57344 train_time:1911027ms step_avg:529.37ms +step:3611/57344 train_time:1911509ms step_avg:529.36ms +grad accum step:903/14336 +step:3612/57344 train_time:1912646ms step_avg:529.53ms +step:3613/57344 train_time:1912663ms step_avg:529.38ms +step:3614/57344 train_time:1912886ms step_avg:529.30ms +step:3615/57344 train_time:1913369ms step_avg:529.29ms +grad accum step:904/14336 +step:3616/57344 train_time:1914507ms step_avg:529.45ms +step:3617/57344 train_time:1914525ms step_avg:529.31ms +step:3618/57344 train_time:1914753ms step_avg:529.23ms +step:3619/57344 train_time:1915235ms step_avg:529.22ms +grad accum step:905/14336 +step:3620/57344 train_time:1916376ms step_avg:529.39ms +step:3621/57344 train_time:1916393ms step_avg:529.24ms +step:3622/57344 train_time:1916621ms step_avg:529.16ms +step:3623/57344 train_time:1917104ms step_avg:529.15ms +grad accum step:906/14336 +step:3624/57344 train_time:1918243ms step_avg:529.32ms +step:3625/57344 train_time:1918260ms step_avg:529.18ms +step:3626/57344 train_time:1918487ms step_avg:529.09ms +step:3627/57344 train_time:1918971ms step_avg:529.08ms +grad accum step:907/14336 +step:3628/57344 train_time:1920107ms step_avg:529.25ms +step:3629/57344 train_time:1920124ms step_avg:529.11ms +step:3630/57344 train_time:1920351ms step_avg:529.02ms +step:3631/57344 train_time:1920834ms step_avg:529.01ms +grad accum step:908/14336 +step:3632/57344 train_time:1921971ms step_avg:529.18ms +step:3633/57344 train_time:1921989ms step_avg:529.04ms +step:3634/57344 train_time:1922217ms step_avg:528.95ms +step:3635/57344 train_time:1922699ms step_avg:528.94ms +grad accum step:909/14336 +step:3636/57344 train_time:1923842ms step_avg:529.11ms +step:3637/57344 train_time:1923859ms step_avg:528.97ms +step:3638/57344 train_time:1924087ms step_avg:528.89ms +step:3639/57344 train_time:1924568ms step_avg:528.87ms +grad accum step:910/14336 +step:3640/57344 train_time:1925710ms step_avg:529.04ms +step:3641/57344 train_time:1925728ms step_avg:528.90ms +step:3642/57344 train_time:1925956ms step_avg:528.82ms +step:3643/57344 train_time:1926441ms step_avg:528.81ms +grad accum step:911/14336 +step:3644/57344 train_time:1927583ms step_avg:528.97ms +step:3645/57344 train_time:1927600ms step_avg:528.83ms +step:3646/57344 train_time:1927828ms step_avg:528.75ms +step:3647/57344 train_time:1928311ms step_avg:528.74ms +grad accum step:912/14336 +step:3648/57344 train_time:1929456ms step_avg:528.91ms +step:3648/57344 val_loss:7.667745 train_time:1929456ms step_avg:528.91ms +step:3649/57344 train_time:1929468ms step_avg:528.77ms +step:3650/57344 train_time:1929760ms step_avg:528.70ms +step:3651/57344 train_time:1930250ms step_avg:528.69ms +grad accum step:913/14336 +step:3652/57344 train_time:1931382ms step_avg:528.86ms +step:3653/57344 train_time:1931399ms step_avg:528.72ms +step:3654/57344 train_time:1931625ms step_avg:528.63ms +step:3655/57344 train_time:1932105ms step_avg:528.62ms +grad accum step:914/14336 +step:3656/57344 train_time:1933239ms step_avg:528.79ms +step:3657/57344 train_time:1933256ms step_avg:528.65ms +step:3658/57344 train_time:1933480ms step_avg:528.56ms +step:3659/57344 train_time:1933960ms step_avg:528.55ms +grad accum step:915/14336 +step:3660/57344 train_time:1935095ms step_avg:528.71ms +step:3661/57344 train_time:1935112ms step_avg:528.57ms +step:3662/57344 train_time:1935339ms step_avg:528.49ms +step:3663/57344 train_time:1935819ms step_avg:528.48ms +grad accum step:916/14336 +step:3664/57344 train_time:1936956ms step_avg:528.65ms +step:3665/57344 train_time:1936973ms step_avg:528.51ms +step:3666/57344 train_time:1937200ms step_avg:528.42ms +step:3667/57344 train_time:1937680ms step_avg:528.41ms +grad accum step:917/14336 +step:3668/57344 train_time:1938816ms step_avg:528.58ms +step:3669/57344 train_time:1938833ms step_avg:528.44ms +step:3670/57344 train_time:1939060ms step_avg:528.35ms +step:3671/57344 train_time:1939542ms step_avg:528.34ms +grad accum step:918/14336 +step:3672/57344 train_time:1940680ms step_avg:528.51ms +step:3673/57344 train_time:1940697ms step_avg:528.37ms +step:3674/57344 train_time:1940923ms step_avg:528.29ms +step:3675/57344 train_time:1941404ms step_avg:528.27ms +grad accum step:919/14336 +step:3676/57344 train_time:1942539ms step_avg:528.44ms +step:3677/57344 train_time:1942556ms step_avg:528.30ms +step:3678/57344 train_time:1942784ms step_avg:528.22ms +step:3679/57344 train_time:1943267ms step_avg:528.21ms +grad accum step:920/14336 +step:3680/57344 train_time:1944404ms step_avg:528.37ms +step:3681/57344 train_time:1944421ms step_avg:528.23ms +step:3682/57344 train_time:1944650ms step_avg:528.15ms +step:3683/57344 train_time:1945135ms step_avg:528.14ms +grad accum step:921/14336 +step:3684/57344 train_time:1946271ms step_avg:528.30ms +step:3685/57344 train_time:1946289ms step_avg:528.17ms +step:3686/57344 train_time:1946517ms step_avg:528.08ms +step:3687/57344 train_time:1946999ms step_avg:528.07ms +grad accum step:922/14336 +step:3688/57344 train_time:1948140ms step_avg:528.24ms +step:3689/57344 train_time:1948157ms step_avg:528.10ms +step:3690/57344 train_time:1948384ms step_avg:528.02ms +step:3691/57344 train_time:1948866ms step_avg:528.00ms +grad accum step:923/14336 +step:3692/57344 train_time:1950005ms step_avg:528.17ms +step:3693/57344 train_time:1950022ms step_avg:528.03ms +step:3694/57344 train_time:1950249ms step_avg:527.95ms +step:3695/57344 train_time:1950732ms step_avg:527.94ms +grad accum step:924/14336 +step:3696/57344 train_time:1951873ms step_avg:528.10ms +step:3697/57344 train_time:1951890ms step_avg:527.97ms +step:3698/57344 train_time:1952118ms step_avg:527.88ms +step:3699/57344 train_time:1952604ms step_avg:527.87ms +grad accum step:925/14336 +step:3700/57344 train_time:1953743ms step_avg:528.04ms +step:3701/57344 train_time:1953760ms step_avg:527.90ms +step:3702/57344 train_time:1953988ms step_avg:527.82ms +step:3703/57344 train_time:1954477ms step_avg:527.81ms +grad accum step:926/14336 +step:3704/57344 train_time:1955616ms step_avg:527.97ms +step:3705/57344 train_time:1955634ms step_avg:527.84ms +step:3706/57344 train_time:1955861ms step_avg:527.76ms +step:3707/57344 train_time:1956345ms step_avg:527.74ms +grad accum step:927/14336 +step:3708/57344 train_time:1957484ms step_avg:527.91ms +step:3709/57344 train_time:1957501ms step_avg:527.77ms +step:3710/57344 train_time:1957729ms step_avg:527.69ms +step:3711/57344 train_time:1958213ms step_avg:527.68ms +grad accum step:928/14336 +step:3712/57344 train_time:1959354ms step_avg:527.84ms +step:3712/57344 val_loss:7.656830 train_time:1959355ms step_avg:527.84ms +step:3713/57344 train_time:1959367ms step_avg:527.70ms +step:3714/57344 train_time:1959573ms step_avg:527.62ms +step:3715/57344 train_time:1960060ms step_avg:527.61ms +grad accum step:929/14336 +step:3716/57344 train_time:1961206ms step_avg:527.77ms +step:3717/57344 train_time:1961223ms step_avg:527.64ms +step:3718/57344 train_time:1961450ms step_avg:527.56ms +step:3719/57344 train_time:1961936ms step_avg:527.54ms +grad accum step:930/14336 +step:3720/57344 train_time:1963082ms step_avg:527.71ms +step:3721/57344 train_time:1963099ms step_avg:527.57ms +step:3722/57344 train_time:1963328ms step_avg:527.49ms +step:3723/57344 train_time:1963811ms step_avg:527.48ms +grad accum step:931/14336 +step:3724/57344 train_time:1964958ms step_avg:527.65ms +step:3725/57344 train_time:1964975ms step_avg:527.51ms +step:3726/57344 train_time:1965204ms step_avg:527.43ms +step:3727/57344 train_time:1965690ms step_avg:527.42ms +grad accum step:932/14336 +step:3728/57344 train_time:1966836ms step_avg:527.58ms +step:3729/57344 train_time:1966854ms step_avg:527.45ms +step:3730/57344 train_time:1967083ms step_avg:527.37ms +step:3731/57344 train_time:1967571ms step_avg:527.36ms +grad accum step:933/14336 +step:3732/57344 train_time:1968719ms step_avg:527.52ms +step:3733/57344 train_time:1968736ms step_avg:527.39ms +step:3734/57344 train_time:1968965ms step_avg:527.31ms +step:3735/57344 train_time:1969451ms step_avg:527.30ms +grad accum step:934/14336 +step:3736/57344 train_time:1970595ms step_avg:527.46ms +step:3737/57344 train_time:1970613ms step_avg:527.32ms +step:3738/57344 train_time:1970842ms step_avg:527.25ms +step:3739/57344 train_time:1971328ms step_avg:527.23ms +grad accum step:935/14336 +step:3740/57344 train_time:1972473ms step_avg:527.40ms +step:3741/57344 train_time:1972491ms step_avg:527.26ms +step:3742/57344 train_time:1972720ms step_avg:527.18ms +step:3743/57344 train_time:1973208ms step_avg:527.17ms +grad accum step:936/14336 +step:3744/57344 train_time:1974358ms step_avg:527.34ms +step:3745/57344 train_time:1974376ms step_avg:527.20ms +step:3746/57344 train_time:1974605ms step_avg:527.12ms +step:3747/57344 train_time:1975091ms step_avg:527.11ms +grad accum step:937/14336 +step:3748/57344 train_time:1976239ms step_avg:527.28ms +step:3749/57344 train_time:1976256ms step_avg:527.14ms +step:3750/57344 train_time:1976486ms step_avg:527.06ms +step:3751/57344 train_time:1976972ms step_avg:527.05ms +grad accum step:938/14336 +step:3752/57344 train_time:1978120ms step_avg:527.22ms +step:3753/57344 train_time:1978137ms step_avg:527.08ms +step:3754/57344 train_time:1978368ms step_avg:527.00ms +step:3755/57344 train_time:1978854ms step_avg:526.99ms +grad accum step:939/14336 +step:3756/57344 train_time:1980002ms step_avg:527.16ms +step:3757/57344 train_time:1980019ms step_avg:527.02ms +step:3758/57344 train_time:1980248ms step_avg:526.94ms +step:3759/57344 train_time:1980734ms step_avg:526.93ms +grad accum step:940/14336 +step:3760/57344 train_time:1981882ms step_avg:527.10ms +step:3761/57344 train_time:1981899ms step_avg:526.96ms +step:3762/57344 train_time:1982128ms step_avg:526.88ms +step:3763/57344 train_time:1982615ms step_avg:526.87ms +grad accum step:941/14336 +step:3764/57344 train_time:1983763ms step_avg:527.04ms +step:3765/57344 train_time:1983781ms step_avg:526.90ms +step:3766/57344 train_time:1984011ms step_avg:526.82ms +step:3767/57344 train_time:1984496ms step_avg:526.81ms +grad accum step:942/14336 +step:3768/57344 train_time:1985641ms step_avg:526.97ms +step:3769/57344 train_time:1985658ms step_avg:526.84ms +step:3770/57344 train_time:1985887ms step_avg:526.76ms +step:3771/57344 train_time:1986375ms step_avg:526.75ms +grad accum step:943/14336 +step:3772/57344 train_time:1987522ms step_avg:526.91ms +step:3773/57344 train_time:1987539ms step_avg:526.78ms +step:3774/57344 train_time:1987768ms step_avg:526.70ms +step:3775/57344 train_time:1988257ms step_avg:526.69ms +grad accum step:944/14336 +step:3776/57344 train_time:1989405ms step_avg:526.86ms +step:3776/57344 val_loss:7.658441 train_time:1989406ms step_avg:526.86ms +step:3777/57344 train_time:1989417ms step_avg:526.72ms +step:3778/57344 train_time:1989674ms step_avg:526.65ms +step:3779/57344 train_time:1990157ms step_avg:526.64ms +grad accum step:945/14336 +step:3780/57344 train_time:1991286ms step_avg:526.80ms +step:3781/57344 train_time:1991303ms step_avg:526.66ms +step:3782/57344 train_time:1991528ms step_avg:526.58ms +step:3783/57344 train_time:1992013ms step_avg:526.57ms +grad accum step:946/14336 +step:3784/57344 train_time:1993144ms step_avg:526.73ms +step:3785/57344 train_time:1993161ms step_avg:526.59ms +step:3786/57344 train_time:1993386ms step_avg:526.52ms +step:3787/57344 train_time:1993868ms step_avg:526.50ms +grad accum step:947/14336 +step:3788/57344 train_time:1995000ms step_avg:526.66ms +step:3789/57344 train_time:1995017ms step_avg:526.53ms +step:3790/57344 train_time:1995245ms step_avg:526.45ms +step:3791/57344 train_time:1995726ms step_avg:526.44ms +grad accum step:948/14336 +step:3792/57344 train_time:1996856ms step_avg:526.60ms +step:3793/57344 train_time:1996872ms step_avg:526.46ms +step:3794/57344 train_time:1997098ms step_avg:526.38ms +step:3795/57344 train_time:1997578ms step_avg:526.37ms +grad accum step:949/14336 +step:3796/57344 train_time:1998710ms step_avg:526.53ms +step:3797/57344 train_time:1998727ms step_avg:526.40ms +step:3798/57344 train_time:1998953ms step_avg:526.32ms +step:3799/57344 train_time:1999434ms step_avg:526.31ms +grad accum step:950/14336 +step:3800/57344 train_time:2000569ms step_avg:526.47ms +step:3801/57344 train_time:2000586ms step_avg:526.33ms +step:3802/57344 train_time:2000812ms step_avg:526.25ms +step:3803/57344 train_time:2001292ms step_avg:526.24ms +grad accum step:951/14336 +step:3804/57344 train_time:2002429ms step_avg:526.40ms +step:3805/57344 train_time:2002447ms step_avg:526.27ms +step:3806/57344 train_time:2002674ms step_avg:526.19ms +step:3807/57344 train_time:2003154ms step_avg:526.18ms +grad accum step:952/14336 +step:3808/57344 train_time:2004291ms step_avg:526.34ms +step:3809/57344 train_time:2004308ms step_avg:526.20ms +step:3810/57344 train_time:2004537ms step_avg:526.13ms +step:3811/57344 train_time:2005022ms step_avg:526.11ms +grad accum step:953/14336 +step:3812/57344 train_time:2006158ms step_avg:526.27ms +step:3813/57344 train_time:2006175ms step_avg:526.14ms +step:3814/57344 train_time:2006402ms step_avg:526.06ms +step:3815/57344 train_time:2006886ms step_avg:526.05ms +grad accum step:954/14336 +step:3816/57344 train_time:2008023ms step_avg:526.21ms +step:3817/57344 train_time:2008040ms step_avg:526.08ms +step:3818/57344 train_time:2008267ms step_avg:526.00ms +step:3819/57344 train_time:2008749ms step_avg:525.99ms +grad accum step:955/14336 +step:3820/57344 train_time:2009889ms step_avg:526.15ms +step:3821/57344 train_time:2009906ms step_avg:526.02ms +step:3822/57344 train_time:2010132ms step_avg:525.94ms +step:3823/57344 train_time:2010615ms step_avg:525.93ms +grad accum step:956/14336 +step:3824/57344 train_time:2011753ms step_avg:526.09ms +step:3825/57344 train_time:2011771ms step_avg:525.95ms +step:3826/57344 train_time:2011998ms step_avg:525.88ms +step:3827/57344 train_time:2012481ms step_avg:525.86ms +grad accum step:957/14336 +step:3828/57344 train_time:2013618ms step_avg:526.02ms +step:3829/57344 train_time:2013635ms step_avg:525.89ms +step:3830/57344 train_time:2013862ms step_avg:525.81ms +step:3831/57344 train_time:2014345ms step_avg:525.80ms +grad accum step:958/14336 +step:3832/57344 train_time:2015484ms step_avg:525.96ms +step:3833/57344 train_time:2015501ms step_avg:525.83ms +step:3834/57344 train_time:2015729ms step_avg:525.75ms +step:3835/57344 train_time:2016212ms step_avg:525.74ms +grad accum step:959/14336 +step:3836/57344 train_time:2017354ms step_avg:525.90ms +step:3837/57344 train_time:2017371ms step_avg:525.77ms +step:3838/57344 train_time:2017599ms step_avg:525.69ms +step:3839/57344 train_time:2018084ms step_avg:525.68ms +grad accum step:960/14336 +step:3840/57344 train_time:2019221ms step_avg:525.84ms +step:3840/57344 val_loss:7.663338 train_time:2019221ms step_avg:525.84ms +step:3841/57344 train_time:2019233ms step_avg:525.71ms +step:3842/57344 train_time:2019439ms step_avg:525.62ms +step:3843/57344 train_time:2019926ms step_avg:525.61ms +grad accum step:961/14336 +step:3844/57344 train_time:2021074ms step_avg:525.77ms +step:3845/57344 train_time:2021091ms step_avg:525.64ms +step:3846/57344 train_time:2021320ms step_avg:525.56ms +step:3847/57344 train_time:2021807ms step_avg:525.55ms +grad accum step:962/14336 +step:3848/57344 train_time:2022955ms step_avg:525.72ms +step:3849/57344 train_time:2022972ms step_avg:525.58ms +step:3850/57344 train_time:2023201ms step_avg:525.51ms +step:3851/57344 train_time:2023687ms step_avg:525.50ms +grad accum step:963/14336 +step:3852/57344 train_time:2024837ms step_avg:525.66ms +step:3853/57344 train_time:2024854ms step_avg:525.53ms +step:3854/57344 train_time:2025082ms step_avg:525.45ms +step:3855/57344 train_time:2025569ms step_avg:525.44ms +grad accum step:964/14336 +step:3856/57344 train_time:2026719ms step_avg:525.60ms +step:3857/57344 train_time:2026736ms step_avg:525.47ms +step:3858/57344 train_time:2026967ms step_avg:525.39ms +step:3859/57344 train_time:2027455ms step_avg:525.38ms +grad accum step:965/14336 +step:3860/57344 train_time:2028604ms step_avg:525.55ms +step:3861/57344 train_time:2028622ms step_avg:525.41ms +step:3862/57344 train_time:2028851ms step_avg:525.34ms +step:3863/57344 train_time:2029339ms step_avg:525.33ms +grad accum step:966/14336 +step:3864/57344 train_time:2030487ms step_avg:525.49ms +step:3865/57344 train_time:2030504ms step_avg:525.36ms +step:3866/57344 train_time:2030734ms step_avg:525.28ms +step:3867/57344 train_time:2031221ms step_avg:525.27ms +grad accum step:967/14336 +step:3868/57344 train_time:2032370ms step_avg:525.43ms +step:3869/57344 train_time:2032387ms step_avg:525.30ms +step:3870/57344 train_time:2032616ms step_avg:525.22ms +step:3871/57344 train_time:2033103ms step_avg:525.21ms +grad accum step:968/14336 +step:3872/57344 train_time:2034252ms step_avg:525.38ms +step:3873/57344 train_time:2034270ms step_avg:525.24ms +step:3874/57344 train_time:2034499ms step_avg:525.17ms +step:3875/57344 train_time:2034987ms step_avg:525.16ms +grad accum step:969/14336 +step:3876/57344 train_time:2036136ms step_avg:525.32ms +step:3877/57344 train_time:2036153ms step_avg:525.19ms +step:3878/57344 train_time:2036384ms step_avg:525.11ms +step:3879/57344 train_time:2036872ms step_avg:525.10ms +grad accum step:970/14336 +step:3880/57344 train_time:2038021ms step_avg:525.26ms +step:3881/57344 train_time:2038038ms step_avg:525.13ms +step:3882/57344 train_time:2038267ms step_avg:525.06ms +step:3883/57344 train_time:2038754ms step_avg:525.05ms +grad accum step:971/14336 +step:3884/57344 train_time:2039904ms step_avg:525.21ms +step:3885/57344 train_time:2039921ms step_avg:525.08ms +step:3886/57344 train_time:2040150ms step_avg:525.00ms +step:3887/57344 train_time:2040636ms step_avg:524.99ms +grad accum step:972/14336 +step:3888/57344 train_time:2041785ms step_avg:525.15ms +step:3889/57344 train_time:2041802ms step_avg:525.02ms +step:3890/57344 train_time:2042032ms step_avg:524.94ms +step:3891/57344 train_time:2042520ms step_avg:524.93ms +grad accum step:973/14336 +step:3892/57344 train_time:2043670ms step_avg:525.09ms +step:3893/57344 train_time:2043687ms step_avg:524.96ms +step:3894/57344 train_time:2043917ms step_avg:524.89ms +step:3895/57344 train_time:2044403ms step_avg:524.88ms +grad accum step:974/14336 +step:3896/57344 train_time:2045554ms step_avg:525.04ms +step:3897/57344 train_time:2045571ms step_avg:524.91ms +step:3898/57344 train_time:2045799ms step_avg:524.83ms +step:3899/57344 train_time:2046287ms step_avg:524.82ms +grad accum step:975/14336 +step:3900/57344 train_time:2047439ms step_avg:524.98ms +step:3901/57344 train_time:2047456ms step_avg:524.85ms +step:3902/57344 train_time:2047685ms step_avg:524.78ms +step:3903/57344 train_time:2048174ms step_avg:524.77ms +grad accum step:976/14336 +step:3904/57344 train_time:2049322ms step_avg:524.93ms +step:3904/57344 val_loss:7.677653 train_time:2049323ms step_avg:524.93ms +step:3905/57344 train_time:2049335ms step_avg:524.80ms +step:3906/57344 train_time:2049541ms step_avg:524.72ms +step:3907/57344 train_time:2050028ms step_avg:524.71ms +grad accum step:977/14336 +step:3908/57344 train_time:2051178ms step_avg:524.87ms +step:3909/57344 train_time:2051196ms step_avg:524.74ms +step:3910/57344 train_time:2051425ms step_avg:524.66ms +step:3911/57344 train_time:2051913ms step_avg:524.65ms +grad accum step:978/14336 +step:3912/57344 train_time:2053064ms step_avg:524.81ms +step:3913/57344 train_time:2053081ms step_avg:524.68ms +step:3914/57344 train_time:2053310ms step_avg:524.61ms +step:3915/57344 train_time:2053795ms step_avg:524.60ms +grad accum step:979/14336 +step:3916/57344 train_time:2054944ms step_avg:524.76ms +step:3917/57344 train_time:2054962ms step_avg:524.63ms +step:3918/57344 train_time:2055192ms step_avg:524.55ms +step:3919/57344 train_time:2055681ms step_avg:524.54ms +grad accum step:980/14336 +step:3920/57344 train_time:2056830ms step_avg:524.70ms +step:3921/57344 train_time:2056847ms step_avg:524.57ms +step:3922/57344 train_time:2057077ms step_avg:524.50ms +step:3923/57344 train_time:2057565ms step_avg:524.49ms +grad accum step:981/14336 +step:3924/57344 train_time:2058719ms step_avg:524.65ms +step:3925/57344 train_time:2058737ms step_avg:524.52ms +step:3926/57344 train_time:2058965ms step_avg:524.44ms +step:3927/57344 train_time:2059452ms step_avg:524.43ms +grad accum step:982/14336 +step:3928/57344 train_time:2060605ms step_avg:524.59ms +step:3929/57344 train_time:2060622ms step_avg:524.46ms +step:3930/57344 train_time:2060851ms step_avg:524.39ms +step:3931/57344 train_time:2061339ms step_avg:524.38ms +grad accum step:983/14336 +step:3932/57344 train_time:2062489ms step_avg:524.54ms +step:3933/57344 train_time:2062507ms step_avg:524.41ms +step:3934/57344 train_time:2062737ms step_avg:524.34ms +step:3935/57344 train_time:2063227ms step_avg:524.33ms +grad accum step:984/14336 +step:3936/57344 train_time:2064377ms step_avg:524.49ms +step:3937/57344 train_time:2064395ms step_avg:524.36ms +step:3938/57344 train_time:2064624ms step_avg:524.28ms +step:3939/57344 train_time:2065112ms step_avg:524.27ms +grad accum step:985/14336 +step:3940/57344 train_time:2066264ms step_avg:524.43ms +step:3941/57344 train_time:2066282ms step_avg:524.30ms +step:3942/57344 train_time:2066512ms step_avg:524.23ms +step:3943/57344 train_time:2066999ms step_avg:524.22ms +grad accum step:986/14336 +step:3944/57344 train_time:2068151ms step_avg:524.38ms +step:3945/57344 train_time:2068168ms step_avg:524.25ms +step:3946/57344 train_time:2068398ms step_avg:524.18ms +step:3947/57344 train_time:2068885ms step_avg:524.17ms +grad accum step:987/14336 +step:3948/57344 train_time:2070039ms step_avg:524.33ms +step:3949/57344 train_time:2070056ms step_avg:524.20ms +step:3950/57344 train_time:2070286ms step_avg:524.12ms +step:3951/57344 train_time:2070773ms step_avg:524.11ms +grad accum step:988/14336 +step:3952/57344 train_time:2071926ms step_avg:524.27ms +step:3953/57344 train_time:2071943ms step_avg:524.14ms +step:3954/57344 train_time:2072172ms step_avg:524.07ms +step:3955/57344 train_time:2072661ms step_avg:524.06ms +grad accum step:989/14336 +step:3956/57344 train_time:2073813ms step_avg:524.22ms +step:3957/57344 train_time:2073830ms step_avg:524.09ms +step:3958/57344 train_time:2074059ms step_avg:524.02ms +step:3959/57344 train_time:2074549ms step_avg:524.01ms +grad accum step:990/14336 +step:3960/57344 train_time:2075701ms step_avg:524.17ms +step:3961/57344 train_time:2075718ms step_avg:524.04ms +step:3962/57344 train_time:2075948ms step_avg:523.96ms +step:3963/57344 train_time:2076436ms step_avg:523.96ms +grad accum step:991/14336 +step:3964/57344 train_time:2077587ms step_avg:524.11ms +step:3965/57344 train_time:2077605ms step_avg:523.99ms +step:3966/57344 train_time:2077835ms step_avg:523.91ms +step:3967/57344 train_time:2078323ms step_avg:523.90ms +grad accum step:992/14336 +step:3968/57344 train_time:2079473ms step_avg:524.06ms +step:3968/57344 val_loss:7.659332 train_time:2079474ms step_avg:524.06ms +step:3969/57344 train_time:2079486ms step_avg:523.93ms +step:3970/57344 train_time:2079692ms step_avg:523.85ms +step:3971/57344 train_time:2080181ms step_avg:523.84ms +grad accum step:993/14336 +step:3972/57344 train_time:2081331ms step_avg:524.00ms +step:3973/57344 train_time:2081348ms step_avg:523.87ms +step:3974/57344 train_time:2081578ms step_avg:523.80ms +step:3975/57344 train_time:2082065ms step_avg:523.79ms +grad accum step:994/14336 +step:3976/57344 train_time:2083213ms step_avg:523.95ms +step:3977/57344 train_time:2083231ms step_avg:523.82ms +step:3978/57344 train_time:2083460ms step_avg:523.75ms +step:3979/57344 train_time:2083947ms step_avg:523.74ms +grad accum step:995/14336 +step:3980/57344 train_time:2085097ms step_avg:523.89ms +step:3981/57344 train_time:2085114ms step_avg:523.77ms +step:3982/57344 train_time:2085344ms step_avg:523.69ms +step:3983/57344 train_time:2085832ms step_avg:523.68ms +grad accum step:996/14336 +step:3984/57344 train_time:2086985ms step_avg:523.84ms +step:3985/57344 train_time:2087003ms step_avg:523.71ms +step:3986/57344 train_time:2087231ms step_avg:523.64ms +step:3987/57344 train_time:2087718ms step_avg:523.63ms +grad accum step:997/14336 +step:3988/57344 train_time:2088865ms step_avg:523.79ms +step:3989/57344 train_time:2088882ms step_avg:523.66ms +step:3990/57344 train_time:2089113ms step_avg:523.59ms +step:3991/57344 train_time:2089600ms step_avg:523.58ms +grad accum step:998/14336 +step:3992/57344 train_time:2090753ms step_avg:523.74ms +step:3993/57344 train_time:2090771ms step_avg:523.61ms +step:3994/57344 train_time:2091001ms step_avg:523.54ms +step:3995/57344 train_time:2091488ms step_avg:523.53ms +grad accum step:999/14336 +step:3996/57344 train_time:2092636ms step_avg:523.68ms +step:3997/57344 train_time:2092654ms step_avg:523.56ms +step:3998/57344 train_time:2092883ms step_avg:523.48ms +step:3999/57344 train_time:2093370ms step_avg:523.47ms +grad accum step:1000/14336 +step:4000/57344 train_time:2094519ms step_avg:523.63ms +step:4001/57344 train_time:2094535ms step_avg:523.50ms +step:4002/57344 train_time:2094764ms step_avg:523.43ms +step:4003/57344 train_time:2095252ms step_avg:523.42ms +grad accum step:1001/14336 +step:4004/57344 train_time:2096407ms step_avg:523.58ms +step:4005/57344 train_time:2096425ms step_avg:523.45ms +step:4006/57344 train_time:2096654ms step_avg:523.38ms +step:4007/57344 train_time:2097142ms step_avg:523.37ms +grad accum step:1002/14336 +step:4008/57344 train_time:2098293ms step_avg:523.53ms +step:4009/57344 train_time:2098310ms step_avg:523.40ms +step:4010/57344 train_time:2098539ms step_avg:523.33ms +step:4011/57344 train_time:2099024ms step_avg:523.32ms +grad accum step:1003/14336 +step:4012/57344 train_time:2100173ms step_avg:523.47ms +step:4013/57344 train_time:2100190ms step_avg:523.35ms +step:4014/57344 train_time:2100419ms step_avg:523.27ms +step:4015/57344 train_time:2100905ms step_avg:523.26ms +grad accum step:1004/14336 +step:4016/57344 train_time:2102051ms step_avg:523.42ms +step:4017/57344 train_time:2102068ms step_avg:523.29ms +step:4018/57344 train_time:2102297ms step_avg:523.22ms +step:4019/57344 train_time:2102783ms step_avg:523.21ms +grad accum step:1005/14336 +step:4020/57344 train_time:2103928ms step_avg:523.37ms +step:4021/57344 train_time:2103945ms step_avg:523.24ms +step:4022/57344 train_time:2104175ms step_avg:523.17ms +step:4023/57344 train_time:2104664ms step_avg:523.16ms +grad accum step:1006/14336 +step:4024/57344 train_time:2105810ms step_avg:523.31ms +step:4025/57344 train_time:2105827ms step_avg:523.19ms +step:4026/57344 train_time:2106056ms step_avg:523.11ms +step:4027/57344 train_time:2106544ms step_avg:523.10ms +grad accum step:1007/14336 +step:4028/57344 train_time:2107691ms step_avg:523.26ms +step:4029/57344 train_time:2107708ms step_avg:523.13ms +step:4030/57344 train_time:2107937ms step_avg:523.06ms +step:4031/57344 train_time:2108423ms step_avg:523.05ms +grad accum step:1008/14336 +step:4032/57344 train_time:2109571ms step_avg:523.21ms +step:4032/57344 val_loss:7.704954 train_time:2109571ms step_avg:523.21ms +step:4033/57344 train_time:2109583ms step_avg:523.08ms +step:4034/57344 train_time:2109788ms step_avg:523.00ms +step:4035/57344 train_time:2110276ms step_avg:522.99ms +grad accum step:1009/14336 +step:4036/57344 train_time:2111421ms step_avg:523.15ms +step:4037/57344 train_time:2111439ms step_avg:523.02ms +step:4038/57344 train_time:2111667ms step_avg:522.95ms +step:4039/57344 train_time:2112152ms step_avg:522.94ms +grad accum step:1010/14336 +step:4040/57344 train_time:2113299ms step_avg:523.09ms +step:4041/57344 train_time:2113316ms step_avg:522.97ms +step:4042/57344 train_time:2113543ms step_avg:522.90ms +step:4043/57344 train_time:2114027ms step_avg:522.89ms +grad accum step:1011/14336 +step:4044/57344 train_time:2115174ms step_avg:523.04ms +step:4045/57344 train_time:2115191ms step_avg:522.92ms +step:4046/57344 train_time:2115420ms step_avg:522.84ms +step:4047/57344 train_time:2115906ms step_avg:522.83ms +grad accum step:1012/14336 +step:4048/57344 train_time:2117050ms step_avg:522.99ms +step:4049/57344 train_time:2117067ms step_avg:522.86ms +step:4050/57344 train_time:2117295ms step_avg:522.79ms +step:4051/57344 train_time:2117782ms step_avg:522.78ms +grad accum step:1013/14336 +step:4052/57344 train_time:2118927ms step_avg:522.93ms +step:4053/57344 train_time:2118944ms step_avg:522.81ms +step:4054/57344 train_time:2119173ms step_avg:522.74ms +step:4055/57344 train_time:2119658ms step_avg:522.73ms +grad accum step:1014/14336 +step:4056/57344 train_time:2120802ms step_avg:522.88ms +step:4057/57344 train_time:2120819ms step_avg:522.76ms +step:4058/57344 train_time:2121049ms step_avg:522.68ms +step:4059/57344 train_time:2121535ms step_avg:522.67ms +grad accum step:1015/14336 +step:4060/57344 train_time:2122680ms step_avg:522.83ms +step:4061/57344 train_time:2122697ms step_avg:522.70ms +step:4062/57344 train_time:2122925ms step_avg:522.63ms +step:4063/57344 train_time:2123409ms step_avg:522.62ms +grad accum step:1016/14336 +step:4064/57344 train_time:2124556ms step_avg:522.77ms +step:4065/57344 train_time:2124573ms step_avg:522.65ms +step:4066/57344 train_time:2124801ms step_avg:522.58ms +step:4067/57344 train_time:2125285ms step_avg:522.57ms +grad accum step:1017/14336 +step:4068/57344 train_time:2126431ms step_avg:522.72ms +step:4069/57344 train_time:2126448ms step_avg:522.60ms +step:4070/57344 train_time:2126677ms step_avg:522.53ms +step:4071/57344 train_time:2127162ms step_avg:522.52ms +grad accum step:1018/14336 +step:4072/57344 train_time:2128308ms step_avg:522.67ms +step:4073/57344 train_time:2128325ms step_avg:522.54ms +step:4074/57344 train_time:2128554ms step_avg:522.47ms +step:4075/57344 train_time:2129040ms step_avg:522.46ms +grad accum step:1019/14336 +step:4076/57344 train_time:2130186ms step_avg:522.62ms +step:4077/57344 train_time:2130203ms step_avg:522.49ms +step:4078/57344 train_time:2130432ms step_avg:522.42ms +step:4079/57344 train_time:2130916ms step_avg:522.41ms +grad accum step:1020/14336 +step:4080/57344 train_time:2132060ms step_avg:522.56ms +step:4081/57344 train_time:2132077ms step_avg:522.44ms +step:4082/57344 train_time:2132306ms step_avg:522.37ms +step:4083/57344 train_time:2132791ms step_avg:522.36ms +grad accum step:1021/14336 +step:4084/57344 train_time:2133938ms step_avg:522.51ms +step:4085/57344 train_time:2133955ms step_avg:522.39ms +step:4086/57344 train_time:2134183ms step_avg:522.32ms +step:4087/57344 train_time:2134667ms step_avg:522.31ms +grad accum step:1022/14336 +step:4088/57344 train_time:2135809ms step_avg:522.46ms +step:4089/57344 train_time:2135827ms step_avg:522.33ms +step:4090/57344 train_time:2136056ms step_avg:522.26ms +step:4091/57344 train_time:2136542ms step_avg:522.25ms +grad accum step:1023/14336 +step:4092/57344 train_time:2137685ms step_avg:522.41ms +step:4093/57344 train_time:2137701ms step_avg:522.28ms +step:4094/57344 train_time:2137931ms step_avg:522.21ms +step:4095/57344 train_time:2138416ms step_avg:522.20ms +grad accum step:1024/14336 +step:4096/57344 train_time:2139562ms step_avg:522.35ms +step:4096/57344 val_loss:7.691890 train_time:2139562ms step_avg:522.35ms +step:4097/57344 train_time:2139574ms step_avg:522.23ms +step:4098/57344 train_time:2139780ms step_avg:522.15ms +step:4099/57344 train_time:2140266ms step_avg:522.14ms +grad accum step:1025/14336 +step:4100/57344 train_time:2141413ms step_avg:522.30ms +step:4101/57344 train_time:2141430ms step_avg:522.17ms +step:4102/57344 train_time:2141661ms step_avg:522.10ms +step:4103/57344 train_time:2142161ms step_avg:522.10ms +grad accum step:1026/14336 +step:4104/57344 train_time:2143344ms step_avg:522.26ms +step:4105/57344 train_time:2143361ms step_avg:522.13ms +step:4106/57344 train_time:2143593ms step_avg:522.06ms +step:4107/57344 train_time:2144097ms step_avg:522.06ms +grad accum step:1027/14336 +step:4108/57344 train_time:2145289ms step_avg:522.22ms +step:4109/57344 train_time:2145306ms step_avg:522.10ms +step:4110/57344 train_time:2145540ms step_avg:522.03ms +step:4111/57344 train_time:2146046ms step_avg:522.03ms +grad accum step:1028/14336 +step:4112/57344 train_time:2147228ms step_avg:522.19ms +step:4113/57344 train_time:2147245ms step_avg:522.06ms +step:4114/57344 train_time:2147476ms step_avg:521.99ms +step:4115/57344 train_time:2147978ms step_avg:521.99ms +grad accum step:1029/14336 +step:4116/57344 train_time:2149169ms step_avg:522.15ms +step:4117/57344 train_time:2149186ms step_avg:522.03ms +step:4118/57344 train_time:2149417ms step_avg:521.96ms +step:4119/57344 train_time:2149917ms step_avg:521.95ms +grad accum step:1030/14336 +step:4120/57344 train_time:2151097ms step_avg:522.11ms +step:4121/57344 train_time:2151114ms step_avg:521.99ms +step:4122/57344 train_time:2151346ms step_avg:521.92ms +step:4123/57344 train_time:2151846ms step_avg:521.91ms +grad accum step:1031/14336 +step:4124/57344 train_time:2153028ms step_avg:522.07ms +step:4125/57344 train_time:2153045ms step_avg:521.95ms +step:4126/57344 train_time:2153279ms step_avg:521.88ms +step:4127/57344 train_time:2153784ms step_avg:521.88ms +grad accum step:1032/14336 +step:4128/57344 train_time:2154971ms step_avg:522.04ms +step:4129/57344 train_time:2154988ms step_avg:521.92ms +step:4130/57344 train_time:2155219ms step_avg:521.84ms +step:4131/57344 train_time:2155717ms step_avg:521.84ms +grad accum step:1033/14336 +step:4132/57344 train_time:2156903ms step_avg:522.00ms +step:4133/57344 train_time:2156920ms step_avg:521.88ms +step:4134/57344 train_time:2157152ms step_avg:521.81ms +step:4135/57344 train_time:2157654ms step_avg:521.80ms +grad accum step:1034/14336 +step:4136/57344 train_time:2158846ms step_avg:521.96ms +step:4137/57344 train_time:2158863ms step_avg:521.84ms +step:4138/57344 train_time:2159093ms step_avg:521.77ms +step:4139/57344 train_time:2159593ms step_avg:521.77ms +grad accum step:1035/14336 +step:4140/57344 train_time:2160779ms step_avg:521.93ms +step:4141/57344 train_time:2160796ms step_avg:521.81ms +step:4142/57344 train_time:2161027ms step_avg:521.74ms +step:4143/57344 train_time:2161528ms step_avg:521.73ms +grad accum step:1036/14336 +step:4144/57344 train_time:2162712ms step_avg:521.89ms +step:4145/57344 train_time:2162730ms step_avg:521.77ms +step:4146/57344 train_time:2162962ms step_avg:521.70ms +step:4147/57344 train_time:2163465ms step_avg:521.69ms +grad accum step:1037/14336 +step:4148/57344 train_time:2164644ms step_avg:521.85ms +step:4149/57344 train_time:2164661ms step_avg:521.73ms +step:4150/57344 train_time:2164894ms step_avg:521.66ms +step:4151/57344 train_time:2165398ms step_avg:521.66ms +grad accum step:1038/14336 +step:4152/57344 train_time:2166584ms step_avg:521.82ms +step:4153/57344 train_time:2166601ms step_avg:521.70ms +step:4154/57344 train_time:2166833ms step_avg:521.63ms +step:4155/57344 train_time:2167336ms step_avg:521.62ms +grad accum step:1039/14336 +step:4156/57344 train_time:2168522ms step_avg:521.78ms +step:4157/57344 train_time:2168539ms step_avg:521.66ms +step:4158/57344 train_time:2168770ms step_avg:521.59ms +step:4159/57344 train_time:2169269ms step_avg:521.58ms +grad accum step:1040/14336 +step:4160/57344 train_time:2170456ms step_avg:521.74ms +step:4160/57344 val_loss:7.703646 train_time:2170457ms step_avg:521.74ms +step:4161/57344 train_time:2170468ms step_avg:521.62ms +step:4162/57344 train_time:2170677ms step_avg:521.55ms +step:4163/57344 train_time:2171176ms step_avg:521.54ms +grad accum step:1041/14336 +step:4164/57344 train_time:2172358ms step_avg:521.70ms +step:4165/57344 train_time:2172375ms step_avg:521.58ms +step:4166/57344 train_time:2172607ms step_avg:521.51ms +step:4167/57344 train_time:2173108ms step_avg:521.50ms +grad accum step:1042/14336 +step:4168/57344 train_time:2174288ms step_avg:521.66ms +step:4169/57344 train_time:2174305ms step_avg:521.54ms +step:4170/57344 train_time:2174537ms step_avg:521.47ms +step:4171/57344 train_time:2175038ms step_avg:521.47ms +grad accum step:1043/14336 +step:4172/57344 train_time:2176218ms step_avg:521.62ms +step:4173/57344 train_time:2176235ms step_avg:521.50ms +step:4174/57344 train_time:2176466ms step_avg:521.43ms +step:4175/57344 train_time:2176966ms step_avg:521.43ms +grad accum step:1044/14336 +step:4176/57344 train_time:2178150ms step_avg:521.59ms +step:4177/57344 train_time:2178167ms step_avg:521.47ms +step:4178/57344 train_time:2178400ms step_avg:521.40ms +step:4179/57344 train_time:2178902ms step_avg:521.39ms +grad accum step:1045/14336 +step:4180/57344 train_time:2180091ms step_avg:521.55ms +step:4181/57344 train_time:2180108ms step_avg:521.43ms +step:4182/57344 train_time:2180341ms step_avg:521.36ms +step:4183/57344 train_time:2180847ms step_avg:521.36ms +grad accum step:1046/14336 +step:4184/57344 train_time:2182030ms step_avg:521.52ms +step:4185/57344 train_time:2182047ms step_avg:521.40ms +step:4186/57344 train_time:2182278ms step_avg:521.33ms +step:4187/57344 train_time:2182780ms step_avg:521.32ms +grad accum step:1047/14336 +step:4188/57344 train_time:2183964ms step_avg:521.48ms +step:4189/57344 train_time:2183981ms step_avg:521.36ms +step:4190/57344 train_time:2184212ms step_avg:521.29ms +step:4191/57344 train_time:2184713ms step_avg:521.29ms +grad accum step:1048/14336 +step:4192/57344 train_time:2185894ms step_avg:521.44ms +step:4193/57344 train_time:2185912ms step_avg:521.32ms +step:4194/57344 train_time:2186144ms step_avg:521.26ms +step:4195/57344 train_time:2186646ms step_avg:521.25ms +grad accum step:1049/14336 +step:4196/57344 train_time:2187832ms step_avg:521.41ms +step:4197/57344 train_time:2187850ms step_avg:521.29ms +step:4198/57344 train_time:2188081ms step_avg:521.22ms +step:4199/57344 train_time:2188580ms step_avg:521.21ms +grad accum step:1050/14336 +step:4200/57344 train_time:2189760ms step_avg:521.37ms +step:4201/57344 train_time:2189777ms step_avg:521.25ms +step:4202/57344 train_time:2190010ms step_avg:521.18ms +step:4203/57344 train_time:2190510ms step_avg:521.18ms +grad accum step:1051/14336 +step:4204/57344 train_time:2191703ms step_avg:521.34ms +step:4205/57344 train_time:2191721ms step_avg:521.22ms +step:4206/57344 train_time:2191952ms step_avg:521.15ms +step:4207/57344 train_time:2192453ms step_avg:521.14ms +grad accum step:1052/14336 +step:4208/57344 train_time:2193632ms step_avg:521.30ms +step:4209/57344 train_time:2193649ms step_avg:521.18ms +step:4210/57344 train_time:2193879ms step_avg:521.11ms +step:4211/57344 train_time:2194378ms step_avg:521.11ms +grad accum step:1053/14336 +step:4212/57344 train_time:2195563ms step_avg:521.26ms +step:4213/57344 train_time:2195580ms step_avg:521.14ms +step:4214/57344 train_time:2195811ms step_avg:521.08ms +step:4215/57344 train_time:2196313ms step_avg:521.07ms +grad accum step:1054/14336 +step:4216/57344 train_time:2197500ms step_avg:521.23ms +step:4217/57344 train_time:2197517ms step_avg:521.11ms +step:4218/57344 train_time:2197750ms step_avg:521.04ms +step:4219/57344 train_time:2198252ms step_avg:521.04ms +grad accum step:1055/14336 +step:4220/57344 train_time:2199430ms step_avg:521.19ms +step:4221/57344 train_time:2199447ms step_avg:521.07ms +step:4222/57344 train_time:2199679ms step_avg:521.00ms +step:4223/57344 train_time:2200183ms step_avg:521.00ms +grad accum step:1056/14336 +step:4224/57344 train_time:2201368ms step_avg:521.16ms +step:4224/57344 val_loss:7.705266 train_time:2201369ms step_avg:521.16ms +step:4225/57344 train_time:2201381ms step_avg:521.04ms +step:4226/57344 train_time:2201589ms step_avg:520.96ms +step:4227/57344 train_time:2202091ms step_avg:520.96ms +grad accum step:1057/14336 +step:4228/57344 train_time:2203277ms step_avg:521.12ms +step:4229/57344 train_time:2203294ms step_avg:521.00ms +step:4230/57344 train_time:2203526ms step_avg:520.93ms +step:4231/57344 train_time:2204026ms step_avg:520.92ms +grad accum step:1058/14336 +step:4232/57344 train_time:2205212ms step_avg:521.08ms +step:4233/57344 train_time:2205229ms step_avg:520.96ms +step:4234/57344 train_time:2205461ms step_avg:520.89ms +step:4235/57344 train_time:2205962ms step_avg:520.89ms +grad accum step:1059/14336 +step:4236/57344 train_time:2207146ms step_avg:521.04ms +step:4237/57344 train_time:2207164ms step_avg:520.93ms +step:4238/57344 train_time:2207395ms step_avg:520.86ms +step:4239/57344 train_time:2207896ms step_avg:520.85ms +grad accum step:1060/14336 +step:4240/57344 train_time:2220493ms step_avg:523.70ms +step:4241/57344 train_time:2220508ms step_avg:523.58ms +step:4242/57344 train_time:2220769ms step_avg:523.52ms +step:4243/57344 train_time:2230572ms step_avg:525.71ms +grad accum step:1061/14336 +step:4244/57344 train_time:2235969ms step_avg:526.85ms +step:4245/57344 train_time:2235984ms step_avg:526.73ms +step:4246/57344 train_time:2236187ms step_avg:526.66ms +step:4247/57344 train_time:2236681ms step_avg:526.65ms +grad accum step:1062/14336 +step:4248/57344 train_time:2237850ms step_avg:526.80ms +step:4249/57344 train_time:2260546ms step_avg:532.02ms +step:4250/57344 train_time:2270418ms step_avg:534.22ms +step:4251/57344 train_time:2290499ms step_avg:538.81ms +grad accum step:1063/14336 +step:4252/57344 train_time:2312239ms step_avg:543.80ms +step:4253/57344 train_time:2312256ms step_avg:543.68ms +step:4254/57344 train_time:2312483ms step_avg:543.60ms +step:4255/57344 train_time:2312973ms step_avg:543.59ms +grad accum step:1064/14336 +step:4256/57344 train_time:2314130ms step_avg:543.73ms +step:4257/57344 train_time:2314147ms step_avg:543.61ms +step:4258/57344 train_time:2314374ms step_avg:543.54ms +step:4259/57344 train_time:2314862ms step_avg:543.52ms +grad accum step:1065/14336 +step:4260/57344 train_time:2316027ms step_avg:543.67ms +step:4261/57344 train_time:2316044ms step_avg:543.54ms +step:4262/57344 train_time:2316271ms step_avg:543.47ms +step:4263/57344 train_time:2316762ms step_avg:543.46ms +grad accum step:1066/14336 +step:4264/57344 train_time:2317934ms step_avg:543.61ms +step:4265/57344 train_time:2317951ms step_avg:543.48ms +step:4266/57344 train_time:2318179ms step_avg:543.41ms +step:4267/57344 train_time:2318670ms step_avg:543.40ms +grad accum step:1067/14336 +step:4268/57344 train_time:2319833ms step_avg:543.54ms +step:4269/57344 train_time:2319850ms step_avg:543.42ms +step:4270/57344 train_time:2320078ms step_avg:543.34ms +step:4271/57344 train_time:2320569ms step_avg:543.33ms +grad accum step:1068/14336 +step:4272/57344 train_time:2321736ms step_avg:543.48ms +step:4273/57344 train_time:2321753ms step_avg:543.35ms +step:4274/57344 train_time:2321981ms step_avg:543.28ms +step:4275/57344 train_time:2322472ms step_avg:543.27ms +grad accum step:1069/14336 +step:4276/57344 train_time:2323639ms step_avg:543.41ms +step:4277/57344 train_time:2323656ms step_avg:543.29ms +step:4278/57344 train_time:2323885ms step_avg:543.22ms +step:4279/57344 train_time:2324377ms step_avg:543.21ms +grad accum step:1070/14336 +step:4280/57344 train_time:2325539ms step_avg:543.35ms +step:4281/57344 train_time:2325556ms step_avg:543.23ms +step:4282/57344 train_time:2325784ms step_avg:543.15ms +step:4283/57344 train_time:2326274ms step_avg:543.14ms +grad accum step:1071/14336 +step:4284/57344 train_time:2327438ms step_avg:543.29ms +step:4285/57344 train_time:2327455ms step_avg:543.16ms +step:4286/57344 train_time:2327683ms step_avg:543.09ms +step:4287/57344 train_time:2328174ms step_avg:543.08ms +grad accum step:1072/14336 +step:4288/57344 train_time:2329338ms step_avg:543.22ms +step:4288/57344 val_loss:7.716187 train_time:2329339ms step_avg:543.22ms +step:4289/57344 train_time:2329351ms step_avg:543.10ms +step:4290/57344 train_time:2329558ms step_avg:543.02ms +step:4291/57344 train_time:2330058ms step_avg:543.01ms +grad accum step:1073/14336 +step:4292/57344 train_time:2331231ms step_avg:543.16ms +step:4293/57344 train_time:2331248ms step_avg:543.03ms +step:4294/57344 train_time:2331479ms step_avg:542.96ms +step:4295/57344 train_time:2331979ms step_avg:542.95ms +grad accum step:1074/14336 +step:4296/57344 train_time:2333157ms step_avg:543.10ms +step:4297/57344 train_time:2333174ms step_avg:542.98ms +step:4298/57344 train_time:2333406ms step_avg:542.91ms +step:4299/57344 train_time:2333905ms step_avg:542.89ms +grad accum step:1075/14336 +step:4300/57344 train_time:2335080ms step_avg:543.04ms +step:4301/57344 train_time:2335097ms step_avg:542.92ms +step:4302/57344 train_time:2335328ms step_avg:542.85ms +step:4303/57344 train_time:2335827ms step_avg:542.84ms +grad accum step:1076/14336 +step:4304/57344 train_time:2337005ms step_avg:542.98ms +step:4305/57344 train_time:2337022ms step_avg:542.86ms +step:4306/57344 train_time:2337252ms step_avg:542.79ms +step:4307/57344 train_time:2337750ms step_avg:542.78ms +grad accum step:1077/14336 +step:4308/57344 train_time:2338925ms step_avg:542.93ms +step:4309/57344 train_time:2338942ms step_avg:542.80ms +step:4310/57344 train_time:2339173ms step_avg:542.73ms +step:4311/57344 train_time:2339674ms step_avg:542.72ms +grad accum step:1078/14336 +step:4312/57344 train_time:2340848ms step_avg:542.87ms +step:4313/57344 train_time:2340865ms step_avg:542.75ms +step:4314/57344 train_time:2341094ms step_avg:542.67ms +step:4315/57344 train_time:2341586ms step_avg:542.66ms +grad accum step:1079/14336 +step:4316/57344 train_time:2342768ms step_avg:542.81ms +step:4317/57344 train_time:2342785ms step_avg:542.69ms +step:4318/57344 train_time:2343016ms step_avg:542.62ms +step:4319/57344 train_time:2343514ms step_avg:542.61ms +grad accum step:1080/14336 +step:4320/57344 train_time:2344694ms step_avg:542.75ms +step:4321/57344 train_time:2344711ms step_avg:542.63ms +step:4322/57344 train_time:2344941ms step_avg:542.56ms +step:4323/57344 train_time:2345440ms step_avg:542.55ms +grad accum step:1081/14336 +step:4324/57344 train_time:2346621ms step_avg:542.70ms +step:4325/57344 train_time:2346638ms step_avg:542.58ms +step:4326/57344 train_time:2346869ms step_avg:542.50ms +step:4327/57344 train_time:2347369ms step_avg:542.49ms +grad accum step:1082/14336 +step:4328/57344 train_time:2348558ms step_avg:542.64ms +step:4329/57344 train_time:2348575ms step_avg:542.52ms +step:4330/57344 train_time:2348805ms step_avg:542.45ms +step:4331/57344 train_time:2349303ms step_avg:542.44ms +grad accum step:1083/14336 +step:4332/57344 train_time:2350482ms step_avg:542.59ms +step:4333/57344 train_time:2350499ms step_avg:542.46ms +step:4334/57344 train_time:2350729ms step_avg:542.39ms +step:4335/57344 train_time:2351227ms step_avg:542.38ms +grad accum step:1084/14336 +step:4336/57344 train_time:2352406ms step_avg:542.53ms +step:4337/57344 train_time:2352423ms step_avg:542.41ms +step:4338/57344 train_time:2352653ms step_avg:542.34ms +step:4339/57344 train_time:2353150ms step_avg:542.33ms +grad accum step:1085/14336 +step:4340/57344 train_time:2354331ms step_avg:542.47ms +step:4341/57344 train_time:2354349ms step_avg:542.35ms +step:4342/57344 train_time:2354579ms step_avg:542.28ms +step:4343/57344 train_time:2355075ms step_avg:542.27ms +grad accum step:1086/14336 +step:4344/57344 train_time:2356251ms step_avg:542.42ms +step:4345/57344 train_time:2356268ms step_avg:542.29ms +step:4346/57344 train_time:2356499ms step_avg:542.22ms +step:4347/57344 train_time:2356997ms step_avg:542.21ms +grad accum step:1087/14336 +step:4348/57344 train_time:2358172ms step_avg:542.36ms +step:4349/57344 train_time:2358188ms step_avg:542.24ms +step:4350/57344 train_time:2358419ms step_avg:542.17ms +step:4351/57344 train_time:2358918ms step_avg:542.16ms +grad accum step:1088/14336 +step:4352/57344 train_time:2360102ms step_avg:542.30ms +step:4352/57344 val_loss:7.718926 train_time:2360103ms step_avg:542.30ms +step:4353/57344 train_time:2360115ms step_avg:542.18ms +step:4354/57344 train_time:2360322ms step_avg:542.10ms +step:4355/57344 train_time:2360825ms step_avg:542.10ms +grad accum step:1089/14336 +step:4356/57344 train_time:2362007ms step_avg:542.24ms +step:4357/57344 train_time:2362024ms step_avg:542.12ms +step:4358/57344 train_time:2362255ms step_avg:542.05ms +step:4359/57344 train_time:2362759ms step_avg:542.04ms +grad accum step:1090/14336 +step:4360/57344 train_time:2363947ms step_avg:542.19ms +step:4361/57344 train_time:2363964ms step_avg:542.07ms +step:4362/57344 train_time:2364194ms step_avg:542.00ms +step:4363/57344 train_time:2364696ms step_avg:541.99ms +grad accum step:1091/14336 +step:4364/57344 train_time:2365885ms step_avg:542.14ms +step:4365/57344 train_time:2365903ms step_avg:542.02ms +step:4366/57344 train_time:2366135ms step_avg:541.95ms +step:4367/57344 train_time:2366635ms step_avg:541.94ms +grad accum step:1092/14336 +step:4368/57344 train_time:2367822ms step_avg:542.08ms +step:4369/57344 train_time:2367839ms step_avg:541.96ms +step:4370/57344 train_time:2368072ms step_avg:541.89ms +step:4371/57344 train_time:2368575ms step_avg:541.88ms +grad accum step:1093/14336 +step:4372/57344 train_time:2369759ms step_avg:542.03ms +step:4373/57344 train_time:2369776ms step_avg:541.91ms +step:4374/57344 train_time:2370009ms step_avg:541.84ms +step:4375/57344 train_time:2370518ms step_avg:541.83ms +grad accum step:1094/14336 +step:4376/57344 train_time:2371708ms step_avg:541.98ms +step:4377/57344 train_time:2371725ms step_avg:541.86ms +step:4378/57344 train_time:2371958ms step_avg:541.79ms +step:4379/57344 train_time:2372461ms step_avg:541.78ms +grad accum step:1095/14336 +step:4380/57344 train_time:2373649ms step_avg:541.93ms +step:4381/57344 train_time:2373667ms step_avg:541.81ms +step:4382/57344 train_time:2373899ms step_avg:541.74ms +step:4383/57344 train_time:2374400ms step_avg:541.73ms +grad accum step:1096/14336 +step:4384/57344 train_time:2375582ms step_avg:541.88ms +step:4385/57344 train_time:2375599ms step_avg:541.76ms +step:4386/57344 train_time:2375833ms step_avg:541.69ms +step:4387/57344 train_time:2376342ms step_avg:541.68ms +grad accum step:1097/14336 +step:4388/57344 train_time:2377536ms step_avg:541.83ms +step:4389/57344 train_time:2377554ms step_avg:541.71ms +step:4390/57344 train_time:2377786ms step_avg:541.64ms +step:4391/57344 train_time:2378289ms step_avg:541.63ms +grad accum step:1098/14336 +step:4392/57344 train_time:2379479ms step_avg:541.78ms +step:4393/57344 train_time:2379496ms step_avg:541.66ms +step:4394/57344 train_time:2379728ms step_avg:541.59ms +step:4395/57344 train_time:2380231ms step_avg:541.58ms +grad accum step:1099/14336 +step:4396/57344 train_time:2381415ms step_avg:541.72ms +step:4397/57344 train_time:2381432ms step_avg:541.60ms +step:4398/57344 train_time:2381664ms step_avg:541.53ms +step:4399/57344 train_time:2382169ms step_avg:541.53ms +grad accum step:1100/14336 +step:4400/57344 train_time:2383359ms step_avg:541.67ms +step:4401/57344 train_time:2383377ms step_avg:541.55ms +step:4402/57344 train_time:2383609ms step_avg:541.48ms +step:4403/57344 train_time:2384110ms step_avg:541.47ms +grad accum step:1101/14336 +step:4404/57344 train_time:2385300ms step_avg:541.62ms +step:4405/57344 train_time:2385317ms step_avg:541.50ms +step:4406/57344 train_time:2385548ms step_avg:541.43ms +step:4407/57344 train_time:2386052ms step_avg:541.42ms +grad accum step:1102/14336 +step:4408/57344 train_time:2387240ms step_avg:541.57ms +step:4409/57344 train_time:2387257ms step_avg:541.45ms +step:4410/57344 train_time:2387489ms step_avg:541.38ms +step:4411/57344 train_time:2387992ms step_avg:541.37ms +grad accum step:1103/14336 +step:4412/57344 train_time:2389178ms step_avg:541.52ms +step:4413/57344 train_time:2389195ms step_avg:541.40ms +step:4414/57344 train_time:2389427ms step_avg:541.33ms +step:4415/57344 train_time:2389928ms step_avg:541.32ms +grad accum step:1104/14336 +step:4416/57344 train_time:2391116ms step_avg:541.47ms +step:4416/57344 val_loss:7.717360 train_time:2391116ms step_avg:541.47ms +step:4417/57344 train_time:2391513ms step_avg:541.43ms +step:4418/57344 train_time:2398577ms step_avg:542.91ms +step:4419/57344 train_time:2398808ms step_avg:542.84ms +grad accum step:1105/14336 +step:4420/57344 train_time:2400328ms step_avg:543.06ms +step:4421/57344 train_time:2400340ms step_avg:542.94ms +step:4422/57344 train_time:2400549ms step_avg:542.87ms +step:4423/57344 train_time:2401047ms step_avg:542.85ms +grad accum step:1106/14336 +step:4424/57344 train_time:2402228ms step_avg:543.00ms +step:4425/57344 train_time:2402245ms step_avg:542.88ms +step:4426/57344 train_time:2402477ms step_avg:542.81ms +step:4427/57344 train_time:2402976ms step_avg:542.80ms +grad accum step:1107/14336 +step:4428/57344 train_time:2404152ms step_avg:542.94ms +step:4429/57344 train_time:2404169ms step_avg:542.82ms +step:4430/57344 train_time:2404400ms step_avg:542.75ms +step:4431/57344 train_time:2404896ms step_avg:542.74ms +grad accum step:1108/14336 +step:4432/57344 train_time:2406079ms step_avg:542.89ms +step:4433/57344 train_time:2406096ms step_avg:542.77ms +step:4434/57344 train_time:2406327ms step_avg:542.70ms +step:4435/57344 train_time:2406827ms step_avg:542.69ms +grad accum step:1109/14336 +step:4436/57344 train_time:2408012ms step_avg:542.83ms +step:4437/57344 train_time:2408029ms step_avg:542.72ms +step:4438/57344 train_time:2408260ms step_avg:542.65ms +step:4439/57344 train_time:2408760ms step_avg:542.64ms +grad accum step:1110/14336 +step:4440/57344 train_time:2409947ms step_avg:542.78ms +step:4441/57344 train_time:2409964ms step_avg:542.66ms +step:4442/57344 train_time:2410196ms step_avg:542.59ms +step:4443/57344 train_time:2410697ms step_avg:542.58ms +grad accum step:1111/14336 +step:4444/57344 train_time:2411875ms step_avg:542.73ms +step:4445/57344 train_time:2411892ms step_avg:542.61ms +step:4446/57344 train_time:2412123ms step_avg:542.54ms +step:4447/57344 train_time:2412621ms step_avg:542.53ms +grad accum step:1112/14336 +step:4448/57344 train_time:2413799ms step_avg:542.67ms +step:4449/57344 train_time:2413817ms step_avg:542.55ms +step:4450/57344 train_time:2414047ms step_avg:542.48ms +step:4451/57344 train_time:2414547ms step_avg:542.47ms +grad accum step:1113/14336 +step:4452/57344 train_time:2415734ms step_avg:542.62ms +step:4453/57344 train_time:2415751ms step_avg:542.50ms +step:4454/57344 train_time:2415984ms step_avg:542.43ms +step:4455/57344 train_time:2416487ms step_avg:542.42ms +grad accum step:1114/14336 +step:4456/57344 train_time:2417671ms step_avg:542.57ms +step:4457/57344 train_time:2417689ms step_avg:542.45ms +step:4458/57344 train_time:2417920ms step_avg:542.38ms +step:4459/57344 train_time:2418425ms step_avg:542.37ms +grad accum step:1115/14336 +step:4460/57344 train_time:2419610ms step_avg:542.51ms +step:4461/57344 train_time:2419627ms step_avg:542.40ms +step:4462/57344 train_time:2419858ms step_avg:542.33ms +step:4463/57344 train_time:2420357ms step_avg:542.32ms +grad accum step:1116/14336 +step:4464/57344 train_time:2421541ms step_avg:542.46ms +step:4465/57344 train_time:2421558ms step_avg:542.34ms +step:4466/57344 train_time:2421789ms step_avg:542.27ms +step:4467/57344 train_time:2422288ms step_avg:542.26ms +grad accum step:1117/14336 +step:4468/57344 train_time:2850033ms step_avg:637.88ms +step:4469/57344 train_time:2850068ms step_avg:637.74ms +step:4470/57344 train_time:2850275ms step_avg:637.65ms +step:4471/57344 train_time:2850767ms step_avg:637.61ms +grad accum step:1118/14336 +step:4472/57344 train_time:2851926ms step_avg:637.73ms +step:4473/57344 train_time:2851943ms step_avg:637.59ms +step:4474/57344 train_time:2852171ms step_avg:637.50ms +step:4475/57344 train_time:2852663ms step_avg:637.47ms +grad accum step:1119/14336 +step:4476/57344 train_time:2853829ms step_avg:637.58ms +step:4477/57344 train_time:2853846ms step_avg:637.45ms +step:4478/57344 train_time:2854073ms step_avg:637.35ms +step:4479/57344 train_time:2854563ms step_avg:637.32ms +grad accum step:1120/14336 +step:4480/57344 train_time:2855734ms step_avg:637.44ms +step:4480/57344 val_loss:7.713251 train_time:2855734ms step_avg:637.44ms +step:4481/57344 train_time:2855746ms step_avg:637.30ms +step:4482/57344 train_time:2855952ms step_avg:637.20ms +step:4483/57344 train_time:2856448ms step_avg:637.17ms +grad accum step:1121/14336 +step:4484/57344 train_time:2857611ms step_avg:637.29ms +step:4485/57344 train_time:2857628ms step_avg:637.15ms +step:4486/57344 train_time:2857855ms step_avg:637.06ms +step:4487/57344 train_time:2858348ms step_avg:637.03ms +grad accum step:1122/14336 +step:4488/57344 train_time:2859515ms step_avg:637.15ms +step:4489/57344 train_time:2859532ms step_avg:637.01ms +step:4490/57344 train_time:2859759ms step_avg:636.92ms +step:4491/57344 train_time:2860251ms step_avg:636.89ms +grad accum step:1123/14336 +step:4492/57344 train_time:2861417ms step_avg:637.00ms +step:4493/57344 train_time:2861434ms step_avg:636.86ms +step:4494/57344 train_time:2861662ms step_avg:636.77ms +step:4495/57344 train_time:2862155ms step_avg:636.74ms +grad accum step:1124/14336 +step:4496/57344 train_time:2864271ms step_avg:637.07ms +step:4497/57344 train_time:2864312ms step_avg:636.94ms +step:4498/57344 train_time:2864509ms step_avg:636.84ms +step:4499/57344 train_time:2865000ms step_avg:636.81ms +grad accum step:1125/14336 +step:4500/57344 train_time:2866163ms step_avg:636.93ms +step:4501/57344 train_time:2866180ms step_avg:636.79ms +step:4502/57344 train_time:2866409ms step_avg:636.70ms +step:4503/57344 train_time:2866904ms step_avg:636.67ms +grad accum step:1126/14336 +step:4504/57344 train_time:2868069ms step_avg:636.78ms +step:4505/57344 train_time:2868086ms step_avg:636.65ms +step:4506/57344 train_time:2868315ms step_avg:636.55ms +step:4507/57344 train_time:2868808ms step_avg:636.52ms +grad accum step:1127/14336 +step:4508/57344 train_time:2869976ms step_avg:636.64ms +step:4509/57344 train_time:2869993ms step_avg:636.50ms +step:4510/57344 train_time:2870221ms step_avg:636.41ms +step:4511/57344 train_time:2870717ms step_avg:636.38ms +grad accum step:1128/14336 +step:4512/57344 train_time:2871887ms step_avg:636.50ms +step:4513/57344 train_time:2871904ms step_avg:636.36ms +step:4514/57344 train_time:2872133ms step_avg:636.27ms +step:4515/57344 train_time:2872627ms step_avg:636.24ms +grad accum step:1129/14336 +step:4516/57344 train_time:2873797ms step_avg:636.36ms +step:4517/57344 train_time:2873814ms step_avg:636.22ms +step:4518/57344 train_time:2874042ms step_avg:636.13ms +step:4519/57344 train_time:2874540ms step_avg:636.10ms +grad accum step:1130/14336 +step:4520/57344 train_time:2875715ms step_avg:636.22ms +step:4521/57344 train_time:2875732ms step_avg:636.08ms +step:4522/57344 train_time:2875960ms step_avg:635.99ms +step:4523/57344 train_time:2876454ms step_avg:635.96ms +grad accum step:1131/14336 +step:4524/57344 train_time:2877620ms step_avg:636.08ms +step:4525/57344 train_time:2877637ms step_avg:635.94ms +step:4526/57344 train_time:2877865ms step_avg:635.85ms +step:4527/57344 train_time:2878359ms step_avg:635.82ms +grad accum step:1132/14336 +step:4528/57344 train_time:2879531ms step_avg:635.94ms +step:4529/57344 train_time:2879548ms step_avg:635.80ms +step:4530/57344 train_time:2879776ms step_avg:635.71ms +step:4531/57344 train_time:2880271ms step_avg:635.68ms +grad accum step:1133/14336 +step:4532/57344 train_time:2881442ms step_avg:635.80ms +step:4533/57344 train_time:2881459ms step_avg:635.66ms +step:4534/57344 train_time:2881690ms step_avg:635.57ms +step:4535/57344 train_time:2882187ms step_avg:635.54ms +grad accum step:1134/14336 +step:4536/57344 train_time:2883361ms step_avg:635.66ms +step:4537/57344 train_time:2883378ms step_avg:635.53ms +step:4538/57344 train_time:2883606ms step_avg:635.44ms +step:4539/57344 train_time:2884100ms step_avg:635.40ms +grad accum step:1135/14336 +step:4540/57344 train_time:2885278ms step_avg:635.52ms +step:4541/57344 train_time:2885295ms step_avg:635.39ms +step:4542/57344 train_time:2885522ms step_avg:635.30ms +step:4543/57344 train_time:2886018ms step_avg:635.27ms +grad accum step:1136/14336 +step:4544/57344 train_time:2887192ms step_avg:635.39ms +step:4544/57344 val_loss:7.724630 train_time:2887193ms step_avg:635.39ms +step:4545/57344 train_time:2887205ms step_avg:635.25ms +step:4546/57344 train_time:2887411ms step_avg:635.15ms +step:4547/57344 train_time:2887913ms step_avg:635.12ms +grad accum step:1137/14336 +step:4548/57344 train_time:2889094ms step_avg:635.25ms +step:4549/57344 train_time:2889111ms step_avg:635.11ms +step:4550/57344 train_time:2889341ms step_avg:635.02ms +step:4551/57344 train_time:2889840ms step_avg:634.99ms +grad accum step:1138/14336 +step:4552/57344 train_time:2891019ms step_avg:635.11ms +step:4553/57344 train_time:2891036ms step_avg:634.97ms +step:4554/57344 train_time:2891268ms step_avg:634.89ms +step:4555/57344 train_time:2891768ms step_avg:634.86ms +grad accum step:1139/14336 +step:4556/57344 train_time:2892951ms step_avg:634.98ms +step:4557/57344 train_time:2892968ms step_avg:634.84ms +step:4558/57344 train_time:2893200ms step_avg:634.75ms +step:4559/57344 train_time:2893700ms step_avg:634.72ms +grad accum step:1140/14336 +step:4560/57344 train_time:2894884ms step_avg:634.84ms +step:4561/57344 train_time:2894901ms step_avg:634.71ms +step:4562/57344 train_time:2895132ms step_avg:634.62ms +step:4563/57344 train_time:2895632ms step_avg:634.59ms +grad accum step:1141/14336 +step:4564/57344 train_time:2896812ms step_avg:634.71ms +step:4565/57344 train_time:2896829ms step_avg:634.57ms +step:4566/57344 train_time:2897061ms step_avg:634.49ms +step:4567/57344 train_time:2897560ms step_avg:634.46ms +grad accum step:1142/14336 +step:4568/57344 train_time:2898742ms step_avg:634.58ms +step:4569/57344 train_time:2898759ms step_avg:634.44ms +step:4570/57344 train_time:2898990ms step_avg:634.35ms +step:4571/57344 train_time:2899489ms step_avg:634.32ms +grad accum step:1143/14336 +step:4572/57344 train_time:2900671ms step_avg:634.44ms +step:4573/57344 train_time:2900689ms step_avg:634.31ms +step:4574/57344 train_time:2900922ms step_avg:634.22ms +step:4575/57344 train_time:2901424ms step_avg:634.19ms +grad accum step:1144/14336 +step:4576/57344 train_time:2902607ms step_avg:634.31ms +step:4577/57344 train_time:2902624ms step_avg:634.18ms +step:4578/57344 train_time:2902855ms step_avg:634.09ms +step:4579/57344 train_time:2903354ms step_avg:634.06ms +grad accum step:1145/14336 +step:4580/57344 train_time:2904540ms step_avg:634.18ms +step:4581/57344 train_time:2904558ms step_avg:634.04ms +step:4582/57344 train_time:2904790ms step_avg:633.96ms +step:4583/57344 train_time:2905293ms step_avg:633.93ms +grad accum step:1146/14336 +step:4584/57344 train_time:2906479ms step_avg:634.05ms +step:4585/57344 train_time:2906496ms step_avg:633.91ms +step:4586/57344 train_time:2906728ms step_avg:633.83ms +step:4587/57344 train_time:2907226ms step_avg:633.80ms +grad accum step:1147/14336 +step:4588/57344 train_time:2908411ms step_avg:633.92ms +step:4589/57344 train_time:2908425ms step_avg:633.78ms +step:4590/57344 train_time:2908660ms step_avg:633.70ms +step:4591/57344 train_time:2909169ms step_avg:633.67ms +grad accum step:1148/14336 +step:4592/57344 train_time:2910361ms step_avg:633.79ms +step:4593/57344 train_time:2910377ms step_avg:633.66ms +step:4594/57344 train_time:2910610ms step_avg:633.57ms +step:4595/57344 train_time:2911114ms step_avg:633.54ms +grad accum step:1149/14336 +step:4596/57344 train_time:2912302ms step_avg:633.66ms +step:4597/57344 train_time:2912319ms step_avg:633.53ms +step:4598/57344 train_time:2912553ms step_avg:633.44ms +step:4599/57344 train_time:2913055ms step_avg:633.41ms +grad accum step:1150/14336 +step:4600/57344 train_time:2914251ms step_avg:633.53ms +step:4601/57344 train_time:2914268ms step_avg:633.40ms +step:4602/57344 train_time:2914499ms step_avg:633.31ms +step:4603/57344 train_time:2915000ms step_avg:633.28ms +grad accum step:1151/14336 +step:4604/57344 train_time:2916191ms step_avg:633.40ms +step:4605/57344 train_time:2916208ms step_avg:633.27ms +step:4606/57344 train_time:2916439ms step_avg:633.18ms +step:4607/57344 train_time:2917495ms step_avg:633.27ms +grad accum step:1152/14336 +step:4608/57344 train_time:2983377ms step_avg:647.43ms +step:4608/57344 val_loss:7.735681 train_time:2983562ms step_avg:647.47ms +step:4609/57344 train_time:2983573ms step_avg:647.34ms +step:4610/57344 train_time:2983849ms step_avg:647.26ms +step:4611/57344 train_time:2984342ms step_avg:647.22ms +grad accum step:1153/14336 +step:4612/57344 train_time:2985520ms step_avg:647.34ms +step:4613/57344 train_time:2985537ms step_avg:647.20ms +step:4614/57344 train_time:2985767ms step_avg:647.11ms +step:4615/57344 train_time:2986263ms step_avg:647.08ms +grad accum step:1154/14336 +step:4616/57344 train_time:2987433ms step_avg:647.19ms +step:4617/57344 train_time:2987450ms step_avg:647.05ms +step:4618/57344 train_time:2987676ms step_avg:646.96ms +step:4619/57344 train_time:2988173ms step_avg:646.93ms +grad accum step:1155/14336 +step:4620/57344 train_time:2989347ms step_avg:647.04ms +step:4621/57344 train_time:2989364ms step_avg:646.91ms +step:4622/57344 train_time:2989594ms step_avg:646.82ms +step:4623/57344 train_time:2990090ms step_avg:646.79ms +grad accum step:1156/14336 +step:4624/57344 train_time:2991269ms step_avg:646.90ms +step:4625/57344 train_time:2991286ms step_avg:646.76ms +step:4626/57344 train_time:2991517ms step_avg:646.67ms +step:4627/57344 train_time:2992013ms step_avg:646.64ms +grad accum step:1157/14336 +step:4628/57344 train_time:2993189ms step_avg:646.76ms +step:4629/57344 train_time:2993206ms step_avg:646.62ms +step:4630/57344 train_time:2993437ms step_avg:646.53ms +step:4631/57344 train_time:2993935ms step_avg:646.50ms +grad accum step:1158/14336 +step:4632/57344 train_time:2995111ms step_avg:646.61ms +step:4633/57344 train_time:2995128ms step_avg:646.48ms +step:4634/57344 train_time:2995359ms step_avg:646.39ms +step:4635/57344 train_time:2995858ms step_avg:646.36ms +grad accum step:1159/14336 +step:4636/57344 train_time:2997033ms step_avg:646.47ms +step:4637/57344 train_time:2997051ms step_avg:646.33ms +step:4638/57344 train_time:2997281ms step_avg:646.24ms +step:4639/57344 train_time:2997782ms step_avg:646.21ms +grad accum step:1160/14336 +step:4640/57344 train_time:2998959ms step_avg:646.33ms +step:4641/57344 train_time:2998976ms step_avg:646.19ms +step:4642/57344 train_time:2999208ms step_avg:646.10ms +step:4643/57344 train_time:2999707ms step_avg:646.07ms +grad accum step:1161/14336 +step:4644/57344 train_time:3000889ms step_avg:646.19ms +step:4645/57344 train_time:3000906ms step_avg:646.05ms +step:4646/57344 train_time:3001136ms step_avg:645.96ms +step:4647/57344 train_time:3001634ms step_avg:645.93ms +grad accum step:1162/14336 +step:4648/57344 train_time:3002809ms step_avg:646.04ms +step:4649/57344 train_time:3002826ms step_avg:645.91ms +step:4650/57344 train_time:3003056ms step_avg:645.82ms +step:4651/57344 train_time:3003561ms step_avg:645.79ms +grad accum step:1163/14336 +step:4652/57344 train_time:3004733ms step_avg:645.90ms +step:4653/57344 train_time:3004750ms step_avg:645.77ms +step:4654/57344 train_time:3004980ms step_avg:645.68ms +step:4655/57344 train_time:3005479ms step_avg:645.65ms +grad accum step:1164/14336 +step:4656/57344 train_time:3006664ms step_avg:645.76ms +step:4657/57344 train_time:3006681ms step_avg:645.63ms +step:4658/57344 train_time:3006912ms step_avg:645.54ms +step:4659/57344 train_time:3007411ms step_avg:645.51ms +grad accum step:1165/14336 +step:4660/57344 train_time:3008613ms step_avg:645.63ms +step:4661/57344 train_time:3008630ms step_avg:645.49ms +step:4662/57344 train_time:3008862ms step_avg:645.40ms +step:4663/57344 train_time:3009364ms step_avg:645.37ms +grad accum step:1166/14336 +step:4664/57344 train_time:3010552ms step_avg:645.49ms +step:4665/57344 train_time:3010569ms step_avg:645.35ms +step:4666/57344 train_time:3010801ms step_avg:645.26ms +step:4667/57344 train_time:3011304ms step_avg:645.23ms +grad accum step:1167/14336 +step:4668/57344 train_time:3012487ms step_avg:645.35ms +step:4669/57344 train_time:3012504ms step_avg:645.21ms +step:4670/57344 train_time:3012735ms step_avg:645.13ms +step:4671/57344 train_time:3013235ms step_avg:645.09ms +grad accum step:1168/14336 +step:4672/57344 train_time:3014412ms step_avg:645.21ms +step:4672/57344 val_loss:7.736219 train_time:3014413ms step_avg:645.21ms +step:4673/57344 train_time:3014424ms step_avg:645.07ms +step:4674/57344 train_time:3014634ms step_avg:644.98ms +step:4675/57344 train_time:3015135ms step_avg:644.95ms +grad accum step:1169/14336 +step:4676/57344 train_time:3016325ms step_avg:645.07ms +step:4677/57344 train_time:3016342ms step_avg:644.93ms +step:4678/57344 train_time:3016575ms step_avg:644.84ms +step:4679/57344 train_time:3017077ms step_avg:644.81ms +grad accum step:1170/14336 +step:4680/57344 train_time:3018264ms step_avg:644.93ms +step:4681/57344 train_time:3018281ms step_avg:644.79ms +step:4682/57344 train_time:3018514ms step_avg:644.71ms +step:4683/57344 train_time:3019016ms step_avg:644.68ms +grad accum step:1171/14336 +step:4684/57344 train_time:3020203ms step_avg:644.79ms +step:4685/57344 train_time:3020220ms step_avg:644.66ms +step:4686/57344 train_time:3020453ms step_avg:644.57ms +step:4687/57344 train_time:3020960ms step_avg:644.54ms +grad accum step:1172/14336 +step:4688/57344 train_time:3022148ms step_avg:644.66ms +step:4689/57344 train_time:3022166ms step_avg:644.52ms +step:4690/57344 train_time:3022400ms step_avg:644.43ms +step:4691/57344 train_time:3022903ms step_avg:644.40ms +grad accum step:1173/14336 +step:4692/57344 train_time:3024091ms step_avg:644.52ms +step:4693/57344 train_time:3024108ms step_avg:644.39ms +step:4694/57344 train_time:3024341ms step_avg:644.30ms +step:4695/57344 train_time:3024847ms step_avg:644.27ms +grad accum step:1174/14336 +step:4696/57344 train_time:3026036ms step_avg:644.39ms +step:4697/57344 train_time:3026053ms step_avg:644.25ms +step:4698/57344 train_time:3026285ms step_avg:644.16ms +step:4699/57344 train_time:3026790ms step_avg:644.13ms +grad accum step:1175/14336 +step:4700/57344 train_time:3027983ms step_avg:644.25ms +step:4701/57344 train_time:3028000ms step_avg:644.12ms +step:4702/57344 train_time:3028234ms step_avg:644.03ms +step:4703/57344 train_time:3028738ms step_avg:644.00ms +grad accum step:1176/14336 +step:4704/57344 train_time:3029928ms step_avg:644.12ms +step:4705/57344 train_time:3029945ms step_avg:643.98ms +step:4706/57344 train_time:3030177ms step_avg:643.90ms +step:4707/57344 train_time:3030682ms step_avg:643.87ms +grad accum step:1177/14336 +step:4708/57344 train_time:3031870ms step_avg:643.98ms +step:4709/57344 train_time:3031887ms step_avg:643.85ms +step:4710/57344 train_time:3032120ms step_avg:643.76ms +step:4711/57344 train_time:3032622ms step_avg:643.73ms +grad accum step:1178/14336 +step:4712/57344 train_time:3033809ms step_avg:643.85ms +step:4713/57344 train_time:3033826ms step_avg:643.71ms +step:4714/57344 train_time:3034059ms step_avg:643.63ms +step:4715/57344 train_time:3034562ms step_avg:643.60ms +grad accum step:1179/14336 +step:4716/57344 train_time:3035750ms step_avg:643.71ms +step:4717/57344 train_time:3035767ms step_avg:643.58ms +step:4718/57344 train_time:3036001ms step_avg:643.49ms +step:4719/57344 train_time:3036503ms step_avg:643.46ms +grad accum step:1180/14336 +step:4720/57344 train_time:3037693ms step_avg:643.58ms +step:4721/57344 train_time:3037710ms step_avg:643.45ms +step:4722/57344 train_time:3037945ms step_avg:643.36ms +step:4723/57344 train_time:3038449ms step_avg:643.33ms +grad accum step:1181/14336 +step:4724/57344 train_time:3039647ms step_avg:643.45ms +step:4725/57344 train_time:3039664ms step_avg:643.32ms +step:4726/57344 train_time:3039896ms step_avg:643.23ms +step:4727/57344 train_time:3040398ms step_avg:643.20ms +grad accum step:1182/14336 +step:4728/57344 train_time:3041586ms step_avg:643.31ms +step:4729/57344 train_time:3041604ms step_avg:643.18ms +step:4730/57344 train_time:3041837ms step_avg:643.09ms +step:4731/57344 train_time:3042343ms step_avg:643.07ms +grad accum step:1183/14336 +step:4732/57344 train_time:3043537ms step_avg:643.18ms +step:4733/57344 train_time:3043554ms step_avg:643.05ms +step:4734/57344 train_time:3043789ms step_avg:642.96ms +step:4735/57344 train_time:3044294ms step_avg:642.93ms +grad accum step:1184/14336 +step:4736/57344 train_time:3045482ms step_avg:643.05ms +step:4736/57344 val_loss:7.716004 train_time:3045483ms step_avg:643.05ms +step:4737/57344 train_time:3045495ms step_avg:642.92ms +step:4738/57344 train_time:3096803ms step_avg:653.61ms +step:4739/57344 train_time:3103547ms step_avg:654.89ms +grad accum step:1185/14336 +step:4740/57344 train_time:3104565ms step_avg:654.97ms +step:4741/57344 train_time:3104582ms step_avg:654.84ms +step:4742/57344 train_time:3104808ms step_avg:654.75ms +step:4743/57344 train_time:3105302ms step_avg:654.71ms +grad accum step:1186/14336 +step:4744/57344 train_time:3106468ms step_avg:654.82ms +step:4745/57344 train_time:3106485ms step_avg:654.69ms +step:4746/57344 train_time:3106714ms step_avg:654.60ms +step:4747/57344 train_time:3107206ms step_avg:654.56ms +grad accum step:1187/14336 +step:4748/57344 train_time:3108371ms step_avg:654.67ms +step:4749/57344 train_time:3108389ms step_avg:654.54ms +step:4750/57344 train_time:3108618ms step_avg:654.45ms +step:4751/57344 train_time:3109112ms step_avg:654.41ms +grad accum step:1188/14336 +step:4752/57344 train_time:3110274ms step_avg:654.52ms +step:4753/57344 train_time:3110291ms step_avg:654.38ms +step:4754/57344 train_time:3110519ms step_avg:654.30ms +step:4755/57344 train_time:3111013ms step_avg:654.26ms +grad accum step:1189/14336 +step:4756/57344 train_time:3112186ms step_avg:654.37ms +step:4757/57344 train_time:3112203ms step_avg:654.24ms +step:4758/57344 train_time:3112432ms step_avg:654.15ms +step:4759/57344 train_time:3112925ms step_avg:654.11ms +grad accum step:1190/14336 +step:4760/57344 train_time:3114098ms step_avg:654.22ms +step:4761/57344 train_time:3114115ms step_avg:654.09ms +step:4762/57344 train_time:3114344ms step_avg:654.00ms +step:4763/57344 train_time:3114845ms step_avg:653.97ms +grad accum step:1191/14336 +step:4764/57344 train_time:3116013ms step_avg:654.07ms +step:4765/57344 train_time:3116030ms step_avg:653.94ms +step:4766/57344 train_time:3116261ms step_avg:653.85ms +step:4767/57344 train_time:3116760ms step_avg:653.82ms +grad accum step:1192/14336 +step:4768/57344 train_time:3117934ms step_avg:653.93ms +step:4769/57344 train_time:3117951ms step_avg:653.80ms +step:4770/57344 train_time:3118181ms step_avg:653.71ms +step:4771/57344 train_time:3118677ms step_avg:653.67ms +grad accum step:1193/14336 +step:4772/57344 train_time:3119853ms step_avg:653.78ms +step:4773/57344 train_time:3119870ms step_avg:653.65ms +step:4774/57344 train_time:3120099ms step_avg:653.56ms +step:4775/57344 train_time:3120595ms step_avg:653.53ms +grad accum step:1194/14336 +step:4776/57344 train_time:3121765ms step_avg:653.64ms +step:4777/57344 train_time:3121782ms step_avg:653.50ms +step:4778/57344 train_time:3122012ms step_avg:653.41ms +step:4779/57344 train_time:3122507ms step_avg:653.38ms +grad accum step:1195/14336 +step:4780/57344 train_time:3123685ms step_avg:653.49ms +step:4781/57344 train_time:3123702ms step_avg:653.36ms +step:4782/57344 train_time:3123932ms step_avg:653.27ms +step:4783/57344 train_time:3124428ms step_avg:653.24ms +grad accum step:1196/14336 +step:4784/57344 train_time:3125603ms step_avg:653.35ms +step:4785/57344 train_time:3125620ms step_avg:653.21ms +step:4786/57344 train_time:3125849ms step_avg:653.12ms +step:4787/57344 train_time:3126345ms step_avg:653.09ms +grad accum step:1197/14336 +step:4788/57344 train_time:3127518ms step_avg:653.20ms +step:4789/57344 train_time:3127535ms step_avg:653.07ms +step:4790/57344 train_time:3127766ms step_avg:652.98ms +step:4791/57344 train_time:3128265ms step_avg:652.95ms +grad accum step:1198/14336 +step:4792/57344 train_time:3168934ms step_avg:661.30ms +step:4793/57344 train_time:3168951ms step_avg:661.16ms +step:4794/57344 train_time:3169178ms step_avg:661.07ms +step:4795/57344 train_time:3169667ms step_avg:661.04ms +grad accum step:1199/14336 +step:4796/57344 train_time:3170837ms step_avg:661.14ms +step:4797/57344 train_time:3170854ms step_avg:661.01ms +step:4798/57344 train_time:3171084ms step_avg:660.92ms +step:4799/57344 train_time:3171578ms step_avg:660.88ms +grad accum step:1200/14336 +step:4800/57344 train_time:3172743ms step_avg:660.99ms +step:4800/57344 val_loss:7.705821 train_time:3172744ms step_avg:660.99ms +step:4801/57344 train_time:3172756ms step_avg:660.85ms +step:4802/57344 train_time:3173022ms step_avg:660.77ms +step:4803/57344 train_time:3173517ms step_avg:660.74ms +grad accum step:1201/14336 +step:4804/57344 train_time:3174691ms step_avg:660.84ms +step:4805/57344 train_time:3174708ms step_avg:660.71ms +step:4806/57344 train_time:3174938ms step_avg:660.62ms +step:4807/57344 train_time:3175432ms step_avg:660.59ms +grad accum step:1202/14336 +step:4808/57344 train_time:3176604ms step_avg:660.69ms +step:4809/57344 train_time:3176621ms step_avg:660.56ms +step:4810/57344 train_time:3176853ms step_avg:660.47ms +step:4811/57344 train_time:3177348ms step_avg:660.43ms +grad accum step:1203/14336 +step:4812/57344 train_time:3178520ms step_avg:660.54ms +step:4813/57344 train_time:3178537ms step_avg:660.41ms +step:4814/57344 train_time:3178768ms step_avg:660.32ms +step:4815/57344 train_time:3179266ms step_avg:660.28ms +grad accum step:1204/14336 +step:4816/57344 train_time:3180441ms step_avg:660.39ms +step:4817/57344 train_time:3180458ms step_avg:660.26ms +step:4818/57344 train_time:3180689ms step_avg:660.17ms +step:4819/57344 train_time:3181185ms step_avg:660.13ms +grad accum step:1205/14336 +step:4820/57344 train_time:3182360ms step_avg:660.24ms +step:4821/57344 train_time:3182377ms step_avg:660.11ms +step:4822/57344 train_time:3182607ms step_avg:660.02ms +step:4823/57344 train_time:3183103ms step_avg:659.98ms +grad accum step:1206/14336 +step:4824/57344 train_time:3184271ms step_avg:660.09ms +step:4825/57344 train_time:3184288ms step_avg:659.96ms +step:4826/57344 train_time:3184519ms step_avg:659.87ms +step:4827/57344 train_time:3185018ms step_avg:659.83ms +grad accum step:1207/14336 +step:4828/57344 train_time:3186196ms step_avg:659.94ms +step:4829/57344 train_time:3186213ms step_avg:659.81ms +step:4830/57344 train_time:3186445ms step_avg:659.72ms +step:4831/57344 train_time:3186944ms step_avg:659.69ms +grad accum step:1208/14336 +step:4832/57344 train_time:3188119ms step_avg:659.79ms +step:4833/57344 train_time:3188136ms step_avg:659.66ms +step:4834/57344 train_time:3188367ms step_avg:659.57ms +step:4835/57344 train_time:3188865ms step_avg:659.54ms +grad accum step:1209/14336 +step:4836/57344 train_time:3190044ms step_avg:659.65ms +step:4837/57344 train_time:3190061ms step_avg:659.51ms +step:4838/57344 train_time:3190292ms step_avg:659.42ms +step:4839/57344 train_time:3190792ms step_avg:659.39ms +grad accum step:1210/14336 +step:4840/57344 train_time:3191978ms step_avg:659.50ms +step:4841/57344 train_time:3191995ms step_avg:659.37ms +step:4842/57344 train_time:3192228ms step_avg:659.28ms +step:4843/57344 train_time:3192729ms step_avg:659.25ms +grad accum step:1211/14336 +step:4844/57344 train_time:3193906ms step_avg:659.35ms +step:4845/57344 train_time:3193924ms step_avg:659.22ms +step:4846/57344 train_time:3194157ms step_avg:659.13ms +step:4847/57344 train_time:3194658ms step_avg:659.10ms +grad accum step:1212/14336 +step:4848/57344 train_time:3195836ms step_avg:659.21ms +step:4849/57344 train_time:3195853ms step_avg:659.07ms +step:4850/57344 train_time:3196085ms step_avg:658.99ms +step:4851/57344 train_time:3196584ms step_avg:658.95ms +grad accum step:1213/14336 +step:4852/57344 train_time:3197767ms step_avg:659.06ms +step:4853/57344 train_time:3197784ms step_avg:658.93ms +step:4854/57344 train_time:3198017ms step_avg:658.84ms +step:4855/57344 train_time:3198518ms step_avg:658.81ms +grad accum step:1214/14336 +step:4856/57344 train_time:3199698ms step_avg:658.92ms +step:4857/57344 train_time:3199715ms step_avg:658.78ms +step:4858/57344 train_time:3199945ms step_avg:658.70ms +step:4859/57344 train_time:3200445ms step_avg:658.66ms +grad accum step:1215/14336 +step:4860/57344 train_time:3219649ms step_avg:662.48ms +step:4861/57344 train_time:3223069ms step_avg:663.05ms +step:4862/57344 train_time:3223319ms step_avg:662.96ms +step:4863/57344 train_time:3223813ms step_avg:662.93ms +grad accum step:1216/14336 +step:4864/57344 train_time:3224982ms step_avg:663.03ms +step:4864/57344 val_loss:7.705976 train_time:3224982ms step_avg:663.03ms +step:4865/57344 train_time:3224994ms step_avg:662.90ms +step:4866/57344 train_time:3225203ms step_avg:662.80ms +step:4867/57344 train_time:3225704ms step_avg:662.77ms +grad accum step:1217/14336 +step:4868/57344 train_time:3226884ms step_avg:662.88ms +step:4869/57344 train_time:3226901ms step_avg:662.74ms +step:4870/57344 train_time:3227134ms step_avg:662.66ms +step:4871/57344 train_time:3227634ms step_avg:662.62ms +grad accum step:1218/14336 +step:4872/57344 train_time:3228810ms step_avg:662.73ms +step:4873/57344 train_time:3228827ms step_avg:662.60ms +step:4874/57344 train_time:3229060ms step_avg:662.51ms +step:4875/57344 train_time:3229561ms step_avg:662.47ms +grad accum step:1219/14336 +step:4876/57344 train_time:3230748ms step_avg:662.58ms +step:4877/57344 train_time:3230765ms step_avg:662.45ms +step:4878/57344 train_time:3230996ms step_avg:662.36ms +step:4879/57344 train_time:3231496ms step_avg:662.33ms +grad accum step:1220/14336 +step:4880/57344 train_time:3232680ms step_avg:662.43ms +step:4881/57344 train_time:3232697ms step_avg:662.30ms +step:4882/57344 train_time:3232929ms step_avg:662.21ms +step:4883/57344 train_time:3233433ms step_avg:662.18ms +grad accum step:1221/14336 +step:4884/57344 train_time:3234620ms step_avg:662.29ms +step:4885/57344 train_time:3234636ms step_avg:662.16ms +step:4886/57344 train_time:3234868ms step_avg:662.07ms +step:4887/57344 train_time:3235369ms step_avg:662.04ms +grad accum step:1222/14336 +step:4888/57344 train_time:3236553ms step_avg:662.14ms +step:4889/57344 train_time:3236571ms step_avg:662.01ms +step:4890/57344 train_time:3236801ms step_avg:661.92ms +step:4891/57344 train_time:3237301ms step_avg:661.89ms +grad accum step:1223/14336 +step:4892/57344 train_time:3238489ms step_avg:662.00ms +step:4893/57344 train_time:3238506ms step_avg:661.87ms +step:4894/57344 train_time:3238737ms step_avg:661.78ms +step:4895/57344 train_time:3239240ms step_avg:661.74ms +grad accum step:1224/14336 +step:4896/57344 train_time:3240427ms step_avg:661.85ms +step:4897/57344 train_time:3240444ms step_avg:661.72ms +step:4898/57344 train_time:3240675ms step_avg:661.63ms +step:4899/57344 train_time:3241176ms step_avg:661.60ms +grad accum step:1225/14336 +step:4900/57344 train_time:3242364ms step_avg:661.71ms +step:4901/57344 train_time:3242381ms step_avg:661.58ms +step:4902/57344 train_time:3242615ms step_avg:661.49ms +step:4903/57344 train_time:3243116ms step_avg:661.46ms +grad accum step:1226/14336 +step:4904/57344 train_time:3244300ms step_avg:661.56ms +step:4905/57344 train_time:3244317ms step_avg:661.43ms +step:4906/57344 train_time:3244550ms step_avg:661.34ms +step:4907/57344 train_time:3245051ms step_avg:661.31ms +grad accum step:1227/14336 +step:4908/57344 train_time:3246238ms step_avg:661.42ms +step:4909/57344 train_time:3246255ms step_avg:661.29ms +step:4910/57344 train_time:3246488ms step_avg:661.20ms +step:4911/57344 train_time:3246991ms step_avg:661.17ms +grad accum step:1228/14336 +step:4912/57344 train_time:3248180ms step_avg:661.27ms +step:4913/57344 train_time:3248197ms step_avg:661.14ms +step:4914/57344 train_time:3248431ms step_avg:661.06ms +step:4915/57344 train_time:3248935ms step_avg:661.02ms +grad accum step:1229/14336 +step:4916/57344 train_time:3250122ms step_avg:661.13ms +step:4917/57344 train_time:3250139ms step_avg:661.00ms +step:4918/57344 train_time:3250371ms step_avg:660.91ms +step:4919/57344 train_time:3250873ms step_avg:660.88ms +grad accum step:1230/14336 +step:4920/57344 train_time:3252067ms step_avg:660.99ms +step:4921/57344 train_time:3252084ms step_avg:660.86ms +step:4922/57344 train_time:3252317ms step_avg:660.77ms +step:4923/57344 train_time:3252820ms step_avg:660.74ms +grad accum step:1231/14336 +step:4924/57344 train_time:3254010ms step_avg:660.85ms +step:4925/57344 train_time:3254027ms step_avg:660.72ms +step:4926/57344 train_time:3254261ms step_avg:660.63ms +step:4927/57344 train_time:3254765ms step_avg:660.60ms +grad accum step:1232/14336 +step:4928/57344 train_time:3255952ms step_avg:660.70ms +step:4928/57344 val_loss:7.694653 train_time:3255953ms step_avg:660.70ms +step:4929/57344 train_time:3255965ms step_avg:660.57ms +step:4930/57344 train_time:3256174ms step_avg:660.48ms +step:4931/57344 train_time:3256680ms step_avg:660.45ms +grad accum step:1233/14336 +step:4932/57344 train_time:3257879ms step_avg:660.56ms +step:4933/57344 train_time:3257896ms step_avg:660.43ms +step:4934/57344 train_time:3258132ms step_avg:660.34ms +step:4935/57344 train_time:3258637ms step_avg:660.31ms +grad accum step:1234/14336 +step:4936/57344 train_time:3259828ms step_avg:660.42ms +step:4937/57344 train_time:3259846ms step_avg:660.29ms +step:4938/57344 train_time:3260078ms step_avg:660.20ms +step:4939/57344 train_time:3260579ms step_avg:660.17ms +grad accum step:1235/14336 +step:4940/57344 train_time:3261768ms step_avg:660.28ms +step:4941/57344 train_time:3261786ms step_avg:660.15ms +step:4942/57344 train_time:3262019ms step_avg:660.06ms +step:4943/57344 train_time:3262521ms step_avg:660.03ms +grad accum step:1236/14336 +step:4944/57344 train_time:3263714ms step_avg:660.14ms +step:4945/57344 train_time:3263731ms step_avg:660.01ms +step:4946/57344 train_time:3263965ms step_avg:659.92ms +step:4947/57344 train_time:3264471ms step_avg:659.89ms +grad accum step:1237/14336 +step:4948/57344 train_time:3265666ms step_avg:660.00ms +step:4949/57344 train_time:3265683ms step_avg:659.87ms +step:4950/57344 train_time:3265916ms step_avg:659.78ms +step:4951/57344 train_time:3266420ms step_avg:659.75ms +grad accum step:1238/14336 +step:4952/57344 train_time:3267612ms step_avg:659.86ms +step:4953/57344 train_time:3267629ms step_avg:659.73ms +step:4954/57344 train_time:3267862ms step_avg:659.64ms +step:4955/57344 train_time:3268368ms step_avg:659.61ms +grad accum step:1239/14336 +step:4956/57344 train_time:3269560ms step_avg:659.72ms +step:4957/57344 train_time:3269577ms step_avg:659.59ms +step:4958/57344 train_time:3269813ms step_avg:659.50ms +step:4959/57344 train_time:3270316ms step_avg:659.47ms +grad accum step:1240/14336 +step:4960/57344 train_time:3271505ms step_avg:659.58ms +step:4961/57344 train_time:3271522ms step_avg:659.45ms +step:4962/57344 train_time:3271755ms step_avg:659.36ms +step:4963/57344 train_time:3272257ms step_avg:659.33ms +grad accum step:1241/14336 +step:4964/57344 train_time:3273449ms step_avg:659.44ms +step:4965/57344 train_time:3273466ms step_avg:659.31ms +step:4966/57344 train_time:3273700ms step_avg:659.22ms +step:4967/57344 train_time:3274203ms step_avg:659.19ms +grad accum step:1242/14336 +step:4968/57344 train_time:3275398ms step_avg:659.30ms +step:4969/57344 train_time:3275415ms step_avg:659.17ms +step:4970/57344 train_time:3275648ms step_avg:659.08ms +step:4971/57344 train_time:3276152ms step_avg:659.05ms +grad accum step:1243/14336 +step:4972/57344 train_time:3277348ms step_avg:659.16ms +step:4973/57344 train_time:3277365ms step_avg:659.03ms +step:4974/57344 train_time:3277598ms step_avg:658.95ms +step:4975/57344 train_time:3278102ms step_avg:658.91ms +grad accum step:1244/14336 +step:4976/57344 train_time:3279302ms step_avg:659.02ms +step:4977/57344 train_time:3279320ms step_avg:658.89ms +step:4978/57344 train_time:3279556ms step_avg:658.81ms +step:4979/57344 train_time:3280061ms step_avg:658.78ms +grad accum step:1245/14336 +step:4980/57344 train_time:3281253ms step_avg:658.89ms +step:4981/57344 train_time:3281270ms step_avg:658.76ms +step:4982/57344 train_time:3281502ms step_avg:658.67ms +step:4983/57344 train_time:3282003ms step_avg:658.64ms +grad accum step:1246/14336 +step:4984/57344 train_time:3283198ms step_avg:658.75ms +step:4985/57344 train_time:3283215ms step_avg:658.62ms +step:4986/57344 train_time:3283449ms step_avg:658.53ms +step:4987/57344 train_time:3283951ms step_avg:658.50ms +grad accum step:1247/14336 +step:4988/57344 train_time:3285142ms step_avg:658.61ms +step:4989/57344 train_time:3285159ms step_avg:658.48ms +step:4990/57344 train_time:3285393ms step_avg:658.40ms +step:4991/57344 train_time:3285899ms step_avg:658.36ms +grad accum step:1248/14336 +step:4992/57344 train_time:3287097ms step_avg:658.47ms +step:4992/57344 val_loss:7.678105 train_time:3287098ms step_avg:658.47ms +step:4993/57344 train_time:3287110ms step_avg:658.34ms +step:4994/57344 train_time:3287319ms step_avg:658.25ms +step:4995/57344 train_time:3287822ms step_avg:658.22ms +grad accum step:1249/14336 +step:4996/57344 train_time:3289007ms step_avg:658.33ms +step:4997/57344 train_time:3289024ms step_avg:658.20ms +step:4998/57344 train_time:3289258ms step_avg:658.11ms +step:4999/57344 train_time:3289764ms step_avg:658.08ms +grad accum step:1250/14336 +step:5000/57344 train_time:3290953ms step_avg:658.19ms +step:5001/57344 train_time:3290970ms step_avg:658.06ms +step:5002/57344 train_time:3291204ms step_avg:657.98ms +step:5003/57344 train_time:3291705ms step_avg:657.95ms +grad accum step:1251/14336 +step:5004/57344 train_time:3292896ms step_avg:658.05ms +step:5005/57344 train_time:3292914ms step_avg:657.92ms +step:5006/57344 train_time:3293148ms step_avg:657.84ms +step:5007/57344 train_time:3293651ms step_avg:657.81ms +grad accum step:1252/14336 +step:5008/57344 train_time:3294849ms step_avg:657.92ms +step:5009/57344 train_time:3294866ms step_avg:657.79ms +step:5010/57344 train_time:3295098ms step_avg:657.70ms +step:5011/57344 train_time:3295603ms step_avg:657.67ms +grad accum step:1253/14336 +step:5012/57344 train_time:3296790ms step_avg:657.78ms +step:5013/57344 train_time:3296807ms step_avg:657.65ms +step:5014/57344 train_time:3297039ms step_avg:657.57ms +step:5015/57344 train_time:3297542ms step_avg:657.54ms +grad accum step:1254/14336 +step:5016/57344 train_time:3298736ms step_avg:657.64ms +step:5017/57344 train_time:3298753ms step_avg:657.52ms +step:5018/57344 train_time:3298987ms step_avg:657.43ms +step:5019/57344 train_time:3299492ms step_avg:657.40ms +grad accum step:1255/14336 +step:5020/57344 train_time:3300682ms step_avg:657.51ms +step:5021/57344 train_time:3300699ms step_avg:657.38ms +step:5022/57344 train_time:3300933ms step_avg:657.29ms +step:5023/57344 train_time:3301436ms step_avg:657.26ms +grad accum step:1256/14336 +step:5024/57344 train_time:3302626ms step_avg:657.37ms +step:5025/57344 train_time:3302643ms step_avg:657.24ms +step:5026/57344 train_time:3302876ms step_avg:657.16ms +step:5027/57344 train_time:3303378ms step_avg:657.13ms +grad accum step:1257/14336 +step:5028/57344 train_time:3304570ms step_avg:657.23ms +step:5029/57344 train_time:3304588ms step_avg:657.11ms +step:5030/57344 train_time:3304822ms step_avg:657.02ms +step:5031/57344 train_time:3305326ms step_avg:656.99ms +grad accum step:1258/14336 +step:5032/57344 train_time:3306514ms step_avg:657.10ms +step:5033/57344 train_time:3306532ms step_avg:656.97ms +step:5034/57344 train_time:3306764ms step_avg:656.89ms +step:5035/57344 train_time:3307266ms step_avg:656.86ms +grad accum step:1259/14336 +step:5036/57344 train_time:3308459ms step_avg:656.96ms +step:5037/57344 train_time:3308476ms step_avg:656.83ms +step:5038/57344 train_time:3308710ms step_avg:656.75ms +step:5039/57344 train_time:3309213ms step_avg:656.72ms +grad accum step:1260/14336 +step:5040/57344 train_time:3310403ms step_avg:656.83ms +step:5041/57344 train_time:3310420ms step_avg:656.70ms +step:5042/57344 train_time:3310653ms step_avg:656.62ms +step:5043/57344 train_time:3311157ms step_avg:656.58ms +grad accum step:1261/14336 +step:5044/57344 train_time:3312353ms step_avg:656.69ms +step:5045/57344 train_time:3312370ms step_avg:656.56ms +step:5046/57344 train_time:3312604ms step_avg:656.48ms +step:5047/57344 train_time:3313107ms step_avg:656.45ms +grad accum step:1262/14336 +step:5048/57344 train_time:3314297ms step_avg:656.56ms +step:5049/57344 train_time:3314314ms step_avg:656.43ms +step:5050/57344 train_time:3314547ms step_avg:656.35ms +step:5051/57344 train_time:3315051ms step_avg:656.32ms +grad accum step:1263/14336 +step:5052/57344 train_time:3316245ms step_avg:656.42ms +step:5053/57344 train_time:3316262ms step_avg:656.30ms +step:5054/57344 train_time:3316495ms step_avg:656.21ms +step:5055/57344 train_time:3316997ms step_avg:656.18ms +grad accum step:1264/14336 +step:5056/57344 train_time:3318193ms step_avg:656.29ms +step:5056/57344 val_loss:7.682966 train_time:3318193ms step_avg:656.29ms +step:5057/57344 train_time:3318205ms step_avg:656.16ms +step:5058/57344 train_time:3318414ms step_avg:656.07ms +step:5059/57344 train_time:3318917ms step_avg:656.04ms +grad accum step:1265/14336 +step:5060/57344 train_time:3320107ms step_avg:656.15ms +step:5061/57344 train_time:3320124ms step_avg:656.02ms +step:5062/57344 train_time:3320358ms step_avg:655.94ms +step:5063/57344 train_time:3320863ms step_avg:655.91ms +grad accum step:1266/14336 +step:5064/57344 train_time:3322054ms step_avg:656.01ms +step:5065/57344 train_time:3322070ms step_avg:655.89ms +step:5066/57344 train_time:3322303ms step_avg:655.80ms +step:5067/57344 train_time:3322807ms step_avg:655.77ms +grad accum step:1267/14336 +step:5068/57344 train_time:3323996ms step_avg:655.88ms +step:5069/57344 train_time:3324013ms step_avg:655.75ms +step:5070/57344 train_time:3324245ms step_avg:655.67ms +step:5071/57344 train_time:3324750ms step_avg:655.64ms +grad accum step:1268/14336 +step:5072/57344 train_time:3325935ms step_avg:655.74ms +step:5073/57344 train_time:3325952ms step_avg:655.62ms +step:5074/57344 train_time:3326185ms step_avg:655.54ms +step:5075/57344 train_time:3326686ms step_avg:655.50ms +grad accum step:1269/14336 +step:5076/57344 train_time:3327872ms step_avg:655.61ms +step:5077/57344 train_time:3327889ms step_avg:655.48ms +step:5078/57344 train_time:3328122ms step_avg:655.40ms +step:5079/57344 train_time:3328625ms step_avg:655.37ms +grad accum step:1270/14336 +step:5080/57344 train_time:3329810ms step_avg:655.47ms +step:5081/57344 train_time:3329827ms step_avg:655.35ms +step:5082/57344 train_time:3330064ms step_avg:655.27ms +step:5083/57344 train_time:3330573ms step_avg:655.24ms +grad accum step:1271/14336 +step:5084/57344 train_time:3331772ms step_avg:655.34ms +step:5085/57344 train_time:3331789ms step_avg:655.22ms +step:5086/57344 train_time:3332022ms step_avg:655.14ms +step:5087/57344 train_time:3332523ms step_avg:655.11ms +grad accum step:1272/14336 +step:5088/57344 train_time:3333708ms step_avg:655.21ms +step:5089/57344 train_time:3333725ms step_avg:655.08ms +step:5090/57344 train_time:3333960ms step_avg:655.00ms +step:5091/57344 train_time:3334465ms step_avg:654.97ms +grad accum step:1273/14336 +step:5092/57344 train_time:3335659ms step_avg:655.08ms +step:5093/57344 train_time:3335676ms step_avg:654.95ms +step:5094/57344 train_time:3335909ms step_avg:654.87ms +step:5095/57344 train_time:3336410ms step_avg:654.84ms +grad accum step:1274/14336 +step:5096/57344 train_time:3337598ms step_avg:654.94ms +step:5097/57344 train_time:3337615ms step_avg:654.82ms +step:5098/57344 train_time:3337848ms step_avg:654.74ms +step:5099/57344 train_time:3338353ms step_avg:654.71ms +grad accum step:1275/14336 +step:5100/57344 train_time:3339544ms step_avg:654.81ms +step:5101/57344 train_time:3339561ms step_avg:654.69ms +step:5102/57344 train_time:3339795ms step_avg:654.60ms +step:5103/57344 train_time:3340298ms step_avg:654.58ms +grad accum step:1276/14336 +step:5104/57344 train_time:3341488ms step_avg:654.68ms +step:5105/57344 train_time:3341676ms step_avg:654.59ms +step:5106/57344 train_time:3341881ms step_avg:654.50ms +step:5107/57344 train_time:3342387ms step_avg:654.47ms +grad accum step:1277/14336 +step:5108/57344 train_time:3343582ms step_avg:654.58ms +step:5109/57344 train_time:3343599ms step_avg:654.45ms +step:5110/57344 train_time:3343833ms step_avg:654.37ms +step:5111/57344 train_time:3344337ms step_avg:654.34ms +grad accum step:1278/14336 +step:5112/57344 train_time:3345536ms step_avg:654.45ms +step:5113/57344 train_time:3345554ms step_avg:654.32ms +step:5114/57344 train_time:3345787ms step_avg:654.24ms +step:5115/57344 train_time:3346290ms step_avg:654.21ms +grad accum step:1279/14336 +step:5116/57344 train_time:3347485ms step_avg:654.32ms +step:5117/57344 train_time:3347502ms step_avg:654.19ms +step:5118/57344 train_time:3347736ms step_avg:654.11ms +step:5119/57344 train_time:3348241ms step_avg:654.08ms +grad accum step:1280/14336 +step:5120/57344 train_time:3349461ms step_avg:654.19ms +step:5120/57344 val_loss:7.662866 train_time:3349483ms step_avg:654.20ms +step:5121/57344 train_time:3349494ms step_avg:654.07ms +step:5122/57344 train_time:3349706ms step_avg:653.98ms +step:5123/57344 train_time:3350212ms step_avg:653.96ms +grad accum step:1281/14336 +step:5124/57344 train_time:3351409ms step_avg:654.06ms +step:5125/57344 train_time:3351426ms step_avg:653.94ms +step:5126/57344 train_time:3351662ms step_avg:653.86ms +step:5127/57344 train_time:3352168ms step_avg:653.83ms +grad accum step:1282/14336 +step:5128/57344 train_time:3353363ms step_avg:653.93ms +step:5129/57344 train_time:3353380ms step_avg:653.81ms +step:5130/57344 train_time:3353615ms step_avg:653.73ms +step:5131/57344 train_time:3354118ms step_avg:653.70ms +grad accum step:1283/14336 +step:5132/57344 train_time:3355313ms step_avg:653.80ms +step:5133/57344 train_time:3355331ms step_avg:653.68ms +step:5134/57344 train_time:3355564ms step_avg:653.60ms +step:5135/57344 train_time:3356069ms step_avg:653.57ms +grad accum step:1284/14336 +step:5136/57344 train_time:3357267ms step_avg:653.67ms +step:5137/57344 train_time:3357284ms step_avg:653.55ms +step:5138/57344 train_time:3357516ms step_avg:653.47ms +step:5139/57344 train_time:3358018ms step_avg:653.44ms +grad accum step:1285/14336 +step:5140/57344 train_time:3359212ms step_avg:653.54ms +step:5141/57344 train_time:3359229ms step_avg:653.42ms +step:5142/57344 train_time:3359463ms step_avg:653.34ms +step:5143/57344 train_time:3359968ms step_avg:653.31ms +grad accum step:1286/14336 +step:5144/57344 train_time:3361157ms step_avg:653.41ms +step:5145/57344 train_time:3361174ms step_avg:653.29ms +step:5146/57344 train_time:3361409ms step_avg:653.21ms +step:5147/57344 train_time:3361916ms step_avg:653.18ms +grad accum step:1287/14336 +step:5148/57344 train_time:3363111ms step_avg:653.28ms +step:5149/57344 train_time:3363128ms step_avg:653.16ms +step:5150/57344 train_time:3363362ms step_avg:653.08ms +step:5151/57344 train_time:3363867ms step_avg:653.05ms +grad accum step:1288/14336 +step:5152/57344 train_time:3365061ms step_avg:653.16ms +step:5153/57344 train_time:3365079ms step_avg:653.03ms +step:5154/57344 train_time:3365312ms step_avg:652.95ms +step:5155/57344 train_time:3365815ms step_avg:652.92ms +grad accum step:1289/14336 +step:5156/57344 train_time:3367006ms step_avg:653.03ms +step:5157/57344 train_time:3367023ms step_avg:652.90ms +step:5158/57344 train_time:3367258ms step_avg:652.82ms +step:5159/57344 train_time:3367764ms step_avg:652.79ms +grad accum step:1290/14336 +step:5160/57344 train_time:3368967ms step_avg:652.90ms +step:5161/57344 train_time:3368984ms step_avg:652.78ms +step:5162/57344 train_time:3369218ms step_avg:652.70ms +step:5163/57344 train_time:3369727ms step_avg:652.67ms +grad accum step:1291/14336 +step:5164/57344 train_time:3370926ms step_avg:652.77ms +step:5165/57344 train_time:3370943ms step_avg:652.65ms +step:5166/57344 train_time:3371177ms step_avg:652.57ms +step:5167/57344 train_time:3371683ms step_avg:652.54ms +grad accum step:1292/14336 +step:5168/57344 train_time:3372877ms step_avg:652.65ms +step:5169/57344 train_time:3372894ms step_avg:652.52ms +step:5170/57344 train_time:3373127ms step_avg:652.44ms +step:5171/57344 train_time:3373633ms step_avg:652.41ms +grad accum step:1293/14336 +step:5172/57344 train_time:3374826ms step_avg:652.52ms +step:5173/57344 train_time:3374843ms step_avg:652.40ms +step:5174/57344 train_time:3375076ms step_avg:652.31ms +step:5175/57344 train_time:3375580ms step_avg:652.29ms +grad accum step:1294/14336 +step:5176/57344 train_time:3376775ms step_avg:652.39ms +step:5177/57344 train_time:3376792ms step_avg:652.27ms +step:5178/57344 train_time:3377025ms step_avg:652.19ms +step:5179/57344 train_time:3377530ms step_avg:652.16ms +grad accum step:1295/14336 +step:5180/57344 train_time:3378721ms step_avg:652.26ms +step:5181/57344 train_time:3442048ms step_avg:664.36ms +step:5182/57344 train_time:3442320ms step_avg:664.28ms +step:5183/57344 train_time:3442817ms step_avg:664.25ms +grad accum step:1296/14336 +step:5184/57344 train_time:3443991ms step_avg:664.35ms +step:5184/57344 val_loss:7.657860 train_time:3443992ms step_avg:664.35ms +step:5185/57344 train_time:3444004ms step_avg:664.22ms +step:5186/57344 train_time:3444214ms step_avg:664.14ms +step:5187/57344 train_time:3444717ms step_avg:664.11ms +grad accum step:1297/14336 +step:5188/57344 train_time:3445916ms step_avg:664.21ms +step:5189/57344 train_time:3445933ms step_avg:664.08ms +step:5190/57344 train_time:3446164ms step_avg:664.00ms +step:5191/57344 train_time:3446665ms step_avg:663.97ms +grad accum step:1298/14336 +step:5192/57344 train_time:3447849ms step_avg:664.07ms +step:5193/57344 train_time:3447866ms step_avg:663.94ms +step:5194/57344 train_time:3448100ms step_avg:663.86ms +step:5195/57344 train_time:3448603ms step_avg:663.83ms +grad accum step:1299/14336 +step:5196/57344 train_time:3449791ms step_avg:663.93ms +step:5197/57344 train_time:3449808ms step_avg:663.81ms +step:5198/57344 train_time:3450041ms step_avg:663.72ms +step:5199/57344 train_time:3450543ms step_avg:663.69ms +grad accum step:1300/14336 +step:5200/57344 train_time:3451738ms step_avg:663.80ms +step:5201/57344 train_time:3451755ms step_avg:663.67ms +step:5202/57344 train_time:3451987ms step_avg:663.59ms +step:5203/57344 train_time:3452491ms step_avg:663.56ms +grad accum step:1301/14336 +step:5204/57344 train_time:3453681ms step_avg:663.66ms +step:5205/57344 train_time:3453699ms step_avg:663.53ms +step:5206/57344 train_time:3453931ms step_avg:663.45ms +step:5207/57344 train_time:3454432ms step_avg:663.42ms +grad accum step:1302/14336 +step:5208/57344 train_time:3455620ms step_avg:663.52ms +step:5209/57344 train_time:3455637ms step_avg:663.40ms +step:5210/57344 train_time:3455872ms step_avg:663.32ms +step:5211/57344 train_time:3456376ms step_avg:663.28ms +grad accum step:1303/14336 +step:5212/57344 train_time:3457559ms step_avg:663.38ms +step:5213/57344 train_time:3457576ms step_avg:663.26ms +step:5214/57344 train_time:3457810ms step_avg:663.18ms +step:5215/57344 train_time:3458312ms step_avg:663.15ms +grad accum step:1304/14336 +step:5216/57344 train_time:3459498ms step_avg:663.25ms +step:5217/57344 train_time:3459515ms step_avg:663.12ms +step:5218/57344 train_time:3459751ms step_avg:663.04ms +step:5219/57344 train_time:3460260ms step_avg:663.01ms +grad accum step:1305/14336 +step:5220/57344 train_time:3461461ms step_avg:663.12ms +step:5221/57344 train_time:3461478ms step_avg:662.99ms +step:5222/57344 train_time:3461713ms step_avg:662.91ms +step:5223/57344 train_time:3462222ms step_avg:662.88ms +grad accum step:1306/14336 +step:5224/57344 train_time:3463410ms step_avg:662.98ms +step:5225/57344 train_time:3463427ms step_avg:662.86ms +step:5226/57344 train_time:3463661ms step_avg:662.77ms +step:5227/57344 train_time:3464164ms step_avg:662.74ms +grad accum step:1307/14336 +step:5228/57344 train_time:3465355ms step_avg:662.85ms +step:5229/57344 train_time:3465372ms step_avg:662.72ms +step:5230/57344 train_time:3465604ms step_avg:662.64ms +step:5231/57344 train_time:3466108ms step_avg:662.61ms +grad accum step:1308/14336 +step:5232/57344 train_time:3467296ms step_avg:662.71ms +step:5233/57344 train_time:3467313ms step_avg:662.59ms +step:5234/57344 train_time:3467548ms step_avg:662.50ms +step:5235/57344 train_time:3468054ms step_avg:662.47ms +grad accum step:1309/14336 +step:5236/57344 train_time:3469245ms step_avg:662.58ms +step:5237/57344 train_time:3469261ms step_avg:662.45ms +step:5238/57344 train_time:3469497ms step_avg:662.37ms +step:5239/57344 train_time:3470001ms step_avg:662.34ms +grad accum step:1310/14336 +step:5240/57344 train_time:3471198ms step_avg:662.44ms +step:5241/57344 train_time:3471215ms step_avg:662.32ms +step:5242/57344 train_time:3471449ms step_avg:662.24ms +step:5243/57344 train_time:3471953ms step_avg:662.21ms +grad accum step:1311/14336 +step:5244/57344 train_time:3473148ms step_avg:662.31ms +step:5245/57344 train_time:3473165ms step_avg:662.19ms +step:5246/57344 train_time:3473400ms step_avg:662.10ms +step:5247/57344 train_time:3473909ms step_avg:662.08ms +grad accum step:1312/14336 +step:5248/57344 train_time:3475104ms step_avg:662.18ms +step:5248/57344 val_loss:7.645894 train_time:3475104ms step_avg:662.18ms +step:5249/57344 train_time:3475116ms step_avg:662.05ms +step:5250/57344 train_time:3475327ms step_avg:661.97ms +step:5251/57344 train_time:3475832ms step_avg:661.94ms +grad accum step:1313/14336 +step:5252/57344 train_time:3477037ms step_avg:662.04ms +step:5253/57344 train_time:3477054ms step_avg:661.92ms +step:5254/57344 train_time:3477288ms step_avg:661.84ms +step:5255/57344 train_time:3477796ms step_avg:661.81ms +grad accum step:1314/14336 +step:5256/57344 train_time:3479002ms step_avg:661.91ms +step:5257/57344 train_time:3479019ms step_avg:661.79ms +step:5258/57344 train_time:3479255ms step_avg:661.71ms +step:5259/57344 train_time:3479762ms step_avg:661.68ms +grad accum step:1315/14336 +step:5260/57344 train_time:3480962ms step_avg:661.78ms +step:5261/57344 train_time:3480979ms step_avg:661.66ms +step:5262/57344 train_time:3481217ms step_avg:661.58ms +step:5263/57344 train_time:3481726ms step_avg:661.55ms +grad accum step:1316/14336 +step:5264/57344 train_time:3482936ms step_avg:661.65ms +step:5265/57344 train_time:3482953ms step_avg:661.53ms +step:5266/57344 train_time:3483190ms step_avg:661.45ms +step:5267/57344 train_time:3483700ms step_avg:661.42ms +grad accum step:1317/14336 +step:5268/57344 train_time:3484899ms step_avg:661.52ms +step:5269/57344 train_time:3484916ms step_avg:661.40ms +step:5270/57344 train_time:3485153ms step_avg:661.32ms +step:5271/57344 train_time:3485660ms step_avg:661.29ms +grad accum step:1318/14336 +step:5272/57344 train_time:3486860ms step_avg:661.39ms +step:5273/57344 train_time:3486878ms step_avg:661.27ms +step:5274/57344 train_time:3487112ms step_avg:661.19ms +step:5275/57344 train_time:3487619ms step_avg:661.16ms +grad accum step:1319/14336 +step:5276/57344 train_time:3488815ms step_avg:661.26ms +step:5277/57344 train_time:3488832ms step_avg:661.14ms +step:5278/57344 train_time:3489068ms step_avg:661.06ms +step:5279/57344 train_time:3489573ms step_avg:661.03ms +grad accum step:1320/14336 +step:5280/57344 train_time:3490772ms step_avg:661.13ms +step:5281/57344 train_time:3490789ms step_avg:661.01ms +step:5282/57344 train_time:3491023ms step_avg:660.93ms +step:5283/57344 train_time:3491530ms step_avg:660.90ms +grad accum step:1321/14336 +step:5284/57344 train_time:3492730ms step_avg:661.00ms +step:5285/57344 train_time:3492748ms step_avg:660.88ms +step:5286/57344 train_time:3492982ms step_avg:660.80ms +step:5287/57344 train_time:3493489ms step_avg:660.77ms +grad accum step:1322/14336 +step:5288/57344 train_time:3494694ms step_avg:660.87ms +step:5289/57344 train_time:3494711ms step_avg:660.75ms +step:5290/57344 train_time:3494945ms step_avg:660.67ms +step:5291/57344 train_time:3495454ms step_avg:660.64ms +grad accum step:1323/14336 +step:5292/57344 train_time:3496651ms step_avg:660.74ms +step:5293/57344 train_time:3496668ms step_avg:660.62ms +step:5294/57344 train_time:3496903ms step_avg:660.54ms +step:5295/57344 train_time:3497412ms step_avg:660.51ms +grad accum step:1324/14336 +step:5296/57344 train_time:3498616ms step_avg:660.61ms +step:5297/57344 train_time:3498634ms step_avg:660.49ms +step:5298/57344 train_time:3498870ms step_avg:660.41ms +step:5299/57344 train_time:3499377ms step_avg:660.38ms +grad accum step:1325/14336 +step:5300/57344 train_time:3500583ms step_avg:660.49ms +step:5301/57344 train_time:3500600ms step_avg:660.37ms +step:5302/57344 train_time:3500835ms step_avg:660.29ms +step:5303/57344 train_time:3501342ms step_avg:660.26ms +grad accum step:1326/14336 +step:5304/57344 train_time:3502550ms step_avg:660.36ms +step:5305/57344 train_time:3502567ms step_avg:660.24ms +step:5306/57344 train_time:3502803ms step_avg:660.16ms +step:5307/57344 train_time:3503315ms step_avg:660.13ms +grad accum step:1327/14336 +step:5308/57344 train_time:3504517ms step_avg:660.23ms +step:5309/57344 train_time:3504534ms step_avg:660.11ms +step:5310/57344 train_time:3504770ms step_avg:660.03ms +step:5311/57344 train_time:3505276ms step_avg:660.00ms +grad accum step:1328/14336 +step:5312/57344 train_time:3506477ms step_avg:660.10ms +step:5312/57344 val_loss:7.632289 train_time:3506478ms step_avg:660.11ms +step:5313/57344 train_time:3506490ms step_avg:659.98ms +step:5314/57344 train_time:3520052ms step_avg:662.41ms +step:5315/57344 train_time:3520349ms step_avg:662.34ms +grad accum step:1329/14336 +step:5316/57344 train_time:3521530ms step_avg:662.44ms +step:5317/57344 train_time:3521547ms step_avg:662.32ms +step:5318/57344 train_time:3521779ms step_avg:662.24ms +step:5319/57344 train_time:3522278ms step_avg:662.21ms +grad accum step:1330/14336 +step:5320/57344 train_time:3523467ms step_avg:662.31ms +step:5321/57344 train_time:3523484ms step_avg:662.18ms +step:5322/57344 train_time:3523716ms step_avg:662.10ms +step:5323/57344 train_time:3524221ms step_avg:662.07ms +grad accum step:1331/14336 +step:5324/57344 train_time:3525411ms step_avg:662.17ms +step:5325/57344 train_time:3525428ms step_avg:662.05ms +step:5326/57344 train_time:3525661ms step_avg:661.97ms +step:5327/57344 train_time:3526164ms step_avg:661.94ms +grad accum step:1332/14336 +step:5328/57344 train_time:3527356ms step_avg:662.04ms +step:5329/57344 train_time:3527373ms step_avg:661.92ms +step:5330/57344 train_time:3527607ms step_avg:661.84ms +step:5331/57344 train_time:3528108ms step_avg:661.81ms +grad accum step:1333/14336 +step:5332/57344 train_time:3529292ms step_avg:661.91ms +step:5333/57344 train_time:3529308ms step_avg:661.79ms +step:5334/57344 train_time:3529541ms step_avg:661.71ms +step:5335/57344 train_time:3530042ms step_avg:661.68ms +grad accum step:1334/14336 +step:5336/57344 train_time:3531235ms step_avg:661.78ms +step:5337/57344 train_time:3531252ms step_avg:661.65ms +step:5338/57344 train_time:3531485ms step_avg:661.57ms +step:5339/57344 train_time:3531988ms step_avg:661.54ms +grad accum step:1335/14336 +step:5340/57344 train_time:3533181ms step_avg:661.64ms +step:5341/57344 train_time:3533198ms step_avg:661.52ms +step:5342/57344 train_time:3533431ms step_avg:661.44ms +step:5343/57344 train_time:3533933ms step_avg:661.41ms +grad accum step:1336/14336 +step:5344/57344 train_time:3535127ms step_avg:661.51ms +step:5345/57344 train_time:3535144ms step_avg:661.39ms +step:5346/57344 train_time:3535378ms step_avg:661.31ms +step:5347/57344 train_time:3535881ms step_avg:661.28ms +grad accum step:1337/14336 +step:5348/57344 train_time:3556633ms step_avg:665.04ms +step:5349/57344 train_time:3556645ms step_avg:664.92ms +step:5350/57344 train_time:3556895ms step_avg:664.84ms +step:5351/57344 train_time:3557392ms step_avg:664.81ms +grad accum step:1338/14336 +step:5352/57344 train_time:3558577ms step_avg:664.91ms +step:5353/57344 train_time:3558594ms step_avg:664.79ms +step:5354/57344 train_time:3558825ms step_avg:664.70ms +step:5355/57344 train_time:3559324ms step_avg:664.67ms +grad accum step:1339/14336 +step:5356/57344 train_time:3560503ms step_avg:664.77ms +step:5357/57344 train_time:3560520ms step_avg:664.65ms +step:5358/57344 train_time:3560751ms step_avg:664.57ms +step:5359/57344 train_time:3561252ms step_avg:664.54ms +grad accum step:1340/14336 +step:5360/57344 train_time:3562440ms step_avg:664.63ms +step:5361/57344 train_time:3562457ms step_avg:664.51ms +step:5362/57344 train_time:3562690ms step_avg:664.43ms +step:5363/57344 train_time:3563191ms step_avg:664.40ms +grad accum step:1341/14336 +step:5364/57344 train_time:3564374ms step_avg:664.50ms +step:5365/57344 train_time:3564391ms step_avg:664.38ms +step:5366/57344 train_time:3564623ms step_avg:664.30ms +step:5367/57344 train_time:3565128ms step_avg:664.27ms +grad accum step:1342/14336 +step:5368/57344 train_time:3566319ms step_avg:664.37ms +step:5369/57344 train_time:3566336ms step_avg:664.25ms +step:5370/57344 train_time:3566568ms step_avg:664.17ms +step:5371/57344 train_time:3567069ms step_avg:664.14ms +grad accum step:1343/14336 +step:5372/57344 train_time:3611697ms step_avg:672.32ms +step:5373/57344 train_time:3620789ms step_avg:673.89ms +step:5374/57344 train_time:3621083ms step_avg:673.82ms +step:5375/57344 train_time:3621593ms step_avg:673.78ms +grad accum step:1344/14336 +step:5376/57344 train_time:3623034ms step_avg:673.93ms +step:5376/57344 val_loss:7.624333 train_time:3624432ms step_avg:674.19ms +step:5377/57344 train_time:3624444ms step_avg:674.06ms +step:5378/57344 train_time:3624655ms step_avg:673.98ms +step:5379/57344 train_time:3625157ms step_avg:673.95ms +grad accum step:1345/14336 +step:5380/57344 train_time:3626348ms step_avg:674.04ms +step:5381/57344 train_time:3626365ms step_avg:673.92ms +step:5382/57344 train_time:3626598ms step_avg:673.84ms +step:5383/57344 train_time:3627102ms step_avg:673.81ms +grad accum step:1346/14336 +step:5384/57344 train_time:3628292ms step_avg:673.90ms +step:5385/57344 train_time:3628308ms step_avg:673.78ms +step:5386/57344 train_time:3628542ms step_avg:673.70ms +step:5387/57344 train_time:3629047ms step_avg:673.67ms +grad accum step:1347/14336 +step:5388/57344 train_time:3630236ms step_avg:673.76ms +step:5389/57344 train_time:3630253ms step_avg:673.64ms +step:5390/57344 train_time:3630486ms step_avg:673.56ms +step:5391/57344 train_time:3630988ms step_avg:673.53ms +grad accum step:1348/14336 +step:5392/57344 train_time:3632178ms step_avg:673.62ms +step:5393/57344 train_time:3632195ms step_avg:673.50ms +step:5394/57344 train_time:3632429ms step_avg:673.42ms +step:5395/57344 train_time:3632933ms step_avg:673.39ms +grad accum step:1349/14336 +step:5396/57344 train_time:3634125ms step_avg:673.48ms +step:5397/57344 train_time:3634142ms step_avg:673.36ms +step:5398/57344 train_time:3634375ms step_avg:673.28ms +step:5399/57344 train_time:3634879ms step_avg:673.25ms +grad accum step:1350/14336 +step:5400/57344 train_time:3636068ms step_avg:673.35ms +step:5401/57344 train_time:3636085ms step_avg:673.22ms +step:5402/57344 train_time:3636319ms step_avg:673.14ms +step:5403/57344 train_time:3636823ms step_avg:673.11ms +grad accum step:1351/14336 +step:5404/57344 train_time:3638018ms step_avg:673.21ms +step:5405/57344 train_time:3638035ms step_avg:673.09ms +step:5406/57344 train_time:3638271ms step_avg:673.01ms +step:5407/57344 train_time:3638777ms step_avg:672.98ms +grad accum step:1352/14336 +step:5408/57344 train_time:3639967ms step_avg:673.07ms +step:5409/57344 train_time:3639984ms step_avg:672.95ms +step:5410/57344 train_time:3640215ms step_avg:672.87ms +step:5411/57344 train_time:3640718ms step_avg:672.84ms +grad accum step:1353/14336 +step:5412/57344 train_time:3641908ms step_avg:672.93ms +step:5413/57344 train_time:3641926ms step_avg:672.81ms +step:5414/57344 train_time:3642160ms step_avg:672.73ms +step:5415/57344 train_time:3642664ms step_avg:672.70ms +grad accum step:1354/14336 +step:5416/57344 train_time:3643853ms step_avg:672.79ms +step:5417/57344 train_time:3643870ms step_avg:672.67ms +step:5418/57344 train_time:3644103ms step_avg:672.59ms +step:5419/57344 train_time:3644606ms step_avg:672.56ms +grad accum step:1355/14336 +step:5420/57344 train_time:3645798ms step_avg:672.66ms +step:5421/57344 train_time:3645815ms step_avg:672.54ms +step:5422/57344 train_time:3646049ms step_avg:672.45ms +step:5423/57344 train_time:3646553ms step_avg:672.42ms +grad accum step:1356/14336 +step:5424/57344 train_time:3647741ms step_avg:672.52ms +step:5425/57344 train_time:3647758ms step_avg:672.40ms +step:5426/57344 train_time:3647994ms step_avg:672.32ms +step:5427/57344 train_time:3648509ms step_avg:672.29ms +grad accum step:1357/14336 +step:5428/57344 train_time:3649702ms step_avg:672.38ms +step:5429/57344 train_time:3649719ms step_avg:672.26ms +step:5430/57344 train_time:3649954ms step_avg:672.18ms +step:5431/57344 train_time:3650460ms step_avg:672.15ms +grad accum step:1358/14336 +step:5432/57344 train_time:3651647ms step_avg:672.25ms +step:5433/57344 train_time:3651664ms step_avg:672.13ms +step:5434/57344 train_time:3651897ms step_avg:672.05ms +step:5435/57344 train_time:3652398ms step_avg:672.01ms +grad accum step:1359/14336 +step:5436/57344 train_time:3653593ms step_avg:672.11ms +step:5437/57344 train_time:3653610ms step_avg:671.99ms +step:5438/57344 train_time:3653843ms step_avg:671.91ms +step:5439/57344 train_time:3654349ms step_avg:671.88ms +grad accum step:1360/14336 +step:5440/57344 train_time:3655543ms step_avg:671.97ms +step:5440/57344 val_loss:7.623236 train_time:3655544ms step_avg:671.97ms +step:5441/57344 train_time:3655556ms step_avg:671.85ms +step:5442/57344 train_time:3655765ms step_avg:671.77ms +step:5443/57344 train_time:3656270ms step_avg:671.74ms +grad accum step:1361/14336 +step:5444/57344 train_time:3657467ms step_avg:671.83ms +step:5445/57344 train_time:3657484ms step_avg:671.71ms +step:5446/57344 train_time:3657719ms step_avg:671.63ms +step:5447/57344 train_time:3658222ms step_avg:671.60ms +grad accum step:1362/14336 +step:5448/57344 train_time:3659423ms step_avg:671.70ms +step:5449/57344 train_time:3659440ms step_avg:671.58ms +step:5450/57344 train_time:3659674ms step_avg:671.50ms +step:5451/57344 train_time:3660179ms step_avg:671.47ms +grad accum step:1363/14336 +step:5452/57344 train_time:3661380ms step_avg:671.57ms +step:5453/57344 train_time:3661397ms step_avg:671.45ms +step:5454/57344 train_time:3661630ms step_avg:671.37ms +step:5455/57344 train_time:3662135ms step_avg:671.34ms +grad accum step:1364/14336 +step:5456/57344 train_time:3663327ms step_avg:671.43ms +step:5457/57344 train_time:3663345ms step_avg:671.31ms +step:5458/57344 train_time:3663581ms step_avg:671.23ms +step:5459/57344 train_time:3664087ms step_avg:671.20ms +grad accum step:1365/14336 +step:5460/57344 train_time:3665291ms step_avg:671.30ms +step:5461/57344 train_time:3665308ms step_avg:671.18ms +step:5462/57344 train_time:3665544ms step_avg:671.10ms +step:5463/57344 train_time:3666051ms step_avg:671.07ms +grad accum step:1366/14336 +step:5464/57344 train_time:3667248ms step_avg:671.17ms +step:5465/57344 train_time:3668234ms step_avg:671.22ms +step:5466/57344 train_time:3668398ms step_avg:671.13ms +step:5467/57344 train_time:3668904ms step_avg:671.10ms +grad accum step:1367/14336 +step:5468/57344 train_time:3670134ms step_avg:671.20ms +step:5469/57344 train_time:3670146ms step_avg:671.08ms +step:5470/57344 train_time:3670363ms step_avg:671.00ms +step:5471/57344 train_time:3670873ms step_avg:670.97ms +grad accum step:1368/14336 +step:5472/57344 train_time:3672080ms step_avg:671.07ms +step:5473/57344 train_time:3672097ms step_avg:670.95ms +step:5474/57344 train_time:3672334ms step_avg:670.87ms +step:5475/57344 train_time:3672843ms step_avg:670.84ms +grad accum step:1369/14336 +step:5476/57344 train_time:3674040ms step_avg:670.94ms +step:5477/57344 train_time:3674057ms step_avg:670.82ms +step:5478/57344 train_time:3674293ms step_avg:670.74ms +step:5479/57344 train_time:3674804ms step_avg:670.71ms +grad accum step:1370/14336 +step:5480/57344 train_time:3676010ms step_avg:670.80ms +step:5481/57344 train_time:3676027ms step_avg:670.69ms +step:5482/57344 train_time:3676263ms step_avg:670.61ms +step:5483/57344 train_time:3676771ms step_avg:670.58ms +grad accum step:1371/14336 +step:5484/57344 train_time:3677973ms step_avg:670.67ms +step:5485/57344 train_time:3677990ms step_avg:670.55ms +step:5486/57344 train_time:3678227ms step_avg:670.48ms +step:5487/57344 train_time:3678736ms step_avg:670.45ms +grad accum step:1372/14336 +step:5488/57344 train_time:3679934ms step_avg:670.54ms +step:5489/57344 train_time:3679951ms step_avg:670.42ms +step:5490/57344 train_time:3680186ms step_avg:670.34ms +step:5491/57344 train_time:3680692ms step_avg:670.31ms +grad accum step:1373/14336 +step:5492/57344 train_time:3681897ms step_avg:670.41ms +step:5493/57344 train_time:3681914ms step_avg:670.29ms +step:5494/57344 train_time:3682153ms step_avg:670.21ms +step:5495/57344 train_time:3682668ms step_avg:670.19ms +grad accum step:1374/14336 +step:5496/57344 train_time:3683867ms step_avg:670.28ms +step:5497/57344 train_time:3683884ms step_avg:670.16ms +step:5498/57344 train_time:3684122ms step_avg:670.08ms +step:5499/57344 train_time:3684635ms step_avg:670.06ms +grad accum step:1375/14336 +step:5500/57344 train_time:3685837ms step_avg:670.15ms +step:5501/57344 train_time:3685854ms step_avg:670.03ms +step:5502/57344 train_time:3686090ms step_avg:669.95ms +step:5503/57344 train_time:3686597ms step_avg:669.92ms +grad accum step:1376/14336 +step:5504/57344 train_time:3687799ms step_avg:670.02ms +step:5504/57344 val_loss:7.609012 train_time:3687800ms step_avg:670.02ms +step:5505/57344 train_time:3688059ms step_avg:669.95ms +step:5506/57344 train_time:3688072ms step_avg:669.83ms +step:5507/57344 train_time:3688575ms step_avg:669.80ms +grad accum step:1377/14336 +step:5508/57344 train_time:3689783ms step_avg:669.90ms +step:5509/57344 train_time:3689801ms step_avg:669.78ms +step:5510/57344 train_time:3690036ms step_avg:669.70ms +step:5511/57344 train_time:3690546ms step_avg:669.67ms +grad accum step:1378/14336 +step:5512/57344 train_time:3691761ms step_avg:669.77ms +step:5513/57344 train_time:3691778ms step_avg:669.65ms +step:5514/57344 train_time:3692012ms step_avg:669.57ms +step:5515/57344 train_time:3692517ms step_avg:669.54ms +grad accum step:1379/14336 +step:5516/57344 train_time:3693715ms step_avg:669.64ms +step:5517/57344 train_time:3693732ms step_avg:669.52ms +step:5518/57344 train_time:3693967ms step_avg:669.44ms +step:5519/57344 train_time:3694472ms step_avg:669.41ms +grad accum step:1380/14336 +step:5520/57344 train_time:3695680ms step_avg:669.51ms +step:5521/57344 train_time:3695698ms step_avg:669.39ms +step:5522/57344 train_time:3695932ms step_avg:669.31ms +step:5523/57344 train_time:3696441ms step_avg:669.28ms +grad accum step:1381/14336 +step:5524/57344 train_time:3697644ms step_avg:669.38ms +step:5525/57344 train_time:3697662ms step_avg:669.26ms +step:5526/57344 train_time:3697898ms step_avg:669.18ms +step:5527/57344 train_time:3698408ms step_avg:669.15ms +grad accum step:1382/14336 +step:5528/57344 train_time:3699620ms step_avg:669.25ms +step:5529/57344 train_time:3699637ms step_avg:669.13ms +step:5530/57344 train_time:3699872ms step_avg:669.05ms +step:5531/57344 train_time:3700377ms step_avg:669.02ms +grad accum step:1383/14336 +step:5532/57344 train_time:3701576ms step_avg:669.12ms +step:5533/57344 train_time:3701593ms step_avg:669.00ms +step:5534/57344 train_time:3701830ms step_avg:668.92ms +step:5535/57344 train_time:3702339ms step_avg:668.90ms +grad accum step:1384/14336 +step:5536/57344 train_time:3703547ms step_avg:668.99ms +step:5537/57344 train_time:3703564ms step_avg:668.88ms +step:5538/57344 train_time:3703798ms step_avg:668.80ms +step:5539/57344 train_time:3704304ms step_avg:668.77ms +grad accum step:1385/14336 +step:5540/57344 train_time:3705513ms step_avg:668.87ms +step:5541/57344 train_time:3705531ms step_avg:668.75ms +step:5542/57344 train_time:3705766ms step_avg:668.67ms +step:5543/57344 train_time:3706277ms step_avg:668.64ms +grad accum step:1386/14336 +step:5544/57344 train_time:3707478ms step_avg:668.74ms +step:5545/57344 train_time:3707495ms step_avg:668.62ms +step:5546/57344 train_time:3707734ms step_avg:668.54ms +step:5547/57344 train_time:3708250ms step_avg:668.51ms +grad accum step:1387/14336 +step:5548/57344 train_time:3709461ms step_avg:668.61ms +step:5549/57344 train_time:3709478ms step_avg:668.49ms +step:5550/57344 train_time:3709715ms step_avg:668.42ms +step:5551/57344 train_time:3710227ms step_avg:668.39ms +grad accum step:1388/14336 +step:5552/57344 train_time:3711431ms step_avg:668.49ms +step:5553/57344 train_time:3711449ms step_avg:668.37ms +step:5554/57344 train_time:3711684ms step_avg:668.29ms +step:5555/57344 train_time:3712192ms step_avg:668.26ms +grad accum step:1389/14336 +step:5556/57344 train_time:3713395ms step_avg:668.36ms +step:5557/57344 train_time:3713412ms step_avg:668.24ms +step:5558/57344 train_time:3713647ms step_avg:668.16ms +step:5559/57344 train_time:3714155ms step_avg:668.13ms +grad accum step:1390/14336 +step:5560/57344 train_time:3715356ms step_avg:668.23ms +step:5561/57344 train_time:3715373ms step_avg:668.11ms +step:5562/57344 train_time:3715610ms step_avg:668.03ms +step:5563/57344 train_time:3716121ms step_avg:668.01ms +grad accum step:1391/14336 +step:5564/57344 train_time:3717325ms step_avg:668.10ms +step:5565/57344 train_time:3717342ms step_avg:667.99ms +step:5566/57344 train_time:3717578ms step_avg:667.91ms +step:5567/57344 train_time:3718083ms step_avg:667.88ms +grad accum step:1392/14336 +step:5568/57344 train_time:3719284ms step_avg:667.97ms +step:5568/57344 val_loss:7.585613 train_time:3719285ms step_avg:667.97ms +step:5569/57344 train_time:3719296ms step_avg:667.86ms +step:5570/57344 train_time:3719510ms step_avg:667.78ms +step:5571/57344 train_time:3720018ms step_avg:667.75ms +grad accum step:1393/14336 +step:5572/57344 train_time:3721218ms step_avg:667.84ms +step:5573/57344 train_time:3721235ms step_avg:667.73ms +step:5574/57344 train_time:3721471ms step_avg:667.65ms +step:5575/57344 train_time:3721976ms step_avg:667.62ms +grad accum step:1394/14336 +step:5576/57344 train_time:3723183ms step_avg:667.72ms +step:5577/57344 train_time:3723200ms step_avg:667.60ms +step:5578/57344 train_time:3723435ms step_avg:667.52ms +step:5579/57344 train_time:3723945ms step_avg:667.49ms +grad accum step:1395/14336 +step:5580/57344 train_time:3725140ms step_avg:667.59ms +step:5581/57344 train_time:3725158ms step_avg:667.47ms +step:5582/57344 train_time:3725394ms step_avg:667.39ms +step:5583/57344 train_time:3725900ms step_avg:667.37ms +grad accum step:1396/14336 +step:5584/57344 train_time:3727103ms step_avg:667.46ms +step:5585/57344 train_time:3727121ms step_avg:667.34ms +step:5586/57344 train_time:3727357ms step_avg:667.27ms +step:5587/57344 train_time:3727863ms step_avg:667.24ms +grad accum step:1397/14336 +step:5588/57344 train_time:3729063ms step_avg:667.33ms +step:5589/57344 train_time:3729080ms step_avg:667.22ms +step:5590/57344 train_time:3729316ms step_avg:667.14ms +step:5591/57344 train_time:3729823ms step_avg:667.11ms +grad accum step:1398/14336 +step:5592/57344 train_time:3731025ms step_avg:667.21ms +step:5593/57344 train_time:3731042ms step_avg:667.09ms +step:5594/57344 train_time:3731276ms step_avg:667.01ms +step:5595/57344 train_time:3731781ms step_avg:666.98ms +grad accum step:1399/14336 +step:5596/57344 train_time:3732972ms step_avg:667.08ms +step:5597/57344 train_time:3732990ms step_avg:666.96ms +step:5598/57344 train_time:3733226ms step_avg:666.89ms +step:5599/57344 train_time:3733734ms step_avg:666.86ms +grad accum step:1400/14336 +step:5600/57344 train_time:3734934ms step_avg:666.95ms +step:5601/57344 train_time:3734951ms step_avg:666.84ms +step:5602/57344 train_time:3735187ms step_avg:666.76ms +step:5603/57344 train_time:3735697ms step_avg:666.73ms +grad accum step:1401/14336 +step:5604/57344 train_time:3736903ms step_avg:666.83ms +step:5605/57344 train_time:3736920ms step_avg:666.71ms +step:5606/57344 train_time:3737156ms step_avg:666.64ms +step:5607/57344 train_time:3737663ms step_avg:666.61ms +grad accum step:1402/14336 +step:5608/57344 train_time:3738864ms step_avg:666.70ms +step:5609/57344 train_time:3738881ms step_avg:666.59ms +step:5610/57344 train_time:3739119ms step_avg:666.51ms +step:5611/57344 train_time:3739627ms step_avg:666.48ms +grad accum step:1403/14336 +step:5612/57344 train_time:3740820ms step_avg:666.58ms +step:5613/57344 train_time:3740837ms step_avg:666.46ms +step:5614/57344 train_time:3741072ms step_avg:666.38ms +step:5615/57344 train_time:3741577ms step_avg:666.35ms +grad accum step:1404/14336 +step:5616/57344 train_time:3742785ms step_avg:666.45ms +step:5617/57344 train_time:3742803ms step_avg:666.33ms +step:5618/57344 train_time:3743037ms step_avg:666.26ms +step:5619/57344 train_time:3743546ms step_avg:666.23ms +grad accum step:1405/14336 +step:5620/57344 train_time:3744749ms step_avg:666.33ms +step:5621/57344 train_time:3744766ms step_avg:666.21ms +step:5622/57344 train_time:3745002ms step_avg:666.13ms +step:5623/57344 train_time:3745509ms step_avg:666.11ms +grad accum step:1406/14336 +step:5624/57344 train_time:3746712ms step_avg:666.20ms +step:5625/57344 train_time:3746730ms step_avg:666.09ms +step:5626/57344 train_time:3746968ms step_avg:666.01ms +step:5627/57344 train_time:3747478ms step_avg:665.98ms +grad accum step:1407/14336 +step:5628/57344 train_time:3748676ms step_avg:666.08ms +step:5629/57344 train_time:3748694ms step_avg:665.96ms +step:5630/57344 train_time:3748931ms step_avg:665.88ms +step:5631/57344 train_time:3749439ms step_avg:665.86ms +grad accum step:1408/14336 +step:5632/57344 train_time:3750646ms step_avg:665.95ms +step:5632/57344 val_loss:7.579461 train_time:3750646ms step_avg:665.95ms +step:5633/57344 train_time:3750658ms step_avg:665.84ms +step:5634/57344 train_time:3750870ms step_avg:665.76ms +step:5635/57344 train_time:3751378ms step_avg:665.73ms +grad accum step:1409/14336 +step:5636/57344 train_time:3752573ms step_avg:665.82ms +step:5637/57344 train_time:3752590ms step_avg:665.71ms +step:5638/57344 train_time:3752824ms step_avg:665.63ms +step:5639/57344 train_time:3753330ms step_avg:665.60ms +grad accum step:1410/14336 +step:5640/57344 train_time:3754530ms step_avg:665.70ms +step:5641/57344 train_time:3754547ms step_avg:665.58ms +step:5642/57344 train_time:3754782ms step_avg:665.51ms +step:5643/57344 train_time:3755291ms step_avg:665.48ms +grad accum step:1411/14336 +step:5644/57344 train_time:3756494ms step_avg:665.57ms +step:5645/57344 train_time:3756511ms step_avg:665.46ms +step:5646/57344 train_time:3756745ms step_avg:665.38ms +step:5647/57344 train_time:3757253ms step_avg:665.35ms +grad accum step:1412/14336 +step:5648/57344 train_time:3758457ms step_avg:665.45ms +step:5649/57344 train_time:3758474ms step_avg:665.33ms +step:5650/57344 train_time:3758709ms step_avg:665.26ms +step:5651/57344 train_time:3759213ms step_avg:665.23ms +grad accum step:1413/14336 +step:5652/57344 train_time:3760418ms step_avg:665.33ms +step:5653/57344 train_time:3760436ms step_avg:665.21ms +step:5654/57344 train_time:3760671ms step_avg:665.13ms +step:5655/57344 train_time:3761180ms step_avg:665.11ms +grad accum step:1414/14336 +step:5656/57344 train_time:3762376ms step_avg:665.20ms +step:5657/57344 train_time:3762393ms step_avg:665.09ms +step:5658/57344 train_time:3762628ms step_avg:665.01ms +step:5659/57344 train_time:3763133ms step_avg:664.98ms +grad accum step:1415/14336 +step:5660/57344 train_time:3764333ms step_avg:665.08ms +step:5661/57344 train_time:3764350ms step_avg:664.96ms +step:5662/57344 train_time:3764583ms step_avg:664.89ms +step:5663/57344 train_time:3765086ms step_avg:664.86ms +grad accum step:1416/14336 +step:5664/57344 train_time:3766285ms step_avg:664.95ms +step:5665/57344 train_time:3766302ms step_avg:664.84ms +step:5666/57344 train_time:3766538ms step_avg:664.76ms +step:5667/57344 train_time:3767044ms step_avg:664.73ms +grad accum step:1417/14336 +step:5668/57344 train_time:3768239ms step_avg:664.83ms +step:5669/57344 train_time:3768256ms step_avg:664.71ms +step:5670/57344 train_time:3768491ms step_avg:664.64ms +step:5671/57344 train_time:3769000ms step_avg:664.61ms +grad accum step:1418/14336 +step:5672/57344 train_time:3770196ms step_avg:664.70ms +step:5673/57344 train_time:3770213ms step_avg:664.59ms +step:5674/57344 train_time:3770449ms step_avg:664.51ms +step:5675/57344 train_time:3770955ms step_avg:664.49ms +grad accum step:1419/14336 +step:5676/57344 train_time:3772172ms step_avg:664.58ms +step:5677/57344 train_time:3772184ms step_avg:664.47ms +step:5678/57344 train_time:3772403ms step_avg:664.39ms +step:5679/57344 train_time:3772910ms step_avg:664.36ms +grad accum step:1420/14336 +step:5680/57344 train_time:3774107ms step_avg:664.46ms +step:5681/57344 train_time:3774124ms step_avg:664.34ms +step:5682/57344 train_time:3774362ms step_avg:664.27ms +step:5683/57344 train_time:3774875ms step_avg:664.24ms +grad accum step:1421/14336 +step:5684/57344 train_time:3776076ms step_avg:664.33ms +step:5685/57344 train_time:3776093ms step_avg:664.22ms +step:5686/57344 train_time:3776330ms step_avg:664.15ms +step:5687/57344 train_time:3776836ms step_avg:664.12ms +grad accum step:1422/14336 +step:5688/57344 train_time:3778031ms step_avg:664.21ms +step:5689/57344 train_time:3778048ms step_avg:664.10ms +step:5690/57344 train_time:3778285ms step_avg:664.02ms +step:5691/57344 train_time:3778796ms step_avg:664.00ms +grad accum step:1423/14336 +step:5692/57344 train_time:3779996ms step_avg:664.09ms +step:5693/57344 train_time:3780013ms step_avg:663.98ms +step:5694/57344 train_time:3780248ms step_avg:663.90ms +step:5695/57344 train_time:3780756ms step_avg:663.87ms +grad accum step:1424/14336 +step:5696/57344 train_time:3781955ms step_avg:663.97ms +step:5696/57344 val_loss:7.564924 train_time:3781955ms step_avg:663.97ms +step:5697/57344 train_time:3781967ms step_avg:663.85ms +step:5698/57344 train_time:3782179ms step_avg:663.77ms +step:5699/57344 train_time:3782689ms step_avg:663.75ms +grad accum step:1425/14336 +step:5700/57344 train_time:3783899ms step_avg:663.84ms +step:5701/57344 train_time:3783916ms step_avg:663.73ms +step:5702/57344 train_time:3784152ms step_avg:663.65ms +step:5703/57344 train_time:3784662ms step_avg:663.63ms +grad accum step:1426/14336 +step:5704/57344 train_time:3785865ms step_avg:663.72ms +step:5705/57344 train_time:3785883ms step_avg:663.61ms +step:5706/57344 train_time:3786119ms step_avg:663.53ms +step:5707/57344 train_time:3786625ms step_avg:663.51ms +grad accum step:1427/14336 +step:5708/57344 train_time:3787824ms step_avg:663.60ms +step:5709/57344 train_time:3787842ms step_avg:663.49ms +step:5710/57344 train_time:3788077ms step_avg:663.41ms +step:5711/57344 train_time:3788583ms step_avg:663.38ms +grad accum step:1428/14336 +step:5712/57344 train_time:3789777ms step_avg:663.48ms +step:5713/57344 train_time:3789794ms step_avg:663.36ms +step:5714/57344 train_time:3790029ms step_avg:663.29ms +step:5715/57344 train_time:3790534ms step_avg:663.26ms +grad accum step:1429/14336 +step:5716/57344 train_time:3791737ms step_avg:663.36ms +step:5717/57344 train_time:3791754ms step_avg:663.24ms +step:5718/57344 train_time:3791990ms step_avg:663.17ms +step:5719/57344 train_time:3792495ms step_avg:663.14ms +grad accum step:1430/14336 +step:5720/57344 train_time:3793695ms step_avg:663.23ms +step:5721/57344 train_time:3793712ms step_avg:663.12ms +step:5722/57344 train_time:3793948ms step_avg:663.05ms +step:5723/57344 train_time:3794457ms step_avg:663.02ms +grad accum step:1431/14336 +step:5724/57344 train_time:3795656ms step_avg:663.11ms +step:5725/57344 train_time:3795673ms step_avg:663.00ms +step:5726/57344 train_time:3795908ms step_avg:662.92ms +step:5727/57344 train_time:3796417ms step_avg:662.90ms +grad accum step:1432/14336 +step:5728/57344 train_time:3797617ms step_avg:662.99ms +step:5729/57344 train_time:3797635ms step_avg:662.88ms +step:5730/57344 train_time:3797870ms step_avg:662.80ms +step:5731/57344 train_time:3798376ms step_avg:662.78ms +grad accum step:1433/14336 +step:5732/57344 train_time:3799572ms step_avg:662.87ms +step:5733/57344 train_time:3799589ms step_avg:662.76ms +step:5734/57344 train_time:3799825ms step_avg:662.68ms +step:5735/57344 train_time:3800331ms step_avg:662.66ms +grad accum step:1434/14336 +step:5736/57344 train_time:3801532ms step_avg:662.75ms +step:5737/57344 train_time:3801549ms step_avg:662.64ms +step:5738/57344 train_time:3801785ms step_avg:662.56ms +step:5739/57344 train_time:3802291ms step_avg:662.54ms +grad accum step:1435/14336 +step:5740/57344 train_time:3803487ms step_avg:662.63ms +step:5741/57344 train_time:3803504ms step_avg:662.52ms +step:5742/57344 train_time:3803740ms step_avg:662.44ms +step:5743/57344 train_time:3804248ms step_avg:662.41ms +grad accum step:1436/14336 +step:5744/57344 train_time:3805453ms step_avg:662.51ms +step:5745/57344 train_time:3805471ms step_avg:662.40ms +step:5746/57344 train_time:3805705ms step_avg:662.32ms +step:5747/57344 train_time:3806213ms step_avg:662.30ms +grad accum step:1437/14336 +step:5748/57344 train_time:3807411ms step_avg:662.39ms +step:5749/57344 train_time:3807428ms step_avg:662.28ms +step:5750/57344 train_time:3807663ms step_avg:662.20ms +step:5751/57344 train_time:3808171ms step_avg:662.18ms +grad accum step:1438/14336 +step:5752/57344 train_time:3809389ms step_avg:662.27ms +step:5753/57344 train_time:3809406ms step_avg:662.16ms +step:5754/57344 train_time:3809644ms step_avg:662.09ms +step:5755/57344 train_time:3810157ms step_avg:662.06ms +grad accum step:1439/14336 +step:5756/57344 train_time:3811354ms step_avg:662.15ms +step:5757/57344 train_time:3811371ms step_avg:662.04ms +step:5758/57344 train_time:3811608ms step_avg:661.97ms +step:5759/57344 train_time:3812115ms step_avg:661.94ms +grad accum step:1440/14336 +step:5760/57344 train_time:3814319ms step_avg:662.21ms +step:5760/57344 val_loss:7.561890 train_time:3814320ms step_avg:662.21ms +step:5761/57344 train_time:3814332ms step_avg:662.10ms +step:5762/57344 train_time:3814544ms step_avg:662.02ms +step:5763/57344 train_time:3815051ms step_avg:661.99ms +grad accum step:1441/14336 +step:5764/57344 train_time:3816245ms step_avg:662.08ms +step:5765/57344 train_time:3816262ms step_avg:661.97ms +step:5766/57344 train_time:3816500ms step_avg:661.90ms +step:5767/57344 train_time:3817006ms step_avg:661.87ms +grad accum step:1442/14336 +step:5768/57344 train_time:3818196ms step_avg:661.96ms +step:5769/57344 train_time:3818213ms step_avg:661.85ms +step:5770/57344 train_time:3818447ms step_avg:661.78ms +step:5771/57344 train_time:3818954ms step_avg:661.75ms +grad accum step:1443/14336 +step:5772/57344 train_time:3820151ms step_avg:661.84ms +step:5773/57344 train_time:3820169ms step_avg:661.73ms +step:5774/57344 train_time:3820405ms step_avg:661.66ms +step:5775/57344 train_time:3820914ms step_avg:661.63ms +grad accum step:1444/14336 +step:5776/57344 train_time:3822111ms step_avg:661.72ms +step:5777/57344 train_time:3822128ms step_avg:661.61ms +step:5778/57344 train_time:3822364ms step_avg:661.54ms +step:5779/57344 train_time:3822871ms step_avg:661.51ms +grad accum step:1445/14336 +step:5780/57344 train_time:3824077ms step_avg:661.60ms +step:5781/57344 train_time:3824094ms step_avg:661.49ms +step:5782/57344 train_time:3824328ms step_avg:661.42ms +step:5783/57344 train_time:3824833ms step_avg:661.39ms +grad accum step:1446/14336 +step:5784/57344 train_time:3826028ms step_avg:661.48ms +step:5785/57344 train_time:3826045ms step_avg:661.37ms +step:5786/57344 train_time:3826282ms step_avg:661.30ms +step:5787/57344 train_time:3826790ms step_avg:661.27ms +grad accum step:1447/14336 +step:5788/57344 train_time:3827989ms step_avg:661.37ms +step:5789/57344 train_time:3828006ms step_avg:661.26ms +step:5790/57344 train_time:3828240ms step_avg:661.18ms +step:5791/57344 train_time:3828743ms step_avg:661.15ms +grad accum step:1448/14336 +step:5792/57344 train_time:3829934ms step_avg:661.25ms +step:5793/57344 train_time:3829951ms step_avg:661.13ms +step:5794/57344 train_time:3830187ms step_avg:661.06ms +step:5795/57344 train_time:3830696ms step_avg:661.03ms +grad accum step:1449/14336 +step:5796/57344 train_time:3831890ms step_avg:661.13ms +step:5797/57344 train_time:3831907ms step_avg:661.02ms +step:5798/57344 train_time:3832143ms step_avg:660.94ms +step:5799/57344 train_time:3832651ms step_avg:660.92ms +grad accum step:1450/14336 +step:5800/57344 train_time:3833854ms step_avg:661.01ms +step:5801/57344 train_time:3833871ms step_avg:660.90ms +step:5802/57344 train_time:3834109ms step_avg:660.83ms +step:5803/57344 train_time:3834619ms step_avg:660.80ms +grad accum step:1451/14336 +step:5804/57344 train_time:3835825ms step_avg:660.89ms +step:5805/57344 train_time:3835843ms step_avg:660.78ms +step:5806/57344 train_time:3836079ms step_avg:660.71ms +step:5807/57344 train_time:3836586ms step_avg:660.68ms +grad accum step:1452/14336 +step:5808/57344 train_time:3837779ms step_avg:660.77ms +step:5809/57344 train_time:3837796ms step_avg:660.66ms +step:5810/57344 train_time:3838032ms step_avg:660.59ms +step:5811/57344 train_time:3838540ms step_avg:660.56ms +grad accum step:1453/14336 +step:5812/57344 train_time:3839738ms step_avg:660.66ms +step:5813/57344 train_time:3839754ms step_avg:660.55ms +step:5814/57344 train_time:3839990ms step_avg:660.47ms +step:5815/57344 train_time:3840498ms step_avg:660.45ms +grad accum step:1454/14336 +step:5816/57344 train_time:3841697ms step_avg:660.54ms +step:5817/57344 train_time:3841714ms step_avg:660.43ms +step:5818/57344 train_time:3841950ms step_avg:660.36ms +step:5819/57344 train_time:3842457ms step_avg:660.33ms +grad accum step:1455/14336 +step:5820/57344 train_time:3843655ms step_avg:660.42ms +step:5821/57344 train_time:3843672ms step_avg:660.31ms +step:5822/57344 train_time:3843907ms step_avg:660.24ms +step:5823/57344 train_time:3844417ms step_avg:660.21ms +grad accum step:1456/14336 +step:5824/57344 train_time:3845618ms step_avg:660.31ms +step:5824/57344 val_loss:7.537563 train_time:3845619ms step_avg:660.31ms +step:5825/57344 train_time:3845631ms step_avg:660.19ms +step:5826/57344 train_time:3845845ms step_avg:660.12ms +step:5827/57344 train_time:3846352ms step_avg:660.09ms +grad accum step:1457/14336 +step:5828/57344 train_time:3847555ms step_avg:660.18ms +step:5829/57344 train_time:3847572ms step_avg:660.07ms +step:5830/57344 train_time:3847807ms step_avg:660.00ms +step:5831/57344 train_time:3848314ms step_avg:659.97ms +grad accum step:1458/14336 +step:5832/57344 train_time:3849511ms step_avg:660.07ms +step:5833/57344 train_time:3849529ms step_avg:659.96ms +step:5834/57344 train_time:3849764ms step_avg:659.88ms +step:5835/57344 train_time:3850269ms step_avg:659.86ms +grad accum step:1459/14336 +step:5836/57344 train_time:3851469ms step_avg:659.95ms +step:5837/57344 train_time:3851486ms step_avg:659.84ms +step:5838/57344 train_time:3851722ms step_avg:659.77ms +step:5839/57344 train_time:3852230ms step_avg:659.74ms +grad accum step:1460/14336 +step:5840/57344 train_time:3853434ms step_avg:659.83ms +step:5841/57344 train_time:3853450ms step_avg:659.72ms +step:5842/57344 train_time:3853686ms step_avg:659.65ms +step:5843/57344 train_time:3854192ms step_avg:659.63ms +grad accum step:1461/14336 +step:5844/57344 train_time:3855395ms step_avg:659.72ms +step:5845/57344 train_time:3855413ms step_avg:659.61ms +step:5846/57344 train_time:3855649ms step_avg:659.54ms +step:5847/57344 train_time:3856155ms step_avg:659.51ms +grad accum step:1462/14336 +step:5848/57344 train_time:3857357ms step_avg:659.60ms +step:5849/57344 train_time:3857374ms step_avg:659.49ms +step:5850/57344 train_time:3857610ms step_avg:659.42ms +step:5851/57344 train_time:3858116ms step_avg:659.39ms +grad accum step:1463/14336 +step:5852/57344 train_time:3859316ms step_avg:659.49ms +step:5853/57344 train_time:3859333ms step_avg:659.38ms +step:5854/57344 train_time:3859571ms step_avg:659.30ms +step:5855/57344 train_time:3860080ms step_avg:659.28ms +grad accum step:1464/14336 +step:5856/57344 train_time:3861281ms step_avg:659.37ms +step:5857/57344 train_time:3861298ms step_avg:659.26ms +step:5858/57344 train_time:3861534ms step_avg:659.19ms +step:5859/57344 train_time:3862043ms step_avg:659.16ms +grad accum step:1465/14336 +step:5860/57344 train_time:3863248ms step_avg:659.26ms +step:5861/57344 train_time:3863266ms step_avg:659.15ms +step:5862/57344 train_time:3863500ms step_avg:659.08ms +step:5863/57344 train_time:3864006ms step_avg:659.05ms +grad accum step:1466/14336 +step:5864/57344 train_time:3865207ms step_avg:659.14ms +step:5865/57344 train_time:3865224ms step_avg:659.03ms +step:5866/57344 train_time:3865458ms step_avg:658.96ms +step:5867/57344 train_time:3865962ms step_avg:658.93ms +grad accum step:1467/14336 +step:5868/57344 train_time:3867171ms step_avg:659.03ms +step:5869/57344 train_time:3867188ms step_avg:658.92ms +step:5870/57344 train_time:3867426ms step_avg:658.85ms +step:5871/57344 train_time:3867935ms step_avg:658.82ms +grad accum step:1468/14336 +step:5872/57344 train_time:3869132ms step_avg:658.91ms +step:5873/57344 train_time:3869149ms step_avg:658.80ms +step:5874/57344 train_time:3869386ms step_avg:658.73ms +step:5875/57344 train_time:3869894ms step_avg:658.71ms +grad accum step:1469/14336 +step:5876/57344 train_time:3871094ms step_avg:658.80ms +step:5877/57344 train_time:3871111ms step_avg:658.69ms +step:5878/57344 train_time:3871347ms step_avg:658.62ms +step:5879/57344 train_time:3871854ms step_avg:658.59ms +grad accum step:1470/14336 +step:5880/57344 train_time:3873056ms step_avg:658.68ms +step:5881/57344 train_time:3873073ms step_avg:658.57ms +step:5882/57344 train_time:3873309ms step_avg:658.50ms +step:5883/57344 train_time:3873817ms step_avg:658.48ms +grad accum step:1471/14336 +step:5884/57344 train_time:3875016ms step_avg:658.57ms +step:5885/57344 train_time:3875033ms step_avg:658.46ms +step:5886/57344 train_time:3875268ms step_avg:658.39ms +step:5887/57344 train_time:3875774ms step_avg:658.36ms +grad accum step:1472/14336 +step:5888/57344 train_time:3876971ms step_avg:658.45ms +step:5888/57344 val_loss:7.519125 train_time:3876972ms step_avg:658.45ms +step:5889/57344 train_time:3876983ms step_avg:658.34ms +step:5890/57344 train_time:3877196ms step_avg:658.27ms +step:5891/57344 train_time:3877701ms step_avg:658.24ms +grad accum step:1473/14336 +step:5892/57344 train_time:3878907ms step_avg:658.33ms +step:5893/57344 train_time:3878924ms step_avg:658.23ms +step:5894/57344 train_time:3879160ms step_avg:658.15ms +step:5895/57344 train_time:3879665ms step_avg:658.13ms +grad accum step:1474/14336 +step:5896/57344 train_time:3880865ms step_avg:658.22ms +step:5897/57344 train_time:3880883ms step_avg:658.11ms +step:5898/57344 train_time:3881118ms step_avg:658.04ms +step:5899/57344 train_time:3881627ms step_avg:658.01ms +grad accum step:1475/14336 +step:5900/57344 train_time:3882832ms step_avg:658.11ms +step:5901/57344 train_time:3882849ms step_avg:658.00ms +step:5902/57344 train_time:3883085ms step_avg:657.93ms +step:5903/57344 train_time:3883592ms step_avg:657.90ms +grad accum step:1476/14336 +step:5904/57344 train_time:3884797ms step_avg:657.99ms +step:5905/57344 train_time:3884814ms step_avg:657.89ms +step:5906/57344 train_time:3885050ms step_avg:657.81ms +step:5907/57344 train_time:3885560ms step_avg:657.79ms +grad accum step:1477/14336 +step:5908/57344 train_time:3886763ms step_avg:657.88ms +step:5909/57344 train_time:3886780ms step_avg:657.77ms +step:5910/57344 train_time:3887018ms step_avg:657.70ms +step:5911/57344 train_time:3887530ms step_avg:657.68ms +grad accum step:1478/14336 +step:5912/57344 train_time:3888738ms step_avg:657.77ms +step:5913/57344 train_time:3888755ms step_avg:657.66ms +step:5914/57344 train_time:3888991ms step_avg:657.59ms +step:5915/57344 train_time:3889493ms step_avg:657.56ms +grad accum step:1479/14336 +step:5916/57344 train_time:3890695ms step_avg:657.66ms +step:5917/57344 train_time:3890712ms step_avg:657.55ms +step:5918/57344 train_time:3890947ms step_avg:657.48ms +step:5919/57344 train_time:3891454ms step_avg:657.45ms +grad accum step:1480/14336 +step:5920/57344 train_time:3892655ms step_avg:657.54ms +step:5921/57344 train_time:3892672ms step_avg:657.43ms +step:5922/57344 train_time:3892909ms step_avg:657.36ms +step:5923/57344 train_time:3893417ms step_avg:657.34ms +grad accum step:1481/14336 +step:5924/57344 train_time:3894619ms step_avg:657.43ms +step:5925/57344 train_time:3894636ms step_avg:657.32ms +step:5926/57344 train_time:3894874ms step_avg:657.25ms +step:5927/57344 train_time:3895383ms step_avg:657.23ms +grad accum step:1482/14336 +step:5928/57344 train_time:3896582ms step_avg:657.32ms +step:5929/57344 train_time:3896599ms step_avg:657.21ms +step:5930/57344 train_time:3896835ms step_avg:657.14ms +step:5931/57344 train_time:3897346ms step_avg:657.11ms +grad accum step:1483/14336 +step:5932/57344 train_time:3898553ms step_avg:657.21ms +step:5933/57344 train_time:3898570ms step_avg:657.10ms +step:5934/57344 train_time:3898806ms step_avg:657.03ms +step:5935/57344 train_time:3899315ms step_avg:657.00ms +grad accum step:1484/14336 +step:5936/57344 train_time:3900511ms step_avg:657.09ms +step:5937/57344 train_time:3900528ms step_avg:656.99ms +step:5938/57344 train_time:3900765ms step_avg:656.92ms +step:5939/57344 train_time:3901274ms step_avg:656.89ms +grad accum step:1485/14336 +step:5940/57344 train_time:3902469ms step_avg:656.98ms +step:5941/57344 train_time:3902487ms step_avg:656.87ms +step:5942/57344 train_time:3902723ms step_avg:656.80ms +step:5943/57344 train_time:3903230ms step_avg:656.78ms +grad accum step:1486/14336 +step:5944/57344 train_time:3904432ms step_avg:656.87ms +step:5945/57344 train_time:3904450ms step_avg:656.76ms +step:5946/57344 train_time:3904687ms step_avg:656.69ms +step:5947/57344 train_time:3905198ms step_avg:656.67ms +grad accum step:1487/14336 +step:5948/57344 train_time:3906393ms step_avg:656.76ms +step:5949/57344 train_time:3906410ms step_avg:656.65ms +step:5950/57344 train_time:3906646ms step_avg:656.58ms +step:5951/57344 train_time:3907155ms step_avg:656.55ms +grad accum step:1488/14336 +step:5952/57344 train_time:3908362ms step_avg:656.65ms +step:5952/57344 val_loss:7.499354 train_time:3908362ms step_avg:656.65ms +step:5953/57344 train_time:3908374ms step_avg:656.54ms +step:5954/57344 train_time:3908588ms step_avg:656.46ms +step:5955/57344 train_time:3909094ms step_avg:656.44ms +grad accum step:1489/14336 +step:5956/57344 train_time:3910290ms step_avg:656.53ms +step:5957/57344 train_time:3910307ms step_avg:656.42ms +step:5958/57344 train_time:3910543ms step_avg:656.35ms +step:5959/57344 train_time:3911047ms step_avg:656.33ms +grad accum step:1490/14336 +step:5960/57344 train_time:3912246ms step_avg:656.42ms +step:5961/57344 train_time:3912263ms step_avg:656.31ms +step:5962/57344 train_time:3912499ms step_avg:656.24ms +step:5963/57344 train_time:3913004ms step_avg:656.21ms +grad accum step:1491/14336 +step:5964/57344 train_time:3914199ms step_avg:656.30ms +step:5965/57344 train_time:3914216ms step_avg:656.20ms +step:5966/57344 train_time:3914451ms step_avg:656.13ms +step:5967/57344 train_time:3914960ms step_avg:656.10ms +grad accum step:1492/14336 +step:5968/57344 train_time:3916159ms step_avg:656.19ms +step:5969/57344 train_time:3916176ms step_avg:656.09ms +step:5970/57344 train_time:3916411ms step_avg:656.02ms +step:5971/57344 train_time:3916917ms step_avg:655.99ms +grad accum step:1493/14336 +step:5972/57344 train_time:3918112ms step_avg:656.08ms +step:5973/57344 train_time:3918130ms step_avg:655.97ms +step:5974/57344 train_time:3918367ms step_avg:655.90ms +step:5975/57344 train_time:3918881ms step_avg:655.88ms +grad accum step:1494/14336 +step:5976/57344 train_time:3920075ms step_avg:655.97ms +step:5977/57344 train_time:3920093ms step_avg:655.86ms +step:5978/57344 train_time:3920329ms step_avg:655.79ms +step:5979/57344 train_time:3920837ms step_avg:655.77ms +grad accum step:1495/14336 +step:5980/57344 train_time:3922035ms step_avg:655.86ms +step:5981/57344 train_time:3922052ms step_avg:655.75ms +step:5982/57344 train_time:3922289ms step_avg:655.68ms +step:5983/57344 train_time:3922798ms step_avg:655.66ms +grad accum step:1496/14336 +step:5984/57344 train_time:3924002ms step_avg:655.75ms +step:5985/57344 train_time:3924019ms step_avg:655.64ms +step:5986/57344 train_time:3924253ms step_avg:655.57ms +step:5987/57344 train_time:3924759ms step_avg:655.55ms +grad accum step:1497/14336 +step:5988/57344 train_time:3925958ms step_avg:655.64ms +step:5989/57344 train_time:3925975ms step_avg:655.53ms +step:5990/57344 train_time:3926212ms step_avg:655.46ms +step:5991/57344 train_time:3926724ms step_avg:655.44ms +grad accum step:1498/14336 +step:5992/57344 train_time:3927925ms step_avg:655.53ms +step:5993/57344 train_time:3927942ms step_avg:655.42ms +step:5994/57344 train_time:3928178ms step_avg:655.35ms +step:5995/57344 train_time:3928685ms step_avg:655.33ms +grad accum step:1499/14336 +step:5996/57344 train_time:3929883ms step_avg:655.42ms +step:5997/57344 train_time:3929900ms step_avg:655.31ms +step:5998/57344 train_time:3930136ms step_avg:655.24ms +step:5999/57344 train_time:3930643ms step_avg:655.22ms +grad accum step:1500/14336 +step:6000/57344 train_time:3931841ms step_avg:655.31ms +step:6001/57344 train_time:3931858ms step_avg:655.20ms +step:6002/57344 train_time:3932093ms step_avg:655.13ms +step:6003/57344 train_time:3932601ms step_avg:655.11ms +grad accum step:1501/14336 +step:6004/57344 train_time:3933804ms step_avg:655.20ms +step:6005/57344 train_time:3933821ms step_avg:655.09ms +step:6006/57344 train_time:3934057ms step_avg:655.02ms +step:6007/57344 train_time:3934563ms step_avg:655.00ms +grad accum step:1502/14336 +step:6008/57344 train_time:3935758ms step_avg:655.09ms +step:6009/57344 train_time:3935775ms step_avg:654.98ms +step:6010/57344 train_time:3936011ms step_avg:654.91ms +step:6011/57344 train_time:3936520ms step_avg:654.89ms +grad accum step:1503/14336 +step:6012/57344 train_time:3937715ms step_avg:654.98ms +step:6013/57344 train_time:3937732ms step_avg:654.87ms +step:6014/57344 train_time:3937967ms step_avg:654.80ms +step:6015/57344 train_time:3938474ms step_avg:654.78ms +grad accum step:1504/14336 +step:6016/57344 train_time:3939672ms step_avg:654.87ms +step:6016/57344 val_loss:7.477836 train_time:3939672ms step_avg:654.87ms +step:6017/57344 train_time:3939684ms step_avg:654.76ms +step:6018/57344 train_time:3939898ms step_avg:654.69ms +step:6019/57344 train_time:3940406ms step_avg:654.66ms +grad accum step:1505/14336 +step:6020/57344 train_time:3941610ms step_avg:654.75ms +step:6021/57344 train_time:3941627ms step_avg:654.65ms +step:6022/57344 train_time:3941864ms step_avg:654.58ms +step:6023/57344 train_time:3942370ms step_avg:654.55ms +grad accum step:1506/14336 +step:6024/57344 train_time:3943571ms step_avg:654.64ms +step:6025/57344 train_time:3943589ms step_avg:654.54ms +step:6026/57344 train_time:3943825ms step_avg:654.47ms +step:6027/57344 train_time:3944335ms step_avg:654.44ms +grad accum step:1507/14336 +step:6028/57344 train_time:3945535ms step_avg:654.53ms +step:6029/57344 train_time:3945552ms step_avg:654.43ms +step:6030/57344 train_time:3945787ms step_avg:654.36ms +step:6031/57344 train_time:3946293ms step_avg:654.33ms +grad accum step:1508/14336 +step:6032/57344 train_time:3947495ms step_avg:654.43ms +step:6033/57344 train_time:3947511ms step_avg:654.32ms +step:6034/57344 train_time:3947746ms step_avg:654.25ms +step:6035/57344 train_time:3948255ms step_avg:654.23ms +grad accum step:1509/14336 +step:6036/57344 train_time:3949453ms step_avg:654.32ms +step:6037/57344 train_time:3949470ms step_avg:654.21ms +step:6038/57344 train_time:3949707ms step_avg:654.14ms +step:6039/57344 train_time:3950218ms step_avg:654.12ms +grad accum step:1510/14336 +step:6040/57344 train_time:3951430ms step_avg:654.21ms +step:6041/57344 train_time:3951447ms step_avg:654.10ms +step:6042/57344 train_time:3951684ms step_avg:654.04ms +step:6043/57344 train_time:3952193ms step_avg:654.01ms +grad accum step:1511/14336 +step:6044/57344 train_time:3953394ms step_avg:654.10ms +step:6045/57344 train_time:3953411ms step_avg:654.00ms +step:6046/57344 train_time:3953647ms step_avg:653.93ms +step:6047/57344 train_time:3954156ms step_avg:653.90ms +grad accum step:1512/14336 +step:6048/57344 train_time:3955364ms step_avg:654.00ms +step:6049/57344 train_time:3955380ms step_avg:653.89ms +step:6050/57344 train_time:3955617ms step_avg:653.82ms +step:6051/57344 train_time:3956126ms step_avg:653.80ms +grad accum step:1513/14336 +step:6052/57344 train_time:3957335ms step_avg:653.89ms +step:6053/57344 train_time:3957352ms step_avg:653.78ms +step:6054/57344 train_time:3957591ms step_avg:653.72ms +step:6055/57344 train_time:3958104ms step_avg:653.69ms +grad accum step:1514/14336 +step:6056/57344 train_time:3959297ms step_avg:653.78ms +step:6057/57344 train_time:3959314ms step_avg:653.68ms +step:6058/57344 train_time:3959550ms step_avg:653.61ms +step:6059/57344 train_time:3960053ms step_avg:653.58ms +grad accum step:1515/14336 +step:6060/57344 train_time:3961249ms step_avg:653.67ms +step:6061/57344 train_time:3961266ms step_avg:653.57ms +step:6062/57344 train_time:3961501ms step_avg:653.50ms +step:6063/57344 train_time:3962009ms step_avg:653.47ms +grad accum step:1516/14336 +step:6064/57344 train_time:3963213ms step_avg:653.56ms +step:6065/57344 train_time:3963230ms step_avg:653.46ms +step:6066/57344 train_time:3963464ms step_avg:653.39ms +step:6067/57344 train_time:3963967ms step_avg:653.37ms +grad accum step:1517/14336 +step:6068/57344 train_time:3965172ms step_avg:653.46ms +step:6069/57344 train_time:3965190ms step_avg:653.35ms +step:6070/57344 train_time:3965426ms step_avg:653.28ms +step:6071/57344 train_time:3965932ms step_avg:653.26ms +grad accum step:1518/14336 +step:6072/57344 train_time:3967132ms step_avg:653.35ms +step:6073/57344 train_time:3967149ms step_avg:653.24ms +step:6074/57344 train_time:3967384ms step_avg:653.17ms +step:6075/57344 train_time:3967891ms step_avg:653.15ms +grad accum step:1519/14336 +step:6076/57344 train_time:3969095ms step_avg:653.24ms +step:6077/57344 train_time:3969112ms step_avg:653.14ms +step:6078/57344 train_time:3969350ms step_avg:653.07ms +step:6079/57344 train_time:3969858ms step_avg:653.04ms +grad accum step:1520/14336 +step:6080/57344 train_time:3971061ms step_avg:653.13ms +step:6080/57344 val_loss:7.459615 train_time:3971061ms step_avg:653.14ms +step:6081/57344 train_time:3971094ms step_avg:653.03ms +step:6082/57344 train_time:3971285ms step_avg:652.96ms +step:6083/57344 train_time:3971795ms step_avg:652.93ms +grad accum step:1521/14336 +step:6084/57344 train_time:3972991ms step_avg:653.02ms +step:6085/57344 train_time:3973008ms step_avg:652.92ms +step:6086/57344 train_time:3973242ms step_avg:652.85ms +step:6087/57344 train_time:3973750ms step_avg:652.83ms +grad accum step:1522/14336 +step:6088/57344 train_time:3974945ms step_avg:652.91ms +step:6089/57344 train_time:3974962ms step_avg:652.81ms +step:6090/57344 train_time:3975199ms step_avg:652.74ms +step:6091/57344 train_time:3975711ms step_avg:652.72ms +grad accum step:1523/14336 +step:6092/57344 train_time:3976912ms step_avg:652.81ms +step:6093/57344 train_time:3976930ms step_avg:652.70ms +step:6094/57344 train_time:3977165ms step_avg:652.64ms +step:6095/57344 train_time:3977672ms step_avg:652.61ms +grad accum step:1524/14336 +step:6096/57344 train_time:3978867ms step_avg:652.70ms +step:6097/57344 train_time:3978884ms step_avg:652.60ms +step:6098/57344 train_time:3979119ms step_avg:652.53ms +step:6099/57344 train_time:3979626ms step_avg:652.50ms +grad accum step:1525/14336 +step:6100/57344 train_time:3980822ms step_avg:652.59ms +step:6101/57344 train_time:3980839ms step_avg:652.49ms +step:6102/57344 train_time:3981074ms step_avg:652.42ms +step:6103/57344 train_time:3981583ms step_avg:652.40ms +grad accum step:1526/14336 +step:6104/57344 train_time:3982785ms step_avg:652.49ms +step:6105/57344 train_time:3982802ms step_avg:652.38ms +step:6106/57344 train_time:3983040ms step_avg:652.32ms +step:6107/57344 train_time:3983551ms step_avg:652.29ms +grad accum step:1527/14336 +step:6108/57344 train_time:3984759ms step_avg:652.38ms +step:6109/57344 train_time:3984776ms step_avg:652.28ms +step:6110/57344 train_time:3985012ms step_avg:652.21ms +step:6111/57344 train_time:3985522ms step_avg:652.19ms +grad accum step:1528/14336 +step:6112/57344 train_time:3986720ms step_avg:652.28ms +step:6113/57344 train_time:3986737ms step_avg:652.17ms +step:6114/57344 train_time:3986973ms step_avg:652.11ms +step:6115/57344 train_time:3987481ms step_avg:652.08ms +grad accum step:1529/14336 +step:6116/57344 train_time:3988681ms step_avg:652.17ms +step:6117/57344 train_time:3988698ms step_avg:652.07ms +step:6118/57344 train_time:3988936ms step_avg:652.00ms +step:6119/57344 train_time:3989445ms step_avg:651.98ms +grad accum step:1530/14336 +step:6120/57344 train_time:3990643ms step_avg:652.07ms +step:6121/57344 train_time:3990660ms step_avg:651.96ms +step:6122/57344 train_time:3990896ms step_avg:651.89ms +step:6123/57344 train_time:3991402ms step_avg:651.87ms +grad accum step:1531/14336 +step:6124/57344 train_time:3992610ms step_avg:651.96ms +step:6125/57344 train_time:3992628ms step_avg:651.86ms +step:6126/57344 train_time:3992862ms step_avg:651.79ms +step:6127/57344 train_time:3993369ms step_avg:651.77ms +grad accum step:1532/14336 +step:6128/57344 train_time:3994568ms step_avg:651.86ms +step:6129/57344 train_time:3994585ms step_avg:651.75ms +step:6130/57344 train_time:3994820ms step_avg:651.68ms +step:6131/57344 train_time:3995329ms step_avg:651.66ms +grad accum step:1533/14336 +step:6132/57344 train_time:3996542ms step_avg:651.75ms +step:6133/57344 train_time:3996559ms step_avg:651.65ms +step:6134/57344 train_time:3996795ms step_avg:651.58ms +step:6135/57344 train_time:3997302ms step_avg:651.56ms +grad accum step:1534/14336 +step:6136/57344 train_time:3998501ms step_avg:651.65ms +step:6137/57344 train_time:3998518ms step_avg:651.54ms +step:6138/57344 train_time:3998755ms step_avg:651.48ms +step:6139/57344 train_time:3999263ms step_avg:651.45ms +grad accum step:1535/14336 +step:6140/57344 train_time:4000471ms step_avg:651.54ms +step:6141/57344 train_time:4000488ms step_avg:651.44ms +step:6142/57344 train_time:4000723ms step_avg:651.37ms +step:6143/57344 train_time:4001228ms step_avg:651.35ms +grad accum step:1536/14336 +step:6144/57344 train_time:4002430ms step_avg:651.44ms +step:6144/57344 val_loss:7.435397 train_time:4002430ms step_avg:651.44ms +step:6145/57344 train_time:4002442ms step_avg:651.33ms +step:6146/57344 train_time:4002654ms step_avg:651.26ms +step:6147/57344 train_time:4003162ms step_avg:651.24ms +grad accum step:1537/14336 +step:6148/57344 train_time:4004366ms step_avg:651.33ms +step:6149/57344 train_time:4004383ms step_avg:651.23ms +step:6150/57344 train_time:4004619ms step_avg:651.16ms +step:6151/57344 train_time:4005126ms step_avg:651.13ms +grad accum step:1538/14336 +step:6152/57344 train_time:4006321ms step_avg:651.22ms +step:6153/57344 train_time:4006339ms step_avg:651.12ms +step:6154/57344 train_time:4006573ms step_avg:651.05ms +step:6155/57344 train_time:4007078ms step_avg:651.03ms +grad accum step:1539/14336 +step:6156/57344 train_time:4008280ms step_avg:651.12ms +step:6157/57344 train_time:4008297ms step_avg:651.01ms +step:6158/57344 train_time:4008533ms step_avg:650.95ms +step:6159/57344 train_time:4009044ms step_avg:650.92ms +grad accum step:1540/14336 +step:6160/57344 train_time:4010249ms step_avg:651.01ms +step:6161/57344 train_time:4010267ms step_avg:650.91ms +step:6162/57344 train_time:4010502ms step_avg:650.84ms +step:6163/57344 train_time:4011010ms step_avg:650.82ms +grad accum step:1541/14336 +step:6164/57344 train_time:4012215ms step_avg:650.91ms +step:6165/57344 train_time:4012232ms step_avg:650.81ms +step:6166/57344 train_time:4012469ms step_avg:650.74ms +step:6167/57344 train_time:4012980ms step_avg:650.72ms +grad accum step:1542/14336 +step:6168/57344 train_time:4014172ms step_avg:650.81ms +step:6169/57344 train_time:4014189ms step_avg:650.70ms +step:6170/57344 train_time:4014425ms step_avg:650.64ms +step:6171/57344 train_time:4014930ms step_avg:650.61ms +grad accum step:1543/14336 +step:6172/57344 train_time:4016132ms step_avg:650.70ms +step:6173/57344 train_time:4016150ms step_avg:650.60ms +step:6174/57344 train_time:4016384ms step_avg:650.53ms +step:6175/57344 train_time:4016888ms step_avg:650.51ms +grad accum step:1544/14336 +step:6176/57344 train_time:4018090ms step_avg:650.60ms +step:6177/57344 train_time:4018107ms step_avg:650.49ms +step:6178/57344 train_time:4018341ms step_avg:650.43ms +step:6179/57344 train_time:4018849ms step_avg:650.40ms +grad accum step:1545/14336 +step:6180/57344 train_time:4020050ms step_avg:650.49ms +step:6181/57344 train_time:4020067ms step_avg:650.39ms +step:6182/57344 train_time:4020304ms step_avg:650.32ms +step:6183/57344 train_time:4020815ms step_avg:650.30ms +grad accum step:1546/14336 +step:6184/57344 train_time:4022013ms step_avg:650.39ms +step:6185/57344 train_time:4022031ms step_avg:650.29ms +step:6186/57344 train_time:4022267ms step_avg:650.22ms +step:6187/57344 train_time:4022778ms step_avg:650.20ms +grad accum step:1547/14336 +step:6188/57344 train_time:4023974ms step_avg:650.29ms +step:6189/57344 train_time:4023991ms step_avg:650.18ms +step:6190/57344 train_time:4024227ms step_avg:650.12ms +step:6191/57344 train_time:4024733ms step_avg:650.09ms +grad accum step:1548/14336 +step:6192/57344 train_time:4025940ms step_avg:650.18ms +step:6193/57344 train_time:4025957ms step_avg:650.08ms +step:6194/57344 train_time:4026194ms step_avg:650.02ms +step:6195/57344 train_time:4026707ms step_avg:649.99ms +grad accum step:1549/14336 +step:6196/57344 train_time:4027908ms step_avg:650.08ms +step:6197/57344 train_time:4027926ms step_avg:649.98ms +step:6198/57344 train_time:4028161ms step_avg:649.91ms +step:6199/57344 train_time:4028669ms step_avg:649.89ms +grad accum step:1550/14336 +step:6200/57344 train_time:4029876ms step_avg:649.98ms +step:6201/57344 train_time:4029893ms step_avg:649.88ms +step:6202/57344 train_time:4030132ms step_avg:649.81ms +step:6203/57344 train_time:4030651ms step_avg:649.79ms +grad accum step:1551/14336 +step:6204/57344 train_time:4031878ms step_avg:649.88ms +step:6205/57344 train_time:4031895ms step_avg:649.78ms +step:6206/57344 train_time:4032132ms step_avg:649.72ms +step:6207/57344 train_time:4032652ms step_avg:649.69ms +grad accum step:1552/14336 +step:6208/57344 train_time:4033894ms step_avg:649.79ms +step:6208/57344 val_loss:7.421387 train_time:4033895ms step_avg:649.79ms +step:6209/57344 train_time:4033906ms step_avg:649.69ms +step:6210/57344 train_time:4034123ms step_avg:649.62ms +step:6211/57344 train_time:4034642ms step_avg:649.60ms +grad accum step:1553/14336 +step:6212/57344 train_time:4035866ms step_avg:649.69ms +step:6213/57344 train_time:4035883ms step_avg:649.59ms +step:6214/57344 train_time:4036119ms step_avg:649.52ms +step:6215/57344 train_time:4036635ms step_avg:649.50ms +grad accum step:1554/14336 +step:6216/57344 train_time:4037866ms step_avg:649.59ms +step:6217/57344 train_time:4037883ms step_avg:649.49ms +step:6218/57344 train_time:4038121ms step_avg:649.42ms +step:6219/57344 train_time:4038640ms step_avg:649.40ms +grad accum step:1555/14336 +step:6220/57344 train_time:4039877ms step_avg:649.50ms +step:6221/57344 train_time:4039894ms step_avg:649.40ms +step:6222/57344 train_time:4040133ms step_avg:649.33ms +step:6223/57344 train_time:4040650ms step_avg:649.31ms +grad accum step:1556/14336 +step:6224/57344 train_time:4041886ms step_avg:649.40ms +step:6225/57344 train_time:4041903ms step_avg:649.30ms +step:6226/57344 train_time:4042142ms step_avg:649.24ms +step:6227/57344 train_time:4042661ms step_avg:649.21ms +grad accum step:1557/14336 +step:6228/57344 train_time:4043900ms step_avg:649.31ms +step:6229/57344 train_time:4043917ms step_avg:649.21ms +step:6230/57344 train_time:4044154ms step_avg:649.14ms +step:6231/57344 train_time:4044672ms step_avg:649.12ms +grad accum step:1558/14336 +step:6232/57344 train_time:4045909ms step_avg:649.22ms +step:6233/57344 train_time:4045926ms step_avg:649.11ms +step:6234/57344 train_time:4046164ms step_avg:649.05ms +step:6235/57344 train_time:4046682ms step_avg:649.03ms +grad accum step:1559/14336 +step:6236/57344 train_time:4047901ms step_avg:649.12ms +step:6237/57344 train_time:4047918ms step_avg:649.02ms +step:6238/57344 train_time:4048157ms step_avg:648.95ms +step:6239/57344 train_time:4048680ms step_avg:648.93ms +grad accum step:1560/14336 +step:6240/57344 train_time:4049938ms step_avg:649.03ms +step:6241/57344 train_time:4049955ms step_avg:648.93ms +step:6242/57344 train_time:4050195ms step_avg:648.86ms +step:6243/57344 train_time:4050719ms step_avg:648.84ms +grad accum step:1561/14336 +step:6244/57344 train_time:4051960ms step_avg:648.94ms +step:6245/57344 train_time:4051977ms step_avg:648.84ms +step:6246/57344 train_time:4052214ms step_avg:648.77ms +step:6247/57344 train_time:4052734ms step_avg:648.75ms +grad accum step:1562/14336 +step:6248/57344 train_time:4053972ms step_avg:648.84ms +step:6249/57344 train_time:4053989ms step_avg:648.74ms +step:6250/57344 train_time:4054226ms step_avg:648.68ms +step:6251/57344 train_time:4054750ms step_avg:648.66ms +grad accum step:1563/14336 +step:6252/57344 train_time:4055975ms step_avg:648.75ms +step:6253/57344 train_time:4055992ms step_avg:648.65ms +step:6254/57344 train_time:4056231ms step_avg:648.58ms +step:6255/57344 train_time:4056757ms step_avg:648.56ms +grad accum step:1564/14336 +step:6256/57344 train_time:4058004ms step_avg:648.66ms +step:6257/57344 train_time:4058021ms step_avg:648.56ms +step:6258/57344 train_time:4058260ms step_avg:648.49ms +step:6259/57344 train_time:4058782ms step_avg:648.47ms +grad accum step:1565/14336 +step:6260/57344 train_time:4060007ms step_avg:648.56ms +step:6261/57344 train_time:4060025ms step_avg:648.46ms +step:6262/57344 train_time:4060262ms step_avg:648.40ms +step:6263/57344 train_time:4060781ms step_avg:648.38ms +grad accum step:1566/14336 +step:6264/57344 train_time:4062009ms step_avg:648.47ms +step:6265/57344 train_time:4062026ms step_avg:648.37ms +step:6266/57344 train_time:4062265ms step_avg:648.30ms +step:6267/57344 train_time:4062789ms step_avg:648.28ms +grad accum step:1567/14336 +step:6268/57344 train_time:4064015ms step_avg:648.38ms +step:6269/57344 train_time:4064031ms step_avg:648.27ms +step:6270/57344 train_time:4064272ms step_avg:648.21ms +step:6271/57344 train_time:4064795ms step_avg:648.19ms +grad accum step:1568/14336 +step:6272/57344 train_time:4066062ms step_avg:648.29ms +step:6272/57344 val_loss:7.422082 train_time:4066063ms step_avg:648.29ms +step:6273/57344 train_time:4066075ms step_avg:648.19ms +step:6274/57344 train_time:4066291ms step_avg:648.12ms +step:6275/57344 train_time:4066816ms step_avg:648.10ms +grad accum step:1569/14336 +step:6276/57344 train_time:4068045ms step_avg:648.19ms +step:6277/57344 train_time:4068062ms step_avg:648.09ms +step:6278/57344 train_time:4068299ms step_avg:648.02ms +step:6279/57344 train_time:4068821ms step_avg:648.00ms +grad accum step:1570/14336 +step:6280/57344 train_time:4070059ms step_avg:648.10ms +step:6281/57344 train_time:4070076ms step_avg:648.00ms +step:6282/57344 train_time:4070317ms step_avg:647.93ms +step:6283/57344 train_time:4070847ms step_avg:647.91ms +grad accum step:1571/14336 +step:6284/57344 train_time:4072076ms step_avg:648.01ms +step:6285/57344 train_time:4072093ms step_avg:647.91ms +step:6286/57344 train_time:4072333ms step_avg:647.84ms +step:6287/57344 train_time:4072855ms step_avg:647.82ms +grad accum step:1572/14336 +step:6288/57344 train_time:4074103ms step_avg:647.92ms +step:6289/57344 train_time:4074120ms step_avg:647.82ms +step:6290/57344 train_time:4074361ms step_avg:647.75ms +step:6291/57344 train_time:4074884ms step_avg:647.73ms +grad accum step:1573/14336 +step:6292/57344 train_time:4076110ms step_avg:647.82ms +step:6293/57344 train_time:4076128ms step_avg:647.72ms +step:6294/57344 train_time:4076366ms step_avg:647.66ms +step:6295/57344 train_time:4076891ms step_avg:647.64ms +grad accum step:1574/14336 +step:6296/57344 train_time:4078136ms step_avg:647.73ms +step:6297/57344 train_time:4078154ms step_avg:647.63ms +step:6298/57344 train_time:4078397ms step_avg:647.57ms +step:6299/57344 train_time:4078923ms step_avg:647.55ms +grad accum step:1575/14336 +step:6300/57344 train_time:4080166ms step_avg:647.65ms +step:6301/57344 train_time:4080183ms step_avg:647.55ms +step:6302/57344 train_time:4080422ms step_avg:647.48ms +step:6303/57344 train_time:4080942ms step_avg:647.46ms +grad accum step:1576/14336 +step:6304/57344 train_time:4082177ms step_avg:647.55ms +step:6305/57344 train_time:4082194ms step_avg:647.45ms +step:6306/57344 train_time:4082435ms step_avg:647.39ms +step:6307/57344 train_time:4082953ms step_avg:647.37ms +grad accum step:1577/14336 +step:6308/57344 train_time:4084186ms step_avg:647.46ms +step:6309/57344 train_time:4084203ms step_avg:647.36ms +step:6310/57344 train_time:4084442ms step_avg:647.30ms +step:6311/57344 train_time:4084962ms step_avg:647.28ms +grad accum step:1578/14336 +step:6312/57344 train_time:4086195ms step_avg:647.37ms +step:6313/57344 train_time:4086212ms step_avg:647.27ms +step:6314/57344 train_time:4086451ms step_avg:647.20ms +step:6315/57344 train_time:4086974ms step_avg:647.19ms +grad accum step:1579/14336 +step:6316/57344 train_time:4088199ms step_avg:647.28ms +step:6317/57344 train_time:4088216ms step_avg:647.18ms +step:6318/57344 train_time:4088453ms step_avg:647.11ms +step:6319/57344 train_time:4088972ms step_avg:647.09ms +grad accum step:1580/14336 +step:6320/57344 train_time:4090203ms step_avg:647.18ms +step:6321/57344 train_time:4090221ms step_avg:647.08ms +step:6322/57344 train_time:4090460ms step_avg:647.02ms +step:6323/57344 train_time:4090980ms step_avg:647.00ms +grad accum step:1581/14336 +step:6324/57344 train_time:4092206ms step_avg:647.09ms +step:6325/57344 train_time:4092223ms step_avg:646.99ms +step:6326/57344 train_time:4092461ms step_avg:646.93ms +step:6327/57344 train_time:4092981ms step_avg:646.91ms +grad accum step:1582/14336 +step:6328/57344 train_time:4094207ms step_avg:647.00ms +step:6329/57344 train_time:4094224ms step_avg:646.90ms +step:6330/57344 train_time:4094462ms step_avg:646.83ms +step:6331/57344 train_time:4094987ms step_avg:646.82ms +grad accum step:1583/14336 +step:6332/57344 train_time:4096223ms step_avg:646.91ms +step:6333/57344 train_time:4096240ms step_avg:646.81ms +step:6334/57344 train_time:4096480ms step_avg:646.74ms +step:6335/57344 train_time:4097005ms step_avg:646.73ms +grad accum step:1584/14336 +step:6336/57344 train_time:4098254ms step_avg:646.82ms +step:6336/57344 val_loss:7.390254 train_time:4098255ms step_avg:646.82ms +step:6337/57344 train_time:4098266ms step_avg:646.72ms +step:6338/57344 train_time:4098484ms step_avg:646.65ms +step:6339/57344 train_time:4099008ms step_avg:646.63ms +grad accum step:1585/14336 +step:6340/57344 train_time:4100243ms step_avg:646.73ms +step:6341/57344 train_time:4100260ms step_avg:646.63ms +step:6342/57344 train_time:4100498ms step_avg:646.56ms +step:6343/57344 train_time:4101015ms step_avg:646.54ms +grad accum step:1586/14336 +step:6344/57344 train_time:4102241ms step_avg:646.63ms +step:6345/57344 train_time:4102259ms step_avg:646.53ms +step:6346/57344 train_time:4102497ms step_avg:646.47ms +step:6347/57344 train_time:4103021ms step_avg:646.45ms +grad accum step:1587/14336 +step:6348/57344 train_time:4104251ms step_avg:646.54ms +step:6349/57344 train_time:4104268ms step_avg:646.44ms +step:6350/57344 train_time:4104505ms step_avg:646.38ms +step:6351/57344 train_time:4105031ms step_avg:646.36ms +grad accum step:1588/14336 +step:6352/57344 train_time:4106256ms step_avg:646.45ms +step:6353/57344 train_time:4106273ms step_avg:646.35ms +step:6354/57344 train_time:4106514ms step_avg:646.29ms +step:6355/57344 train_time:4107038ms step_avg:646.27ms +grad accum step:1589/14336 +step:6356/57344 train_time:4108266ms step_avg:646.36ms +step:6357/57344 train_time:4108283ms step_avg:646.26ms +step:6358/57344 train_time:4108521ms step_avg:646.20ms +step:6359/57344 train_time:4109045ms step_avg:646.18ms +grad accum step:1590/14336 +step:6360/57344 train_time:4110277ms step_avg:646.27ms +step:6361/57344 train_time:4110294ms step_avg:646.17ms +step:6362/57344 train_time:4110535ms step_avg:646.11ms +step:6363/57344 train_time:4111056ms step_avg:646.09ms +grad accum step:1591/14336 +step:6364/57344 train_time:4112282ms step_avg:646.18ms +step:6365/57344 train_time:4112300ms step_avg:646.08ms +step:6366/57344 train_time:4112537ms step_avg:646.02ms +step:6367/57344 train_time:4113060ms step_avg:646.00ms +grad accum step:1592/14336 +step:6368/57344 train_time:4118320ms step_avg:646.72ms +step:6369/57344 train_time:4118332ms step_avg:646.62ms +step:6370/57344 train_time:4118602ms step_avg:646.56ms +step:6371/57344 train_time:4119127ms step_avg:646.54ms +grad accum step:1593/14336 +step:6372/57344 train_time:4120354ms step_avg:646.63ms +step:6373/57344 train_time:4120371ms step_avg:646.54ms +step:6374/57344 train_time:4120608ms step_avg:646.47ms +step:6375/57344 train_time:4121127ms step_avg:646.45ms +grad accum step:1594/14336 +step:6376/57344 train_time:4122356ms step_avg:646.54ms +step:6377/57344 train_time:4122373ms step_avg:646.44ms +step:6378/57344 train_time:4122611ms step_avg:646.38ms +step:6379/57344 train_time:4123126ms step_avg:646.36ms +grad accum step:1595/14336 +step:6380/57344 train_time:4124362ms step_avg:646.45ms +step:6381/57344 train_time:4124379ms step_avg:646.35ms +step:6382/57344 train_time:4124617ms step_avg:646.29ms +step:6383/57344 train_time:4125142ms step_avg:646.27ms +grad accum step:1596/14336 +step:6384/57344 train_time:4126378ms step_avg:646.36ms +step:6385/57344 train_time:4126395ms step_avg:646.26ms +step:6386/57344 train_time:4126634ms step_avg:646.20ms +step:6387/57344 train_time:4127159ms step_avg:646.18ms +grad accum step:1597/14336 +step:6388/57344 train_time:4128388ms step_avg:646.27ms +step:6389/57344 train_time:4128405ms step_avg:646.17ms +step:6390/57344 train_time:4128642ms step_avg:646.11ms +step:6391/57344 train_time:4129162ms step_avg:646.09ms +grad accum step:1598/14336 +step:6392/57344 train_time:4130403ms step_avg:646.18ms +step:6393/57344 train_time:4130420ms step_avg:646.08ms +step:6394/57344 train_time:4130659ms step_avg:646.02ms +step:6395/57344 train_time:4131186ms step_avg:646.00ms +grad accum step:1599/14336 +step:6396/57344 train_time:4132414ms step_avg:646.09ms +step:6397/57344 train_time:4132431ms step_avg:646.00ms +step:6398/57344 train_time:4132669ms step_avg:645.93ms +step:6399/57344 train_time:4133187ms step_avg:645.91ms +grad accum step:1600/14336 +step:6400/57344 train_time:4134418ms step_avg:646.00ms +step:6400/57344 val_loss:7.379295 train_time:4134419ms step_avg:646.00ms +step:6401/57344 train_time:4134430ms step_avg:645.90ms +step:6402/57344 train_time:4134644ms step_avg:645.84ms +step:6403/57344 train_time:4135158ms step_avg:645.82ms +grad accum step:1601/14336 +step:6404/57344 train_time:4136384ms step_avg:645.91ms +step:6405/57344 train_time:4136401ms step_avg:645.81ms +step:6406/57344 train_time:4136639ms step_avg:645.74ms +step:6407/57344 train_time:4137160ms step_avg:645.73ms +grad accum step:1602/14336 +step:6408/57344 train_time:4138398ms step_avg:645.82ms +step:6409/57344 train_time:4138415ms step_avg:645.72ms +step:6410/57344 train_time:4138654ms step_avg:645.66ms +step:6411/57344 train_time:4139177ms step_avg:645.64ms +grad accum step:1603/14336 +step:6412/57344 train_time:4140425ms step_avg:645.73ms +step:6413/57344 train_time:4140442ms step_avg:645.63ms +step:6414/57344 train_time:4140681ms step_avg:645.57ms +step:6415/57344 train_time:4141204ms step_avg:645.55ms +grad accum step:1604/14336 +step:6416/57344 train_time:4142435ms step_avg:645.64ms +step:6417/57344 train_time:4142452ms step_avg:645.54ms +step:6418/57344 train_time:4142689ms step_avg:645.48ms +step:6419/57344 train_time:4143206ms step_avg:645.46ms +grad accum step:1605/14336 +step:6420/57344 train_time:4144435ms step_avg:645.55ms +step:6421/57344 train_time:4144452ms step_avg:645.45ms +step:6422/57344 train_time:4144691ms step_avg:645.39ms +step:6423/57344 train_time:4145209ms step_avg:645.37ms +grad accum step:1606/14336 +step:6424/57344 train_time:4146433ms step_avg:645.46ms +step:6425/57344 train_time:4146450ms step_avg:645.36ms +step:6426/57344 train_time:4146691ms step_avg:645.30ms +step:6427/57344 train_time:4147213ms step_avg:645.28ms +grad accum step:1607/14336 +step:6428/57344 train_time:4148437ms step_avg:645.37ms +step:6429/57344 train_time:4148454ms step_avg:645.27ms +step:6430/57344 train_time:4148694ms step_avg:645.21ms +step:6431/57344 train_time:4149219ms step_avg:645.19ms +grad accum step:1608/14336 +step:6432/57344 train_time:4150445ms step_avg:645.28ms +step:6433/57344 train_time:4150462ms step_avg:645.18ms +step:6434/57344 train_time:4150701ms step_avg:645.12ms +step:6435/57344 train_time:4151223ms step_avg:645.10ms +grad accum step:1609/14336 +step:6436/57344 train_time:4152446ms step_avg:645.19ms +step:6437/57344 train_time:4152463ms step_avg:645.09ms +step:6438/57344 train_time:4152702ms step_avg:645.03ms +step:6439/57344 train_time:4153225ms step_avg:645.01ms +grad accum step:1610/14336 +step:6440/57344 train_time:4154449ms step_avg:645.10ms +step:6441/57344 train_time:4154466ms step_avg:645.00ms +step:6442/57344 train_time:4154706ms step_avg:644.94ms +step:6443/57344 train_time:4155228ms step_avg:644.92ms +grad accum step:1611/14336 +step:6444/57344 train_time:4156454ms step_avg:645.01ms +step:6445/57344 train_time:4156471ms step_avg:644.91ms +step:6446/57344 train_time:4156709ms step_avg:644.85ms +step:6447/57344 train_time:4157230ms step_avg:644.83ms +grad accum step:1612/14336 +step:6448/57344 train_time:4158453ms step_avg:644.92ms +step:6449/57344 train_time:4158471ms step_avg:644.82ms +step:6450/57344 train_time:4158708ms step_avg:644.76ms +step:6451/57344 train_time:4159228ms step_avg:644.74ms +grad accum step:1613/14336 +step:6452/57344 train_time:4160460ms step_avg:644.83ms +step:6453/57344 train_time:4160477ms step_avg:644.74ms +step:6454/57344 train_time:4160713ms step_avg:644.67ms +step:6455/57344 train_time:4161230ms step_avg:644.65ms +grad accum step:1614/14336 +step:6456/57344 train_time:4162462ms step_avg:644.74ms +step:6457/57344 train_time:4162479ms step_avg:644.65ms +step:6458/57344 train_time:4162716ms step_avg:644.58ms +step:6459/57344 train_time:4163234ms step_avg:644.56ms +grad accum step:1615/14336 +step:6460/57344 train_time:4164464ms step_avg:644.65ms +step:6461/57344 train_time:4164481ms step_avg:644.56ms +step:6462/57344 train_time:4164718ms step_avg:644.49ms +step:6463/57344 train_time:4165240ms step_avg:644.47ms +grad accum step:1616/14336 +step:6464/57344 train_time:4166470ms step_avg:644.57ms +step:6464/57344 val_loss:7.352916 train_time:4166470ms step_avg:644.57ms +step:6465/57344 train_time:4166482ms step_avg:644.47ms +step:6466/57344 train_time:4166736ms step_avg:644.41ms +step:6467/57344 train_time:4167246ms step_avg:644.39ms +grad accum step:1617/14336 +step:6468/57344 train_time:4168482ms step_avg:644.48ms +step:6469/57344 train_time:4168499ms step_avg:644.38ms +step:6470/57344 train_time:4168735ms step_avg:644.32ms +step:6471/57344 train_time:4169256ms step_avg:644.30ms +grad accum step:1618/14336 +step:6472/57344 train_time:4170487ms step_avg:644.39ms +step:6473/57344 train_time:4170504ms step_avg:644.29ms +step:6474/57344 train_time:4170737ms step_avg:644.23ms +step:6475/57344 train_time:4171253ms step_avg:644.21ms +grad accum step:1619/14336 +step:6476/57344 train_time:4172479ms step_avg:644.30ms +step:6477/57344 train_time:4172496ms step_avg:644.20ms +step:6478/57344 train_time:4172732ms step_avg:644.14ms +step:6479/57344 train_time:4173245ms step_avg:644.12ms +grad accum step:1620/14336 +step:6480/57344 train_time:4174468ms step_avg:644.21ms +step:6481/57344 train_time:4174485ms step_avg:644.11ms +step:6482/57344 train_time:4174723ms step_avg:644.05ms +step:6483/57344 train_time:4175243ms step_avg:644.03ms +grad accum step:1621/14336 +step:6484/57344 train_time:4176474ms step_avg:644.12ms +step:6485/57344 train_time:4176491ms step_avg:644.02ms +step:6486/57344 train_time:4176727ms step_avg:643.96ms +step:6487/57344 train_time:4177250ms step_avg:643.94ms +grad accum step:1622/14336 +step:6488/57344 train_time:4178477ms step_avg:644.03ms +step:6489/57344 train_time:4178495ms step_avg:643.94ms +step:6490/57344 train_time:4178730ms step_avg:643.87ms +step:6491/57344 train_time:4179246ms step_avg:643.85ms +grad accum step:1623/14336 +step:6492/57344 train_time:4204677ms step_avg:647.67ms +step:6493/57344 train_time:4206453ms step_avg:647.84ms +step:6494/57344 train_time:4206714ms step_avg:647.78ms +step:6495/57344 train_time:4207230ms step_avg:647.76ms +grad accum step:1624/14336 +step:6496/57344 train_time:4208435ms step_avg:647.85ms +step:6497/57344 train_time:4208452ms step_avg:647.75ms +step:6498/57344 train_time:4208686ms step_avg:647.69ms +step:6499/57344 train_time:4209196ms step_avg:647.67ms +grad accum step:1625/14336 +step:6500/57344 train_time:4210407ms step_avg:647.75ms +step:6501/57344 train_time:4210423ms step_avg:647.66ms +step:6502/57344 train_time:4210663ms step_avg:647.59ms +step:6503/57344 train_time:4211190ms step_avg:647.58ms +grad accum step:1626/14336 +step:6504/57344 train_time:4212415ms step_avg:647.67ms +step:6505/57344 train_time:4212432ms step_avg:647.57ms +step:6506/57344 train_time:4212667ms step_avg:647.50ms +step:6507/57344 train_time:4213181ms step_avg:647.48ms +grad accum step:1627/14336 +step:6508/57344 train_time:4214401ms step_avg:647.57ms +step:6509/57344 train_time:4214418ms step_avg:647.48ms +step:6510/57344 train_time:4214651ms step_avg:647.41ms +step:6511/57344 train_time:4215161ms step_avg:647.39ms +grad accum step:1628/14336 +step:6512/57344 train_time:4216381ms step_avg:647.48ms +step:6513/57344 train_time:4216397ms step_avg:647.38ms +step:6514/57344 train_time:4216632ms step_avg:647.32ms +step:6515/57344 train_time:4217147ms step_avg:647.30ms +grad accum step:1629/14336 +step:6516/57344 train_time:4218371ms step_avg:647.39ms +step:6517/57344 train_time:4218388ms step_avg:647.29ms +step:6518/57344 train_time:4218623ms step_avg:647.23ms +step:6519/57344 train_time:4219134ms step_avg:647.21ms +grad accum step:1630/14336 +step:6520/57344 train_time:4220351ms step_avg:647.29ms +step:6521/57344 train_time:4220368ms step_avg:647.20ms +step:6522/57344 train_time:4220606ms step_avg:647.13ms +step:6523/57344 train_time:4221125ms step_avg:647.11ms +grad accum step:1631/14336 +step:6524/57344 train_time:4222354ms step_avg:647.20ms +step:6525/57344 train_time:4222371ms step_avg:647.11ms +step:6526/57344 train_time:4222606ms step_avg:647.04ms +step:6527/57344 train_time:4223120ms step_avg:647.02ms +grad accum step:1632/14336 +step:6528/57344 train_time:4224342ms step_avg:647.11ms +step:6528/57344 val_loss:7.353693 train_time:4224343ms step_avg:647.11ms +step:6529/57344 train_time:4224355ms step_avg:647.01ms +step:6530/57344 train_time:4224571ms step_avg:646.95ms +step:6531/57344 train_time:4225094ms step_avg:646.93ms +grad accum step:1633/14336 +step:6532/57344 train_time:4226353ms step_avg:647.02ms +step:6533/57344 train_time:4226370ms step_avg:646.93ms +step:6534/57344 train_time:4226610ms step_avg:646.86ms +step:6535/57344 train_time:4227125ms step_avg:646.84ms +grad accum step:1634/14336 +step:6536/57344 train_time:4228356ms step_avg:646.93ms +step:6537/57344 train_time:4228373ms step_avg:646.84ms +step:6538/57344 train_time:4228614ms step_avg:646.77ms +step:6539/57344 train_time:4229134ms step_avg:646.76ms +grad accum step:1635/14336 +step:6540/57344 train_time:4230361ms step_avg:646.84ms +step:6541/57344 train_time:4230378ms step_avg:646.75ms +step:6542/57344 train_time:4230619ms step_avg:646.69ms +step:6543/57344 train_time:4231139ms step_avg:646.67ms +grad accum step:1636/14336 +step:6544/57344 train_time:4232369ms step_avg:646.76ms +step:6545/57344 train_time:4232387ms step_avg:646.66ms +step:6546/57344 train_time:4232626ms step_avg:646.60ms +step:6547/57344 train_time:4233146ms step_avg:646.58ms +grad accum step:1637/14336 +step:6548/57344 train_time:4234377ms step_avg:646.67ms +step:6549/57344 train_time:4234394ms step_avg:646.57ms +step:6550/57344 train_time:4234632ms step_avg:646.51ms +step:6551/57344 train_time:4235149ms step_avg:646.49ms +grad accum step:1638/14336 +step:6552/57344 train_time:4236379ms step_avg:646.58ms +step:6553/57344 train_time:4236396ms step_avg:646.48ms +step:6554/57344 train_time:4236636ms step_avg:646.42ms +step:6555/57344 train_time:4237161ms step_avg:646.40ms +grad accum step:1639/14336 +step:6556/57344 train_time:4238404ms step_avg:646.49ms +step:6557/57344 train_time:4238421ms step_avg:646.40ms +step:6558/57344 train_time:4238661ms step_avg:646.33ms +step:6559/57344 train_time:4239181ms step_avg:646.32ms +grad accum step:1640/14336 +step:6560/57344 train_time:4240404ms step_avg:646.40ms +step:6561/57344 train_time:4240421ms step_avg:646.31ms +step:6562/57344 train_time:4240660ms step_avg:646.25ms +step:6563/57344 train_time:4241180ms step_avg:646.23ms +grad accum step:1641/14336 +step:6564/57344 train_time:4242430ms step_avg:646.32ms +step:6565/57344 train_time:4242448ms step_avg:646.22ms +step:6566/57344 train_time:4242688ms step_avg:646.16ms +step:6567/57344 train_time:4243211ms step_avg:646.14ms +grad accum step:1642/14336 +step:6568/57344 train_time:4244442ms step_avg:646.23ms +step:6569/57344 train_time:4244459ms step_avg:646.13ms +step:6570/57344 train_time:4244698ms step_avg:646.07ms +step:6571/57344 train_time:4245219ms step_avg:646.05ms +grad accum step:1643/14336 +step:6572/57344 train_time:4246453ms step_avg:646.14ms +step:6573/57344 train_time:4246470ms step_avg:646.05ms +step:6574/57344 train_time:4246708ms step_avg:645.99ms +step:6575/57344 train_time:4247229ms step_avg:645.97ms +grad accum step:1644/14336 +step:6576/57344 train_time:4248469ms step_avg:646.06ms +step:6577/57344 train_time:4248486ms step_avg:645.96ms +step:6578/57344 train_time:4248728ms step_avg:645.90ms +step:6579/57344 train_time:4249264ms step_avg:645.88ms +grad accum step:1645/14336 +step:6580/57344 train_time:4250500ms step_avg:645.97ms +step:6581/57344 train_time:4250517ms step_avg:645.88ms +step:6582/57344 train_time:4250755ms step_avg:645.82ms +step:6583/57344 train_time:4251273ms step_avg:645.80ms +grad accum step:1646/14336 +step:6584/57344 train_time:4252511ms step_avg:645.89ms +step:6585/57344 train_time:4252528ms step_avg:645.79ms +step:6586/57344 train_time:4252768ms step_avg:645.73ms +step:6587/57344 train_time:4253292ms step_avg:645.71ms +grad accum step:1647/14336 +step:6588/57344 train_time:4254548ms step_avg:645.80ms +step:6589/57344 train_time:4254565ms step_avg:645.71ms +step:6590/57344 train_time:4254806ms step_avg:645.65ms +step:6591/57344 train_time:4255328ms step_avg:645.63ms +grad accum step:1648/14336 +step:6592/57344 train_time:4256555ms step_avg:645.72ms +step:6592/57344 val_loss:7.355770 train_time:4256555ms step_avg:645.72ms +step:6593/57344 train_time:4256567ms step_avg:645.62ms +step:6594/57344 train_time:4256785ms step_avg:645.55ms +step:6595/57344 train_time:4257310ms step_avg:645.54ms +grad accum step:1649/14336 +step:6596/57344 train_time:4258542ms step_avg:645.62ms +step:6597/57344 train_time:4258559ms step_avg:645.53ms +step:6598/57344 train_time:4258798ms step_avg:645.47ms +step:6599/57344 train_time:4259318ms step_avg:645.45ms +grad accum step:1650/14336 +step:6600/57344 train_time:4260573ms step_avg:645.54ms +step:6601/57344 train_time:4260590ms step_avg:645.45ms +step:6602/57344 train_time:4260830ms step_avg:645.38ms +step:6603/57344 train_time:4261353ms step_avg:645.37ms +grad accum step:1651/14336 +step:6604/57344 train_time:4262588ms step_avg:645.46ms +step:6605/57344 train_time:4262606ms step_avg:645.36ms +step:6606/57344 train_time:4262849ms step_avg:645.30ms +step:6607/57344 train_time:4263374ms step_avg:645.28ms +grad accum step:1652/14336 +step:6608/57344 train_time:4264612ms step_avg:645.37ms +step:6609/57344 train_time:4264629ms step_avg:645.28ms +step:6610/57344 train_time:4264870ms step_avg:645.21ms +step:6611/57344 train_time:4265396ms step_avg:645.20ms +grad accum step:1653/14336 +step:6612/57344 train_time:4266631ms step_avg:645.29ms +step:6613/57344 train_time:4266648ms step_avg:645.19ms +step:6614/57344 train_time:4266887ms step_avg:645.13ms +step:6615/57344 train_time:4267408ms step_avg:645.11ms +grad accum step:1654/14336 +step:6616/57344 train_time:4268650ms step_avg:645.20ms +step:6617/57344 train_time:4268667ms step_avg:645.11ms +step:6618/57344 train_time:4268904ms step_avg:645.04ms +step:6619/57344 train_time:4269429ms step_avg:645.03ms +grad accum step:1655/14336 +step:6620/57344 train_time:4270673ms step_avg:645.12ms +step:6621/57344 train_time:4270690ms step_avg:645.02ms +step:6622/57344 train_time:4270932ms step_avg:644.96ms +step:6623/57344 train_time:4271455ms step_avg:644.94ms +grad accum step:1656/14336 +step:6624/57344 train_time:4272694ms step_avg:645.03ms +step:6625/57344 train_time:4272711ms step_avg:644.94ms +step:6626/57344 train_time:4272951ms step_avg:644.88ms +step:6627/57344 train_time:4273473ms step_avg:644.86ms +grad accum step:1657/14336 +step:6628/57344 train_time:4274706ms step_avg:644.95ms +step:6629/57344 train_time:4274723ms step_avg:644.85ms +step:6630/57344 train_time:4274962ms step_avg:644.79ms +step:6631/57344 train_time:4275482ms step_avg:644.77ms +grad accum step:1658/14336 +step:6632/57344 train_time:4276707ms step_avg:644.86ms +step:6633/57344 train_time:4276724ms step_avg:644.76ms +step:6634/57344 train_time:4276963ms step_avg:644.70ms +step:6635/57344 train_time:4277483ms step_avg:644.68ms +grad accum step:1659/14336 +step:6636/57344 train_time:4278714ms step_avg:644.77ms +step:6637/57344 train_time:4278732ms step_avg:644.68ms +step:6638/57344 train_time:4278973ms step_avg:644.62ms +step:6639/57344 train_time:4279499ms step_avg:644.60ms +grad accum step:1660/14336 +step:6640/57344 train_time:4280732ms step_avg:644.69ms +step:6641/57344 train_time:4280750ms step_avg:644.59ms +step:6642/57344 train_time:4280990ms step_avg:644.53ms +step:6643/57344 train_time:4281511ms step_avg:644.51ms +grad accum step:1661/14336 +step:6644/57344 train_time:4282748ms step_avg:644.60ms +step:6645/57344 train_time:4282765ms step_avg:644.51ms +step:6646/57344 train_time:4283006ms step_avg:644.45ms +step:6647/57344 train_time:4283532ms step_avg:644.43ms +grad accum step:1662/14336 +step:6648/57344 train_time:4284777ms step_avg:644.52ms +step:6649/57344 train_time:4284794ms step_avg:644.43ms +step:6650/57344 train_time:4285034ms step_avg:644.37ms +step:6651/57344 train_time:4285558ms step_avg:644.35ms +grad accum step:1663/14336 +step:6652/57344 train_time:4286795ms step_avg:644.44ms +step:6653/57344 train_time:4286812ms step_avg:644.34ms +step:6654/57344 train_time:4287052ms step_avg:644.28ms +step:6655/57344 train_time:4287578ms step_avg:644.26ms +grad accum step:1664/14336 +step:6656/57344 train_time:4288830ms step_avg:644.36ms +step:6656/57344 val_loss:7.331154 train_time:4288830ms step_avg:644.36ms +step:6657/57344 train_time:4288842ms step_avg:644.26ms +step:6658/57344 train_time:4289056ms step_avg:644.20ms +step:6659/57344 train_time:4289572ms step_avg:644.18ms +grad accum step:1665/14336 +step:6660/57344 train_time:4290806ms step_avg:644.27ms +step:6661/57344 train_time:4290823ms step_avg:644.17ms +step:6662/57344 train_time:4291061ms step_avg:644.11ms +step:6663/57344 train_time:4291581ms step_avg:644.09ms +grad accum step:1666/14336 +step:6664/57344 train_time:4292820ms step_avg:644.18ms +step:6665/57344 train_time:4292837ms step_avg:644.09ms +step:6666/57344 train_time:4293076ms step_avg:644.03ms +step:6667/57344 train_time:4293598ms step_avg:644.01ms +grad accum step:1667/14336 +step:6668/57344 train_time:4294838ms step_avg:644.10ms +step:6669/57344 train_time:4294855ms step_avg:644.00ms +step:6670/57344 train_time:4295092ms step_avg:643.94ms +step:6671/57344 train_time:4295611ms step_avg:643.92ms +grad accum step:1668/14336 +step:6672/57344 train_time:4296836ms step_avg:644.01ms +step:6673/57344 train_time:4296854ms step_avg:643.92ms +step:6674/57344 train_time:4297092ms step_avg:643.86ms +step:6675/57344 train_time:4297619ms step_avg:643.84ms +grad accum step:1669/14336 +step:6676/57344 train_time:4298858ms step_avg:643.93ms +step:6677/57344 train_time:4298875ms step_avg:643.83ms +step:6678/57344 train_time:4299120ms step_avg:643.77ms +step:6679/57344 train_time:4299647ms step_avg:643.76ms +grad accum step:1670/14336 +step:6680/57344 train_time:4300886ms step_avg:643.85ms +step:6681/57344 train_time:4300904ms step_avg:643.75ms +step:6682/57344 train_time:4301142ms step_avg:643.69ms +step:6683/57344 train_time:4301663ms step_avg:643.67ms +grad accum step:1671/14336 +step:6684/57344 train_time:4302915ms step_avg:643.76ms +step:6685/57344 train_time:4302932ms step_avg:643.67ms +step:6686/57344 train_time:4303169ms step_avg:643.61ms +step:6687/57344 train_time:4303692ms step_avg:643.59ms +grad accum step:1672/14336 +step:6688/57344 train_time:4304936ms step_avg:643.68ms +step:6689/57344 train_time:4304953ms step_avg:643.59ms +step:6690/57344 train_time:4305194ms step_avg:643.53ms +step:6691/57344 train_time:4305719ms step_avg:643.51ms +grad accum step:1673/14336 +step:6692/57344 train_time:4306966ms step_avg:643.60ms +step:6693/57344 train_time:4306984ms step_avg:643.51ms +step:6694/57344 train_time:4307227ms step_avg:643.45ms +step:6695/57344 train_time:4307752ms step_avg:643.43ms +grad accum step:1674/14336 +step:6696/57344 train_time:4308974ms step_avg:643.51ms +step:6697/57344 train_time:4308991ms step_avg:643.42ms +step:6698/57344 train_time:4309231ms step_avg:643.36ms +step:6699/57344 train_time:4309757ms step_avg:643.34ms +grad accum step:1675/14336 +step:6700/57344 train_time:4310979ms step_avg:643.43ms +step:6701/57344 train_time:4310996ms step_avg:643.34ms +step:6702/57344 train_time:4311237ms step_avg:643.28ms +step:6703/57344 train_time:4311758ms step_avg:643.26ms +grad accum step:1676/14336 +step:6704/57344 train_time:4312987ms step_avg:643.35ms +step:6705/57344 train_time:4313005ms step_avg:643.25ms +step:6706/57344 train_time:4313245ms step_avg:643.19ms +step:6707/57344 train_time:4313769ms step_avg:643.17ms +grad accum step:1677/14336 +step:6708/57344 train_time:4315024ms step_avg:643.27ms +step:6709/57344 train_time:4315041ms step_avg:643.17ms +step:6710/57344 train_time:4315280ms step_avg:643.11ms +step:6711/57344 train_time:4315804ms step_avg:643.09ms +grad accum step:1678/14336 +step:6712/57344 train_time:4317041ms step_avg:643.18ms +step:6713/57344 train_time:4317059ms step_avg:643.09ms +step:6714/57344 train_time:4317297ms step_avg:643.03ms +step:6715/57344 train_time:4317817ms step_avg:643.01ms +grad accum step:1679/14336 +step:6716/57344 train_time:4319050ms step_avg:643.10ms +step:6717/57344 train_time:4319067ms step_avg:643.01ms +step:6718/57344 train_time:4319309ms step_avg:642.95ms +step:6719/57344 train_time:4319829ms step_avg:642.93ms +grad accum step:1680/14336 +step:6720/57344 train_time:4321225ms step_avg:643.04ms +step:6720/57344 val_loss:7.320140 train_time:4321226ms step_avg:643.04ms +step:6721/57344 train_time:4321238ms step_avg:642.95ms +step:6722/57344 train_time:4321454ms step_avg:642.88ms +step:6723/57344 train_time:4321977ms step_avg:642.86ms +grad accum step:1681/14336 +step:6724/57344 train_time:4323211ms step_avg:642.95ms +step:6725/57344 train_time:4323226ms step_avg:642.86ms +step:6726/57344 train_time:4323464ms step_avg:642.80ms +step:6727/57344 train_time:4323983ms step_avg:642.78ms +grad accum step:1682/14336 +step:6728/57344 train_time:4325216ms step_avg:642.87ms +step:6729/57344 train_time:4325232ms step_avg:642.77ms +step:6730/57344 train_time:4325472ms step_avg:642.72ms +step:6731/57344 train_time:4325995ms step_avg:642.70ms +grad accum step:1683/14336 +step:6732/57344 train_time:4327239ms step_avg:642.79ms +step:6733/57344 train_time:4327313ms step_avg:642.70ms +step:6734/57344 train_time:4327524ms step_avg:642.64ms +step:6735/57344 train_time:4328043ms step_avg:642.62ms +grad accum step:1684/14336 +step:6736/57344 train_time:4329292ms step_avg:642.71ms +step:6737/57344 train_time:4329306ms step_avg:642.62ms +step:6738/57344 train_time:4329546ms step_avg:642.56ms +step:6739/57344 train_time:4330067ms step_avg:642.54ms +grad accum step:1685/14336 +step:6740/57344 train_time:4333176ms step_avg:642.90ms +step:6741/57344 train_time:4333363ms step_avg:642.84ms +step:6742/57344 train_time:4333617ms step_avg:642.78ms +step:6743/57344 train_time:4334134ms step_avg:642.76ms +grad accum step:1686/14336 +step:6744/57344 train_time:4335378ms step_avg:642.85ms +step:6745/57344 train_time:4335395ms step_avg:642.76ms +step:6746/57344 train_time:4335633ms step_avg:642.70ms +step:6747/57344 train_time:4336155ms step_avg:642.68ms +grad accum step:1687/14336 +step:6748/57344 train_time:4337390ms step_avg:642.77ms +step:6749/57344 train_time:4337408ms step_avg:642.67ms +step:6750/57344 train_time:4337649ms step_avg:642.61ms +step:6751/57344 train_time:4338173ms step_avg:642.60ms +grad accum step:1688/14336 +step:6752/57344 train_time:4339403ms step_avg:642.68ms +step:6753/57344 train_time:4339420ms step_avg:642.59ms +step:6754/57344 train_time:4339658ms step_avg:642.53ms +step:6755/57344 train_time:4340180ms step_avg:642.51ms +grad accum step:1689/14336 +step:6756/57344 train_time:4341410ms step_avg:642.60ms +step:6757/57344 train_time:4341427ms step_avg:642.51ms +step:6758/57344 train_time:4341666ms step_avg:642.45ms +step:6759/57344 train_time:4342186ms step_avg:642.43ms +grad accum step:1690/14336 +step:6760/57344 train_time:4343416ms step_avg:642.52ms +step:6761/57344 train_time:4343433ms step_avg:642.42ms +step:6762/57344 train_time:4343673ms step_avg:642.37ms +step:6763/57344 train_time:4344199ms step_avg:642.35ms +grad accum step:1691/14336 +step:6764/57344 train_time:4345425ms step_avg:642.43ms +step:6765/57344 train_time:4345442ms step_avg:642.34ms +step:6766/57344 train_time:4345681ms step_avg:642.28ms +step:6767/57344 train_time:4346205ms step_avg:642.26ms +grad accum step:1692/14336 +step:6768/57344 train_time:4347435ms step_avg:642.35ms +step:6769/57344 train_time:4347452ms step_avg:642.26ms +step:6770/57344 train_time:4347692ms step_avg:642.20ms +step:6771/57344 train_time:4348212ms step_avg:642.18ms +grad accum step:1693/14336 +step:6772/57344 train_time:4349448ms step_avg:642.27ms +step:6773/57344 train_time:4349466ms step_avg:642.18ms +step:6774/57344 train_time:4349705ms step_avg:642.12ms +step:6775/57344 train_time:4350225ms step_avg:642.10ms +grad accum step:1694/14336 +step:6776/57344 train_time:4351458ms step_avg:642.19ms +step:6777/57344 train_time:4351475ms step_avg:642.09ms +step:6778/57344 train_time:4351712ms step_avg:642.03ms +step:6779/57344 train_time:4352231ms step_avg:642.02ms +grad accum step:1695/14336 +step:6780/57344 train_time:4353468ms step_avg:642.10ms +step:6781/57344 train_time:4353485ms step_avg:642.01ms +step:6782/57344 train_time:4353724ms step_avg:641.95ms +step:6783/57344 train_time:4354249ms step_avg:641.94ms +grad accum step:1696/14336 +step:6784/57344 train_time:4355476ms step_avg:642.02ms +step:6784/57344 val_loss:7.303020 train_time:4355477ms step_avg:642.02ms +step:6785/57344 train_time:4355489ms step_avg:641.93ms +step:6786/57344 train_time:4355704ms step_avg:641.87ms +step:6787/57344 train_time:4356224ms step_avg:641.85ms +grad accum step:1697/14336 +step:6788/57344 train_time:4357464ms step_avg:641.94ms +step:6789/57344 train_time:4357481ms step_avg:641.84ms +step:6790/57344 train_time:4357721ms step_avg:641.79ms +step:6791/57344 train_time:4358244ms step_avg:641.77ms +grad accum step:1698/14336 +step:6792/57344 train_time:4359478ms step_avg:641.85ms +step:6793/57344 train_time:4359495ms step_avg:641.76ms +step:6794/57344 train_time:4359736ms step_avg:641.70ms +step:6795/57344 train_time:4360257ms step_avg:641.69ms +grad accum step:1699/14336 +step:6796/57344 train_time:4361484ms step_avg:641.77ms +step:6797/57344 train_time:4361501ms step_avg:641.68ms +step:6798/57344 train_time:4361742ms step_avg:641.62ms +step:6799/57344 train_time:4362260ms step_avg:641.60ms +grad accum step:1700/14336 +step:6800/57344 train_time:4363486ms step_avg:641.69ms +step:6801/57344 train_time:4363503ms step_avg:641.60ms +step:6802/57344 train_time:4363742ms step_avg:641.54ms +step:6803/57344 train_time:4364260ms step_avg:641.52ms +grad accum step:1701/14336 +step:6804/57344 train_time:4365494ms step_avg:641.61ms +step:6805/57344 train_time:4365511ms step_avg:641.52ms +step:6806/57344 train_time:4365750ms step_avg:641.46ms +step:6807/57344 train_time:4366271ms step_avg:641.44ms +grad accum step:1702/14336 +step:6808/57344 train_time:4367498ms step_avg:641.52ms +step:6809/57344 train_time:4367516ms step_avg:641.43ms +step:6810/57344 train_time:4367757ms step_avg:641.37ms +step:6811/57344 train_time:4368280ms step_avg:641.36ms +grad accum step:1703/14336 +step:6812/57344 train_time:4369512ms step_avg:641.44ms +step:6813/57344 train_time:4369529ms step_avg:641.35ms +step:6814/57344 train_time:4369769ms step_avg:641.29ms +step:6815/57344 train_time:4370288ms step_avg:641.27ms +grad accum step:1704/14336 +step:6816/57344 train_time:4371514ms step_avg:641.36ms +step:6817/57344 train_time:4371531ms step_avg:641.27ms +step:6818/57344 train_time:4371769ms step_avg:641.21ms +step:6819/57344 train_time:4372290ms step_avg:641.19ms +grad accum step:1705/14336 +step:6820/57344 train_time:4373527ms step_avg:641.28ms +step:6821/57344 train_time:4373545ms step_avg:641.19ms +step:6822/57344 train_time:4373784ms step_avg:641.13ms +step:6823/57344 train_time:4374304ms step_avg:641.11ms +grad accum step:1706/14336 +step:6824/57344 train_time:4375539ms step_avg:641.20ms +step:6825/57344 train_time:4375557ms step_avg:641.11ms +step:6826/57344 train_time:4375796ms step_avg:641.05ms +step:6827/57344 train_time:4376317ms step_avg:641.03ms +grad accum step:1707/14336 +step:6828/57344 train_time:4377556ms step_avg:641.12ms +step:6829/57344 train_time:4377573ms step_avg:641.03ms +step:6830/57344 train_time:4377810ms step_avg:640.97ms +step:6831/57344 train_time:4378326ms step_avg:640.95ms +grad accum step:1708/14336 +step:6832/57344 train_time:4379562ms step_avg:641.04ms +step:6833/57344 train_time:4379579ms step_avg:640.95ms +step:6834/57344 train_time:4379817ms step_avg:640.89ms +step:6835/57344 train_time:4380338ms step_avg:640.87ms +grad accum step:1709/14336 +step:6836/57344 train_time:4381557ms step_avg:640.95ms +step:6837/57344 train_time:4381574ms step_avg:640.86ms +step:6838/57344 train_time:4381812ms step_avg:640.80ms +step:6839/57344 train_time:4382335ms step_avg:640.79ms +grad accum step:1710/14336 +step:6840/57344 train_time:4383566ms step_avg:640.87ms +step:6841/57344 train_time:4383583ms step_avg:640.78ms +step:6842/57344 train_time:4383823ms step_avg:640.72ms +step:6843/57344 train_time:4384346ms step_avg:640.71ms +grad accum step:1711/14336 +step:6844/57344 train_time:4385575ms step_avg:640.79ms +step:6845/57344 train_time:4385592ms step_avg:640.70ms +step:6846/57344 train_time:4385830ms step_avg:640.64ms +step:6847/57344 train_time:4386346ms step_avg:640.62ms +grad accum step:1712/14336 +step:6848/57344 train_time:4387578ms step_avg:640.71ms +step:6848/57344 val_loss:7.333101 train_time:4387579ms step_avg:640.71ms +step:6849/57344 train_time:4387591ms step_avg:640.62ms +step:6850/57344 train_time:4387805ms step_avg:640.56ms +step:6851/57344 train_time:4388322ms step_avg:640.54ms +grad accum step:1713/14336 +step:6852/57344 train_time:4389550ms step_avg:640.62ms +step:6853/57344 train_time:4389567ms step_avg:640.53ms +step:6854/57344 train_time:4389806ms step_avg:640.47ms +step:6855/57344 train_time:4390323ms step_avg:640.46ms +grad accum step:1714/14336 +step:6856/57344 train_time:4391548ms step_avg:640.54ms +step:6857/57344 train_time:4391565ms step_avg:640.45ms +step:6858/57344 train_time:4391803ms step_avg:640.39ms +step:6859/57344 train_time:4392326ms step_avg:640.37ms +grad accum step:1715/14336 +step:6860/57344 train_time:4393555ms step_avg:640.46ms +step:6861/57344 train_time:4393572ms step_avg:640.37ms +step:6862/57344 train_time:4393810ms step_avg:640.31ms +step:6863/57344 train_time:4394331ms step_avg:640.29ms +grad accum step:1716/14336 +step:6864/57344 train_time:4395566ms step_avg:640.38ms +step:6865/57344 train_time:4395583ms step_avg:640.29ms +step:6866/57344 train_time:4395821ms step_avg:640.23ms +step:6867/57344 train_time:4396341ms step_avg:640.21ms +grad accum step:1717/14336 +step:6868/57344 train_time:4397574ms step_avg:640.30ms +step:6869/57344 train_time:4397591ms step_avg:640.21ms +step:6870/57344 train_time:4397829ms step_avg:640.15ms +step:6871/57344 train_time:4398347ms step_avg:640.13ms +grad accum step:1718/14336 +step:6872/57344 train_time:4399573ms step_avg:640.22ms +step:6873/57344 train_time:4399591ms step_avg:640.13ms +step:6874/57344 train_time:4399829ms step_avg:640.07ms +step:6875/57344 train_time:4400350ms step_avg:640.05ms +grad accum step:1719/14336 +step:6876/57344 train_time:4401580ms step_avg:640.14ms +step:6877/57344 train_time:4401598ms step_avg:640.05ms +step:6878/57344 train_time:4401838ms step_avg:639.99ms +step:6879/57344 train_time:4402362ms step_avg:639.97ms +grad accum step:1720/14336 +step:6880/57344 train_time:4403610ms step_avg:640.06ms +step:6881/57344 train_time:4403627ms step_avg:639.97ms +step:6882/57344 train_time:4403866ms step_avg:639.91ms +step:6883/57344 train_time:4404393ms step_avg:639.89ms +grad accum step:1721/14336 +step:6884/57344 train_time:4405620ms step_avg:639.98ms +step:6885/57344 train_time:4405637ms step_avg:639.89ms +step:6886/57344 train_time:4405877ms step_avg:639.83ms +step:6887/57344 train_time:4406400ms step_avg:639.81ms +grad accum step:1722/14336 +step:6888/57344 train_time:4407632ms step_avg:639.90ms +step:6889/57344 train_time:4407649ms step_avg:639.81ms +step:6890/57344 train_time:4407889ms step_avg:639.75ms +step:6891/57344 train_time:4408410ms step_avg:639.73ms +grad accum step:1723/14336 +step:6892/57344 train_time:4409642ms step_avg:639.82ms +step:6893/57344 train_time:4409659ms step_avg:639.73ms +step:6894/57344 train_time:4409896ms step_avg:639.67ms +step:6895/57344 train_time:4410416ms step_avg:639.65ms +grad accum step:1724/14336 +step:6896/57344 train_time:4411648ms step_avg:639.74ms +step:6897/57344 train_time:4411665ms step_avg:639.65ms +step:6898/57344 train_time:4411904ms step_avg:639.59ms +step:6899/57344 train_time:4412424ms step_avg:639.57ms +grad accum step:1725/14336 +step:6900/57344 train_time:4413656ms step_avg:639.66ms +step:6901/57344 train_time:4413673ms step_avg:639.57ms +step:6902/57344 train_time:4413911ms step_avg:639.51ms +step:6903/57344 train_time:4414433ms step_avg:639.49ms +grad accum step:1726/14336 +step:6904/57344 train_time:4415684ms step_avg:639.58ms +step:6905/57344 train_time:4415701ms step_avg:639.49ms +step:6906/57344 train_time:4415944ms step_avg:639.44ms +step:6907/57344 train_time:4416480ms step_avg:639.42ms +grad accum step:1727/14336 +step:6908/57344 train_time:4417720ms step_avg:639.51ms +step:6909/57344 train_time:4417737ms step_avg:639.42ms +step:6910/57344 train_time:4417974ms step_avg:639.36ms +step:6911/57344 train_time:4418495ms step_avg:639.34ms +grad accum step:1728/14336 +step:6912/57344 train_time:4419724ms step_avg:639.43ms +step:6912/57344 val_loss:7.345509 train_time:4419724ms step_avg:639.43ms +step:6913/57344 train_time:4419809ms step_avg:639.35ms +step:6914/57344 train_time:4419952ms step_avg:639.28ms +step:6915/57344 train_time:4420473ms step_avg:639.26ms +grad accum step:1729/14336 +step:6916/57344 train_time:4421753ms step_avg:639.35ms +step:6917/57344 train_time:4421765ms step_avg:639.26ms +step:6918/57344 train_time:4421975ms step_avg:639.20ms +step:6919/57344 train_time:4422500ms step_avg:639.18ms +grad accum step:1730/14336 +step:6920/57344 train_time:4423732ms step_avg:639.27ms +step:6921/57344 train_time:4423749ms step_avg:639.18ms +step:6922/57344 train_time:4423989ms step_avg:639.12ms +step:6923/57344 train_time:4424510ms step_avg:639.10ms +grad accum step:1731/14336 +step:6924/57344 train_time:4425755ms step_avg:639.19ms +step:6925/57344 train_time:4425772ms step_avg:639.10ms +step:6926/57344 train_time:4426012ms step_avg:639.04ms +step:6927/57344 train_time:4426536ms step_avg:639.03ms +grad accum step:1732/14336 +step:6928/57344 train_time:4427779ms step_avg:639.11ms +step:6929/57344 train_time:4427796ms step_avg:639.02ms +step:6930/57344 train_time:4428034ms step_avg:638.97ms +step:6931/57344 train_time:4428554ms step_avg:638.95ms +grad accum step:1733/14336 +step:6932/57344 train_time:4429779ms step_avg:639.03ms +step:6933/57344 train_time:4429796ms step_avg:638.94ms +step:6934/57344 train_time:4430034ms step_avg:638.89ms +step:6935/57344 train_time:4430552ms step_avg:638.87ms +grad accum step:1734/14336 +step:6936/57344 train_time:4431787ms step_avg:638.95ms +step:6937/57344 train_time:4431804ms step_avg:638.86ms +step:6938/57344 train_time:4432044ms step_avg:638.81ms +step:6939/57344 train_time:4432563ms step_avg:638.79ms +grad accum step:1735/14336 +step:6940/57344 train_time:4433806ms step_avg:638.88ms +step:6941/57344 train_time:4433823ms step_avg:638.79ms +step:6942/57344 train_time:4434062ms step_avg:638.73ms +step:6943/57344 train_time:4434582ms step_avg:638.71ms +grad accum step:1736/14336 +step:6944/57344 train_time:4435817ms step_avg:638.80ms +step:6945/57344 train_time:4435834ms step_avg:638.71ms +step:6946/57344 train_time:4436071ms step_avg:638.65ms +step:6947/57344 train_time:4436590ms step_avg:638.63ms +grad accum step:1737/14336 +step:6948/57344 train_time:4437812ms step_avg:638.72ms +step:6949/57344 train_time:4437829ms step_avg:638.63ms +step:6950/57344 train_time:4438068ms step_avg:638.57ms +step:6951/57344 train_time:4438586ms step_avg:638.55ms +grad accum step:1738/14336 +step:6952/57344 train_time:4439820ms step_avg:638.64ms +step:6953/57344 train_time:4439837ms step_avg:638.55ms +step:6954/57344 train_time:4440078ms step_avg:638.49ms +step:6955/57344 train_time:4440600ms step_avg:638.48ms +grad accum step:1739/14336 +step:6956/57344 train_time:4441844ms step_avg:638.56ms +step:6957/57344 train_time:4441861ms step_avg:638.47ms +step:6958/57344 train_time:4442102ms step_avg:638.42ms +step:6959/57344 train_time:4442626ms step_avg:638.40ms +grad accum step:1740/14336 +step:6960/57344 train_time:4443854ms step_avg:638.48ms +step:6961/57344 train_time:4443872ms step_avg:638.40ms +step:6962/57344 train_time:4444111ms step_avg:638.34ms +step:6963/57344 train_time:4444631ms step_avg:638.32ms +grad accum step:1741/14336 +step:6964/57344 train_time:4445875ms step_avg:638.41ms +step:6965/57344 train_time:4445892ms step_avg:638.32ms +step:6966/57344 train_time:4446132ms step_avg:638.26ms +step:6967/57344 train_time:4446655ms step_avg:638.25ms +grad accum step:1742/14336 +step:6968/57344 train_time:4447880ms step_avg:638.33ms +step:6969/57344 train_time:4447898ms step_avg:638.24ms +step:6970/57344 train_time:4448134ms step_avg:638.18ms +step:6971/57344 train_time:4448651ms step_avg:638.17ms +grad accum step:1743/14336 +step:6972/57344 train_time:4449898ms step_avg:638.25ms +step:6973/57344 train_time:4449915ms step_avg:638.16ms +step:6974/57344 train_time:4450155ms step_avg:638.11ms +step:6975/57344 train_time:4450678ms step_avg:638.09ms +grad accum step:1744/14336 +step:6976/57344 train_time:4451902ms step_avg:638.17ms +step:6976/57344 val_loss:7.340341 train_time:4451903ms step_avg:638.17ms +step:6977/57344 train_time:4451915ms step_avg:638.08ms +step:6978/57344 train_time:4452130ms step_avg:638.02ms +step:6979/57344 train_time:4452654ms step_avg:638.01ms +grad accum step:1745/14336 +step:6980/57344 train_time:4453887ms step_avg:638.09ms +step:6981/57344 train_time:4453904ms step_avg:638.00ms +step:6982/57344 train_time:4454142ms step_avg:637.95ms +step:6983/57344 train_time:4454665ms step_avg:637.93ms +grad accum step:1746/14336 +step:6984/57344 train_time:4455900ms step_avg:638.02ms +step:6985/57344 train_time:4455917ms step_avg:637.93ms +step:6986/57344 train_time:4456155ms step_avg:637.87ms +step:6987/57344 train_time:4456668ms step_avg:637.85ms +grad accum step:1747/14336 +step:6988/57344 train_time:4457914ms step_avg:637.94ms +step:6989/57344 train_time:4457931ms step_avg:637.85ms +step:6990/57344 train_time:4458171ms step_avg:637.79ms +step:6991/57344 train_time:4458695ms step_avg:637.78ms +grad accum step:1748/14336 +step:6992/57344 train_time:4459940ms step_avg:637.86ms +step:6993/57344 train_time:4459958ms step_avg:637.77ms +step:6994/57344 train_time:4460196ms step_avg:637.72ms +step:6995/57344 train_time:4460721ms step_avg:637.70ms +grad accum step:1749/14336 +step:6996/57344 train_time:4461958ms step_avg:637.79ms +step:6997/57344 train_time:4461975ms step_avg:637.70ms +step:6998/57344 train_time:4462213ms step_avg:637.64ms +step:6999/57344 train_time:4462731ms step_avg:637.62ms +grad accum step:1750/14336 +step:7000/57344 train_time:4463966ms step_avg:637.71ms +step:7001/57344 train_time:4463983ms step_avg:637.62ms +step:7002/57344 train_time:4464222ms step_avg:637.56ms +step:7003/57344 train_time:4464739ms step_avg:637.55ms +grad accum step:1751/14336 +step:7004/57344 train_time:4465986ms step_avg:637.63ms +step:7005/57344 train_time:4466003ms step_avg:637.55ms +step:7006/57344 train_time:4466243ms step_avg:637.49ms +step:7007/57344 train_time:4466775ms step_avg:637.47ms +grad accum step:1752/14336 +step:7008/57344 train_time:4468027ms step_avg:637.56ms +step:7009/57344 train_time:4468044ms step_avg:637.47ms +step:7010/57344 train_time:4468285ms step_avg:637.42ms +step:7011/57344 train_time:4468808ms step_avg:637.40ms +grad accum step:1753/14336 +step:7012/57344 train_time:4470043ms step_avg:637.48ms +step:7013/57344 train_time:4470060ms step_avg:637.40ms +step:7014/57344 train_time:4470300ms step_avg:637.34ms +step:7015/57344 train_time:4470826ms step_avg:637.32ms +grad accum step:1754/14336 +step:7016/57344 train_time:4472064ms step_avg:637.41ms +step:7017/57344 train_time:4472081ms step_avg:637.32ms +step:7018/57344 train_time:4472319ms step_avg:637.26ms +step:7019/57344 train_time:4472839ms step_avg:637.25ms +grad accum step:1755/14336 +step:7020/57344 train_time:4474079ms step_avg:637.33ms +step:7021/57344 train_time:4474095ms step_avg:637.24ms +step:7022/57344 train_time:4474336ms step_avg:637.19ms +step:7023/57344 train_time:4474863ms step_avg:637.17ms +grad accum step:1756/14336 +step:7024/57344 train_time:4476090ms step_avg:637.26ms +step:7025/57344 train_time:4476107ms step_avg:637.17ms +step:7026/57344 train_time:4476346ms step_avg:637.11ms +step:7027/57344 train_time:4476867ms step_avg:637.10ms +grad accum step:1757/14336 +step:7028/57344 train_time:4478106ms step_avg:637.18ms +step:7029/57344 train_time:4478123ms step_avg:637.09ms +step:7030/57344 train_time:4478364ms step_avg:637.04ms +step:7031/57344 train_time:4478890ms step_avg:637.02ms +grad accum step:1758/14336 +step:7032/57344 train_time:4480115ms step_avg:637.10ms +step:7033/57344 train_time:4480132ms step_avg:637.02ms +step:7034/57344 train_time:4480373ms step_avg:636.96ms +step:7035/57344 train_time:4480894ms step_avg:636.94ms +grad accum step:1759/14336 +step:7036/57344 train_time:4482120ms step_avg:637.03ms +step:7037/57344 train_time:4482137ms step_avg:636.94ms +step:7038/57344 train_time:4482376ms step_avg:636.88ms +step:7039/57344 train_time:4482900ms step_avg:636.87ms +grad accum step:1760/14336 +step:7040/57344 train_time:4484141ms step_avg:636.95ms +step:7040/57344 val_loss:7.340064 train_time:4484141ms step_avg:636.95ms +step:7041/57344 train_time:4484153ms step_avg:636.86ms +step:7042/57344 train_time:4484368ms step_avg:636.80ms +step:7043/57344 train_time:4484889ms step_avg:636.79ms +grad accum step:1761/14336 +step:7044/57344 train_time:4486116ms step_avg:636.87ms +step:7045/57344 train_time:4486134ms step_avg:636.78ms +step:7046/57344 train_time:4486371ms step_avg:636.73ms +step:7047/57344 train_time:4486887ms step_avg:636.71ms +grad accum step:1762/14336 +step:7048/57344 train_time:4488124ms step_avg:636.79ms +step:7049/57344 train_time:4488141ms step_avg:636.71ms +step:7050/57344 train_time:4488380ms step_avg:636.65ms +step:7051/57344 train_time:4488902ms step_avg:636.63ms +grad accum step:1763/14336 +step:7052/57344 train_time:4490132ms step_avg:636.72ms +step:7053/57344 train_time:4490150ms step_avg:636.63ms +step:7054/57344 train_time:4490388ms step_avg:636.57ms +step:7055/57344 train_time:4492271ms step_avg:636.75ms +grad accum step:1764/14336 +step:7056/57344 train_time:4492906ms step_avg:636.75ms +step:7057/57344 train_time:4492924ms step_avg:636.66ms +step:7058/57344 train_time:4493164ms step_avg:636.61ms +step:7059/57344 train_time:4493692ms step_avg:636.59ms +grad accum step:1765/14336 +step:7060/57344 train_time:4496641ms step_avg:636.92ms +step:7061/57344 train_time:4497216ms step_avg:636.91ms +step:7062/57344 train_time:4497472ms step_avg:636.86ms +step:7063/57344 train_time:4497994ms step_avg:636.84ms +grad accum step:1766/14336 +step:7064/57344 train_time:4499216ms step_avg:636.92ms +step:7065/57344 train_time:4499233ms step_avg:636.83ms +step:7066/57344 train_time:4499471ms step_avg:636.78ms +step:7067/57344 train_time:4499992ms step_avg:636.76ms +grad accum step:1767/14336 +step:7068/57344 train_time:4501221ms step_avg:636.85ms +step:7069/57344 train_time:4501237ms step_avg:636.76ms +step:7070/57344 train_time:4501476ms step_avg:636.70ms +step:7071/57344 train_time:4501995ms step_avg:636.68ms +grad accum step:1768/14336 +step:7072/57344 train_time:4503232ms step_avg:636.77ms +step:7073/57344 train_time:4503249ms step_avg:636.68ms +step:7074/57344 train_time:4503488ms step_avg:636.63ms +step:7075/57344 train_time:4504009ms step_avg:636.61ms +grad accum step:1769/14336 +step:7076/57344 train_time:4505239ms step_avg:636.69ms +step:7077/57344 train_time:4505257ms step_avg:636.61ms +step:7078/57344 train_time:4505494ms step_avg:636.55ms +step:7079/57344 train_time:4506015ms step_avg:636.53ms +grad accum step:1770/14336 +step:7080/57344 train_time:4507260ms step_avg:636.62ms +step:7081/57344 train_time:4507277ms step_avg:636.53ms +step:7082/57344 train_time:4507516ms step_avg:636.47ms +step:7083/57344 train_time:4508035ms step_avg:636.46ms +grad accum step:1771/14336 +step:7084/57344 train_time:4509264ms step_avg:636.54ms +step:7085/57344 train_time:4509281ms step_avg:636.45ms +step:7086/57344 train_time:4509521ms step_avg:636.40ms +step:7087/57344 train_time:4510037ms step_avg:636.38ms +grad accum step:1772/14336 +step:7088/57344 train_time:4511271ms step_avg:636.47ms +step:7089/57344 train_time:4511288ms step_avg:636.38ms +step:7090/57344 train_time:4511527ms step_avg:636.32ms +step:7091/57344 train_time:4512046ms step_avg:636.31ms +grad accum step:1773/14336 +step:7092/57344 train_time:4513275ms step_avg:636.39ms +step:7093/57344 train_time:4513292ms step_avg:636.30ms +step:7094/57344 train_time:4513531ms step_avg:636.25ms +step:7095/57344 train_time:4514048ms step_avg:636.23ms +grad accum step:1774/14336 +step:7096/57344 train_time:4515294ms step_avg:636.32ms +step:7097/57344 train_time:4515311ms step_avg:636.23ms +step:7098/57344 train_time:4515552ms step_avg:636.17ms +step:7099/57344 train_time:4516073ms step_avg:636.16ms +grad accum step:1775/14336 +step:7100/57344 train_time:4517315ms step_avg:636.24ms +step:7101/57344 train_time:4517332ms step_avg:636.15ms +step:7102/57344 train_time:4517571ms step_avg:636.10ms +step:7103/57344 train_time:4518094ms step_avg:636.08ms +grad accum step:1776/14336 +step:7104/57344 train_time:4519324ms step_avg:636.17ms +step:7104/57344 val_loss:7.338420 train_time:4519324ms step_avg:636.17ms +step:7105/57344 train_time:4519336ms step_avg:636.08ms +step:7106/57344 train_time:4519551ms step_avg:636.02ms +step:7107/57344 train_time:4520076ms step_avg:636.00ms +grad accum step:1777/14336 +step:7108/57344 train_time:4521306ms step_avg:636.09ms +step:7109/57344 train_time:4521324ms step_avg:636.00ms +step:7110/57344 train_time:4521562ms step_avg:635.94ms +step:7111/57344 train_time:4522082ms step_avg:635.93ms +grad accum step:1778/14336 +step:7112/57344 train_time:4523317ms step_avg:636.01ms +step:7113/57344 train_time:4523335ms step_avg:635.93ms +step:7114/57344 train_time:4523573ms step_avg:635.87ms +step:7115/57344 train_time:4524096ms step_avg:635.85ms +grad accum step:1779/14336 +step:7116/57344 train_time:4525326ms step_avg:635.94ms +step:7117/57344 train_time:4525343ms step_avg:635.85ms +step:7118/57344 train_time:4525581ms step_avg:635.79ms +step:7119/57344 train_time:4526102ms step_avg:635.78ms +grad accum step:1780/14336 +step:7120/57344 train_time:4527338ms step_avg:635.86ms +step:7121/57344 train_time:4527355ms step_avg:635.78ms +step:7122/57344 train_time:4527594ms step_avg:635.72ms +step:7123/57344 train_time:4528116ms step_avg:635.70ms +grad accum step:1781/14336 +step:7124/57344 train_time:4529341ms step_avg:635.79ms +step:7125/57344 train_time:4529359ms step_avg:635.70ms +step:7126/57344 train_time:4529598ms step_avg:635.64ms +step:7127/57344 train_time:4530121ms step_avg:635.63ms +grad accum step:1782/14336 +step:7128/57344 train_time:4531359ms step_avg:635.71ms +step:7129/57344 train_time:4531376ms step_avg:635.63ms +step:7130/57344 train_time:4531614ms step_avg:635.57ms +step:7131/57344 train_time:4532129ms step_avg:635.55ms +grad accum step:1783/14336 +step:7132/57344 train_time:4533357ms step_avg:635.64ms +step:7133/57344 train_time:4533374ms step_avg:635.55ms +step:7134/57344 train_time:4533615ms step_avg:635.49ms +step:7135/57344 train_time:4534141ms step_avg:635.48ms +grad accum step:1784/14336 +step:7136/57344 train_time:4535383ms step_avg:635.56ms +step:7137/57344 train_time:4535401ms step_avg:635.48ms +step:7138/57344 train_time:4535638ms step_avg:635.42ms +step:7139/57344 train_time:4536162ms step_avg:635.41ms +grad accum step:1785/14336 +step:7140/57344 train_time:4537391ms step_avg:635.49ms +step:7141/57344 train_time:4537408ms step_avg:635.40ms +step:7142/57344 train_time:4537649ms step_avg:635.35ms +step:7143/57344 train_time:4538172ms step_avg:635.33ms +grad accum step:1786/14336 +step:7144/57344 train_time:4539391ms step_avg:635.41ms +step:7145/57344 train_time:4539408ms step_avg:635.33ms +step:7146/57344 train_time:4539650ms step_avg:635.27ms +step:7147/57344 train_time:4540176ms step_avg:635.26ms +grad accum step:1787/14336 +step:7148/57344 train_time:4541395ms step_avg:635.34ms +step:7149/57344 train_time:4541412ms step_avg:635.25ms +step:7150/57344 train_time:4541653ms step_avg:635.20ms +step:7151/57344 train_time:4542176ms step_avg:635.18ms +grad accum step:1788/14336 +step:7152/57344 train_time:4543419ms step_avg:635.27ms +step:7153/57344 train_time:4543436ms step_avg:635.18ms +step:7154/57344 train_time:4543674ms step_avg:635.12ms +step:7155/57344 train_time:4544193ms step_avg:635.11ms +grad accum step:1789/14336 +step:7156/57344 train_time:4545431ms step_avg:635.19ms +step:7157/57344 train_time:4545448ms step_avg:635.11ms +step:7158/57344 train_time:4545691ms step_avg:635.05ms +step:7159/57344 train_time:4546221ms step_avg:635.04ms +grad accum step:1790/14336 +step:7160/57344 train_time:4547454ms step_avg:635.12ms +step:7161/57344 train_time:4547471ms step_avg:635.03ms +step:7162/57344 train_time:4547710ms step_avg:634.98ms +step:7163/57344 train_time:4548229ms step_avg:634.96ms +grad accum step:1791/14336 +step:7164/57344 train_time:4549466ms step_avg:635.05ms +step:7165/57344 train_time:4549483ms step_avg:634.96ms +step:7166/57344 train_time:4549724ms step_avg:634.90ms +step:7167/57344 train_time:4550250ms step_avg:634.89ms +grad accum step:1792/14336 +step:7168/57344 train_time:4551476ms step_avg:634.97ms +step:7168/57344 val_loss:7.325290 train_time:4551476ms step_avg:634.97ms +step:7169/57344 train_time:4551488ms step_avg:634.88ms +step:7170/57344 train_time:4551704ms step_avg:634.83ms +step:7171/57344 train_time:4552229ms step_avg:634.81ms +grad accum step:1793/14336 +step:7172/57344 train_time:4553463ms step_avg:634.89ms +step:7173/57344 train_time:4553481ms step_avg:634.81ms +step:7174/57344 train_time:4553721ms step_avg:634.75ms +step:7175/57344 train_time:4554240ms step_avg:634.74ms +grad accum step:1794/14336 +step:7176/57344 train_time:4555478ms step_avg:634.82ms +step:7177/57344 train_time:4555495ms step_avg:634.74ms +step:7178/57344 train_time:4555735ms step_avg:634.68ms +step:7179/57344 train_time:4556256ms step_avg:634.66ms +grad accum step:1795/14336 +step:7180/57344 train_time:4557490ms step_avg:634.75ms +step:7181/57344 train_time:4557507ms step_avg:634.66ms +step:7182/57344 train_time:4557746ms step_avg:634.61ms +step:7183/57344 train_time:4558266ms step_avg:634.59ms +grad accum step:1796/14336 +step:7184/57344 train_time:4559493ms step_avg:634.67ms +step:7185/57344 train_time:4559510ms step_avg:634.59ms +step:7186/57344 train_time:4559750ms step_avg:634.53ms +step:7187/57344 train_time:4560274ms step_avg:634.52ms +grad accum step:1797/14336 +step:7188/57344 train_time:4561508ms step_avg:634.60ms +step:7189/57344 train_time:4561525ms step_avg:634.51ms +step:7190/57344 train_time:4561765ms step_avg:634.46ms +step:7191/57344 train_time:4562285ms step_avg:634.44ms +grad accum step:1798/14336 +step:7192/57344 train_time:4563512ms step_avg:634.53ms +step:7193/57344 train_time:4563529ms step_avg:634.44ms +step:7194/57344 train_time:4563766ms step_avg:634.39ms +step:7195/57344 train_time:4564288ms step_avg:634.37ms +grad accum step:1799/14336 +step:7196/57344 train_time:4565524ms step_avg:634.45ms +step:7197/57344 train_time:4565541ms step_avg:634.37ms +step:7198/57344 train_time:4565781ms step_avg:634.31ms +step:7199/57344 train_time:4566304ms step_avg:634.30ms +grad accum step:1800/14336 +step:7200/57344 train_time:4567563ms step_avg:634.38ms +step:7201/57344 train_time:4567580ms step_avg:634.30ms +step:7202/57344 train_time:4567820ms step_avg:634.24ms +step:7203/57344 train_time:4568339ms step_avg:634.23ms +grad accum step:1801/14336 +step:7204/57344 train_time:4569568ms step_avg:634.31ms +step:7205/57344 train_time:4569585ms step_avg:634.22ms +step:7206/57344 train_time:4569827ms step_avg:634.17ms +step:7207/57344 train_time:4570347ms step_avg:634.15ms +grad accum step:1802/14336 +step:7208/57344 train_time:4571571ms step_avg:634.24ms +step:7209/57344 train_time:4571589ms step_avg:634.15ms +step:7210/57344 train_time:4571826ms step_avg:634.10ms +step:7211/57344 train_time:4572342ms step_avg:634.08ms +grad accum step:1803/14336 +step:7212/57344 train_time:4573587ms step_avg:634.16ms +step:7213/57344 train_time:4573604ms step_avg:634.08ms +step:7214/57344 train_time:4573841ms step_avg:634.02ms +step:7215/57344 train_time:4574360ms step_avg:634.01ms +grad accum step:1804/14336 +step:7216/57344 train_time:4575594ms step_avg:634.09ms +step:7217/57344 train_time:4575611ms step_avg:634.00ms +step:7218/57344 train_time:4575850ms step_avg:633.95ms +step:7219/57344 train_time:4576372ms step_avg:633.93ms +grad accum step:1805/14336 +step:7220/57344 train_time:4577606ms step_avg:634.02ms +step:7221/57344 train_time:4577623ms step_avg:633.93ms +step:7222/57344 train_time:4577861ms step_avg:633.88ms +step:7223/57344 train_time:4578377ms step_avg:633.86ms +grad accum step:1806/14336 +step:7224/57344 train_time:4579620ms step_avg:633.95ms +step:7225/57344 train_time:4579637ms step_avg:633.86ms +step:7226/57344 train_time:4579879ms step_avg:633.81ms +step:7227/57344 train_time:4580404ms step_avg:633.79ms +grad accum step:1807/14336 +step:7228/57344 train_time:4581641ms step_avg:633.87ms +step:7229/57344 train_time:4581658ms step_avg:633.79ms +step:7230/57344 train_time:4581897ms step_avg:633.73ms +step:7231/57344 train_time:4582421ms step_avg:633.72ms +grad accum step:1808/14336 +step:7232/57344 train_time:4583655ms step_avg:633.80ms +step:7232/57344 val_loss:7.317104 train_time:4583656ms step_avg:633.80ms +step:7233/57344 train_time:4583668ms step_avg:633.72ms +step:7234/57344 train_time:4583882ms step_avg:633.66ms +step:7235/57344 train_time:4584403ms step_avg:633.64ms +grad accum step:1809/14336 +step:7236/57344 train_time:4585632ms step_avg:633.72ms +step:7237/57344 train_time:4585649ms step_avg:633.64ms +step:7238/57344 train_time:4585889ms step_avg:633.59ms +step:7239/57344 train_time:4586410ms step_avg:633.57ms +grad accum step:1810/14336 +step:7240/57344 train_time:4587643ms step_avg:633.65ms +step:7241/57344 train_time:4587661ms step_avg:633.57ms +step:7242/57344 train_time:4587903ms step_avg:633.51ms +step:7243/57344 train_time:4588426ms step_avg:633.50ms +grad accum step:1811/14336 +step:7244/57344 train_time:4589658ms step_avg:633.58ms +step:7245/57344 train_time:4589675ms step_avg:633.50ms +step:7246/57344 train_time:4589916ms step_avg:633.44ms +step:7247/57344 train_time:4590441ms step_avg:633.43ms +grad accum step:1812/14336 +step:7248/57344 train_time:4591675ms step_avg:633.51ms +step:7249/57344 train_time:4591692ms step_avg:633.42ms +step:7250/57344 train_time:4591931ms step_avg:633.37ms +step:7251/57344 train_time:4592450ms step_avg:633.35ms +grad accum step:1813/14336 +step:7252/57344 train_time:4593689ms step_avg:633.44ms +step:7253/57344 train_time:4593707ms step_avg:633.35ms +step:7254/57344 train_time:4593947ms step_avg:633.30ms +step:7255/57344 train_time:4594470ms step_avg:633.28ms +grad accum step:1814/14336 +step:7256/57344 train_time:4595707ms step_avg:633.37ms +step:7257/57344 train_time:4595724ms step_avg:633.28ms +step:7258/57344 train_time:4595965ms step_avg:633.23ms +step:7259/57344 train_time:4596493ms step_avg:633.21ms +grad accum step:1815/14336 +step:7260/57344 train_time:4597727ms step_avg:633.30ms +step:7261/57344 train_time:4597744ms step_avg:633.21ms +step:7262/57344 train_time:4597982ms step_avg:633.16ms +step:7263/57344 train_time:4598501ms step_avg:633.14ms +grad accum step:1816/14336 +step:7264/57344 train_time:4599735ms step_avg:633.22ms +step:7265/57344 train_time:4599752ms step_avg:633.14ms +step:7266/57344 train_time:4599993ms step_avg:633.08ms +step:7267/57344 train_time:4600520ms step_avg:633.07ms +grad accum step:1817/14336 +step:7268/57344 train_time:4601782ms step_avg:633.16ms +step:7269/57344 train_time:4601799ms step_avg:633.07ms +step:7270/57344 train_time:4602040ms step_avg:633.02ms +step:7271/57344 train_time:4602572ms step_avg:633.00ms +grad accum step:1818/14336 +step:7272/57344 train_time:4603795ms step_avg:633.09ms +step:7273/57344 train_time:4603812ms step_avg:633.00ms +step:7274/57344 train_time:4604051ms step_avg:632.95ms +step:7275/57344 train_time:4604573ms step_avg:632.93ms +grad accum step:1819/14336 +step:7276/57344 train_time:4605799ms step_avg:633.01ms +step:7277/57344 train_time:4605816ms step_avg:632.93ms +step:7278/57344 train_time:4606055ms step_avg:632.87ms +step:7279/57344 train_time:4606576ms step_avg:632.86ms +grad accum step:1820/14336 +step:7280/57344 train_time:4607825ms step_avg:632.94ms +step:7281/57344 train_time:4607842ms step_avg:632.86ms +step:7282/57344 train_time:4608083ms step_avg:632.80ms +step:7283/57344 train_time:4608606ms step_avg:632.79ms +grad accum step:1821/14336 +step:7284/57344 train_time:4609855ms step_avg:632.87ms +step:7285/57344 train_time:4609872ms step_avg:632.79ms +step:7286/57344 train_time:4610111ms step_avg:632.74ms +step:7287/57344 train_time:4610632ms step_avg:632.72ms +grad accum step:1822/14336 +step:7288/57344 train_time:4611871ms step_avg:632.80ms +step:7289/57344 train_time:4611888ms step_avg:632.72ms +step:7290/57344 train_time:4612125ms step_avg:632.66ms +step:7291/57344 train_time:4612642ms step_avg:632.65ms +grad accum step:1823/14336 +step:7292/57344 train_time:4613872ms step_avg:632.73ms +step:7293/57344 train_time:4613889ms step_avg:632.65ms +step:7294/57344 train_time:4614129ms step_avg:632.59ms +step:7295/57344 train_time:4614646ms step_avg:632.58ms +grad accum step:1824/14336 +step:7296/57344 train_time:4615875ms step_avg:632.66ms +step:7296/57344 val_loss:7.306343 train_time:4615876ms step_avg:632.66ms +step:7297/57344 train_time:4615888ms step_avg:632.57ms +step:7298/57344 train_time:4616106ms step_avg:632.52ms +step:7299/57344 train_time:4616629ms step_avg:632.50ms +grad accum step:1825/14336 +step:7300/57344 train_time:4617867ms step_avg:632.58ms +step:7301/57344 train_time:4617884ms step_avg:632.50ms +step:7302/57344 train_time:4618123ms step_avg:632.45ms +step:7303/57344 train_time:4618648ms step_avg:632.43ms +grad accum step:1826/14336 +step:7304/57344 train_time:4619890ms step_avg:632.52ms +step:7305/57344 train_time:4619906ms step_avg:632.43ms +step:7306/57344 train_time:4620144ms step_avg:632.38ms +step:7307/57344 train_time:4620664ms step_avg:632.36ms +grad accum step:1827/14336 +step:7308/57344 train_time:4621883ms step_avg:632.44ms +step:7309/57344 train_time:4621901ms step_avg:632.36ms +step:7310/57344 train_time:4622141ms step_avg:632.30ms +step:7311/57344 train_time:4622666ms step_avg:632.29ms +grad accum step:1828/14336 +step:7312/57344 train_time:4623900ms step_avg:632.37ms +step:7313/57344 train_time:4623917ms step_avg:632.29ms +step:7314/57344 train_time:4624156ms step_avg:632.23ms +step:7315/57344 train_time:4624675ms step_avg:632.22ms +grad accum step:1829/14336 +step:7316/57344 train_time:4625902ms step_avg:632.30ms +step:7317/57344 train_time:4625919ms step_avg:632.22ms +step:7318/57344 train_time:4626158ms step_avg:632.16ms +step:7319/57344 train_time:4626680ms step_avg:632.15ms +grad accum step:1830/14336 +step:7320/57344 train_time:4627922ms step_avg:632.23ms +step:7321/57344 train_time:4627939ms step_avg:632.15ms +step:7322/57344 train_time:4628178ms step_avg:632.09ms +step:7323/57344 train_time:4628700ms step_avg:632.08ms +grad accum step:1831/14336 +step:7324/57344 train_time:4629928ms step_avg:632.16ms +step:7325/57344 train_time:4629945ms step_avg:632.07ms +step:7326/57344 train_time:4630186ms step_avg:632.02ms +step:7327/57344 train_time:4630714ms step_avg:632.01ms +grad accum step:1832/14336 +step:7328/57344 train_time:4631956ms step_avg:632.09ms +step:7329/57344 train_time:4631973ms step_avg:632.01ms +step:7330/57344 train_time:4632213ms step_avg:631.95ms +step:7331/57344 train_time:4632731ms step_avg:631.94ms +grad accum step:1833/14336 +step:7332/57344 train_time:4633956ms step_avg:632.02ms +step:7333/57344 train_time:4633973ms step_avg:631.93ms +step:7334/57344 train_time:4634214ms step_avg:631.88ms +step:7335/57344 train_time:4634737ms step_avg:631.87ms +grad accum step:1834/14336 +step:7336/57344 train_time:4635978ms step_avg:631.95ms +step:7337/57344 train_time:4635995ms step_avg:631.87ms +step:7338/57344 train_time:4636234ms step_avg:631.81ms +step:7339/57344 train_time:4636761ms step_avg:631.80ms +grad accum step:1835/14336 +step:7340/57344 train_time:4638002ms step_avg:631.88ms +step:7341/57344 train_time:4638020ms step_avg:631.80ms +step:7342/57344 train_time:4638258ms step_avg:631.74ms +step:7343/57344 train_time:4638782ms step_avg:631.73ms +grad accum step:1836/14336 +step:7344/57344 train_time:4640009ms step_avg:631.81ms +step:7345/57344 train_time:4640026ms step_avg:631.73ms +step:7346/57344 train_time:4640267ms step_avg:631.67ms +step:7347/57344 train_time:4640789ms step_avg:631.66ms +grad accum step:1837/14336 +step:7348/57344 train_time:4642012ms step_avg:631.74ms +step:7349/57344 train_time:4642030ms step_avg:631.65ms +step:7350/57344 train_time:4642269ms step_avg:631.60ms +step:7351/57344 train_time:4642788ms step_avg:631.59ms +grad accum step:1838/14336 +step:7352/57344 train_time:4644015ms step_avg:631.67ms +step:7353/57344 train_time:4644033ms step_avg:631.58ms +step:7354/57344 train_time:4644272ms step_avg:631.53ms +step:7355/57344 train_time:4644792ms step_avg:631.51ms +grad accum step:1839/14336 +step:7356/57344 train_time:4646032ms step_avg:631.60ms +step:7357/57344 train_time:4646049ms step_avg:631.51ms +step:7358/57344 train_time:4646290ms step_avg:631.46ms +step:7359/57344 train_time:4646816ms step_avg:631.45ms +grad accum step:1840/14336 +step:7360/57344 train_time:4648062ms step_avg:631.53ms +step:7360/57344 val_loss:7.293196 train_time:4648062ms step_avg:631.53ms +step:7361/57344 train_time:4648074ms step_avg:631.45ms +step:7362/57344 train_time:4648293ms step_avg:631.39ms +step:7363/57344 train_time:4648821ms step_avg:631.38ms +grad accum step:1841/14336 +step:7364/57344 train_time:4650052ms step_avg:631.46ms +step:7365/57344 train_time:4650069ms step_avg:631.37ms +step:7366/57344 train_time:4650309ms step_avg:631.32ms +step:7367/57344 train_time:4650829ms step_avg:631.31ms +grad accum step:1842/14336 +step:7368/57344 train_time:4652060ms step_avg:631.39ms +step:7369/57344 train_time:4652077ms step_avg:631.30ms +step:7370/57344 train_time:4652316ms step_avg:631.25ms +step:7371/57344 train_time:4652838ms step_avg:631.24ms +grad accum step:1843/14336 +step:7372/57344 train_time:4654065ms step_avg:631.32ms +step:7373/57344 train_time:4654082ms step_avg:631.23ms +step:7374/57344 train_time:4654321ms step_avg:631.18ms +step:7375/57344 train_time:4654844ms step_avg:631.17ms +grad accum step:1844/14336 +step:7376/57344 train_time:4656076ms step_avg:631.25ms +step:7377/57344 train_time:4656093ms step_avg:631.16ms +step:7378/57344 train_time:4656337ms step_avg:631.11ms +step:7379/57344 train_time:4656871ms step_avg:631.10ms +grad accum step:1845/14336 +step:7380/57344 train_time:4658099ms step_avg:631.18ms +step:7381/57344 train_time:4658116ms step_avg:631.10ms +step:7382/57344 train_time:4658355ms step_avg:631.04ms +step:7383/57344 train_time:4658877ms step_avg:631.03ms +grad accum step:1846/14336 +step:7384/57344 train_time:4660112ms step_avg:631.11ms +step:7385/57344 train_time:4660129ms step_avg:631.03ms +step:7386/57344 train_time:4660366ms step_avg:630.97ms +step:7387/57344 train_time:4660888ms step_avg:630.96ms +grad accum step:1847/14336 +step:7388/57344 train_time:4662149ms step_avg:631.04ms +step:7389/57344 train_time:4662166ms step_avg:630.96ms +step:7390/57344 train_time:4662406ms step_avg:630.91ms +step:7391/57344 train_time:4662926ms step_avg:630.89ms +grad accum step:1848/14336 +step:7392/57344 train_time:4664169ms step_avg:630.98ms +step:7393/57344 train_time:4664187ms step_avg:630.89ms +step:7394/57344 train_time:4664425ms step_avg:630.84ms +step:7395/57344 train_time:4664946ms step_avg:630.82ms +grad accum step:1849/14336 +step:7396/57344 train_time:4666177ms step_avg:630.91ms +step:7397/57344 train_time:4666194ms step_avg:630.82ms +step:7398/57344 train_time:4666433ms step_avg:630.77ms +step:7399/57344 train_time:4666953ms step_avg:630.75ms +grad accum step:1850/14336 +step:7400/57344 train_time:4668188ms step_avg:630.84ms +step:7401/57344 train_time:4668205ms step_avg:630.75ms +step:7402/57344 train_time:4668444ms step_avg:630.70ms +step:7403/57344 train_time:4668968ms step_avg:630.69ms +grad accum step:1851/14336 +step:7404/57344 train_time:4670227ms step_avg:630.77ms +step:7405/57344 train_time:4670244ms step_avg:630.69ms +step:7406/57344 train_time:4670486ms step_avg:630.64ms +step:7407/57344 train_time:4671015ms step_avg:630.62ms +grad accum step:1852/14336 +step:7408/57344 train_time:4672248ms step_avg:630.70ms +step:7409/57344 train_time:4672265ms step_avg:630.62ms +step:7410/57344 train_time:4672503ms step_avg:630.57ms +step:7411/57344 train_time:4673018ms step_avg:630.55ms +grad accum step:1853/14336 +step:7412/57344 train_time:4674246ms step_avg:630.63ms +step:7413/57344 train_time:4674264ms step_avg:630.55ms +step:7414/57344 train_time:4674503ms step_avg:630.50ms +step:7415/57344 train_time:4675024ms step_avg:630.48ms +grad accum step:1854/14336 +step:7416/57344 train_time:4676251ms step_avg:630.56ms +step:7417/57344 train_time:4676268ms step_avg:630.48ms +step:7418/57344 train_time:4676507ms step_avg:630.43ms +step:7419/57344 train_time:4677026ms step_avg:630.41ms +grad accum step:1855/14336 +step:7420/57344 train_time:4678254ms step_avg:630.49ms +step:7421/57344 train_time:4678304ms step_avg:630.41ms +step:7422/57344 train_time:4678517ms step_avg:630.36ms +step:7423/57344 train_time:4679039ms step_avg:630.34ms +grad accum step:1856/14336 +step:7424/57344 train_time:4680292ms step_avg:630.43ms +step:7424/57344 val_loss:7.285810 train_time:4680293ms step_avg:630.43ms +step:7425/57344 train_time:4680305ms step_avg:630.34ms +step:7426/57344 train_time:4680521ms step_avg:630.29ms +step:7427/57344 train_time:4681047ms step_avg:630.27ms +grad accum step:1857/14336 +step:7428/57344 train_time:4682287ms step_avg:630.36ms +step:7429/57344 train_time:4682304ms step_avg:630.27ms +step:7430/57344 train_time:4682547ms step_avg:630.22ms +step:7431/57344 train_time:4683080ms step_avg:630.21ms +grad accum step:1858/14336 +step:7432/57344 train_time:4684307ms step_avg:630.29ms +step:7433/57344 train_time:4684324ms step_avg:630.21ms +step:7434/57344 train_time:4684565ms step_avg:630.15ms +step:7435/57344 train_time:4685088ms step_avg:630.14ms +grad accum step:1859/14336 +step:7436/57344 train_time:4686316ms step_avg:630.22ms +step:7437/57344 train_time:4686333ms step_avg:630.14ms +step:7438/57344 train_time:4686574ms step_avg:630.09ms +step:7439/57344 train_time:4687100ms step_avg:630.07ms +grad accum step:1860/14336 +step:7440/57344 train_time:4688340ms step_avg:630.15ms +step:7441/57344 train_time:4688357ms step_avg:630.07ms +step:7442/57344 train_time:4688597ms step_avg:630.02ms +step:7443/57344 train_time:4689120ms step_avg:630.00ms +grad accum step:1861/14336 +step:7444/57344 train_time:4690355ms step_avg:630.09ms +step:7445/57344 train_time:4690372ms step_avg:630.00ms +step:7446/57344 train_time:4690613ms step_avg:629.95ms +step:7447/57344 train_time:4691137ms step_avg:629.94ms +grad accum step:1862/14336 +step:7448/57344 train_time:4692372ms step_avg:630.02ms +step:7449/57344 train_time:4692390ms step_avg:629.94ms +step:7450/57344 train_time:4692628ms step_avg:629.88ms +step:7451/57344 train_time:4693145ms step_avg:629.87ms +grad accum step:1863/14336 +step:7452/57344 train_time:4694377ms step_avg:629.95ms +step:7453/57344 train_time:4694395ms step_avg:629.87ms +step:7454/57344 train_time:4694638ms step_avg:629.81ms +step:7455/57344 train_time:4695167ms step_avg:629.80ms +grad accum step:1864/14336 +step:7456/57344 train_time:4696399ms step_avg:629.88ms +step:7457/57344 train_time:4696416ms step_avg:629.80ms +step:7458/57344 train_time:4696653ms step_avg:629.75ms +step:7459/57344 train_time:4697172ms step_avg:629.73ms +grad accum step:1865/14336 +step:7460/57344 train_time:4698413ms step_avg:629.81ms +step:7461/57344 train_time:4698430ms step_avg:629.73ms +step:7462/57344 train_time:4698670ms step_avg:629.68ms +step:7463/57344 train_time:4699191ms step_avg:629.67ms +grad accum step:1866/14336 +step:7464/57344 train_time:4700428ms step_avg:629.75ms +step:7465/57344 train_time:4700446ms step_avg:629.66ms +step:7466/57344 train_time:4700688ms step_avg:629.61ms +step:7467/57344 train_time:4701212ms step_avg:629.60ms +grad accum step:1867/14336 +step:7468/57344 train_time:4702451ms step_avg:629.68ms +step:7469/57344 train_time:4702468ms step_avg:629.60ms +step:7470/57344 train_time:4702709ms step_avg:629.55ms +step:7471/57344 train_time:4703230ms step_avg:629.53ms +grad accum step:1868/14336 +step:7472/57344 train_time:4704455ms step_avg:629.61ms +step:7473/57344 train_time:4704472ms step_avg:629.53ms +step:7474/57344 train_time:4704711ms step_avg:629.48ms +step:7475/57344 train_time:4705233ms step_avg:629.46ms +grad accum step:1869/14336 +step:7476/57344 train_time:4706493ms step_avg:629.55ms +step:7477/57344 train_time:4706510ms step_avg:629.47ms +step:7478/57344 train_time:4706750ms step_avg:629.41ms +step:7479/57344 train_time:4707278ms step_avg:629.40ms +grad accum step:1870/14336 +step:7480/57344 train_time:4708516ms step_avg:629.48ms +step:7481/57344 train_time:4708533ms step_avg:629.40ms +step:7482/57344 train_time:4708774ms step_avg:629.35ms +step:7483/57344 train_time:4709302ms step_avg:629.33ms +grad accum step:1871/14336 +step:7484/57344 train_time:4710563ms step_avg:629.42ms +step:7485/57344 train_time:4710580ms step_avg:629.34ms +step:7486/57344 train_time:4710821ms step_avg:629.28ms +step:7487/57344 train_time:4711344ms step_avg:629.27ms +grad accum step:1872/14336 +step:7488/57344 train_time:4712571ms step_avg:629.35ms +step:7488/57344 val_loss:7.275821 train_time:4712571ms step_avg:629.35ms +step:7489/57344 train_time:4712583ms step_avg:629.27ms +step:7490/57344 train_time:4712801ms step_avg:629.21ms +step:7491/57344 train_time:4713330ms step_avg:629.20ms +grad accum step:1873/14336 +step:7492/57344 train_time:4714559ms step_avg:629.28ms +step:7493/57344 train_time:4714576ms step_avg:629.20ms +step:7494/57344 train_time:4714816ms step_avg:629.15ms +step:7495/57344 train_time:4715339ms step_avg:629.13ms +grad accum step:1874/14336 +step:7496/57344 train_time:4716573ms step_avg:629.21ms +step:7497/57344 train_time:4716590ms step_avg:629.13ms +step:7498/57344 train_time:4716831ms step_avg:629.08ms +step:7499/57344 train_time:4717354ms step_avg:629.06ms +grad accum step:1875/14336 +step:7500/57344 train_time:4718594ms step_avg:629.15ms +step:7501/57344 train_time:4718611ms step_avg:629.06ms +step:7502/57344 train_time:4718851ms step_avg:629.01ms +step:7503/57344 train_time:4719377ms step_avg:629.00ms +grad accum step:1876/14336 +step:7504/57344 train_time:4720608ms step_avg:629.08ms +step:7505/57344 train_time:4720625ms step_avg:629.00ms +step:7506/57344 train_time:4720868ms step_avg:628.95ms +step:7507/57344 train_time:4721399ms step_avg:628.93ms +grad accum step:1877/14336 +step:7508/57344 train_time:4722620ms step_avg:629.01ms +step:7509/57344 train_time:4722637ms step_avg:628.93ms +step:7510/57344 train_time:4722880ms step_avg:628.88ms +step:7511/57344 train_time:4723402ms step_avg:628.86ms +grad accum step:1878/14336 +step:7512/57344 train_time:4724647ms step_avg:628.95ms +step:7513/57344 train_time:4724664ms step_avg:628.87ms +step:7514/57344 train_time:4724903ms step_avg:628.81ms +step:7515/57344 train_time:4725423ms step_avg:628.80ms +grad accum step:1879/14336 +step:7516/57344 train_time:4726658ms step_avg:628.88ms +step:7517/57344 train_time:4726675ms step_avg:628.80ms +step:7518/57344 train_time:4726914ms step_avg:628.75ms +step:7519/57344 train_time:4727436ms step_avg:628.73ms +grad accum step:1880/14336 +step:7520/57344 train_time:4728676ms step_avg:628.81ms +step:7521/57344 train_time:4728693ms step_avg:628.73ms +step:7522/57344 train_time:4728933ms step_avg:628.68ms +step:7523/57344 train_time:4729457ms step_avg:628.67ms +grad accum step:1881/14336 +step:7524/57344 train_time:4730698ms step_avg:628.75ms +step:7525/57344 train_time:4730714ms step_avg:628.67ms +step:7526/57344 train_time:4730952ms step_avg:628.61ms +step:7527/57344 train_time:4731469ms step_avg:628.60ms +grad accum step:1882/14336 +step:7528/57344 train_time:4732701ms step_avg:628.68ms +step:7529/57344 train_time:4732718ms step_avg:628.60ms +step:7530/57344 train_time:4732959ms step_avg:628.55ms +step:7531/57344 train_time:4733478ms step_avg:628.53ms +grad accum step:1883/14336 +step:7532/57344 train_time:4734721ms step_avg:628.61ms +step:7533/57344 train_time:4734738ms step_avg:628.53ms +step:7534/57344 train_time:4734981ms step_avg:628.48ms +step:7535/57344 train_time:4735503ms step_avg:628.47ms +grad accum step:1884/14336 +step:7536/57344 train_time:4736855ms step_avg:628.56ms +step:7537/57344 train_time:4736905ms step_avg:628.49ms +step:7538/57344 train_time:4737117ms step_avg:628.43ms +step:7539/57344 train_time:4737644ms step_avg:628.42ms +grad accum step:1885/14336 +step:7540/57344 train_time:4738889ms step_avg:628.50ms +step:7541/57344 train_time:4738906ms step_avg:628.42ms +step:7542/57344 train_time:4739147ms step_avg:628.37ms +step:7543/57344 train_time:4739668ms step_avg:628.35ms +grad accum step:1886/14336 +step:7544/57344 train_time:4740914ms step_avg:628.44ms +step:7545/57344 train_time:4740931ms step_avg:628.35ms +step:7546/57344 train_time:4741169ms step_avg:628.30ms +step:7547/57344 train_time:4741687ms step_avg:628.29ms +grad accum step:1887/14336 +step:7548/57344 train_time:4742917ms step_avg:628.37ms +step:7549/57344 train_time:4742934ms step_avg:628.29ms +step:7550/57344 train_time:4743172ms step_avg:628.23ms +step:7551/57344 train_time:4743692ms step_avg:628.22ms +grad accum step:1888/14336 +step:7552/57344 train_time:4744934ms step_avg:628.30ms +step:7552/57344 val_loss:7.248770 train_time:4744935ms step_avg:628.30ms +step:7553/57344 train_time:4744947ms step_avg:628.22ms +step:7554/57344 train_time:4745164ms step_avg:628.17ms +step:7555/57344 train_time:4745685ms step_avg:628.15ms +grad accum step:1889/14336 +step:7556/57344 train_time:4746935ms step_avg:628.23ms +step:7557/57344 train_time:4746952ms step_avg:628.15ms +step:7558/57344 train_time:4747194ms step_avg:628.10ms +step:7559/57344 train_time:4747720ms step_avg:628.09ms +grad accum step:1890/14336 +step:7560/57344 train_time:4748962ms step_avg:628.17ms +step:7561/57344 train_time:4748979ms step_avg:628.09ms +step:7562/57344 train_time:4749218ms step_avg:628.04ms +step:7563/57344 train_time:4749739ms step_avg:628.02ms +grad accum step:1891/14336 +step:7564/57344 train_time:4750984ms step_avg:628.10ms +step:7565/57344 train_time:4751001ms step_avg:628.02ms +step:7566/57344 train_time:4751240ms step_avg:627.97ms +step:7567/57344 train_time:4751762ms step_avg:627.96ms +grad accum step:1892/14336 +step:7568/57344 train_time:4753001ms step_avg:628.04ms +step:7569/57344 train_time:4753018ms step_avg:627.96ms +step:7570/57344 train_time:4753261ms step_avg:627.91ms +step:7571/57344 train_time:4753790ms step_avg:627.89ms +grad accum step:1893/14336 +step:7572/57344 train_time:4755048ms step_avg:627.98ms +step:7573/57344 train_time:4755066ms step_avg:627.90ms +step:7574/57344 train_time:4755306ms step_avg:627.85ms +step:7575/57344 train_time:4755828ms step_avg:627.83ms +grad accum step:1894/14336 +step:7576/57344 train_time:4757077ms step_avg:627.91ms +step:7577/57344 train_time:4757094ms step_avg:627.83ms +step:7578/57344 train_time:4757335ms step_avg:627.78ms +step:7579/57344 train_time:4757858ms step_avg:627.77ms +grad accum step:1895/14336 +step:7580/57344 train_time:4759100ms step_avg:627.85ms +step:7581/57344 train_time:4759117ms step_avg:627.77ms +step:7582/57344 train_time:4759358ms step_avg:627.72ms +step:7583/57344 train_time:4759882ms step_avg:627.70ms +grad accum step:1896/14336 +step:7584/57344 train_time:4761125ms step_avg:627.79ms +step:7585/57344 train_time:4761142ms step_avg:627.70ms +step:7586/57344 train_time:4761379ms step_avg:627.65ms +step:7587/57344 train_time:4761900ms step_avg:627.64ms +grad accum step:1897/14336 +step:7588/57344 train_time:4763143ms step_avg:627.72ms +step:7589/57344 train_time:4763161ms step_avg:627.64ms +step:7590/57344 train_time:4763403ms step_avg:627.59ms +step:7591/57344 train_time:4763933ms step_avg:627.58ms +grad accum step:1898/14336 +step:7592/57344 train_time:4765169ms step_avg:627.66ms +step:7593/57344 train_time:4765186ms step_avg:627.58ms +step:7594/57344 train_time:4765428ms step_avg:627.53ms +step:7595/57344 train_time:4765951ms step_avg:627.51ms +grad accum step:1899/14336 +step:7596/57344 train_time:4767192ms step_avg:627.59ms +step:7597/57344 train_time:4767210ms step_avg:627.51ms +step:7598/57344 train_time:4767450ms step_avg:627.46ms +step:7599/57344 train_time:4767974ms step_avg:627.45ms +grad accum step:1900/14336 +step:7600/57344 train_time:4769219ms step_avg:627.53ms +step:7601/57344 train_time:4769236ms step_avg:627.45ms +step:7602/57344 train_time:4769475ms step_avg:627.40ms +step:7603/57344 train_time:4769993ms step_avg:627.38ms +grad accum step:1901/14336 +step:7604/57344 train_time:4771223ms step_avg:627.46ms +step:7605/57344 train_time:4771240ms step_avg:627.38ms +step:7606/57344 train_time:4771483ms step_avg:627.33ms +step:7607/57344 train_time:4772007ms step_avg:627.32ms +grad accum step:1902/14336 +step:7608/57344 train_time:4773241ms step_avg:627.40ms +step:7609/57344 train_time:4773258ms step_avg:627.32ms +step:7610/57344 train_time:4773497ms step_avg:627.27ms +step:7611/57344 train_time:4774020ms step_avg:627.25ms +grad accum step:1903/14336 +step:7612/57344 train_time:4775264ms step_avg:627.33ms +step:7613/57344 train_time:4775282ms step_avg:627.25ms +step:7614/57344 train_time:4775524ms step_avg:627.20ms +step:7615/57344 train_time:4776050ms step_avg:627.19ms +grad accum step:1904/14336 +step:7616/57344 train_time:4777303ms step_avg:627.27ms +step:7616/57344 val_loss:7.234798 train_time:4777304ms step_avg:627.27ms +step:7617/57344 train_time:4777316ms step_avg:627.19ms +step:7618/57344 train_time:4777532ms step_avg:627.14ms +step:7619/57344 train_time:4778054ms step_avg:627.12ms +grad accum step:1905/14336 +step:7620/57344 train_time:4779305ms step_avg:627.21ms +step:7621/57344 train_time:4779322ms step_avg:627.13ms +step:7622/57344 train_time:4779564ms step_avg:627.07ms +step:7623/57344 train_time:4780093ms step_avg:627.06ms +grad accum step:1906/14336 +step:7624/57344 train_time:4781334ms step_avg:627.14ms +step:7625/57344 train_time:4781351ms step_avg:627.06ms +step:7626/57344 train_time:4781591ms step_avg:627.01ms +step:7627/57344 train_time:4782105ms step_avg:627.00ms +grad accum step:1907/14336 +step:7628/57344 train_time:4783330ms step_avg:627.08ms +step:7629/57344 train_time:4783347ms step_avg:627.00ms +step:7630/57344 train_time:4783586ms step_avg:626.94ms +step:7631/57344 train_time:4784107ms step_avg:626.93ms +grad accum step:1908/14336 +step:7632/57344 train_time:4785343ms step_avg:627.01ms +step:7633/57344 train_time:4785360ms step_avg:626.93ms +step:7634/57344 train_time:4785602ms step_avg:626.88ms +step:7635/57344 train_time:4786134ms step_avg:626.87ms +grad accum step:1909/14336 +step:7636/57344 train_time:4787374ms step_avg:626.95ms +step:7637/57344 train_time:4787391ms step_avg:626.87ms +step:7638/57344 train_time:4787632ms step_avg:626.82ms +step:7639/57344 train_time:4788155ms step_avg:626.80ms +grad accum step:1910/14336 +step:7640/57344 train_time:4789410ms step_avg:626.89ms +step:7641/57344 train_time:4789427ms step_avg:626.81ms +step:7642/57344 train_time:4789671ms step_avg:626.76ms +step:7643/57344 train_time:4790203ms step_avg:626.74ms +grad accum step:1911/14336 +step:7644/57344 train_time:4791450ms step_avg:626.83ms +step:7645/57344 train_time:4791467ms step_avg:626.75ms +step:7646/57344 train_time:4791709ms step_avg:626.69ms +step:7647/57344 train_time:4792244ms step_avg:626.68ms +grad accum step:1912/14336 +step:7648/57344 train_time:4793481ms step_avg:626.76ms +step:7649/57344 train_time:4793498ms step_avg:626.68ms +step:7650/57344 train_time:4793736ms step_avg:626.63ms +step:7651/57344 train_time:4794257ms step_avg:626.62ms +grad accum step:1913/14336 +step:7652/57344 train_time:4795487ms step_avg:626.70ms +step:7653/57344 train_time:4795504ms step_avg:626.62ms +step:7654/57344 train_time:4795743ms step_avg:626.57ms +step:7655/57344 train_time:4796265ms step_avg:626.55ms +grad accum step:1914/14336 +step:7656/57344 train_time:4797526ms step_avg:626.64ms +step:7657/57344 train_time:4797543ms step_avg:626.56ms +step:7658/57344 train_time:4797783ms step_avg:626.51ms +step:7659/57344 train_time:4798307ms step_avg:626.49ms +grad accum step:1915/14336 +step:7660/57344 train_time:4799560ms step_avg:626.57ms +step:7661/57344 train_time:4799577ms step_avg:626.49ms +step:7662/57344 train_time:4799816ms step_avg:626.44ms +step:7663/57344 train_time:4800335ms step_avg:626.43ms +grad accum step:1916/14336 +step:7664/57344 train_time:4801576ms step_avg:626.51ms +step:7665/57344 train_time:4801593ms step_avg:626.43ms +step:7666/57344 train_time:4801833ms step_avg:626.38ms +step:7667/57344 train_time:4802355ms step_avg:626.37ms +grad accum step:1917/14336 +step:7668/57344 train_time:4803598ms step_avg:626.45ms +step:7669/57344 train_time:4803615ms step_avg:626.37ms +step:7670/57344 train_time:4803854ms step_avg:626.32ms +step:7671/57344 train_time:4804376ms step_avg:626.30ms +grad accum step:1918/14336 +step:7672/57344 train_time:4805603ms step_avg:626.38ms +step:7673/57344 train_time:4805620ms step_avg:626.30ms +step:7674/57344 train_time:4805859ms step_avg:626.25ms +step:7675/57344 train_time:4806385ms step_avg:626.24ms +grad accum step:1919/14336 +step:7676/57344 train_time:4807625ms step_avg:626.32ms +step:7677/57344 train_time:4807642ms step_avg:626.24ms +step:7678/57344 train_time:4807882ms step_avg:626.19ms +step:7679/57344 train_time:4808408ms step_avg:626.18ms +grad accum step:1920/14336 +step:7680/57344 train_time:4809649ms step_avg:626.26ms +step:7680/57344 val_loss:7.221576 train_time:4809649ms step_avg:626.26ms +step:7681/57344 train_time:4809661ms step_avg:626.18ms +step:7682/57344 train_time:4809879ms step_avg:626.12ms +step:7683/57344 train_time:4810407ms step_avg:626.11ms +grad accum step:1921/14336 +step:7684/57344 train_time:4811656ms step_avg:626.19ms +step:7685/57344 train_time:4811673ms step_avg:626.11ms +step:7686/57344 train_time:4811913ms step_avg:626.06ms +step:7687/57344 train_time:4812437ms step_avg:626.05ms +grad accum step:1922/14336 +step:7688/57344 train_time:4813675ms step_avg:626.13ms +step:7689/57344 train_time:4813693ms step_avg:626.05ms +step:7690/57344 train_time:4813933ms step_avg:626.00ms +step:7691/57344 train_time:4814454ms step_avg:625.99ms +grad accum step:1923/14336 +step:7692/57344 train_time:4815679ms step_avg:626.06ms +step:7693/57344 train_time:4815696ms step_avg:625.98ms +step:7694/57344 train_time:4815934ms step_avg:625.93ms +step:7695/57344 train_time:4816456ms step_avg:625.92ms +grad accum step:1924/14336 +step:7696/57344 train_time:4817696ms step_avg:626.00ms +step:7697/57344 train_time:4817713ms step_avg:625.92ms +step:7698/57344 train_time:4817952ms step_avg:625.87ms +step:7699/57344 train_time:4818473ms step_avg:625.86ms +grad accum step:1925/14336 +step:7700/57344 train_time:4819708ms step_avg:625.94ms +step:7701/57344 train_time:4819725ms step_avg:625.86ms +step:7702/57344 train_time:4819967ms step_avg:625.81ms +step:7703/57344 train_time:4820492ms step_avg:625.79ms +grad accum step:1926/14336 +step:7704/57344 train_time:4821739ms step_avg:625.87ms +step:7705/57344 train_time:4821756ms step_avg:625.80ms +step:7706/57344 train_time:4821995ms step_avg:625.75ms +step:7707/57344 train_time:4822514ms step_avg:625.73ms +grad accum step:1927/14336 +step:7708/57344 train_time:4823746ms step_avg:625.81ms +step:7709/57344 train_time:4823763ms step_avg:625.73ms +step:7710/57344 train_time:4824003ms step_avg:625.68ms +step:7711/57344 train_time:4824523ms step_avg:625.67ms +grad accum step:1928/14336 +step:7712/57344 train_time:4825759ms step_avg:625.75ms +step:7713/57344 train_time:4825776ms step_avg:625.67ms +step:7714/57344 train_time:4826017ms step_avg:625.62ms +step:7715/57344 train_time:4826541ms step_avg:625.60ms +grad accum step:1929/14336 +step:7716/57344 train_time:4827770ms step_avg:625.68ms +step:7717/57344 train_time:4827787ms step_avg:625.60ms +step:7718/57344 train_time:4828027ms step_avg:625.55ms +step:7719/57344 train_time:4828551ms step_avg:625.54ms +grad accum step:1930/14336 +step:7720/57344 train_time:4829782ms step_avg:625.62ms +step:7721/57344 train_time:4829800ms step_avg:625.54ms +step:7722/57344 train_time:4830042ms step_avg:625.49ms +step:7723/57344 train_time:4830566ms step_avg:625.48ms +grad accum step:1931/14336 +step:7724/57344 train_time:4831803ms step_avg:625.56ms +step:7725/57344 train_time:4831821ms step_avg:625.48ms +step:7726/57344 train_time:4832058ms step_avg:625.43ms +step:7727/57344 train_time:4832582ms step_avg:625.42ms +grad accum step:1932/14336 +step:7728/57344 train_time:4833818ms step_avg:625.49ms +step:7729/57344 train_time:4833835ms step_avg:625.42ms +step:7730/57344 train_time:4834074ms step_avg:625.37ms +step:7731/57344 train_time:4834593ms step_avg:625.35ms +grad accum step:1933/14336 +step:7732/57344 train_time:4835836ms step_avg:625.43ms +step:7733/57344 train_time:4835854ms step_avg:625.35ms +step:7734/57344 train_time:4836093ms step_avg:625.30ms +step:7735/57344 train_time:4836617ms step_avg:625.29ms +grad accum step:1934/14336 +step:7736/57344 train_time:4837872ms step_avg:625.37ms +step:7737/57344 train_time:4837888ms step_avg:625.29ms +step:7738/57344 train_time:4838130ms step_avg:625.24ms +step:7739/57344 train_time:4838656ms step_avg:625.23ms +grad accum step:1935/14336 +step:7740/57344 train_time:4839891ms step_avg:625.31ms +step:7741/57344 train_time:4839907ms step_avg:625.23ms +step:7742/57344 train_time:4840144ms step_avg:625.18ms +step:7743/57344 train_time:4840662ms step_avg:625.17ms +grad accum step:1936/14336 +step:7744/57344 train_time:4841891ms step_avg:625.24ms +step:7744/57344 val_loss:7.207185 train_time:4841892ms step_avg:625.24ms +step:7745/57344 train_time:4841904ms step_avg:625.17ms +step:7746/57344 train_time:4842125ms step_avg:625.11ms +step:7747/57344 train_time:4842652ms step_avg:625.10ms +grad accum step:1937/14336 +step:7748/57344 train_time:4843877ms step_avg:625.18ms +step:7749/57344 train_time:4843894ms step_avg:625.10ms +step:7750/57344 train_time:4844132ms step_avg:625.05ms +step:7751/57344 train_time:4844652ms step_avg:625.04ms +grad accum step:1938/14336 +step:7752/57344 train_time:4845880ms step_avg:625.11ms +step:7753/57344 train_time:4845897ms step_avg:625.04ms +step:7754/57344 train_time:4846137ms step_avg:624.99ms +step:7755/57344 train_time:4846665ms step_avg:624.97ms +grad accum step:1939/14336 +step:7756/57344 train_time:4847918ms step_avg:625.05ms +step:7757/57344 train_time:4847935ms step_avg:624.98ms +step:7758/57344 train_time:4848176ms step_avg:624.93ms +step:7759/57344 train_time:4848697ms step_avg:624.91ms +grad accum step:1940/14336 +step:7760/57344 train_time:4849948ms step_avg:624.99ms +step:7761/57344 train_time:4849966ms step_avg:624.92ms +step:7762/57344 train_time:4850205ms step_avg:624.87ms +step:7763/57344 train_time:4850725ms step_avg:624.85ms +grad accum step:1941/14336 +step:7764/57344 train_time:4851960ms step_avg:624.93ms +step:7765/57344 train_time:4851977ms step_avg:624.85ms +step:7766/57344 train_time:4852217ms step_avg:624.80ms +step:7767/57344 train_time:4852742ms step_avg:624.79ms +grad accum step:1942/14336 +step:7768/57344 train_time:4854006ms step_avg:624.87ms +step:7769/57344 train_time:4854023ms step_avg:624.79ms +step:7770/57344 train_time:4854263ms step_avg:624.74ms +step:7771/57344 train_time:4854789ms step_avg:624.73ms +grad accum step:1943/14336 +step:7772/57344 train_time:4856014ms step_avg:624.81ms +step:7773/57344 train_time:4856032ms step_avg:624.73ms +step:7774/57344 train_time:4856273ms step_avg:624.68ms +step:7775/57344 train_time:4856794ms step_avg:624.67ms +grad accum step:1944/14336 +step:7776/57344 train_time:4858032ms step_avg:624.75ms +step:7777/57344 train_time:4858050ms step_avg:624.67ms +step:7778/57344 train_time:4858288ms step_avg:624.62ms +step:7779/57344 train_time:4858810ms step_avg:624.61ms +grad accum step:1945/14336 +step:7780/57344 train_time:4860049ms step_avg:624.68ms +step:7781/57344 train_time:4860066ms step_avg:624.61ms +step:7782/57344 train_time:4860309ms step_avg:624.56ms +step:7783/57344 train_time:4860842ms step_avg:624.55ms +grad accum step:1946/14336 +step:7784/57344 train_time:4862071ms step_avg:624.62ms +step:7785/57344 train_time:4862088ms step_avg:624.55ms +step:7786/57344 train_time:4862327ms step_avg:624.50ms +step:7787/57344 train_time:4862850ms step_avg:624.48ms +grad accum step:1947/14336 +step:7788/57344 train_time:4864095ms step_avg:624.56ms +step:7789/57344 train_time:4864112ms step_avg:624.48ms +step:7790/57344 train_time:4864351ms step_avg:624.44ms +step:7791/57344 train_time:4864870ms step_avg:624.42ms +grad accum step:1948/14336 +step:7792/57344 train_time:4866106ms step_avg:624.50ms +step:7793/57344 train_time:4866124ms step_avg:624.42ms +step:7794/57344 train_time:4866363ms step_avg:624.37ms +step:7795/57344 train_time:4866884ms step_avg:624.36ms +grad accum step:1949/14336 +step:7796/57344 train_time:4868112ms step_avg:624.44ms +step:7797/57344 train_time:4868129ms step_avg:624.36ms +step:7798/57344 train_time:4868369ms step_avg:624.31ms +step:7799/57344 train_time:4868894ms step_avg:624.30ms +grad accum step:1950/14336 +step:7800/57344 train_time:4870131ms step_avg:624.38ms +step:7801/57344 train_time:4870149ms step_avg:624.30ms +step:7802/57344 train_time:4870389ms step_avg:624.25ms +step:7803/57344 train_time:4870913ms step_avg:624.24ms +grad accum step:1951/14336 +step:7804/57344 train_time:4872140ms step_avg:624.31ms +step:7805/57344 train_time:4872157ms step_avg:624.24ms +step:7806/57344 train_time:4872397ms step_avg:624.19ms +step:7807/57344 train_time:4874132ms step_avg:624.33ms +grad accum step:1952/14336 +step:7808/57344 train_time:4875119ms step_avg:624.37ms +step:7808/57344 val_loss:7.189125 train_time:4875120ms step_avg:624.37ms +step:7809/57344 train_time:4875131ms step_avg:624.30ms +step:7810/57344 train_time:4875348ms step_avg:624.24ms +step:7811/57344 train_time:4875869ms step_avg:624.23ms +grad accum step:1953/14336 +step:7812/57344 train_time:4877116ms step_avg:624.31ms +step:7813/57344 train_time:4877133ms step_avg:624.23ms +step:7814/57344 train_time:4877372ms step_avg:624.18ms +step:7815/57344 train_time:4877888ms step_avg:624.17ms +grad accum step:1954/14336 +step:7816/57344 train_time:4879131ms step_avg:624.25ms +step:7817/57344 train_time:4879148ms step_avg:624.17ms +step:7818/57344 train_time:4879387ms step_avg:624.12ms +step:7819/57344 train_time:4879912ms step_avg:624.11ms +grad accum step:1955/14336 +step:7820/57344 train_time:4881157ms step_avg:624.19ms +step:7821/57344 train_time:4881174ms step_avg:624.11ms +step:7822/57344 train_time:4881413ms step_avg:624.06ms +step:7823/57344 train_time:4881939ms step_avg:624.05ms +grad accum step:1956/14336 +step:7824/57344 train_time:4883173ms step_avg:624.13ms +step:7825/57344 train_time:4883190ms step_avg:624.05ms +step:7826/57344 train_time:4883430ms step_avg:624.00ms +step:7827/57344 train_time:4883952ms step_avg:623.99ms +grad accum step:1957/14336 +step:7828/57344 train_time:4885198ms step_avg:624.07ms +step:7829/57344 train_time:4885215ms step_avg:623.99ms +step:7830/57344 train_time:4885453ms step_avg:623.94ms +step:7831/57344 train_time:4885972ms step_avg:623.93ms +grad accum step:1958/14336 +step:7832/57344 train_time:4887205ms step_avg:624.00ms +step:7833/57344 train_time:4887223ms step_avg:623.93ms +step:7834/57344 train_time:4887461ms step_avg:623.88ms +step:7835/57344 train_time:4887982ms step_avg:623.87ms +grad accum step:1959/14336 +step:7836/57344 train_time:4889228ms step_avg:623.94ms +step:7837/57344 train_time:4889245ms step_avg:623.87ms +step:7838/57344 train_time:4889483ms step_avg:623.82ms +step:7839/57344 train_time:4890006ms step_avg:623.80ms +grad accum step:1960/14336 +step:7840/57344 train_time:4891235ms step_avg:623.88ms +step:7841/57344 train_time:4891252ms step_avg:623.80ms +step:7842/57344 train_time:4891491ms step_avg:623.76ms +step:7843/57344 train_time:4892011ms step_avg:623.74ms +grad accum step:1961/14336 +step:7844/57344 train_time:4893257ms step_avg:623.82ms +step:7845/57344 train_time:4893274ms step_avg:623.74ms +step:7846/57344 train_time:4893517ms step_avg:623.70ms +step:7847/57344 train_time:4894039ms step_avg:623.68ms +grad accum step:1962/14336 +step:7848/57344 train_time:4895278ms step_avg:623.76ms +step:7849/57344 train_time:4895295ms step_avg:623.68ms +step:7850/57344 train_time:4895535ms step_avg:623.64ms +step:7851/57344 train_time:4896060ms step_avg:623.62ms +grad accum step:1963/14336 +step:7852/57344 train_time:4897301ms step_avg:623.70ms +step:7853/57344 train_time:4897318ms step_avg:623.62ms +step:7854/57344 train_time:4897561ms step_avg:623.58ms +step:7855/57344 train_time:4898087ms step_avg:623.56ms +grad accum step:1964/14336 +step:7856/57344 train_time:4899318ms step_avg:623.64ms +step:7857/57344 train_time:4899335ms step_avg:623.56ms +step:7858/57344 train_time:4899574ms step_avg:623.51ms +step:7859/57344 train_time:4900091ms step_avg:623.50ms +grad accum step:1965/14336 +step:7860/57344 train_time:4901330ms step_avg:623.58ms +step:7861/57344 train_time:4901347ms step_avg:623.50ms +step:7862/57344 train_time:4901586ms step_avg:623.45ms +step:7863/57344 train_time:4902108ms step_avg:623.44ms +grad accum step:1966/14336 +step:7864/57344 train_time:4903358ms step_avg:623.52ms +step:7865/57344 train_time:4903375ms step_avg:623.44ms +step:7866/57344 train_time:4903616ms step_avg:623.39ms +step:7867/57344 train_time:4904137ms step_avg:623.38ms +grad accum step:1967/14336 +step:7868/57344 train_time:4905365ms step_avg:623.46ms +step:7869/57344 train_time:4905382ms step_avg:623.38ms +step:7870/57344 train_time:4905621ms step_avg:623.33ms +step:7871/57344 train_time:4906142ms step_avg:623.32ms +grad accum step:1968/14336 +step:7872/57344 train_time:4907403ms step_avg:623.40ms +step:7872/57344 val_loss:7.187330 train_time:4907404ms step_avg:623.40ms +step:7873/57344 train_time:4907415ms step_avg:623.32ms +step:7874/57344 train_time:4907634ms step_avg:623.27ms +step:7875/57344 train_time:4908159ms step_avg:623.26ms +grad accum step:1969/14336 +step:7876/57344 train_time:4909395ms step_avg:623.34ms +step:7877/57344 train_time:4909412ms step_avg:623.26ms +step:7878/57344 train_time:4909651ms step_avg:623.21ms +step:7879/57344 train_time:4910168ms step_avg:623.20ms +grad accum step:1970/14336 +step:7880/57344 train_time:4911395ms step_avg:623.27ms +step:7881/57344 train_time:4911412ms step_avg:623.20ms +step:7882/57344 train_time:4911650ms step_avg:623.15ms +step:7883/57344 train_time:4912170ms step_avg:623.13ms +grad accum step:1971/14336 +step:7884/57344 train_time:4913405ms step_avg:623.21ms +step:7885/57344 train_time:4913422ms step_avg:623.14ms +step:7886/57344 train_time:4913662ms step_avg:623.09ms +step:7887/57344 train_time:4914187ms step_avg:623.07ms +grad accum step:1972/14336 +step:7888/57344 train_time:4915426ms step_avg:623.15ms +step:7889/57344 train_time:4915443ms step_avg:623.08ms +step:7890/57344 train_time:4915683ms step_avg:623.03ms +step:7891/57344 train_time:4916203ms step_avg:623.01ms +grad accum step:1973/14336 +step:7892/57344 train_time:4917441ms step_avg:623.09ms +step:7893/57344 train_time:4917458ms step_avg:623.02ms +step:7894/57344 train_time:4917696ms step_avg:622.97ms +step:7895/57344 train_time:4918217ms step_avg:622.95ms +grad accum step:1974/14336 +step:7896/57344 train_time:4919448ms step_avg:623.03ms +step:7897/57344 train_time:4919465ms step_avg:622.95ms +step:7898/57344 train_time:4919705ms step_avg:622.91ms +step:7899/57344 train_time:4920226ms step_avg:622.89ms +grad accum step:1975/14336 +step:7900/57344 train_time:4921451ms step_avg:622.97ms +step:7901/57344 train_time:4921468ms step_avg:622.89ms +step:7902/57344 train_time:4921708ms step_avg:622.84ms +step:7903/57344 train_time:4922230ms step_avg:622.83ms +grad accum step:1976/14336 +step:7904/57344 train_time:4923471ms step_avg:622.91ms +step:7905/57344 train_time:4923488ms step_avg:622.83ms +step:7906/57344 train_time:4923728ms step_avg:622.78ms +step:7907/57344 train_time:4924251ms step_avg:622.77ms +grad accum step:1977/14336 +step:7908/57344 train_time:4925503ms step_avg:622.85ms +step:7909/57344 train_time:4925520ms step_avg:622.77ms +step:7910/57344 train_time:4925766ms step_avg:622.73ms +step:7911/57344 train_time:4926301ms step_avg:622.72ms +grad accum step:1978/14336 +step:7912/57344 train_time:4927554ms step_avg:622.79ms +step:7913/57344 train_time:4927571ms step_avg:622.72ms +step:7914/57344 train_time:4927809ms step_avg:622.67ms +step:7915/57344 train_time:4928337ms step_avg:622.66ms +grad accum step:1979/14336 +step:7916/57344 train_time:4929584ms step_avg:622.74ms +step:7917/57344 train_time:4929601ms step_avg:622.66ms +step:7918/57344 train_time:4929839ms step_avg:622.61ms +step:7919/57344 train_time:4930361ms step_avg:622.60ms +grad accum step:1980/14336 +step:7920/57344 train_time:4931604ms step_avg:622.68ms +step:7921/57344 train_time:4931622ms step_avg:622.60ms +step:7922/57344 train_time:4931862ms step_avg:622.55ms +step:7923/57344 train_time:4932386ms step_avg:622.54ms +grad accum step:1981/14336 +step:7924/57344 train_time:4933622ms step_avg:622.62ms +step:7925/57344 train_time:4933639ms step_avg:622.54ms +step:7926/57344 train_time:4933881ms step_avg:622.49ms +step:7927/57344 train_time:4934412ms step_avg:622.48ms +grad accum step:1982/14336 +step:7928/57344 train_time:4935652ms step_avg:622.56ms +step:7929/57344 train_time:4935669ms step_avg:622.48ms +step:7930/57344 train_time:4935909ms step_avg:622.43ms +step:7931/57344 train_time:4936434ms step_avg:622.42ms +grad accum step:1983/14336 +step:7932/57344 train_time:4937674ms step_avg:622.50ms +step:7933/57344 train_time:4937692ms step_avg:622.42ms +step:7934/57344 train_time:4937935ms step_avg:622.38ms +step:7935/57344 train_time:4938468ms step_avg:622.37ms +grad accum step:1984/14336 +step:7936/57344 train_time:4939728ms step_avg:622.45ms +step:7936/57344 val_loss:7.182116 train_time:4939728ms step_avg:622.45ms +step:7937/57344 train_time:4939740ms step_avg:622.37ms +step:7938/57344 train_time:4939955ms step_avg:622.32ms +step:7939/57344 train_time:4940476ms step_avg:622.30ms +grad accum step:1985/14336 +step:7940/57344 train_time:4941721ms step_avg:622.38ms +step:7941/57344 train_time:4941738ms step_avg:622.31ms +step:7942/57344 train_time:4941976ms step_avg:622.26ms +step:7943/57344 train_time:4942498ms step_avg:622.25ms +grad accum step:1986/14336 +step:7944/57344 train_time:4943736ms step_avg:622.32ms +step:7945/57344 train_time:4943753ms step_avg:622.25ms +step:7946/57344 train_time:4943993ms step_avg:622.20ms +step:7947/57344 train_time:4944517ms step_avg:622.19ms +grad accum step:1987/14336 +step:7948/57344 train_time:4945750ms step_avg:622.26ms +step:7949/57344 train_time:4945767ms step_avg:622.19ms +step:7950/57344 train_time:4946007ms step_avg:622.14ms +step:7951/57344 train_time:4946535ms step_avg:622.13ms +grad accum step:1988/14336 +step:7952/57344 train_time:4947790ms step_avg:622.21ms +step:7953/57344 train_time:4947807ms step_avg:622.13ms +step:7954/57344 train_time:4948048ms step_avg:622.08ms +step:7955/57344 train_time:4948579ms step_avg:622.07ms +grad accum step:1989/14336 +step:7956/57344 train_time:4949829ms step_avg:622.15ms +step:7957/57344 train_time:4949847ms step_avg:622.07ms +step:7958/57344 train_time:4950086ms step_avg:622.03ms +step:7959/57344 train_time:4950602ms step_avg:622.01ms +grad accum step:1990/14336 +step:7960/57344 train_time:4951830ms step_avg:622.09ms +step:7961/57344 train_time:4951847ms step_avg:622.01ms +step:7962/57344 train_time:4952087ms step_avg:621.97ms +step:7963/57344 train_time:4952608ms step_avg:621.95ms +grad accum step:1991/14336 +step:7964/57344 train_time:4953849ms step_avg:622.03ms +step:7965/57344 train_time:4953866ms step_avg:621.95ms +step:7966/57344 train_time:4954105ms step_avg:621.91ms +step:7967/57344 train_time:4954630ms step_avg:621.89ms +grad accum step:1992/14336 +step:7968/57344 train_time:4955878ms step_avg:621.97ms +step:7969/57344 train_time:4955895ms step_avg:621.90ms +step:7970/57344 train_time:4956135ms step_avg:621.85ms +step:7971/57344 train_time:4956658ms step_avg:621.84ms +grad accum step:1993/14336 +step:7972/57344 train_time:4957886ms step_avg:621.91ms +step:7973/57344 train_time:4957903ms step_avg:621.84ms +step:7974/57344 train_time:4958146ms step_avg:621.79ms +step:7975/57344 train_time:4958677ms step_avg:621.78ms +grad accum step:1994/14336 +step:7976/57344 train_time:4959902ms step_avg:621.85ms +step:7977/57344 train_time:4959920ms step_avg:621.78ms +step:7978/57344 train_time:4960157ms step_avg:621.73ms +step:7979/57344 train_time:4960678ms step_avg:621.72ms +grad accum step:1995/14336 +step:7980/57344 train_time:4961920ms step_avg:621.79ms +step:7981/57344 train_time:4961938ms step_avg:621.72ms +step:7982/57344 train_time:4962177ms step_avg:621.67ms +step:7983/57344 train_time:4962704ms step_avg:621.66ms +grad accum step:1996/14336 +step:7984/57344 train_time:4963951ms step_avg:621.74ms +step:7985/57344 train_time:4963968ms step_avg:621.66ms +step:7986/57344 train_time:4964208ms step_avg:621.61ms +step:7987/57344 train_time:4964729ms step_avg:621.60ms +grad accum step:1997/14336 +step:7988/57344 train_time:4965988ms step_avg:621.68ms +step:7989/57344 train_time:4966005ms step_avg:621.61ms +step:7990/57344 train_time:4966245ms step_avg:621.56ms +step:7991/57344 train_time:4966769ms step_avg:621.55ms +grad accum step:1998/14336 +step:7992/57344 train_time:4968006ms step_avg:621.62ms +step:7993/57344 train_time:4968023ms step_avg:621.55ms +step:7994/57344 train_time:4968264ms step_avg:621.50ms +step:7995/57344 train_time:4968783ms step_avg:621.49ms +grad accum step:1999/14336 +step:7996/57344 train_time:4970015ms step_avg:621.56ms +step:7997/57344 train_time:4970032ms step_avg:621.49ms +step:7998/57344 train_time:4970272ms step_avg:621.44ms +step:7999/57344 train_time:4970793ms step_avg:621.43ms +grad accum step:2000/14336 +step:8000/57344 train_time:4972030ms step_avg:621.50ms +step:8000/57344 val_loss:7.149826 train_time:4972031ms step_avg:621.50ms +step:8001/57344 train_time:4972043ms step_avg:621.43ms +step:8002/57344 train_time:4972259ms step_avg:621.38ms +step:8003/57344 train_time:4972782ms step_avg:621.36ms +grad accum step:2001/14336 +step:8004/57344 train_time:4974022ms step_avg:621.44ms +step:8005/57344 train_time:4974040ms step_avg:621.37ms +step:8006/57344 train_time:4974280ms step_avg:621.32ms +step:8007/57344 train_time:4974806ms step_avg:621.31ms +grad accum step:2002/14336 +step:8008/57344 train_time:4976045ms step_avg:621.38ms +step:8009/57344 train_time:4976062ms step_avg:621.31ms +step:8010/57344 train_time:4976303ms step_avg:621.26ms +step:8011/57344 train_time:4976827ms step_avg:621.25ms +grad accum step:2003/14336 +step:8012/57344 train_time:4978057ms step_avg:621.33ms +step:8013/57344 train_time:4978075ms step_avg:621.25ms +step:8014/57344 train_time:4978315ms step_avg:621.20ms +step:8015/57344 train_time:4978835ms step_avg:621.19ms +grad accum step:2004/14336 +step:8016/57344 train_time:4980074ms step_avg:621.27ms +step:8017/57344 train_time:4980091ms step_avg:621.19ms +step:8018/57344 train_time:4980328ms step_avg:621.14ms +step:8019/57344 train_time:4980846ms step_avg:621.13ms +grad accum step:2005/14336 +step:8020/57344 train_time:4982101ms step_avg:621.21ms +step:8021/57344 train_time:4982118ms step_avg:621.13ms +step:8022/57344 train_time:4982356ms step_avg:621.09ms +step:8023/57344 train_time:4982878ms step_avg:621.07ms +grad accum step:2006/14336 +step:8024/57344 train_time:4984120ms step_avg:621.15ms +step:8025/57344 train_time:4984137ms step_avg:621.08ms +step:8026/57344 train_time:4984379ms step_avg:621.03ms +step:8027/57344 train_time:4984902ms step_avg:621.02ms +grad accum step:2007/14336 +step:8028/57344 train_time:4986149ms step_avg:621.09ms +step:8029/57344 train_time:4986166ms step_avg:621.02ms +step:8030/57344 train_time:4986405ms step_avg:620.97ms +step:8031/57344 train_time:4986925ms step_avg:620.96ms +grad accum step:2008/14336 +step:8032/57344 train_time:4988171ms step_avg:621.04ms +step:8033/57344 train_time:4988188ms step_avg:620.96ms +step:8034/57344 train_time:4988431ms step_avg:620.91ms +step:8035/57344 train_time:4988953ms step_avg:620.90ms +grad accum step:2009/14336 +step:8036/57344 train_time:4990189ms step_avg:620.98ms +step:8037/57344 train_time:4990206ms step_avg:620.90ms +step:8038/57344 train_time:4990446ms step_avg:620.86ms +step:8039/57344 train_time:4990971ms step_avg:620.84ms +grad accum step:2010/14336 +step:8040/57344 train_time:4992210ms step_avg:620.92ms +step:8041/57344 train_time:4992227ms step_avg:620.85ms +step:8042/57344 train_time:4992467ms step_avg:620.80ms +step:8043/57344 train_time:4992991ms step_avg:620.79ms +grad accum step:2011/14336 +step:8044/57344 train_time:4994234ms step_avg:620.86ms +step:8045/57344 train_time:4994251ms step_avg:620.79ms +step:8046/57344 train_time:4994490ms step_avg:620.74ms +step:8047/57344 train_time:4995013ms step_avg:620.73ms +grad accum step:2012/14336 +step:8048/57344 train_time:4996252ms step_avg:620.81ms +step:8049/57344 train_time:4996269ms step_avg:620.73ms +step:8050/57344 train_time:4996507ms step_avg:620.68ms +step:8051/57344 train_time:4997029ms step_avg:620.67ms +grad accum step:2013/14336 +step:8052/57344 train_time:4998273ms step_avg:620.75ms +step:8053/57344 train_time:4998290ms step_avg:620.67ms +step:8054/57344 train_time:4998529ms step_avg:620.63ms +step:8055/57344 train_time:4999046ms step_avg:620.61ms +grad accum step:2014/14336 +step:8056/57344 train_time:5000278ms step_avg:620.69ms +step:8057/57344 train_time:5000296ms step_avg:620.62ms +step:8058/57344 train_time:5000535ms step_avg:620.57ms +step:8059/57344 train_time:5001056ms step_avg:620.56ms +grad accum step:2015/14336 +step:8060/57344 train_time:5002284ms step_avg:620.63ms +step:8061/57344 train_time:5002301ms step_avg:620.56ms +step:8062/57344 train_time:5002541ms step_avg:620.51ms +step:8063/57344 train_time:5003064ms step_avg:620.50ms +grad accum step:2016/14336 +step:8064/57344 train_time:5004296ms step_avg:620.57ms +step:8064/57344 val_loss:7.138590 train_time:5004296ms step_avg:620.57ms +step:8065/57344 train_time:5004308ms step_avg:620.50ms +step:8066/57344 train_time:5004528ms step_avg:620.45ms +step:8067/57344 train_time:5005056ms step_avg:620.44ms +grad accum step:2017/14336 +step:8068/57344 train_time:5006313ms step_avg:620.51ms +step:8069/57344 train_time:5006330ms step_avg:620.44ms +step:8070/57344 train_time:5006570ms step_avg:620.39ms +step:8071/57344 train_time:5007095ms step_avg:620.38ms +grad accum step:2018/14336 +step:8072/57344 train_time:5008335ms step_avg:620.46ms +step:8073/57344 train_time:5008352ms step_avg:620.38ms +step:8074/57344 train_time:5008594ms step_avg:620.34ms +step:8075/57344 train_time:5009119ms step_avg:620.32ms +grad accum step:2019/14336 +step:8076/57344 train_time:5010353ms step_avg:620.40ms +step:8077/57344 train_time:5010370ms step_avg:620.33ms +step:8078/57344 train_time:5010612ms step_avg:620.28ms +step:8079/57344 train_time:5011137ms step_avg:620.27ms +grad accum step:2020/14336 +step:8080/57344 train_time:5012379ms step_avg:620.34ms +step:8081/57344 train_time:5012396ms step_avg:620.27ms +step:8082/57344 train_time:5012635ms step_avg:620.22ms +step:8083/57344 train_time:5013158ms step_avg:620.21ms +grad accum step:2021/14336 +step:8084/57344 train_time:5014405ms step_avg:620.29ms +step:8085/57344 train_time:5014422ms step_avg:620.21ms +step:8086/57344 train_time:5014664ms step_avg:620.17ms +step:8087/57344 train_time:5015184ms step_avg:620.15ms +grad accum step:2022/14336 +step:8088/57344 train_time:5016411ms step_avg:620.23ms +step:8089/57344 train_time:5016428ms step_avg:620.15ms +step:8090/57344 train_time:5016667ms step_avg:620.11ms +step:8091/57344 train_time:5017185ms step_avg:620.09ms +grad accum step:2023/14336 +step:8092/57344 train_time:5018412ms step_avg:620.17ms +step:8093/57344 train_time:5018429ms step_avg:620.10ms +step:8094/57344 train_time:5018667ms step_avg:620.05ms +step:8095/57344 train_time:5019189ms step_avg:620.04ms +grad accum step:2024/14336 +step:8096/57344 train_time:5020427ms step_avg:620.11ms +step:8097/57344 train_time:5020444ms step_avg:620.04ms +step:8098/57344 train_time:5020683ms step_avg:619.99ms +step:8099/57344 train_time:5021201ms step_avg:619.98ms +grad accum step:2025/14336 +step:8100/57344 train_time:5022440ms step_avg:620.05ms +step:8101/57344 train_time:5022457ms step_avg:619.98ms +step:8102/57344 train_time:5022698ms step_avg:619.93ms +step:8103/57344 train_time:5023222ms step_avg:619.92ms +grad accum step:2026/14336 +step:8104/57344 train_time:5024479ms step_avg:620.00ms +step:8105/57344 train_time:5024496ms step_avg:619.93ms +step:8106/57344 train_time:5024736ms step_avg:619.88ms +step:8107/57344 train_time:5025260ms step_avg:619.87ms +grad accum step:2027/14336 +step:8108/57344 train_time:5026504ms step_avg:619.94ms +step:8109/57344 train_time:5026521ms step_avg:619.87ms +step:8110/57344 train_time:5026760ms step_avg:619.82ms +step:8111/57344 train_time:5027283ms step_avg:619.81ms +grad accum step:2028/14336 +step:8112/57344 train_time:5028527ms step_avg:619.89ms +step:8113/57344 train_time:5028544ms step_avg:619.81ms +step:8114/57344 train_time:5028786ms step_avg:619.77ms +step:8115/57344 train_time:5029308ms step_avg:619.75ms +grad accum step:2029/14336 +step:8116/57344 train_time:5030550ms step_avg:619.83ms +step:8117/57344 train_time:5030567ms step_avg:619.76ms +step:8118/57344 train_time:5030807ms step_avg:619.71ms +step:8119/57344 train_time:5031331ms step_avg:619.70ms +grad accum step:2030/14336 +step:8120/57344 train_time:5032558ms step_avg:619.77ms +step:8121/57344 train_time:5032575ms step_avg:619.70ms +step:8122/57344 train_time:5032818ms step_avg:619.65ms +step:8123/57344 train_time:5033348ms step_avg:619.64ms +grad accum step:2031/14336 +step:8124/57344 train_time:5034591ms step_avg:619.72ms +step:8125/57344 train_time:5034608ms step_avg:619.64ms +step:8126/57344 train_time:5034847ms step_avg:619.60ms +step:8127/57344 train_time:5035367ms step_avg:619.58ms +grad accum step:2032/14336 +step:8128/57344 train_time:5036609ms step_avg:619.66ms +step:8128/57344 val_loss:7.127815 train_time:5036610ms step_avg:619.66ms +step:8129/57344 train_time:5036622ms step_avg:619.59ms +step:8130/57344 train_time:5036838ms step_avg:619.54ms +step:8131/57344 train_time:5037358ms step_avg:619.52ms +grad accum step:2033/14336 +step:8132/57344 train_time:5038602ms step_avg:619.60ms +step:8133/57344 train_time:5038619ms step_avg:619.53ms +step:8134/57344 train_time:5038858ms step_avg:619.48ms +step:8135/57344 train_time:5039378ms step_avg:619.47ms +grad accum step:2034/14336 +step:8136/57344 train_time:5040615ms step_avg:619.54ms +step:8137/57344 train_time:5040633ms step_avg:619.47ms +step:8138/57344 train_time:5040873ms step_avg:619.42ms +step:8139/57344 train_time:5041397ms step_avg:619.41ms +grad accum step:2035/14336 +step:8140/57344 train_time:5042629ms step_avg:619.49ms +step:8141/57344 train_time:5042646ms step_avg:619.41ms +step:8142/57344 train_time:5042887ms step_avg:619.37ms +step:8143/57344 train_time:5043418ms step_avg:619.36ms +grad accum step:2036/14336 +step:8144/57344 train_time:5044666ms step_avg:619.43ms +step:8145/57344 train_time:5044683ms step_avg:619.36ms +step:8146/57344 train_time:5044922ms step_avg:619.31ms +step:8147/57344 train_time:5045442ms step_avg:619.30ms +grad accum step:2037/14336 +step:8148/57344 train_time:5046685ms step_avg:619.38ms +step:8149/57344 train_time:5046702ms step_avg:619.30ms +step:8150/57344 train_time:5046943ms step_avg:619.26ms +step:8151/57344 train_time:5047470ms step_avg:619.25ms +grad accum step:2038/14336 +step:8152/57344 train_time:5048718ms step_avg:619.32ms +step:8153/57344 train_time:5048735ms step_avg:619.25ms +step:8154/57344 train_time:5048975ms step_avg:619.20ms +step:8155/57344 train_time:5049500ms step_avg:619.19ms +grad accum step:2039/14336 +step:8156/57344 train_time:5050736ms step_avg:619.27ms +step:8157/57344 train_time:5050753ms step_avg:619.19ms +step:8158/57344 train_time:5050990ms step_avg:619.15ms +step:8159/57344 train_time:5051511ms step_avg:619.13ms +grad accum step:2040/14336 +step:8160/57344 train_time:5052752ms step_avg:619.21ms +step:8161/57344 train_time:5052770ms step_avg:619.14ms +step:8162/57344 train_time:5053007ms step_avg:619.09ms +step:8163/57344 train_time:5053529ms step_avg:619.08ms +grad accum step:2041/14336 +step:8164/57344 train_time:5054766ms step_avg:619.15ms +step:8165/57344 train_time:5054783ms step_avg:619.08ms +step:8166/57344 train_time:5055025ms step_avg:619.03ms +step:8167/57344 train_time:5055554ms step_avg:619.02ms +grad accum step:2042/14336 +step:8168/57344 train_time:5056803ms step_avg:619.10ms +step:8169/57344 train_time:5056821ms step_avg:619.03ms +step:8170/57344 train_time:5057059ms step_avg:618.98ms +step:8171/57344 train_time:5057582ms step_avg:618.97ms +grad accum step:2043/14336 +step:8172/57344 train_time:5058826ms step_avg:619.04ms +step:8173/57344 train_time:5058844ms step_avg:618.97ms +step:8174/57344 train_time:5059083ms step_avg:618.92ms +step:8175/57344 train_time:5059608ms step_avg:618.91ms +grad accum step:2044/14336 +step:8176/57344 train_time:5060838ms step_avg:618.99ms +step:8177/57344 train_time:5060855ms step_avg:618.91ms +step:8178/57344 train_time:5061095ms step_avg:618.87ms +step:8179/57344 train_time:5061616ms step_avg:618.86ms +grad accum step:2045/14336 +step:8180/57344 train_time:5062861ms step_avg:618.93ms +step:8181/57344 train_time:5062878ms step_avg:618.86ms +step:8182/57344 train_time:5063120ms step_avg:618.81ms +step:8183/57344 train_time:5063643ms step_avg:618.80ms +grad accum step:2046/14336 +step:8184/57344 train_time:5064878ms step_avg:618.88ms +step:8185/57344 train_time:5064895ms step_avg:618.80ms +step:8186/57344 train_time:5065134ms step_avg:618.76ms +step:8187/57344 train_time:5065657ms step_avg:618.74ms +grad accum step:2047/14336 +step:8188/57344 train_time:5066908ms step_avg:618.82ms +step:8189/57344 train_time:5066925ms step_avg:618.75ms +step:8190/57344 train_time:5067165ms step_avg:618.70ms +step:8191/57344 train_time:5067688ms step_avg:618.69ms +grad accum step:2048/14336 +step:8192/57344 train_time:5068936ms step_avg:618.77ms +step:8192/57344 val_loss:7.110021 train_time:5068937ms step_avg:618.77ms +step:8193/57344 train_time:5068949ms step_avg:618.69ms +step:8194/57344 train_time:5069167ms step_avg:618.64ms +step:8195/57344 train_time:5069690ms step_avg:618.63ms +grad accum step:2049/14336 +step:8196/57344 train_time:5070924ms step_avg:618.71ms +step:8197/57344 train_time:5070941ms step_avg:618.63ms +step:8198/57344 train_time:5071181ms step_avg:618.59ms +step:8199/57344 train_time:5071704ms step_avg:618.58ms +grad accum step:2050/14336 +step:8200/57344 train_time:5072946ms step_avg:618.65ms +step:8201/57344 train_time:5072963ms step_avg:618.58ms +step:8202/57344 train_time:5073201ms step_avg:618.53ms +step:8203/57344 train_time:5073722ms step_avg:618.52ms +grad accum step:2051/14336 +step:8204/57344 train_time:5074954ms step_avg:618.60ms +step:8205/57344 train_time:5074971ms step_avg:618.52ms +step:8206/57344 train_time:5075216ms step_avg:618.48ms +step:8207/57344 train_time:5075756ms step_avg:618.47ms +grad accum step:2052/14336 +step:8208/57344 train_time:5077009ms step_avg:618.54ms +step:8209/57344 train_time:5077026ms step_avg:618.47ms +step:8210/57344 train_time:5077267ms step_avg:618.42ms +step:8211/57344 train_time:5077790ms step_avg:618.41ms +grad accum step:2053/14336 +step:8212/57344 train_time:5079014ms step_avg:618.49ms +step:8213/57344 train_time:5079031ms step_avg:618.41ms +step:8214/57344 train_time:5079273ms step_avg:618.37ms +step:8215/57344 train_time:5079796ms step_avg:618.36ms +grad accum step:2054/14336 +step:8216/57344 train_time:5081029ms step_avg:618.43ms +step:8217/57344 train_time:5081046ms step_avg:618.36ms +step:8218/57344 train_time:5081286ms step_avg:618.31ms +step:8219/57344 train_time:5081813ms step_avg:618.30ms +grad accum step:2055/14336 +step:8220/57344 train_time:5083060ms step_avg:618.38ms +step:8221/57344 train_time:5083078ms step_avg:618.30ms +step:8222/57344 train_time:5083321ms step_avg:618.26ms +step:8223/57344 train_time:5083845ms step_avg:618.25ms +grad accum step:2056/14336 +step:8224/57344 train_time:5085089ms step_avg:618.32ms +step:8225/57344 train_time:5085106ms step_avg:618.25ms +step:8226/57344 train_time:5085345ms step_avg:618.20ms +step:8227/57344 train_time:5085866ms step_avg:618.19ms +grad accum step:2057/14336 +step:8228/57344 train_time:5087112ms step_avg:618.27ms +step:8229/57344 train_time:5087129ms step_avg:618.20ms +step:8230/57344 train_time:5087370ms step_avg:618.15ms +step:8231/57344 train_time:5087890ms step_avg:618.14ms +grad accum step:2058/14336 +step:8232/57344 train_time:5089130ms step_avg:618.21ms +step:8233/57344 train_time:5089147ms step_avg:618.14ms +step:8234/57344 train_time:5089386ms step_avg:618.09ms +step:8235/57344 train_time:5089908ms step_avg:618.08ms +grad accum step:2059/14336 +step:8236/57344 train_time:5091151ms step_avg:618.16ms +step:8237/57344 train_time:5091168ms step_avg:618.09ms +step:8238/57344 train_time:5091407ms step_avg:618.04ms +step:8239/57344 train_time:5091930ms step_avg:618.03ms +grad accum step:2060/14336 +step:8240/57344 train_time:5093161ms step_avg:618.10ms +step:8241/57344 train_time:5093178ms step_avg:618.03ms +step:8242/57344 train_time:5093424ms step_avg:617.98ms +step:8243/57344 train_time:5093954ms step_avg:617.97ms +grad accum step:2061/14336 +step:8244/57344 train_time:5095222ms step_avg:618.05ms +step:8245/57344 train_time:5095239ms step_avg:617.98ms +step:8246/57344 train_time:5095480ms step_avg:617.93ms +step:8247/57344 train_time:5096009ms step_avg:617.92ms +grad accum step:2062/14336 +step:8248/57344 train_time:5097251ms step_avg:618.00ms +step:8249/57344 train_time:5097268ms step_avg:617.93ms +step:8250/57344 train_time:5097509ms step_avg:617.88ms +step:8251/57344 train_time:5098036ms step_avg:617.87ms +grad accum step:2063/14336 +step:8252/57344 train_time:5099267ms step_avg:617.94ms +step:8253/57344 train_time:5099284ms step_avg:617.87ms +step:8254/57344 train_time:5099524ms step_avg:617.82ms +step:8255/57344 train_time:5100042ms step_avg:617.81ms +grad accum step:2064/14336 +step:8256/57344 train_time:5101289ms step_avg:617.89ms +step:8256/57344 val_loss:7.109468 train_time:5101290ms step_avg:617.89ms +step:8257/57344 train_time:5101302ms step_avg:617.82ms +step:8258/57344 train_time:5101517ms step_avg:617.77ms +step:8259/57344 train_time:5102041ms step_avg:617.76ms +grad accum step:2065/14336 +step:8260/57344 train_time:5103293ms step_avg:617.83ms +step:8261/57344 train_time:5103310ms step_avg:617.76ms +step:8262/57344 train_time:5103550ms step_avg:617.71ms +step:8263/57344 train_time:5104076ms step_avg:617.70ms +grad accum step:2066/14336 +step:8264/57344 train_time:5105317ms step_avg:617.78ms +step:8265/57344 train_time:5105335ms step_avg:617.71ms +step:8266/57344 train_time:5105574ms step_avg:617.66ms +step:8267/57344 train_time:5106095ms step_avg:617.65ms +grad accum step:2067/14336 +step:8268/57344 train_time:5107324ms step_avg:617.72ms +step:8269/57344 train_time:5107341ms step_avg:617.65ms +step:8270/57344 train_time:5107581ms step_avg:617.60ms +step:8271/57344 train_time:5108100ms step_avg:617.59ms +grad accum step:2068/14336 +step:8272/57344 train_time:5109330ms step_avg:617.67ms +step:8273/57344 train_time:5109347ms step_avg:617.59ms +step:8274/57344 train_time:5109586ms step_avg:617.55ms +step:8275/57344 train_time:5110107ms step_avg:617.54ms +grad accum step:2069/14336 +step:8276/57344 train_time:5111349ms step_avg:617.61ms +step:8277/57344 train_time:5111366ms step_avg:617.54ms +step:8278/57344 train_time:5111606ms step_avg:617.49ms +step:8279/57344 train_time:5112124ms step_avg:617.48ms +grad accum step:2070/14336 +step:8280/57344 train_time:5113364ms step_avg:617.56ms +step:8281/57344 train_time:5113382ms step_avg:617.48ms +step:8282/57344 train_time:5113620ms step_avg:617.44ms +step:8283/57344 train_time:5114138ms step_avg:617.43ms +grad accum step:2071/14336 +step:8284/57344 train_time:5115390ms step_avg:617.50ms +step:8285/57344 train_time:5115407ms step_avg:617.43ms +step:8286/57344 train_time:5115647ms step_avg:617.38ms +step:8287/57344 train_time:5116169ms step_avg:617.37ms +grad accum step:2072/14336 +step:8288/57344 train_time:5117410ms step_avg:617.45ms +step:8289/57344 train_time:5117428ms step_avg:617.38ms +step:8290/57344 train_time:5117665ms step_avg:617.33ms +step:8291/57344 train_time:5118186ms step_avg:617.32ms +grad accum step:2073/14336 +step:8292/57344 train_time:5119418ms step_avg:617.39ms +step:8293/57344 train_time:5119435ms step_avg:617.32ms +step:8294/57344 train_time:5119677ms step_avg:617.27ms +step:8295/57344 train_time:5120200ms step_avg:617.26ms +grad accum step:2074/14336 +step:8296/57344 train_time:5121434ms step_avg:617.34ms +step:8297/57344 train_time:5121451ms step_avg:617.27ms +step:8298/57344 train_time:5121691ms step_avg:617.22ms +step:8299/57344 train_time:5122216ms step_avg:617.21ms +grad accum step:2075/14336 +step:8300/57344 train_time:5123475ms step_avg:617.29ms +step:8301/57344 train_time:5123492ms step_avg:617.21ms +step:8302/57344 train_time:5123735ms step_avg:617.17ms +step:8303/57344 train_time:5124269ms step_avg:617.16ms +grad accum step:2076/14336 +step:8304/57344 train_time:5125518ms step_avg:617.23ms +step:8305/57344 train_time:5125536ms step_avg:617.16ms +step:8306/57344 train_time:5125775ms step_avg:617.12ms +step:8307/57344 train_time:5126298ms step_avg:617.11ms +grad accum step:2077/14336 +step:8308/57344 train_time:5127541ms step_avg:617.18ms +step:8309/57344 train_time:5127558ms step_avg:617.11ms +step:8310/57344 train_time:5127797ms step_avg:617.06ms +step:8311/57344 train_time:5128319ms step_avg:617.05ms +grad accum step:2078/14336 +step:8312/57344 train_time:5129567ms step_avg:617.13ms +step:8313/57344 train_time:5129584ms step_avg:617.06ms +step:8314/57344 train_time:5129822ms step_avg:617.01ms +step:8315/57344 train_time:5130346ms step_avg:617.00ms +grad accum step:2079/14336 +step:8316/57344 train_time:5131595ms step_avg:617.07ms +step:8317/57344 train_time:5131612ms step_avg:617.00ms +step:8318/57344 train_time:5131849ms step_avg:616.96ms +step:8319/57344 train_time:5132368ms step_avg:616.95ms +grad accum step:2080/14336 +step:8320/57344 train_time:5133602ms step_avg:617.02ms +step:8320/57344 val_loss:7.096864 train_time:5133602ms step_avg:617.02ms +step:8321/57344 train_time:5133614ms step_avg:616.95ms +step:8322/57344 train_time:5133832ms step_avg:616.90ms +step:8323/57344 train_time:5134353ms step_avg:616.89ms +grad accum step:2081/14336 +step:8324/57344 train_time:5135594ms step_avg:616.96ms +step:8325/57344 train_time:5135611ms step_avg:616.89ms +step:8326/57344 train_time:5135851ms step_avg:616.84ms +step:8327/57344 train_time:5136373ms step_avg:616.83ms +grad accum step:2082/14336 +step:8328/57344 train_time:5137592ms step_avg:616.91ms +step:8329/57344 train_time:5137609ms step_avg:616.83ms +step:8330/57344 train_time:5137853ms step_avg:616.79ms +step:8331/57344 train_time:5138380ms step_avg:616.78ms +grad accum step:2083/14336 +step:8332/57344 train_time:5139630ms step_avg:616.85ms +step:8333/57344 train_time:5139647ms step_avg:616.78ms +step:8334/57344 train_time:5139886ms step_avg:616.74ms +step:8335/57344 train_time:5140412ms step_avg:616.73ms +grad accum step:2084/14336 +step:8336/57344 train_time:5141643ms step_avg:616.80ms +step:8337/57344 train_time:5141660ms step_avg:616.73ms +step:8338/57344 train_time:5141901ms step_avg:616.68ms +step:8339/57344 train_time:5142426ms step_avg:616.67ms +grad accum step:2085/14336 +step:8340/57344 train_time:5143666ms step_avg:616.75ms +step:8341/57344 train_time:5143683ms step_avg:616.67ms +step:8342/57344 train_time:5143924ms step_avg:616.63ms +step:8343/57344 train_time:5144446ms step_avg:616.62ms +grad accum step:2086/14336 +step:8344/57344 train_time:5145673ms step_avg:616.69ms +step:8345/57344 train_time:5145690ms step_avg:616.62ms +step:8346/57344 train_time:5145933ms step_avg:616.57ms +step:8347/57344 train_time:5146461ms step_avg:616.56ms +grad accum step:2087/14336 +step:8348/57344 train_time:5147705ms step_avg:616.64ms +step:8349/57344 train_time:5147723ms step_avg:616.57ms +step:8350/57344 train_time:5147965ms step_avg:616.52ms +step:8351/57344 train_time:5148501ms step_avg:616.51ms +grad accum step:2088/14336 +step:8352/57344 train_time:5149755ms step_avg:616.59ms +step:8353/57344 train_time:5149772ms step_avg:616.52ms +step:8354/57344 train_time:5150014ms step_avg:616.47ms +step:8355/57344 train_time:5150538ms step_avg:616.46ms +grad accum step:2089/14336 +step:8356/57344 train_time:5151792ms step_avg:616.54ms +step:8357/57344 train_time:5151809ms step_avg:616.47ms +step:8358/57344 train_time:5152049ms step_avg:616.42ms +step:8359/57344 train_time:5152579ms step_avg:616.41ms +grad accum step:2090/14336 +step:8360/57344 train_time:5153822ms step_avg:616.49ms +step:8361/57344 train_time:5153839ms step_avg:616.41ms +step:8362/57344 train_time:5154078ms step_avg:616.37ms +step:8363/57344 train_time:5154604ms step_avg:616.36ms +grad accum step:2091/14336 +step:8364/57344 train_time:5155855ms step_avg:616.43ms +step:8365/57344 train_time:5155873ms step_avg:616.36ms +step:8366/57344 train_time:5156115ms step_avg:616.32ms +step:8367/57344 train_time:5156640ms step_avg:616.31ms +grad accum step:2092/14336 +step:8368/57344 train_time:5174973ms step_avg:618.42ms +step:8369/57344 train_time:5174986ms step_avg:618.35ms +step:8370/57344 train_time:5175282ms step_avg:618.31ms +step:8371/57344 train_time:5175796ms step_avg:618.30ms +grad accum step:2093/14336 +step:8372/57344 train_time:5177009ms step_avg:618.37ms +step:8373/57344 train_time:5177026ms step_avg:618.30ms +step:8374/57344 train_time:5177267ms step_avg:618.25ms +step:8375/57344 train_time:5177789ms step_avg:618.24ms +grad accum step:2094/14336 +step:8376/57344 train_time:5179011ms step_avg:618.32ms +step:8377/57344 train_time:5179028ms step_avg:618.24ms +step:8378/57344 train_time:5179265ms step_avg:618.20ms +step:8379/57344 train_time:5179788ms step_avg:618.19ms +grad accum step:2095/14336 +step:8380/57344 train_time:5181022ms step_avg:618.26ms +step:8381/57344 train_time:5181039ms step_avg:618.19ms +step:8382/57344 train_time:5181276ms step_avg:618.14ms +step:8383/57344 train_time:5181799ms step_avg:618.13ms +grad accum step:2096/14336 +step:8384/57344 train_time:5183022ms step_avg:618.20ms +step:8384/57344 val_loss:7.088062 train_time:5183023ms step_avg:618.20ms +step:8385/57344 train_time:5183035ms step_avg:618.13ms +step:8386/57344 train_time:5183252ms step_avg:618.08ms +step:8387/57344 train_time:5183777ms step_avg:618.07ms +grad accum step:2097/14336 +step:8388/57344 train_time:5185009ms step_avg:618.15ms +step:8389/57344 train_time:5185026ms step_avg:618.07ms +step:8390/57344 train_time:5185266ms step_avg:618.03ms +step:8391/57344 train_time:5185787ms step_avg:618.02ms +grad accum step:2098/14336 +step:8392/57344 train_time:5187015ms step_avg:618.09ms +step:8393/57344 train_time:5187032ms step_avg:618.02ms +step:8394/57344 train_time:5187270ms step_avg:617.97ms +step:8395/57344 train_time:5187792ms step_avg:617.96ms +grad accum step:2099/14336 +step:8396/57344 train_time:5189042ms step_avg:618.04ms +step:8397/57344 train_time:5189059ms step_avg:617.97ms +step:8398/57344 train_time:5189299ms step_avg:617.92ms +step:8399/57344 train_time:5189820ms step_avg:617.91ms +grad accum step:2100/14336 +step:8400/57344 train_time:5191056ms step_avg:617.98ms +step:8401/57344 train_time:5191073ms step_avg:617.91ms +step:8402/57344 train_time:5191310ms step_avg:617.87ms +step:8403/57344 train_time:5191833ms step_avg:617.85ms +grad accum step:2101/14336 +step:8404/57344 train_time:5193072ms step_avg:617.93ms +step:8405/57344 train_time:5193089ms step_avg:617.86ms +step:8406/57344 train_time:5193334ms step_avg:617.81ms +step:8407/57344 train_time:5193862ms step_avg:617.80ms +grad accum step:2102/14336 +step:8408/57344 train_time:5195104ms step_avg:617.88ms +step:8409/57344 train_time:5195121ms step_avg:617.80ms +step:8410/57344 train_time:5195359ms step_avg:617.76ms +step:8411/57344 train_time:5195873ms step_avg:617.75ms +grad accum step:2103/14336 +step:8412/57344 train_time:5197120ms step_avg:617.82ms +step:8413/57344 train_time:5197137ms step_avg:617.75ms +step:8414/57344 train_time:5197380ms step_avg:617.71ms +step:8415/57344 train_time:5197909ms step_avg:617.70ms +grad accum step:2104/14336 +step:8416/57344 train_time:5199153ms step_avg:617.77ms +step:8417/57344 train_time:5199170ms step_avg:617.70ms +step:8418/57344 train_time:5199409ms step_avg:617.65ms +step:8419/57344 train_time:5199930ms step_avg:617.64ms +grad accum step:2105/14336 +step:8420/57344 train_time:5201187ms step_avg:617.72ms +step:8421/57344 train_time:5201204ms step_avg:617.65ms +step:8422/57344 train_time:5201445ms step_avg:617.60ms +step:8423/57344 train_time:5201967ms step_avg:617.59ms +grad accum step:2106/14336 +step:8424/57344 train_time:5203210ms step_avg:617.66ms +step:8425/57344 train_time:5203227ms step_avg:617.59ms +step:8426/57344 train_time:5203467ms step_avg:617.55ms +step:8427/57344 train_time:5203989ms step_avg:617.54ms +grad accum step:2107/14336 +step:8428/57344 train_time:5205233ms step_avg:617.61ms +step:8429/57344 train_time:5205250ms step_avg:617.54ms +step:8430/57344 train_time:5205489ms step_avg:617.50ms +step:8431/57344 train_time:5206007ms step_avg:617.48ms +grad accum step:2108/14336 +step:8432/57344 train_time:5207231ms step_avg:617.56ms +step:8433/57344 train_time:5207248ms step_avg:617.48ms +step:8434/57344 train_time:5207489ms step_avg:617.44ms +step:8435/57344 train_time:5208009ms step_avg:617.43ms +grad accum step:2109/14336 +step:8436/57344 train_time:5209245ms step_avg:617.50ms +step:8437/57344 train_time:5209263ms step_avg:617.43ms +step:8438/57344 train_time:5209500ms step_avg:617.39ms +step:8439/57344 train_time:5210019ms step_avg:617.37ms +grad accum step:2110/14336 +step:8440/57344 train_time:5211263ms step_avg:617.45ms +step:8441/57344 train_time:5211280ms step_avg:617.38ms +step:8442/57344 train_time:5211519ms step_avg:617.33ms +step:8443/57344 train_time:5212045ms step_avg:617.32ms +grad accum step:2111/14336 +step:8444/57344 train_time:5213278ms step_avg:617.39ms +step:8445/57344 train_time:5213295ms step_avg:617.32ms +step:8446/57344 train_time:5213538ms step_avg:617.28ms +step:8447/57344 train_time:5214060ms step_avg:617.27ms +grad accum step:2112/14336 +step:8448/57344 train_time:5215293ms step_avg:617.34ms +step:8448/57344 val_loss:7.069827 train_time:5215293ms step_avg:617.34ms +step:8449/57344 train_time:5215305ms step_avg:617.27ms +step:8450/57344 train_time:5215521ms step_avg:617.22ms +step:8451/57344 train_time:5216039ms step_avg:617.21ms +grad accum step:2113/14336 +step:8452/57344 train_time:5217278ms step_avg:617.28ms +step:8453/57344 train_time:5217295ms step_avg:617.21ms +step:8454/57344 train_time:5217533ms step_avg:617.17ms +step:8455/57344 train_time:5218048ms step_avg:617.16ms +grad accum step:2114/14336 +step:8456/57344 train_time:5219276ms step_avg:617.23ms +step:8457/57344 train_time:5219293ms step_avg:617.16ms +step:8458/57344 train_time:5219533ms step_avg:617.11ms +step:8459/57344 train_time:5220054ms step_avg:617.10ms +grad accum step:2115/14336 +step:8460/57344 train_time:5221282ms step_avg:617.17ms +step:8461/57344 train_time:5221299ms step_avg:617.10ms +step:8462/57344 train_time:5221538ms step_avg:617.06ms +step:8463/57344 train_time:5222059ms step_avg:617.05ms +grad accum step:2116/14336 +step:8464/57344 train_time:5223311ms step_avg:617.12ms +step:8465/57344 train_time:5223329ms step_avg:617.05ms +step:8466/57344 train_time:5223569ms step_avg:617.01ms +step:8467/57344 train_time:5224088ms step_avg:616.99ms +grad accum step:2117/14336 +step:8468/57344 train_time:5225333ms step_avg:617.07ms +step:8469/57344 train_time:5225350ms step_avg:617.00ms +step:8470/57344 train_time:5225590ms step_avg:616.95ms +step:8471/57344 train_time:5226113ms step_avg:616.94ms +grad accum step:2118/14336 +step:8472/57344 train_time:5227352ms step_avg:617.02ms +step:8473/57344 train_time:5227369ms step_avg:616.94ms +step:8474/57344 train_time:5227610ms step_avg:616.90ms +step:8475/57344 train_time:5228137ms step_avg:616.89ms +grad accum step:2119/14336 +step:8476/57344 train_time:5229375ms step_avg:616.96ms +step:8477/57344 train_time:5229392ms step_avg:616.89ms +step:8478/57344 train_time:5229632ms step_avg:616.85ms +step:8479/57344 train_time:5230163ms step_avg:616.84ms +grad accum step:2120/14336 +step:8480/57344 train_time:5231394ms step_avg:616.91ms +step:8481/57344 train_time:5231411ms step_avg:616.84ms +step:8482/57344 train_time:5231649ms step_avg:616.79ms +step:8483/57344 train_time:5232170ms step_avg:616.78ms +grad accum step:2121/14336 +step:8484/57344 train_time:5233403ms step_avg:616.86ms +step:8485/57344 train_time:5233420ms step_avg:616.78ms +step:8486/57344 train_time:5233661ms step_avg:616.74ms +step:8487/57344 train_time:5234182ms step_avg:616.73ms +grad accum step:2122/14336 +step:8488/57344 train_time:5235413ms step_avg:616.80ms +step:8489/57344 train_time:5235430ms step_avg:616.73ms +step:8490/57344 train_time:5235669ms step_avg:616.69ms +step:8491/57344 train_time:5236190ms step_avg:616.68ms +grad accum step:2123/14336 +step:8492/57344 train_time:5237423ms step_avg:616.75ms +step:8493/57344 train_time:5237440ms step_avg:616.68ms +step:8494/57344 train_time:5237679ms step_avg:616.63ms +step:8495/57344 train_time:5238202ms step_avg:616.62ms +grad accum step:2124/14336 +step:8496/57344 train_time:5239444ms step_avg:616.70ms +step:8497/57344 train_time:5239461ms step_avg:616.62ms +step:8498/57344 train_time:5239699ms step_avg:616.58ms +step:8499/57344 train_time:5240218ms step_avg:616.57ms +grad accum step:2125/14336 +step:8500/57344 train_time:5241460ms step_avg:616.64ms +step:8501/57344 train_time:5241477ms step_avg:616.57ms +step:8502/57344 train_time:5241720ms step_avg:616.53ms +step:8503/57344 train_time:5242247ms step_avg:616.52ms +grad accum step:2126/14336 +step:8504/57344 train_time:5243488ms step_avg:616.59ms +step:8505/57344 train_time:5243505ms step_avg:616.52ms +step:8506/57344 train_time:5243744ms step_avg:616.48ms +step:8507/57344 train_time:5244267ms step_avg:616.46ms +grad accum step:2127/14336 +step:8508/57344 train_time:5245492ms step_avg:616.54ms +step:8509/57344 train_time:5245509ms step_avg:616.47ms +step:8510/57344 train_time:5245750ms step_avg:616.42ms +step:8511/57344 train_time:5246271ms step_avg:616.41ms +grad accum step:2128/14336 +step:8512/57344 train_time:5247522ms step_avg:616.49ms +step:8512/57344 val_loss:7.057210 train_time:5247523ms step_avg:616.49ms +step:8513/57344 train_time:5247535ms step_avg:616.41ms +step:8514/57344 train_time:5247751ms step_avg:616.37ms +step:8515/57344 train_time:5248276ms step_avg:616.36ms +grad accum step:2129/14336 +step:8516/57344 train_time:5249518ms step_avg:616.43ms +step:8517/57344 train_time:5249535ms step_avg:616.36ms +step:8518/57344 train_time:5249778ms step_avg:616.32ms +step:8519/57344 train_time:5250303ms step_avg:616.31ms +grad accum step:2130/14336 +step:8520/57344 train_time:5251542ms step_avg:616.38ms +step:8521/57344 train_time:5251559ms step_avg:616.31ms +step:8522/57344 train_time:5251799ms step_avg:616.26ms +step:8523/57344 train_time:5252322ms step_avg:616.25ms +grad accum step:2131/14336 +step:8524/57344 train_time:5253549ms step_avg:616.32ms +step:8525/57344 train_time:5253566ms step_avg:616.25ms +step:8526/57344 train_time:5253806ms step_avg:616.21ms +step:8527/57344 train_time:5254329ms step_avg:616.20ms +grad accum step:2132/14336 +step:8528/57344 train_time:5255574ms step_avg:616.27ms +step:8529/57344 train_time:5255591ms step_avg:616.20ms +step:8530/57344 train_time:5255832ms step_avg:616.16ms +step:8531/57344 train_time:5256354ms step_avg:616.15ms +grad accum step:2133/14336 +step:8532/57344 train_time:5257583ms step_avg:616.22ms +step:8533/57344 train_time:5257600ms step_avg:616.15ms +step:8534/57344 train_time:5257839ms step_avg:616.10ms +step:8535/57344 train_time:5258364ms step_avg:616.09ms +grad accum step:2134/14336 +step:8536/57344 train_time:5259606ms step_avg:616.17ms +step:8537/57344 train_time:5259624ms step_avg:616.10ms +step:8538/57344 train_time:5259866ms step_avg:616.05ms +step:8539/57344 train_time:5260396ms step_avg:616.04ms +grad accum step:2135/14336 +step:8540/57344 train_time:5261655ms step_avg:616.12ms +step:8541/57344 train_time:5261672ms step_avg:616.05ms +step:8542/57344 train_time:5261914ms step_avg:616.00ms +step:8543/57344 train_time:5262439ms step_avg:615.99ms +grad accum step:2136/14336 +step:8544/57344 train_time:5263674ms step_avg:616.07ms +step:8545/57344 train_time:5263691ms step_avg:616.00ms +step:8546/57344 train_time:5263931ms step_avg:615.95ms +step:8547/57344 train_time:5264460ms step_avg:615.94ms +grad accum step:2137/14336 +step:8548/57344 train_time:5265696ms step_avg:616.02ms +step:8549/57344 train_time:5265713ms step_avg:615.94ms +step:8550/57344 train_time:5265954ms step_avg:615.90ms +step:8551/57344 train_time:5266478ms step_avg:615.89ms +grad accum step:2138/14336 +step:8552/57344 train_time:5267719ms step_avg:615.96ms +step:8553/57344 train_time:5267737ms step_avg:615.89ms +step:8554/57344 train_time:5267976ms step_avg:615.85ms +step:8555/57344 train_time:5268494ms step_avg:615.84ms +grad accum step:2139/14336 +step:8556/57344 train_time:5269739ms step_avg:615.91ms +step:8557/57344 train_time:5269756ms step_avg:615.84ms +step:8558/57344 train_time:5269996ms step_avg:615.80ms +step:8559/57344 train_time:5270524ms step_avg:615.79ms +grad accum step:2140/14336 +step:8560/57344 train_time:5271760ms step_avg:615.86ms +step:8561/57344 train_time:5271777ms step_avg:615.79ms +step:8562/57344 train_time:5272015ms step_avg:615.75ms +step:8563/57344 train_time:5272531ms step_avg:615.73ms +grad accum step:2141/14336 +step:8564/57344 train_time:5273762ms step_avg:615.81ms +step:8565/57344 train_time:5273779ms step_avg:615.74ms +step:8566/57344 train_time:5274019ms step_avg:615.69ms +step:8567/57344 train_time:5274539ms step_avg:615.68ms +grad accum step:2142/14336 +step:8568/57344 train_time:5275796ms step_avg:615.76ms +step:8569/57344 train_time:5275813ms step_avg:615.69ms +step:8570/57344 train_time:5276052ms step_avg:615.64ms +step:8571/57344 train_time:5276570ms step_avg:615.63ms +grad accum step:2143/14336 +step:8572/57344 train_time:5277811ms step_avg:615.70ms +step:8573/57344 train_time:5277828ms step_avg:615.63ms +step:8574/57344 train_time:5278068ms step_avg:615.59ms +step:8575/57344 train_time:5278589ms step_avg:615.58ms +grad accum step:2144/14336 +step:8576/57344 train_time:5279815ms step_avg:615.65ms +step:8576/57344 val_loss:7.057277 train_time:5279815ms step_avg:615.65ms +step:8577/57344 train_time:5279827ms step_avg:615.58ms +step:8578/57344 train_time:5280047ms step_avg:615.53ms +step:8579/57344 train_time:5280572ms step_avg:615.52ms +grad accum step:2145/14336 +step:8580/57344 train_time:5281813ms step_avg:615.60ms +step:8581/57344 train_time:5281830ms step_avg:615.53ms +step:8582/57344 train_time:5282070ms step_avg:615.48ms +step:8583/57344 train_time:5282595ms step_avg:615.47ms +grad accum step:2146/14336 +step:8584/57344 train_time:5283821ms step_avg:615.54ms +step:8585/57344 train_time:5283838ms step_avg:615.47ms +step:8586/57344 train_time:5284080ms step_avg:615.43ms +step:8587/57344 train_time:5284600ms step_avg:615.42ms +grad accum step:2147/14336 +step:8588/57344 train_time:5285845ms step_avg:615.49ms +step:8589/57344 train_time:5285862ms step_avg:615.42ms +step:8590/57344 train_time:5286105ms step_avg:615.38ms +step:8591/57344 train_time:5286627ms step_avg:615.37ms +grad accum step:2148/14336 +step:8592/57344 train_time:5287872ms step_avg:615.44ms +step:8593/57344 train_time:5287889ms step_avg:615.37ms +step:8594/57344 train_time:5288129ms step_avg:615.33ms +step:8595/57344 train_time:5288651ms step_avg:615.32ms +grad accum step:2149/14336 +step:8596/57344 train_time:5289897ms step_avg:615.39ms +step:8597/57344 train_time:5289914ms step_avg:615.32ms +step:8598/57344 train_time:5290153ms step_avg:615.28ms +step:8599/57344 train_time:5290675ms step_avg:615.27ms +grad accum step:2150/14336 +step:8600/57344 train_time:5291922ms step_avg:615.34ms +step:8601/57344 train_time:5291939ms step_avg:615.27ms +step:8602/57344 train_time:5292179ms step_avg:615.23ms +step:8603/57344 train_time:5292700ms step_avg:615.22ms +grad accum step:2151/14336 +step:8604/57344 train_time:5293949ms step_avg:615.29ms +step:8605/57344 train_time:5293966ms step_avg:615.22ms +step:8606/57344 train_time:5294209ms step_avg:615.18ms +step:8607/57344 train_time:5294735ms step_avg:615.17ms +grad accum step:2152/14336 +step:8608/57344 train_time:5295983ms step_avg:615.24ms +step:8609/57344 train_time:5296000ms step_avg:615.17ms +step:8610/57344 train_time:5296240ms step_avg:615.13ms +step:8611/57344 train_time:5296759ms step_avg:615.12ms +grad accum step:2153/14336 +step:8612/57344 train_time:5298006ms step_avg:615.19ms +step:8613/57344 train_time:5298023ms step_avg:615.12ms +step:8614/57344 train_time:5298265ms step_avg:615.08ms +step:8615/57344 train_time:5298797ms step_avg:615.07ms +grad accum step:2154/14336 +step:8616/57344 train_time:5300083ms step_avg:615.14ms +step:8617/57344 train_time:5300097ms step_avg:615.07ms +step:8618/57344 train_time:5300337ms step_avg:615.03ms +step:8619/57344 train_time:5300858ms step_avg:615.02ms +grad accum step:2155/14336 +step:8620/57344 train_time:5302087ms step_avg:615.09ms +step:8621/57344 train_time:5302104ms step_avg:615.02ms +step:8622/57344 train_time:5302344ms step_avg:614.98ms +step:8623/57344 train_time:5302866ms step_avg:614.97ms +grad accum step:2156/14336 +step:8624/57344 train_time:5304094ms step_avg:615.04ms +step:8625/57344 train_time:5304111ms step_avg:614.97ms +step:8626/57344 train_time:5304349ms step_avg:614.93ms +step:8627/57344 train_time:5304870ms step_avg:614.91ms +grad accum step:2157/14336 +step:8628/57344 train_time:5306108ms step_avg:614.99ms +step:8629/57344 train_time:5306125ms step_avg:614.92ms +step:8630/57344 train_time:5306363ms step_avg:614.87ms +step:8631/57344 train_time:5306880ms step_avg:614.86ms +grad accum step:2158/14336 +step:8632/57344 train_time:5308116ms step_avg:614.93ms +step:8633/57344 train_time:5308133ms step_avg:614.87ms +step:8634/57344 train_time:5308374ms step_avg:614.82ms +step:8635/57344 train_time:5308900ms step_avg:614.81ms +grad accum step:2159/14336 +step:8636/57344 train_time:5310130ms step_avg:614.88ms +step:8637/57344 train_time:5310147ms step_avg:614.81ms +step:8638/57344 train_time:5310389ms step_avg:614.77ms +step:8639/57344 train_time:5310911ms step_avg:614.76ms +grad accum step:2160/14336 +step:8640/57344 train_time:5312136ms step_avg:614.83ms +step:8640/57344 val_loss:7.051640 train_time:5312137ms step_avg:614.83ms +step:8641/57344 train_time:5312148ms step_avg:614.76ms +step:8642/57344 train_time:5312369ms step_avg:614.72ms +step:8643/57344 train_time:5312899ms step_avg:614.71ms +grad accum step:2161/14336 +step:8644/57344 train_time:5314167ms step_avg:614.78ms +step:8645/57344 train_time:5314184ms step_avg:614.71ms +step:8646/57344 train_time:5314425ms step_avg:614.67ms +step:8647/57344 train_time:5314948ms step_avg:614.66ms +grad accum step:2162/14336 +step:8648/57344 train_time:5316189ms step_avg:614.73ms +step:8649/57344 train_time:5316206ms step_avg:614.66ms +step:8650/57344 train_time:5316447ms step_avg:614.62ms +step:8651/57344 train_time:5316980ms step_avg:614.61ms +grad accum step:2163/14336 +step:8652/57344 train_time:5318217ms step_avg:614.68ms +step:8653/57344 train_time:5318235ms step_avg:614.61ms +step:8654/57344 train_time:5318475ms step_avg:614.57ms +step:8655/57344 train_time:5319004ms step_avg:614.56ms +grad accum step:2164/14336 +step:8656/57344 train_time:5320249ms step_avg:614.63ms +step:8657/57344 train_time:5320266ms step_avg:614.56ms +step:8658/57344 train_time:5320504ms step_avg:614.52ms +step:8659/57344 train_time:5321022ms step_avg:614.51ms +grad accum step:2165/14336 +step:8660/57344 train_time:5322262ms step_avg:614.58ms +step:8661/57344 train_time:5322280ms step_avg:614.51ms +step:8662/57344 train_time:5322519ms step_avg:614.47ms +step:8663/57344 train_time:5323039ms step_avg:614.46ms +grad accum step:2166/14336 +step:8664/57344 train_time:5324277ms step_avg:614.53ms +step:8665/57344 train_time:5324294ms step_avg:614.46ms +step:8666/57344 train_time:5324535ms step_avg:614.42ms +step:8667/57344 train_time:5325061ms step_avg:614.41ms +grad accum step:2167/14336 +step:8668/57344 train_time:5326317ms step_avg:614.48ms +step:8669/57344 train_time:5326334ms step_avg:614.41ms +step:8670/57344 train_time:5326574ms step_avg:614.37ms +step:8671/57344 train_time:5327096ms step_avg:614.36ms +grad accum step:2168/14336 +step:8672/57344 train_time:5328330ms step_avg:614.43ms +step:8673/57344 train_time:5328347ms step_avg:614.36ms +step:8674/57344 train_time:5328590ms step_avg:614.32ms +step:8675/57344 train_time:5329111ms step_avg:614.31ms +grad accum step:2169/14336 +step:8676/57344 train_time:5330376ms step_avg:614.38ms +step:8677/57344 train_time:5330394ms step_avg:614.31ms +step:8678/57344 train_time:5330634ms step_avg:614.27ms +step:8679/57344 train_time:5331157ms step_avg:614.26ms +grad accum step:2170/14336 +step:8680/57344 train_time:5332398ms step_avg:614.33ms +step:8681/57344 train_time:5332415ms step_avg:614.26ms +step:8682/57344 train_time:5332654ms step_avg:614.22ms +step:8683/57344 train_time:5333178ms step_avg:614.21ms +grad accum step:2171/14336 +step:8684/57344 train_time:5334417ms step_avg:614.28ms +step:8685/57344 train_time:5334434ms step_avg:614.21ms +step:8686/57344 train_time:5334683ms step_avg:614.17ms +step:8687/57344 train_time:5335225ms step_avg:614.16ms +grad accum step:2172/14336 +step:8688/57344 train_time:5336476ms step_avg:614.24ms +step:8689/57344 train_time:5336493ms step_avg:614.17ms +step:8690/57344 train_time:5336734ms step_avg:614.12ms +step:8691/57344 train_time:5337263ms step_avg:614.11ms +grad accum step:2173/14336 +step:8692/57344 train_time:5338499ms step_avg:614.19ms +step:8693/57344 train_time:5338516ms step_avg:614.12ms +step:8694/57344 train_time:5338756ms step_avg:614.07ms +step:8695/57344 train_time:5339277ms step_avg:614.06ms +grad accum step:2174/14336 +step:8696/57344 train_time:5340510ms step_avg:614.13ms +step:8697/57344 train_time:5340527ms step_avg:614.07ms +step:8698/57344 train_time:5340771ms step_avg:614.02ms +step:8699/57344 train_time:5341295ms step_avg:614.01ms +grad accum step:2175/14336 +step:8700/57344 train_time:5342533ms step_avg:614.08ms +step:8701/57344 train_time:5342550ms step_avg:614.02ms +step:8702/57344 train_time:5342793ms step_avg:613.97ms +step:8703/57344 train_time:5343317ms step_avg:613.96ms +grad accum step:2176/14336 +step:8704/57344 train_time:5344556ms step_avg:614.03ms +step:8704/57344 val_loss:7.042135 train_time:5344556ms step_avg:614.03ms +step:8705/57344 train_time:5344568ms step_avg:613.97ms +step:8706/57344 train_time:5344784ms step_avg:613.92ms +step:8707/57344 train_time:5345302ms step_avg:613.91ms +grad accum step:2177/14336 +step:8708/57344 train_time:5346551ms step_avg:613.98ms +step:8709/57344 train_time:5346568ms step_avg:613.91ms +step:8710/57344 train_time:5346807ms step_avg:613.87ms +step:8711/57344 train_time:5347330ms step_avg:613.86ms +grad accum step:2178/14336 +step:8712/57344 train_time:5348576ms step_avg:613.93ms +step:8713/57344 train_time:5348593ms step_avg:613.86ms +step:8714/57344 train_time:5348832ms step_avg:613.82ms +step:8715/57344 train_time:5349354ms step_avg:613.81ms +grad accum step:2179/14336 +step:8716/57344 train_time:5350590ms step_avg:613.88ms +step:8717/57344 train_time:5350607ms step_avg:613.81ms +step:8718/57344 train_time:5350846ms step_avg:613.77ms +step:8719/57344 train_time:5351365ms step_avg:613.76ms +grad accum step:2180/14336 +step:8720/57344 train_time:5352593ms step_avg:613.83ms +step:8721/57344 train_time:5352611ms step_avg:613.76ms +step:8722/57344 train_time:5352851ms step_avg:613.72ms +step:8723/57344 train_time:5353373ms step_avg:613.71ms +grad accum step:2181/14336 +step:8724/57344 train_time:5354600ms step_avg:613.78ms +step:8725/57344 train_time:5354617ms step_avg:613.71ms +step:8726/57344 train_time:5354857ms step_avg:613.67ms +step:8727/57344 train_time:5355379ms step_avg:613.66ms +grad accum step:2182/14336 +step:8728/57344 train_time:5356619ms step_avg:613.73ms +step:8729/57344 train_time:5356636ms step_avg:613.66ms +step:8730/57344 train_time:5356881ms step_avg:613.62ms +step:8731/57344 train_time:5357421ms step_avg:613.61ms +grad accum step:2183/14336 +step:8732/57344 train_time:5358671ms step_avg:613.68ms +step:8733/57344 train_time:5358688ms step_avg:613.61ms +step:8734/57344 train_time:5358928ms step_avg:613.57ms +step:8735/57344 train_time:5359453ms step_avg:613.56ms +grad accum step:2184/14336 +step:8736/57344 train_time:5360690ms step_avg:613.63ms +step:8737/57344 train_time:5360708ms step_avg:613.56ms +step:8738/57344 train_time:5360947ms step_avg:613.52ms +step:8739/57344 train_time:5361472ms step_avg:613.51ms +grad accum step:2185/14336 +step:8740/57344 train_time:5362711ms step_avg:613.58ms +step:8741/57344 train_time:5362728ms step_avg:613.51ms +step:8742/57344 train_time:5362974ms step_avg:613.47ms +step:8743/57344 train_time:5363505ms step_avg:613.46ms +grad accum step:2186/14336 +step:8744/57344 train_time:5364743ms step_avg:613.53ms +step:8745/57344 train_time:5364760ms step_avg:613.47ms +step:8746/57344 train_time:5364999ms step_avg:613.42ms +step:8747/57344 train_time:5365524ms step_avg:613.41ms +grad accum step:2187/14336 +step:8748/57344 train_time:5366770ms step_avg:613.49ms +step:8749/57344 train_time:5366787ms step_avg:613.42ms +step:8750/57344 train_time:5367028ms step_avg:613.37ms +step:8751/57344 train_time:5367548ms step_avg:613.36ms +grad accum step:2188/14336 +step:8752/57344 train_time:5368779ms step_avg:613.43ms +step:8753/57344 train_time:5368796ms step_avg:613.37ms +step:8754/57344 train_time:5369034ms step_avg:613.32ms +step:8755/57344 train_time:5369556ms step_avg:613.31ms +grad accum step:2189/14336 +step:8756/57344 train_time:5370799ms step_avg:613.38ms +step:8757/57344 train_time:5370816ms step_avg:613.32ms +step:8758/57344 train_time:5371057ms step_avg:613.27ms +step:8759/57344 train_time:5371578ms step_avg:613.26ms +grad accum step:2190/14336 +step:8760/57344 train_time:5372802ms step_avg:613.33ms +step:8761/57344 train_time:5372819ms step_avg:613.27ms +step:8762/57344 train_time:5373060ms step_avg:613.22ms +step:8763/57344 train_time:5373584ms step_avg:613.21ms +grad accum step:2191/14336 +step:8764/57344 train_time:5375360ms step_avg:613.35ms +step:8765/57344 train_time:5375372ms step_avg:613.28ms +step:8766/57344 train_time:5375585ms step_avg:613.23ms +step:8767/57344 train_time:5376115ms step_avg:613.22ms +grad accum step:2192/14336 +step:8768/57344 train_time:5377346ms step_avg:613.29ms +step:8768/57344 val_loss:7.022151 train_time:5377346ms step_avg:613.29ms +step:8769/57344 train_time:5377358ms step_avg:613.22ms +step:8770/57344 train_time:5377573ms step_avg:613.18ms +step:8771/57344 train_time:5378091ms step_avg:613.17ms +grad accum step:2193/14336 +step:8772/57344 train_time:5379320ms step_avg:613.24ms +step:8773/57344 train_time:5379337ms step_avg:613.17ms +step:8774/57344 train_time:5379576ms step_avg:613.13ms +step:8775/57344 train_time:5380095ms step_avg:613.12ms +grad accum step:2194/14336 +step:8776/57344 train_time:5381327ms step_avg:613.19ms +step:8777/57344 train_time:5381344ms step_avg:613.12ms +step:8778/57344 train_time:5381584ms step_avg:613.08ms +step:8779/57344 train_time:5382105ms step_avg:613.07ms +grad accum step:2195/14336 +step:8780/57344 train_time:5383336ms step_avg:613.14ms +step:8781/57344 train_time:5383353ms step_avg:613.07ms +step:8782/57344 train_time:5383593ms step_avg:613.03ms +step:8783/57344 train_time:5384114ms step_avg:613.02ms +grad accum step:2196/14336 +step:8784/57344 train_time:5385350ms step_avg:613.09ms +step:8785/57344 train_time:5385367ms step_avg:613.02ms +step:8786/57344 train_time:5385606ms step_avg:612.98ms +step:8787/57344 train_time:5386129ms step_avg:612.97ms +grad accum step:2197/14336 +step:8788/57344 train_time:5387374ms step_avg:613.04ms +step:8789/57344 train_time:5387391ms step_avg:612.97ms +step:8790/57344 train_time:5387635ms step_avg:612.93ms +step:8791/57344 train_time:5388170ms step_avg:612.92ms +grad accum step:2198/14336 +step:8792/57344 train_time:5389427ms step_avg:612.99ms +step:8793/57344 train_time:5389444ms step_avg:612.92ms +step:8794/57344 train_time:5389687ms step_avg:612.88ms +step:8795/57344 train_time:5390222ms step_avg:612.87ms +grad accum step:2199/14336 +step:8796/57344 train_time:5391471ms step_avg:612.95ms +step:8797/57344 train_time:5391488ms step_avg:612.88ms +step:8798/57344 train_time:5391729ms step_avg:612.84ms +step:8799/57344 train_time:5392258ms step_avg:612.83ms +grad accum step:2200/14336 +step:8800/57344 train_time:5393520ms step_avg:612.90ms +step:8801/57344 train_time:5393537ms step_avg:612.83ms +step:8802/57344 train_time:5393781ms step_avg:612.79ms +step:8803/57344 train_time:5394313ms step_avg:612.78ms +grad accum step:2201/14336 +step:8804/57344 train_time:5395593ms step_avg:612.86ms +step:8805/57344 train_time:5395610ms step_avg:612.79ms +step:8806/57344 train_time:5395851ms step_avg:612.75ms +step:8807/57344 train_time:5396378ms step_avg:612.74ms +grad accum step:2202/14336 +step:8808/57344 train_time:5397658ms step_avg:612.81ms +step:8809/57344 train_time:5397675ms step_avg:612.75ms +step:8810/57344 train_time:5397920ms step_avg:612.70ms +step:8811/57344 train_time:5398464ms step_avg:612.70ms +grad accum step:2203/14336 +step:8812/57344 train_time:5399744ms step_avg:612.77ms +step:8813/57344 train_time:5399761ms step_avg:612.70ms +step:8814/57344 train_time:5400005ms step_avg:612.66ms +step:8815/57344 train_time:5400547ms step_avg:612.65ms +grad accum step:2204/14336 +step:8816/57344 train_time:5401824ms step_avg:612.73ms +step:8817/57344 train_time:5401841ms step_avg:612.66ms +step:8818/57344 train_time:5402084ms step_avg:612.62ms +step:8819/57344 train_time:5402613ms step_avg:612.61ms +grad accum step:2205/14336 +step:8820/57344 train_time:5403884ms step_avg:612.69ms +step:8821/57344 train_time:5403901ms step_avg:612.62ms +step:8822/57344 train_time:5404144ms step_avg:612.58ms +step:8823/57344 train_time:5404673ms step_avg:612.57ms +grad accum step:2206/14336 +step:8824/57344 train_time:5405953ms step_avg:612.64ms +step:8825/57344 train_time:5405970ms step_avg:612.57ms +step:8826/57344 train_time:5406212ms step_avg:612.53ms +step:8827/57344 train_time:5406738ms step_avg:612.52ms +grad accum step:2207/14336 +step:8828/57344 train_time:5408017ms step_avg:612.60ms +step:8829/57344 train_time:5408034ms step_avg:612.53ms +step:8830/57344 train_time:5408277ms step_avg:612.49ms +step:8831/57344 train_time:5408811ms step_avg:612.48ms +grad accum step:2208/14336 +step:8832/57344 train_time:5410080ms step_avg:612.55ms +step:8832/57344 val_loss:7.020913 train_time:5410081ms step_avg:612.55ms +step:8833/57344 train_time:5410093ms step_avg:612.49ms +step:8834/57344 train_time:5410317ms step_avg:612.44ms +step:8835/57344 train_time:5410863ms step_avg:612.43ms +grad accum step:2209/14336 +step:8836/57344 train_time:5412127ms step_avg:612.51ms +step:8837/57344 train_time:5412144ms step_avg:612.44ms +step:8838/57344 train_time:5412387ms step_avg:612.40ms +step:8839/57344 train_time:5412924ms step_avg:612.39ms +grad accum step:2210/14336 +step:8840/57344 train_time:5414186ms step_avg:612.46ms +step:8841/57344 train_time:5414203ms step_avg:612.40ms +step:8842/57344 train_time:5414446ms step_avg:612.36ms +step:8843/57344 train_time:5414980ms step_avg:612.35ms +grad accum step:2211/14336 +step:8844/57344 train_time:5416259ms step_avg:612.42ms +step:8845/57344 train_time:5416276ms step_avg:612.35ms +step:8846/57344 train_time:5416520ms step_avg:612.31ms +step:8847/57344 train_time:5417045ms step_avg:612.30ms +grad accum step:2212/14336 +step:8848/57344 train_time:5418307ms step_avg:612.38ms +step:8849/57344 train_time:5418324ms step_avg:612.31ms +step:8850/57344 train_time:5418575ms step_avg:612.27ms +step:8851/57344 train_time:5419124ms step_avg:612.26ms +grad accum step:2213/14336 +step:8852/57344 train_time:5420382ms step_avg:612.33ms +step:8853/57344 train_time:5420399ms step_avg:612.27ms +step:8854/57344 train_time:5420640ms step_avg:612.23ms +step:8855/57344 train_time:5421164ms step_avg:612.22ms +grad accum step:2214/14336 +step:8856/57344 train_time:5422451ms step_avg:612.29ms +step:8857/57344 train_time:5422468ms step_avg:612.22ms +step:8858/57344 train_time:5422713ms step_avg:612.18ms +step:8859/57344 train_time:5423254ms step_avg:612.17ms +grad accum step:2215/14336 +step:8860/57344 train_time:5424518ms step_avg:612.25ms +step:8861/57344 train_time:5424535ms step_avg:612.18ms +step:8862/57344 train_time:5424783ms step_avg:612.14ms +step:8863/57344 train_time:5425326ms step_avg:612.13ms +grad accum step:2216/14336 +step:8864/57344 train_time:5426596ms step_avg:612.21ms +step:8865/57344 train_time:5426612ms step_avg:612.14ms +step:8866/57344 train_time:5426854ms step_avg:612.10ms +step:8867/57344 train_time:5427384ms step_avg:612.09ms +grad accum step:2217/14336 +step:8868/57344 train_time:5428649ms step_avg:612.16ms +step:8869/57344 train_time:5428666ms step_avg:612.09ms +step:8870/57344 train_time:5428912ms step_avg:612.05ms +step:8871/57344 train_time:5429461ms step_avg:612.05ms +grad accum step:2218/14336 +step:8872/57344 train_time:5430728ms step_avg:612.12ms +step:8873/57344 train_time:5430745ms step_avg:612.05ms +step:8874/57344 train_time:5430985ms step_avg:612.01ms +step:8875/57344 train_time:5431518ms step_avg:612.00ms +grad accum step:2219/14336 +step:8876/57344 train_time:5432800ms step_avg:612.08ms +step:8877/57344 train_time:5432817ms step_avg:612.01ms +step:8878/57344 train_time:5433057ms step_avg:611.97ms +step:8879/57344 train_time:5433588ms step_avg:611.96ms +grad accum step:2220/14336 +step:8880/57344 train_time:5434856ms step_avg:612.03ms +step:8881/57344 train_time:5434873ms step_avg:611.97ms +step:8882/57344 train_time:5435114ms step_avg:611.92ms +step:8883/57344 train_time:5435643ms step_avg:611.92ms +grad accum step:2221/14336 +step:8884/57344 train_time:5436922ms step_avg:611.99ms +step:8885/57344 train_time:5436939ms step_avg:611.92ms +step:8886/57344 train_time:5437184ms step_avg:611.88ms +step:8887/57344 train_time:5437726ms step_avg:611.87ms +grad accum step:2222/14336 +step:8888/57344 train_time:5438987ms step_avg:611.95ms +step:8889/57344 train_time:5439004ms step_avg:611.88ms +step:8890/57344 train_time:5439246ms step_avg:611.84ms +step:8891/57344 train_time:5439781ms step_avg:611.83ms +grad accum step:2223/14336 +step:8892/57344 train_time:5441045ms step_avg:611.90ms +step:8893/57344 train_time:5441062ms step_avg:611.84ms +step:8894/57344 train_time:5441305ms step_avg:611.80ms +step:8895/57344 train_time:5441834ms step_avg:611.79ms +grad accum step:2224/14336 +step:8896/57344 train_time:5443095ms step_avg:611.86ms +step:8896/57344 val_loss:7.011633 train_time:5443095ms step_avg:611.86ms +step:8897/57344 train_time:5443107ms step_avg:611.79ms +step:8898/57344 train_time:5443326ms step_avg:611.75ms +step:8899/57344 train_time:5443865ms step_avg:611.74ms +grad accum step:2225/14336 +step:8900/57344 train_time:5445148ms step_avg:611.81ms +step:8901/57344 train_time:5445165ms step_avg:611.75ms +step:8902/57344 train_time:5445409ms step_avg:611.71ms +step:8903/57344 train_time:5445945ms step_avg:611.70ms +grad accum step:2226/14336 +step:8904/57344 train_time:5447221ms step_avg:611.77ms +step:8905/57344 train_time:5447238ms step_avg:611.71ms +step:8906/57344 train_time:5447480ms step_avg:611.66ms +step:8907/57344 train_time:5448009ms step_avg:611.65ms +grad accum step:2227/14336 +step:8908/57344 train_time:5449289ms step_avg:611.73ms +step:8909/57344 train_time:5449306ms step_avg:611.66ms +step:8910/57344 train_time:5449556ms step_avg:611.62ms +step:8911/57344 train_time:5450108ms step_avg:611.62ms +grad accum step:2228/14336 +step:8912/57344 train_time:5451378ms step_avg:611.69ms +step:8913/57344 train_time:5451396ms step_avg:611.62ms +step:8914/57344 train_time:5451638ms step_avg:611.58ms +step:8915/57344 train_time:5452169ms step_avg:611.57ms +grad accum step:2229/14336 +step:8916/57344 train_time:5453452ms step_avg:611.65ms +step:8917/57344 train_time:5453470ms step_avg:611.58ms +step:8918/57344 train_time:5453710ms step_avg:611.54ms +step:8919/57344 train_time:5454232ms step_avg:611.53ms +grad accum step:2230/14336 +step:8920/57344 train_time:5455495ms step_avg:611.60ms +step:8921/57344 train_time:5455512ms step_avg:611.54ms +step:8922/57344 train_time:5455756ms step_avg:611.49ms +step:8923/57344 train_time:5456285ms step_avg:611.49ms +grad accum step:2231/14336 +step:8924/57344 train_time:5457566ms step_avg:611.56ms +step:8925/57344 train_time:5457583ms step_avg:611.49ms +step:8926/57344 train_time:5457830ms step_avg:611.45ms +step:8927/57344 train_time:5458375ms step_avg:611.45ms +grad accum step:2232/14336 +step:8928/57344 train_time:5459636ms step_avg:611.52ms +step:8929/57344 train_time:5459653ms step_avg:611.45ms +step:8930/57344 train_time:5459895ms step_avg:611.41ms +step:8931/57344 train_time:5460431ms step_avg:611.40ms +grad accum step:2233/14336 +step:8932/57344 train_time:5461694ms step_avg:611.47ms +step:8933/57344 train_time:5461710ms step_avg:611.41ms +step:8934/57344 train_time:5461950ms step_avg:611.37ms +step:8935/57344 train_time:5462476ms step_avg:611.36ms +grad accum step:2234/14336 +step:8936/57344 train_time:5463733ms step_avg:611.43ms +step:8937/57344 train_time:5463750ms step_avg:611.36ms +step:8938/57344 train_time:5463994ms step_avg:611.32ms +step:8939/57344 train_time:5464536ms step_avg:611.31ms +grad accum step:2235/14336 +step:8940/57344 train_time:5465806ms step_avg:611.39ms +step:8941/57344 train_time:5465823ms step_avg:611.32ms +step:8942/57344 train_time:5466064ms step_avg:611.28ms +step:8943/57344 train_time:5466603ms step_avg:611.27ms +grad accum step:2236/14336 +step:8944/57344 train_time:5467885ms step_avg:611.35ms +step:8945/57344 train_time:5467902ms step_avg:611.28ms +step:8946/57344 train_time:5468143ms step_avg:611.24ms +step:8947/57344 train_time:5468675ms step_avg:611.23ms +grad accum step:2237/14336 +step:8948/57344 train_time:5469926ms step_avg:611.30ms +step:8949/57344 train_time:5469943ms step_avg:611.24ms +step:8950/57344 train_time:5470185ms step_avg:611.19ms +step:8951/57344 train_time:5470719ms step_avg:611.19ms +grad accum step:2238/14336 +step:8952/57344 train_time:5471982ms step_avg:611.26ms +step:8953/57344 train_time:5471999ms step_avg:611.19ms +step:8954/57344 train_time:5472243ms step_avg:611.15ms +step:8955/57344 train_time:5472772ms step_avg:611.14ms +grad accum step:2239/14336 +step:8956/57344 train_time:5474051ms step_avg:611.22ms +step:8957/57344 train_time:5474068ms step_avg:611.15ms +step:8958/57344 train_time:5474312ms step_avg:611.11ms +step:8959/57344 train_time:5474844ms step_avg:611.10ms +grad accum step:2240/14336 +step:8960/57344 train_time:5476093ms step_avg:611.17ms +step:8960/57344 val_loss:7.005154 train_time:5476093ms step_avg:611.17ms +step:8961/57344 train_time:5476105ms step_avg:611.10ms +step:8962/57344 train_time:5476323ms step_avg:611.06ms +step:8963/57344 train_time:5476850ms step_avg:611.05ms +grad accum step:2241/14336 +step:8964/57344 train_time:5478130ms step_avg:611.13ms +step:8965/57344 train_time:5478147ms step_avg:611.06ms +step:8966/57344 train_time:5478392ms step_avg:611.02ms +step:8967/57344 train_time:5478921ms step_avg:611.01ms +grad accum step:2242/14336 +step:8968/57344 train_time:5480189ms step_avg:611.08ms +step:8969/57344 train_time:5480205ms step_avg:611.02ms +step:8970/57344 train_time:5480455ms step_avg:610.98ms +step:8971/57344 train_time:5480996ms step_avg:610.97ms +grad accum step:2243/14336 +step:8972/57344 train_time:5482272ms step_avg:611.04ms +step:8973/57344 train_time:5482289ms step_avg:610.98ms +step:8974/57344 train_time:5482530ms step_avg:610.93ms +step:8975/57344 train_time:5483053ms step_avg:610.93ms +grad accum step:2244/14336 +step:8976/57344 train_time:5484317ms step_avg:611.00ms +step:8977/57344 train_time:5484334ms step_avg:610.93ms +step:8978/57344 train_time:5484582ms step_avg:610.89ms +step:8979/57344 train_time:5485127ms step_avg:610.88ms +grad accum step:2245/14336 +step:8980/57344 train_time:5486386ms step_avg:610.96ms +step:8981/57344 train_time:5486403ms step_avg:610.89ms +step:8982/57344 train_time:5486649ms step_avg:610.85ms +step:8983/57344 train_time:5487190ms step_avg:610.84ms +grad accum step:2246/14336 +step:8984/57344 train_time:5488449ms step_avg:610.91ms +step:8985/57344 train_time:5488466ms step_avg:610.85ms +step:8986/57344 train_time:5488706ms step_avg:610.81ms +step:8987/57344 train_time:5489237ms step_avg:610.80ms +grad accum step:2247/14336 +step:8988/57344 train_time:5490516ms step_avg:610.87ms +step:8989/57344 train_time:5490533ms step_avg:610.81ms +step:8990/57344 train_time:5490778ms step_avg:610.77ms +step:8991/57344 train_time:5491321ms step_avg:610.76ms +grad accum step:2248/14336 +step:8992/57344 train_time:5492617ms step_avg:610.83ms +step:8993/57344 train_time:5492634ms step_avg:610.77ms +step:8994/57344 train_time:5492878ms step_avg:610.73ms +step:8995/57344 train_time:5493416ms step_avg:610.72ms +grad accum step:2249/14336 +step:8996/57344 train_time:5494675ms step_avg:610.79ms +step:8997/57344 train_time:5494692ms step_avg:610.72ms +step:8998/57344 train_time:5494939ms step_avg:610.68ms +step:8999/57344 train_time:5495484ms step_avg:610.68ms +grad accum step:2250/14336 +step:9000/57344 train_time:5496758ms step_avg:610.75ms +step:9001/57344 train_time:5496775ms step_avg:610.68ms +step:9002/57344 train_time:5497019ms step_avg:610.64ms +step:9003/57344 train_time:5497551ms step_avg:610.64ms +grad accum step:2251/14336 +step:9004/57344 train_time:5498828ms step_avg:610.71ms +step:9005/57344 train_time:5498846ms step_avg:610.64ms +step:9006/57344 train_time:5499086ms step_avg:610.60ms +step:9007/57344 train_time:5499613ms step_avg:610.59ms +grad accum step:2252/14336 +step:9008/57344 train_time:5500890ms step_avg:610.67ms +step:9009/57344 train_time:5500907ms step_avg:610.60ms +step:9010/57344 train_time:5501152ms step_avg:610.56ms +step:9011/57344 train_time:5501687ms step_avg:610.55ms +grad accum step:2253/14336 +step:9012/57344 train_time:5502949ms step_avg:610.62ms +step:9013/57344 train_time:5502966ms step_avg:610.56ms +step:9014/57344 train_time:5503209ms step_avg:610.52ms +step:9015/57344 train_time:5503741ms step_avg:610.51ms +grad accum step:2254/14336 +step:9016/57344 train_time:5505013ms step_avg:610.58ms +step:9017/57344 train_time:5505030ms step_avg:610.52ms +step:9018/57344 train_time:5505274ms step_avg:610.48ms +step:9019/57344 train_time:5505807ms step_avg:610.47ms +grad accum step:2255/14336 +step:9020/57344 train_time:5507083ms step_avg:610.54ms +step:9021/57344 train_time:5507100ms step_avg:610.48ms +step:9022/57344 train_time:5507341ms step_avg:610.43ms +step:9023/57344 train_time:5507866ms step_avg:610.43ms +grad accum step:2256/14336 +step:9024/57344 train_time:5509149ms step_avg:610.50ms +step:9024/57344 val_loss:6.996831 train_time:5509149ms step_avg:610.50ms +step:9025/57344 train_time:5510627ms step_avg:610.60ms +step:9026/57344 train_time:5510769ms step_avg:610.54ms +step:9027/57344 train_time:5511026ms step_avg:610.50ms +grad accum step:2257/14336 +step:9028/57344 train_time:5512486ms step_avg:610.60ms +step:9029/57344 train_time:5512497ms step_avg:610.53ms +step:9030/57344 train_time:5512716ms step_avg:610.49ms +step:9031/57344 train_time:5513253ms step_avg:610.48ms +grad accum step:2258/14336 +step:9032/57344 train_time:5514517ms step_avg:610.55ms +step:9033/57344 train_time:5514534ms step_avg:610.49ms +step:9034/57344 train_time:5514776ms step_avg:610.45ms +step:9035/57344 train_time:5515305ms step_avg:610.44ms +grad accum step:2259/14336 +step:9036/57344 train_time:5516574ms step_avg:610.51ms +step:9037/57344 train_time:5516591ms step_avg:610.44ms +step:9038/57344 train_time:5516845ms step_avg:610.41ms +step:9039/57344 train_time:5517406ms step_avg:610.40ms +grad accum step:2260/14336 +step:9040/57344 train_time:5518689ms step_avg:610.47ms +step:9041/57344 train_time:5518706ms step_avg:610.41ms +step:9042/57344 train_time:5518949ms step_avg:610.37ms +step:9043/57344 train_time:5519477ms step_avg:610.36ms +grad accum step:2261/14336 +step:9044/57344 train_time:5520732ms step_avg:610.43ms +step:9045/57344 train_time:5520749ms step_avg:610.36ms +step:9046/57344 train_time:5520992ms step_avg:610.32ms +step:9047/57344 train_time:5521521ms step_avg:610.32ms +grad accum step:2262/14336 +step:9048/57344 train_time:5522778ms step_avg:610.39ms +step:9049/57344 train_time:5522795ms step_avg:610.32ms +step:9050/57344 train_time:5523035ms step_avg:610.28ms +step:9051/57344 train_time:5523562ms step_avg:610.27ms +grad accum step:2263/14336 +step:9052/57344 train_time:5524843ms step_avg:610.35ms +step:9053/57344 train_time:5524859ms step_avg:610.28ms +step:9054/57344 train_time:5525102ms step_avg:610.24ms +step:9055/57344 train_time:5525642ms step_avg:610.23ms +grad accum step:2264/14336 +step:9056/57344 train_time:5526903ms step_avg:610.30ms +step:9057/57344 train_time:5526920ms step_avg:610.24ms +step:9058/57344 train_time:5527169ms step_avg:610.20ms +step:9059/57344 train_time:5527711ms step_avg:610.19ms +grad accum step:2265/14336 +step:9060/57344 train_time:5528972ms step_avg:610.26ms +step:9061/57344 train_time:5528989ms step_avg:610.20ms +step:9062/57344 train_time:5529232ms step_avg:610.16ms +step:9063/57344 train_time:5529772ms step_avg:610.15ms +grad accum step:2266/14336 +step:9064/57344 train_time:5531052ms step_avg:610.22ms +step:9065/57344 train_time:5531069ms step_avg:610.16ms +step:9066/57344 train_time:5531314ms step_avg:610.12ms +step:9067/57344 train_time:5532068ms step_avg:610.13ms +grad accum step:2267/14336 +step:9068/57344 train_time:5533112ms step_avg:610.18ms +step:9069/57344 train_time:5533129ms step_avg:610.11ms +step:9070/57344 train_time:5533372ms step_avg:610.07ms +step:9071/57344 train_time:5534663ms step_avg:610.15ms +grad accum step:2268/14336 +step:9072/57344 train_time:5535812ms step_avg:610.21ms +step:9073/57344 train_time:5535824ms step_avg:610.14ms +step:9074/57344 train_time:5536059ms step_avg:610.10ms +step:9075/57344 train_time:5536593ms step_avg:610.09ms +grad accum step:2269/14336 +step:9076/57344 train_time:5537858ms step_avg:610.17ms +step:9077/57344 train_time:5537875ms step_avg:610.10ms +step:9078/57344 train_time:5538117ms step_avg:610.06ms +step:9079/57344 train_time:5538650ms step_avg:610.05ms +grad accum step:2270/14336 +step:9080/57344 train_time:5539915ms step_avg:610.12ms +step:9081/57344 train_time:5539932ms step_avg:610.06ms +step:9082/57344 train_time:5540173ms step_avg:610.02ms +step:9083/57344 train_time:5540703ms step_avg:610.01ms +grad accum step:2271/14336 +step:9084/57344 train_time:5541975ms step_avg:610.08ms +step:9085/57344 train_time:5541992ms step_avg:610.02ms +step:9086/57344 train_time:5542235ms step_avg:609.98ms +step:9087/57344 train_time:5542764ms step_avg:609.97ms +grad accum step:2272/14336 +step:9088/57344 train_time:5544060ms step_avg:610.04ms +step:9088/57344 val_loss:6.996270 train_time:5544060ms step_avg:610.04ms +step:9089/57344 train_time:5544072ms step_avg:609.98ms +step:9090/57344 train_time:5544298ms step_avg:609.93ms +step:9091/57344 train_time:5544847ms step_avg:609.93ms +grad accum step:2273/14336 +step:9092/57344 train_time:5546125ms step_avg:610.00ms +step:9093/57344 train_time:5546142ms step_avg:609.94ms +step:9094/57344 train_time:5546383ms step_avg:609.89ms +step:9095/57344 train_time:5546911ms step_avg:609.89ms +grad accum step:2274/14336 +step:9096/57344 train_time:5548170ms step_avg:609.96ms +step:9097/57344 train_time:5548187ms step_avg:609.89ms +step:9098/57344 train_time:5548429ms step_avg:609.85ms +step:9099/57344 train_time:5548960ms step_avg:609.84ms +grad accum step:2275/14336 +step:9100/57344 train_time:5550217ms step_avg:609.91ms +step:9101/57344 train_time:5550234ms step_avg:609.85ms +step:9102/57344 train_time:5550477ms step_avg:609.81ms +step:9103/57344 train_time:5551010ms step_avg:609.80ms +grad accum step:2276/14336 +step:9104/57344 train_time:5552272ms step_avg:609.87ms +step:9105/57344 train_time:5552289ms step_avg:609.81ms +step:9106/57344 train_time:5552532ms step_avg:609.77ms +step:9107/57344 train_time:5553066ms step_avg:609.76ms +grad accum step:2277/14336 +step:9108/57344 train_time:5554344ms step_avg:609.83ms +step:9109/57344 train_time:5554361ms step_avg:609.77ms +step:9110/57344 train_time:5554603ms step_avg:609.73ms +step:9111/57344 train_time:5555132ms step_avg:609.72ms +grad accum step:2278/14336 +step:9112/57344 train_time:5556402ms step_avg:609.79ms +step:9113/57344 train_time:5556420ms step_avg:609.72ms +step:9114/57344 train_time:5556662ms step_avg:609.68ms +step:9115/57344 train_time:5557193ms step_avg:609.68ms +grad accum step:2279/14336 +step:9116/57344 train_time:5558469ms step_avg:609.75ms +step:9117/57344 train_time:5558486ms step_avg:609.68ms +step:9118/57344 train_time:5558731ms step_avg:609.64ms +step:9119/57344 train_time:5559277ms step_avg:609.64ms +grad accum step:2280/14336 +step:9120/57344 train_time:5560532ms step_avg:609.71ms +step:9121/57344 train_time:5560549ms step_avg:609.64ms +step:9122/57344 train_time:5560790ms step_avg:609.60ms +step:9123/57344 train_time:5561313ms step_avg:609.59ms +grad accum step:2281/14336 +step:9124/57344 train_time:5562567ms step_avg:609.66ms +step:9125/57344 train_time:5562584ms step_avg:609.60ms +step:9126/57344 train_time:5562828ms step_avg:609.56ms +step:9127/57344 train_time:5563362ms step_avg:609.55ms +grad accum step:2282/14336 +step:9128/57344 train_time:5564635ms step_avg:609.62ms +step:9129/57344 train_time:5564653ms step_avg:609.56ms +step:9130/57344 train_time:5564894ms step_avg:609.52ms +step:9131/57344 train_time:5565423ms step_avg:609.51ms +grad accum step:2283/14336 +step:9132/57344 train_time:5566702ms step_avg:609.58ms +step:9133/57344 train_time:5566719ms step_avg:609.52ms +step:9134/57344 train_time:5566959ms step_avg:609.48ms +step:9135/57344 train_time:5567487ms step_avg:609.47ms +grad accum step:2284/14336 +step:9136/57344 train_time:5568779ms step_avg:609.54ms +step:9137/57344 train_time:5568796ms step_avg:609.48ms +step:9138/57344 train_time:5569038ms step_avg:609.44ms +step:9139/57344 train_time:5569566ms step_avg:609.43ms +grad accum step:2285/14336 +step:9140/57344 train_time:5570848ms step_avg:609.50ms +step:9141/57344 train_time:5570866ms step_avg:609.44ms +step:9142/57344 train_time:5571108ms step_avg:609.40ms +step:9143/57344 train_time:5571645ms step_avg:609.39ms +grad accum step:2286/14336 +step:9144/57344 train_time:5572910ms step_avg:609.46ms +step:9145/57344 train_time:5572928ms step_avg:609.40ms +step:9146/57344 train_time:5573171ms step_avg:609.36ms +step:9147/57344 train_time:5573701ms step_avg:609.35ms +grad accum step:2287/14336 +step:9148/57344 train_time:5574954ms step_avg:609.42ms +step:9149/57344 train_time:5574971ms step_avg:609.35ms +step:9150/57344 train_time:5575211ms step_avg:609.31ms +step:9151/57344 train_time:5575739ms step_avg:609.30ms +grad accum step:2288/14336 +step:9152/57344 train_time:5577018ms step_avg:609.38ms +step:9152/57344 val_loss:6.975245 train_time:5577019ms step_avg:609.38ms +step:9153/57344 train_time:5577030ms step_avg:609.31ms +step:9154/57344 train_time:5577250ms step_avg:609.27ms +step:9155/57344 train_time:5577781ms step_avg:609.26ms +grad accum step:2289/14336 +step:9156/57344 train_time:5579062ms step_avg:609.33ms +step:9157/57344 train_time:5579080ms step_avg:609.27ms +step:9158/57344 train_time:5579323ms step_avg:609.23ms +step:9159/57344 train_time:5579862ms step_avg:609.22ms +grad accum step:2290/14336 +step:9160/57344 train_time:5581125ms step_avg:609.29ms +step:9161/57344 train_time:5581142ms step_avg:609.23ms +step:9162/57344 train_time:5581384ms step_avg:609.19ms +step:9163/57344 train_time:5581911ms step_avg:609.18ms +grad accum step:2291/14336 +step:9164/57344 train_time:5583185ms step_avg:609.25ms +step:9165/57344 train_time:5583202ms step_avg:609.19ms +step:9166/57344 train_time:5583446ms step_avg:609.15ms +step:9167/57344 train_time:5583980ms step_avg:609.14ms +grad accum step:2292/14336 +step:9168/57344 train_time:5585270ms step_avg:609.21ms +step:9169/57344 train_time:5585287ms step_avg:609.15ms +step:9170/57344 train_time:5585541ms step_avg:609.11ms +step:9171/57344 train_time:5586092ms step_avg:609.10ms +grad accum step:2293/14336 +step:9172/57344 train_time:5587358ms step_avg:609.18ms +step:9173/57344 train_time:5587375ms step_avg:609.11ms +step:9174/57344 train_time:5587619ms step_avg:609.07ms +step:9175/57344 train_time:5588160ms step_avg:609.06ms +grad accum step:2294/14336 +step:9176/57344 train_time:5589438ms step_avg:609.14ms +step:9177/57344 train_time:5589455ms step_avg:609.07ms +step:9178/57344 train_time:5589702ms step_avg:609.03ms +step:9179/57344 train_time:5590249ms step_avg:609.03ms +grad accum step:2295/14336 +step:9180/57344 train_time:5591521ms step_avg:609.10ms +step:9181/57344 train_time:5591538ms step_avg:609.03ms +step:9182/57344 train_time:5591782ms step_avg:608.99ms +step:9183/57344 train_time:5592311ms step_avg:608.99ms +grad accum step:2296/14336 +step:9184/57344 train_time:5593582ms step_avg:609.06ms +step:9185/57344 train_time:5593599ms step_avg:608.99ms +step:9186/57344 train_time:5593841ms step_avg:608.95ms +step:9187/57344 train_time:5594370ms step_avg:608.94ms +grad accum step:2297/14336 +step:9188/57344 train_time:5595646ms step_avg:609.02ms +step:9189/57344 train_time:5595663ms step_avg:608.95ms +step:9190/57344 train_time:5595905ms step_avg:608.91ms +step:9191/57344 train_time:5596429ms step_avg:608.90ms +grad accum step:2298/14336 +step:9192/57344 train_time:5597684ms step_avg:608.97ms +step:9193/57344 train_time:5597701ms step_avg:608.91ms +step:9194/57344 train_time:5597941ms step_avg:608.87ms +step:9195/57344 train_time:5598475ms step_avg:608.86ms +grad accum step:2299/14336 +step:9196/57344 train_time:5599754ms step_avg:608.93ms +step:9197/57344 train_time:5599771ms step_avg:608.87ms +step:9198/57344 train_time:5600013ms step_avg:608.83ms +step:9199/57344 train_time:5600545ms step_avg:608.82ms +grad accum step:2300/14336 +step:9200/57344 train_time:5601818ms step_avg:608.89ms +step:9201/57344 train_time:5601836ms step_avg:608.83ms +step:9202/57344 train_time:5602078ms step_avg:608.79ms +step:9203/57344 train_time:5602605ms step_avg:608.78ms +grad accum step:2301/14336 +step:9204/57344 train_time:5603878ms step_avg:608.85ms +step:9205/57344 train_time:5603895ms step_avg:608.79ms +step:9206/57344 train_time:5604140ms step_avg:608.75ms +step:9207/57344 train_time:5604670ms step_avg:608.74ms +grad accum step:2302/14336 +step:9208/57344 train_time:5605938ms step_avg:608.81ms +step:9209/57344 train_time:5605955ms step_avg:608.75ms +step:9210/57344 train_time:5606198ms step_avg:608.71ms +step:9211/57344 train_time:5606733ms step_avg:608.70ms +grad accum step:2303/14336 +step:9212/57344 train_time:5608004ms step_avg:608.77ms +step:9213/57344 train_time:5608021ms step_avg:608.71ms +step:9214/57344 train_time:5608265ms step_avg:608.67ms +step:9215/57344 train_time:5608797ms step_avg:608.66ms +grad accum step:2304/14336 +step:9216/57344 train_time:5610069ms step_avg:608.73ms +step:9216/57344 val_loss:6.971144 train_time:5610070ms step_avg:608.73ms +step:9217/57344 train_time:5610082ms step_avg:608.67ms +step:9218/57344 train_time:5610301ms step_avg:608.62ms +step:9219/57344 train_time:5610839ms step_avg:608.62ms +grad accum step:2305/14336 +step:9220/57344 train_time:5612118ms step_avg:608.69ms +step:9221/57344 train_time:5612134ms step_avg:608.63ms +step:9222/57344 train_time:5612375ms step_avg:608.59ms +step:9223/57344 train_time:5612902ms step_avg:608.58ms +grad accum step:2306/14336 +step:9224/57344 train_time:5614173ms step_avg:608.65ms +step:9225/57344 train_time:5614191ms step_avg:608.58ms +step:9226/57344 train_time:5614433ms step_avg:608.54ms +step:9227/57344 train_time:5614967ms step_avg:608.54ms +grad accum step:2307/14336 +step:9228/57344 train_time:5616232ms step_avg:608.61ms +step:9229/57344 train_time:5616249ms step_avg:608.54ms +step:9230/57344 train_time:5616490ms step_avg:608.50ms +step:9231/57344 train_time:5617021ms step_avg:608.50ms +grad accum step:2308/14336 +step:9232/57344 train_time:5618290ms step_avg:608.57ms +step:9233/57344 train_time:5618307ms step_avg:608.50ms +step:9234/57344 train_time:5618550ms step_avg:608.46ms +step:9235/57344 train_time:5619089ms step_avg:608.46ms +grad accum step:2309/14336 +step:9236/57344 train_time:5620349ms step_avg:608.53ms +step:9237/57344 train_time:5620367ms step_avg:608.46ms +step:9238/57344 train_time:5620610ms step_avg:608.42ms +step:9239/57344 train_time:5621146ms step_avg:608.42ms +grad accum step:2310/14336 +step:9240/57344 train_time:5622458ms step_avg:608.49ms +step:9241/57344 train_time:5622475ms step_avg:608.43ms +step:9242/57344 train_time:5622715ms step_avg:608.39ms +step:9243/57344 train_time:5623247ms step_avg:608.38ms +grad accum step:2311/14336 +step:9244/57344 train_time:5624522ms step_avg:608.45ms +step:9245/57344 train_time:5624540ms step_avg:608.39ms +step:9246/57344 train_time:5624782ms step_avg:608.35ms +step:9247/57344 train_time:5625322ms step_avg:608.34ms +grad accum step:2312/14336 +step:9248/57344 train_time:5626607ms step_avg:608.41ms +step:9249/57344 train_time:5626624ms step_avg:608.35ms +step:9250/57344 train_time:5626867ms step_avg:608.31ms +step:9251/57344 train_time:5627405ms step_avg:608.30ms +grad accum step:2313/14336 +step:9252/57344 train_time:5628691ms step_avg:608.38ms +step:9253/57344 train_time:5628708ms step_avg:608.31ms +step:9254/57344 train_time:5628953ms step_avg:608.27ms +step:9255/57344 train_time:5629495ms step_avg:608.27ms +grad accum step:2314/14336 +step:9256/57344 train_time:5630757ms step_avg:608.34ms +step:9257/57344 train_time:5630774ms step_avg:608.27ms +step:9258/57344 train_time:5631017ms step_avg:608.23ms +step:9259/57344 train_time:5631550ms step_avg:608.22ms +grad accum step:2315/14336 +step:9260/57344 train_time:5632813ms step_avg:608.30ms +step:9261/57344 train_time:5632831ms step_avg:608.23ms +step:9262/57344 train_time:5633070ms step_avg:608.19ms +step:9263/57344 train_time:5633591ms step_avg:608.18ms +grad accum step:2316/14336 +step:9264/57344 train_time:5634851ms step_avg:608.25ms +step:9265/57344 train_time:5634869ms step_avg:608.19ms +step:9266/57344 train_time:5635114ms step_avg:608.15ms +step:9267/57344 train_time:5635647ms step_avg:608.14ms +grad accum step:2317/14336 +step:9268/57344 train_time:5636922ms step_avg:608.21ms +step:9269/57344 train_time:5636939ms step_avg:608.15ms +step:9270/57344 train_time:5637183ms step_avg:608.11ms +step:9271/57344 train_time:5637724ms step_avg:608.10ms +grad accum step:2318/14336 +step:9272/57344 train_time:5639005ms step_avg:608.18ms +step:9273/57344 train_time:5639022ms step_avg:608.11ms +step:9274/57344 train_time:5639265ms step_avg:608.07ms +step:9275/57344 train_time:5639800ms step_avg:608.06ms +grad accum step:2319/14336 +step:9276/57344 train_time:5641065ms step_avg:608.14ms +step:9277/57344 train_time:5641082ms step_avg:608.07ms +step:9278/57344 train_time:5641324ms step_avg:608.03ms +step:9279/57344 train_time:5641851ms step_avg:608.02ms +grad accum step:2320/14336 +step:9280/57344 train_time:5643110ms step_avg:608.09ms +step:9280/57344 val_loss:6.959331 train_time:5643110ms step_avg:608.09ms +step:9281/57344 train_time:5643122ms step_avg:608.03ms +step:9282/57344 train_time:5643343ms step_avg:607.99ms +step:9283/57344 train_time:5643879ms step_avg:607.98ms +grad accum step:2321/14336 +step:9284/57344 train_time:5645157ms step_avg:608.05ms +step:9285/57344 train_time:5645174ms step_avg:607.99ms +step:9286/57344 train_time:5645414ms step_avg:607.95ms +step:9287/57344 train_time:5645944ms step_avg:607.94ms +grad accum step:2322/14336 +step:9288/57344 train_time:5647216ms step_avg:608.01ms +step:9289/57344 train_time:5647233ms step_avg:607.95ms +step:9290/57344 train_time:5647474ms step_avg:607.91ms +step:9291/57344 train_time:5648001ms step_avg:607.90ms +grad accum step:2323/14336 +step:9292/57344 train_time:5649273ms step_avg:607.97ms +step:9293/57344 train_time:5649290ms step_avg:607.91ms +step:9294/57344 train_time:5649533ms step_avg:607.87ms +step:9295/57344 train_time:5650064ms step_avg:607.86ms +grad accum step:2324/14336 +step:9296/57344 train_time:5651337ms step_avg:607.93ms +step:9297/57344 train_time:5651354ms step_avg:607.87ms +step:9298/57344 train_time:5651596ms step_avg:607.83ms +step:9299/57344 train_time:5652130ms step_avg:607.82ms +grad accum step:2325/14336 +step:9300/57344 train_time:5653404ms step_avg:607.89ms +step:9301/57344 train_time:5653946ms step_avg:607.89ms +step:9302/57344 train_time:5654159ms step_avg:607.84ms +step:9303/57344 train_time:5654697ms step_avg:607.84ms +grad accum step:2326/14336 +step:9304/57344 train_time:5655974ms step_avg:607.91ms +step:9305/57344 train_time:5655991ms step_avg:607.84ms +step:9306/57344 train_time:5656234ms step_avg:607.81ms +step:9307/57344 train_time:5656757ms step_avg:607.80ms +grad accum step:2327/14336 +step:9308/57344 train_time:5658019ms step_avg:607.87ms +step:9309/57344 train_time:5658036ms step_avg:607.80ms +step:9310/57344 train_time:5658276ms step_avg:607.76ms +step:9311/57344 train_time:5658800ms step_avg:607.75ms +grad accum step:2328/14336 +step:9312/57344 train_time:5660066ms step_avg:607.82ms +step:9313/57344 train_time:5660083ms step_avg:607.76ms +step:9314/57344 train_time:5660328ms step_avg:607.72ms +step:9315/57344 train_time:5660869ms step_avg:607.72ms +grad accum step:2329/14336 +step:9316/57344 train_time:5662139ms step_avg:607.79ms +step:9317/57344 train_time:5662156ms step_avg:607.72ms +step:9318/57344 train_time:5662397ms step_avg:607.68ms +step:9319/57344 train_time:5662926ms step_avg:607.68ms +grad accum step:2330/14336 +step:9320/57344 train_time:5664202ms step_avg:607.75ms +step:9321/57344 train_time:5664219ms step_avg:607.68ms +step:9322/57344 train_time:5664463ms step_avg:607.64ms +step:9323/57344 train_time:5664991ms step_avg:607.64ms +grad accum step:2331/14336 +step:9324/57344 train_time:5666275ms step_avg:607.71ms +step:9325/57344 train_time:5666291ms step_avg:607.65ms +step:9326/57344 train_time:5666535ms step_avg:607.61ms +step:9327/57344 train_time:5667080ms step_avg:607.60ms +grad accum step:2332/14336 +step:9328/57344 train_time:5668359ms step_avg:607.67ms +step:9329/57344 train_time:5668373ms step_avg:607.61ms +step:9330/57344 train_time:5668613ms step_avg:607.57ms +step:9331/57344 train_time:5669137ms step_avg:607.56ms +grad accum step:2333/14336 +step:9332/57344 train_time:5670399ms step_avg:607.63ms +step:9333/57344 train_time:5670416ms step_avg:607.57ms +step:9334/57344 train_time:5670658ms step_avg:607.53ms +step:9335/57344 train_time:5671184ms step_avg:607.52ms +grad accum step:2334/14336 +step:9336/57344 train_time:5672463ms step_avg:607.59ms +step:9337/57344 train_time:5672480ms step_avg:607.53ms +step:9338/57344 train_time:5672729ms step_avg:607.49ms +step:9339/57344 train_time:5673273ms step_avg:607.48ms +grad accum step:2335/14336 +step:9340/57344 train_time:5674544ms step_avg:607.55ms +step:9341/57344 train_time:5674561ms step_avg:607.49ms +step:9342/57344 train_time:5674805ms step_avg:607.45ms +step:9343/57344 train_time:5675344ms step_avg:607.44ms +grad accum step:2336/14336 +step:9344/57344 train_time:5676611ms step_avg:607.51ms +step:9344/57344 val_loss:6.947041 train_time:5676612ms step_avg:607.51ms +step:9345/57344 train_time:5676623ms step_avg:607.45ms +step:9346/57344 train_time:5676840ms step_avg:607.41ms +step:9347/57344 train_time:5677371ms step_avg:607.40ms +grad accum step:2337/14336 +step:9348/57344 train_time:5678633ms step_avg:607.47ms +step:9349/57344 train_time:5678651ms step_avg:607.41ms +step:9350/57344 train_time:5678893ms step_avg:607.37ms +step:9351/57344 train_time:5679420ms step_avg:607.36ms +grad accum step:2338/14336 +step:9352/57344 train_time:5680696ms step_avg:607.43ms +step:9353/57344 train_time:5680713ms step_avg:607.37ms +step:9354/57344 train_time:5680958ms step_avg:607.33ms +step:9355/57344 train_time:5681490ms step_avg:607.32ms +grad accum step:2339/14336 +step:9356/57344 train_time:5682767ms step_avg:607.39ms +step:9357/57344 train_time:5682784ms step_avg:607.33ms +step:9358/57344 train_time:5683031ms step_avg:607.29ms +step:9359/57344 train_time:5683570ms step_avg:607.28ms +grad accum step:2340/14336 +step:9360/57344 train_time:5684848ms step_avg:607.36ms +step:9361/57344 train_time:5684864ms step_avg:607.29ms +step:9362/57344 train_time:5685107ms step_avg:607.25ms +step:9363/57344 train_time:5685647ms step_avg:607.25ms +grad accum step:2341/14336 +step:9364/57344 train_time:5686932ms step_avg:607.32ms +step:9365/57344 train_time:5686948ms step_avg:607.26ms +step:9366/57344 train_time:5687194ms step_avg:607.22ms +step:9367/57344 train_time:5687733ms step_avg:607.21ms +grad accum step:2342/14336 +step:9368/57344 train_time:5689011ms step_avg:607.28ms +step:9369/57344 train_time:5689029ms step_avg:607.22ms +step:9370/57344 train_time:5689270ms step_avg:607.18ms +step:9371/57344 train_time:5689796ms step_avg:607.17ms +grad accum step:2343/14336 +step:9372/57344 train_time:5691069ms step_avg:607.24ms +step:9373/57344 train_time:5691086ms step_avg:607.18ms +step:9374/57344 train_time:5691331ms step_avg:607.14ms +step:9375/57344 train_time:5691862ms step_avg:607.13ms +grad accum step:2344/14336 +step:9376/57344 train_time:5693137ms step_avg:607.20ms +step:9377/57344 train_time:5693154ms step_avg:607.14ms +step:9378/57344 train_time:5693400ms step_avg:607.10ms +step:9379/57344 train_time:5693948ms step_avg:607.10ms +grad accum step:2345/14336 +step:9380/57344 train_time:5695214ms step_avg:607.17ms +step:9381/57344 train_time:5695231ms step_avg:607.10ms +step:9382/57344 train_time:5695474ms step_avg:607.06ms +step:9383/57344 train_time:5696002ms step_avg:607.06ms +grad accum step:2346/14336 +step:9384/57344 train_time:5697284ms step_avg:607.13ms +step:9385/57344 train_time:5697301ms step_avg:607.06ms +step:9386/57344 train_time:5697542ms step_avg:607.03ms +step:9387/57344 train_time:5698072ms step_avg:607.02ms +grad accum step:2347/14336 +step:9388/57344 train_time:5699348ms step_avg:607.09ms +step:9389/57344 train_time:5699365ms step_avg:607.03ms +step:9390/57344 train_time:5699608ms step_avg:606.99ms +step:9391/57344 train_time:5700151ms step_avg:606.98ms +grad accum step:2348/14336 +step:9392/57344 train_time:5701432ms step_avg:607.05ms +step:9393/57344 train_time:5701449ms step_avg:606.99ms +step:9394/57344 train_time:5701691ms step_avg:606.95ms +step:9395/57344 train_time:5702219ms step_avg:606.94ms +grad accum step:2349/14336 +step:9396/57344 train_time:5703478ms step_avg:607.01ms +step:9397/57344 train_time:5703495ms step_avg:606.95ms +step:9398/57344 train_time:5703738ms step_avg:606.91ms +step:9399/57344 train_time:5704282ms step_avg:606.90ms +grad accum step:2350/14336 +step:9400/57344 train_time:5705562ms step_avg:606.97ms +step:9401/57344 train_time:5705579ms step_avg:606.91ms +step:9402/57344 train_time:5705823ms step_avg:606.87ms +step:9403/57344 train_time:5706364ms step_avg:606.87ms +grad accum step:2351/14336 +step:9404/57344 train_time:5707650ms step_avg:606.94ms +step:9405/57344 train_time:5707667ms step_avg:606.88ms +step:9406/57344 train_time:5707909ms step_avg:606.84ms +step:9407/57344 train_time:5708441ms step_avg:606.83ms +grad accum step:2352/14336 +step:9408/57344 train_time:5709710ms step_avg:606.90ms +step:9408/57344 val_loss:6.943706 train_time:5709710ms step_avg:606.90ms +step:9409/57344 train_time:5709722ms step_avg:606.84ms +step:9410/57344 train_time:5709942ms step_avg:606.80ms +step:9411/57344 train_time:5710471ms step_avg:606.79ms +grad accum step:2353/14336 +step:9412/57344 train_time:5711729ms step_avg:606.86ms +step:9413/57344 train_time:5711747ms step_avg:606.79ms +step:9414/57344 train_time:5711988ms step_avg:606.75ms +step:9415/57344 train_time:5712526ms step_avg:606.75ms +grad accum step:2354/14336 +step:9416/57344 train_time:5713803ms step_avg:606.82ms +step:9417/57344 train_time:5713820ms step_avg:606.76ms +step:9418/57344 train_time:5714058ms step_avg:606.72ms +step:9419/57344 train_time:5714583ms step_avg:606.71ms +grad accum step:2355/14336 +step:9420/57344 train_time:5715859ms step_avg:606.78ms +step:9421/57344 train_time:5715876ms step_avg:606.72ms +step:9422/57344 train_time:5716117ms step_avg:606.68ms +step:9423/57344 train_time:5716646ms step_avg:606.67ms +grad accum step:2356/14336 +step:9424/57344 train_time:5717927ms step_avg:606.74ms +step:9425/57344 train_time:5717945ms step_avg:606.68ms +step:9426/57344 train_time:5718185ms step_avg:606.64ms +step:9427/57344 train_time:5718719ms step_avg:606.63ms +grad accum step:2357/14336 +step:9428/57344 train_time:5719978ms step_avg:606.70ms +step:9429/57344 train_time:5719995ms step_avg:606.64ms +step:9430/57344 train_time:5720235ms step_avg:606.60ms +step:9431/57344 train_time:5720767ms step_avg:606.59ms +grad accum step:2358/14336 +step:9432/57344 train_time:5722032ms step_avg:606.66ms +step:9433/57344 train_time:5722049ms step_avg:606.60ms +step:9434/57344 train_time:5722294ms step_avg:606.56ms +step:9435/57344 train_time:5722833ms step_avg:606.55ms +grad accum step:2359/14336 +step:9436/57344 train_time:5724113ms step_avg:606.62ms +step:9437/57344 train_time:5724130ms step_avg:606.56ms +step:9438/57344 train_time:5724373ms step_avg:606.52ms +step:9439/57344 train_time:5724901ms step_avg:606.52ms +grad accum step:2360/14336 +step:9440/57344 train_time:5726184ms step_avg:606.59ms +step:9441/57344 train_time:5726201ms step_avg:606.52ms +step:9442/57344 train_time:5726449ms step_avg:606.49ms +step:9443/57344 train_time:5726991ms step_avg:606.48ms +grad accum step:2361/14336 +step:9444/57344 train_time:5728259ms step_avg:606.55ms +step:9445/57344 train_time:5728276ms step_avg:606.49ms +step:9446/57344 train_time:5728518ms step_avg:606.45ms +step:9447/57344 train_time:5729053ms step_avg:606.44ms +grad accum step:2362/14336 +step:9448/57344 train_time:5730324ms step_avg:606.51ms +step:9449/57344 train_time:5730341ms step_avg:606.45ms +step:9450/57344 train_time:5730582ms step_avg:606.41ms +step:9451/57344 train_time:5731110ms step_avg:606.40ms +grad accum step:2363/14336 +step:9452/57344 train_time:5732386ms step_avg:606.47ms +step:9453/57344 train_time:5732404ms step_avg:606.41ms +step:9454/57344 train_time:5732647ms step_avg:606.37ms +step:9455/57344 train_time:5733178ms step_avg:606.36ms +grad accum step:2364/14336 +step:9456/57344 train_time:5734452ms step_avg:606.44ms +step:9457/57344 train_time:5734469ms step_avg:606.37ms +step:9458/57344 train_time:5734712ms step_avg:606.33ms +step:9459/57344 train_time:5735243ms step_avg:606.33ms +grad accum step:2365/14336 +step:9460/57344 train_time:5736526ms step_avg:606.40ms +step:9461/57344 train_time:5736543ms step_avg:606.34ms +step:9462/57344 train_time:5736785ms step_avg:606.30ms +step:9463/57344 train_time:5737314ms step_avg:606.29ms +grad accum step:2366/14336 +step:9464/57344 train_time:5738581ms step_avg:606.36ms +step:9465/57344 train_time:5738598ms step_avg:606.30ms +step:9466/57344 train_time:5738840ms step_avg:606.26ms +step:9467/57344 train_time:5739374ms step_avg:606.25ms +grad accum step:2367/14336 +step:9468/57344 train_time:5740637ms step_avg:606.32ms +step:9469/57344 train_time:5740654ms step_avg:606.26ms +step:9470/57344 train_time:5740896ms step_avg:606.22ms +step:9471/57344 train_time:5741422ms step_avg:606.21ms +grad accum step:2368/14336 +step:9472/57344 train_time:5742681ms step_avg:606.28ms +step:9472/57344 val_loss:6.932907 train_time:5742682ms step_avg:606.28ms +step:9473/57344 train_time:5742694ms step_avg:606.22ms +step:9474/57344 train_time:5742915ms step_avg:606.18ms +step:9475/57344 train_time:5743450ms step_avg:606.17ms +grad accum step:2369/14336 +step:9476/57344 train_time:5744723ms step_avg:606.24ms +step:9477/57344 train_time:5744740ms step_avg:606.18ms +step:9478/57344 train_time:5744983ms step_avg:606.14ms +step:9479/57344 train_time:5745516ms step_avg:606.13ms +grad accum step:2370/14336 +step:9480/57344 train_time:5746790ms step_avg:606.20ms +step:9481/57344 train_time:5746807ms step_avg:606.14ms +step:9482/57344 train_time:5747051ms step_avg:606.10ms +step:9483/57344 train_time:5747587ms step_avg:606.09ms +grad accum step:2371/14336 +step:9484/57344 train_time:5748838ms step_avg:606.16ms +step:9485/57344 train_time:5748855ms step_avg:606.10ms +step:9486/57344 train_time:5749099ms step_avg:606.06ms +step:9487/57344 train_time:5749639ms step_avg:606.05ms +grad accum step:2372/14336 +step:9488/57344 train_time:5750913ms step_avg:606.12ms +step:9489/57344 train_time:5750930ms step_avg:606.06ms +step:9490/57344 train_time:5751171ms step_avg:606.02ms +step:9491/57344 train_time:5751705ms step_avg:606.02ms +grad accum step:2373/14336 +step:9492/57344 train_time:5752981ms step_avg:606.09ms +step:9493/57344 train_time:5752998ms step_avg:606.03ms +step:9494/57344 train_time:5753242ms step_avg:605.99ms +step:9495/57344 train_time:5753782ms step_avg:605.98ms +grad accum step:2374/14336 +step:9496/57344 train_time:5755055ms step_avg:606.05ms +step:9497/57344 train_time:5755072ms step_avg:605.99ms +step:9498/57344 train_time:5755316ms step_avg:605.95ms +step:9499/57344 train_time:5755853ms step_avg:605.94ms +grad accum step:2375/14336 +step:9500/57344 train_time:5757125ms step_avg:606.01ms +step:9501/57344 train_time:5757142ms step_avg:605.95ms +step:9502/57344 train_time:5757383ms step_avg:605.91ms +step:9503/57344 train_time:5757908ms step_avg:605.90ms +grad accum step:2376/14336 +step:9504/57344 train_time:5759170ms step_avg:605.97ms +step:9505/57344 train_time:5759187ms step_avg:605.91ms +step:9506/57344 train_time:5759427ms step_avg:605.87ms +step:9507/57344 train_time:5759953ms step_avg:605.86ms +grad accum step:2377/14336 +step:9508/57344 train_time:5761231ms step_avg:605.94ms +step:9509/57344 train_time:5761248ms step_avg:605.87ms +step:9510/57344 train_time:5761489ms step_avg:605.83ms +step:9511/57344 train_time:5762018ms step_avg:605.83ms +grad accum step:2378/14336 +step:9512/57344 train_time:5763295ms step_avg:605.90ms +step:9513/57344 train_time:5763312ms step_avg:605.84ms +step:9514/57344 train_time:5763552ms step_avg:605.80ms +step:9515/57344 train_time:5764077ms step_avg:605.79ms +grad accum step:2379/14336 +step:9516/57344 train_time:5765345ms step_avg:605.86ms +step:9517/57344 train_time:5765362ms step_avg:605.80ms +step:9518/57344 train_time:5765603ms step_avg:605.76ms +step:9519/57344 train_time:5766129ms step_avg:605.75ms +grad accum step:2380/14336 +step:9520/57344 train_time:5767407ms step_avg:605.82ms +step:9521/57344 train_time:5767424ms step_avg:605.76ms +step:9522/57344 train_time:5767670ms step_avg:605.72ms +step:9523/57344 train_time:5768205ms step_avg:605.71ms +grad accum step:2381/14336 +step:9524/57344 train_time:5769474ms step_avg:605.78ms +step:9525/57344 train_time:5769491ms step_avg:605.72ms +step:9526/57344 train_time:5769733ms step_avg:605.68ms +step:9527/57344 train_time:5770263ms step_avg:605.67ms +grad accum step:2382/14336 +step:9528/57344 train_time:5771544ms step_avg:605.75ms +step:9529/57344 train_time:5771561ms step_avg:605.68ms +step:9530/57344 train_time:5771809ms step_avg:605.65ms +step:9531/57344 train_time:5772350ms step_avg:605.64ms +grad accum step:2383/14336 +step:9532/57344 train_time:5773839ms step_avg:605.73ms +step:9533/57344 train_time:5773873ms step_avg:605.67ms +step:9534/57344 train_time:5774082ms step_avg:605.63ms +step:9535/57344 train_time:5774617ms step_avg:605.62ms +grad accum step:2384/14336 +step:9536/57344 train_time:5775902ms step_avg:605.69ms +step:9536/57344 val_loss:6.923458 train_time:5775902ms step_avg:605.69ms +step:9537/57344 train_time:5775914ms step_avg:605.63ms +step:9538/57344 train_time:5776135ms step_avg:605.59ms +step:9539/57344 train_time:5776665ms step_avg:605.58ms +grad accum step:2385/14336 +step:9540/57344 train_time:5777928ms step_avg:605.65ms +step:9541/57344 train_time:5777945ms step_avg:605.59ms +step:9542/57344 train_time:5778186ms step_avg:605.55ms +step:9543/57344 train_time:5778713ms step_avg:605.54ms +grad accum step:2386/14336 +step:9544/57344 train_time:5779994ms step_avg:605.62ms +step:9545/57344 train_time:5780011ms step_avg:605.55ms +step:9546/57344 train_time:5780254ms step_avg:605.52ms +step:9547/57344 train_time:5780796ms step_avg:605.51ms +grad accum step:2387/14336 +step:9548/57344 train_time:5782074ms step_avg:605.58ms +step:9549/57344 train_time:5782091ms step_avg:605.52ms +step:9550/57344 train_time:5782337ms step_avg:605.48ms +step:9551/57344 train_time:5782882ms step_avg:605.47ms +grad accum step:2388/14336 +step:9552/57344 train_time:5784142ms step_avg:605.54ms +step:9553/57344 train_time:5784159ms step_avg:605.48ms +step:9554/57344 train_time:5784404ms step_avg:605.44ms +step:9555/57344 train_time:5784940ms step_avg:605.44ms +grad accum step:2389/14336 +step:9556/57344 train_time:5786218ms step_avg:605.51ms +step:9557/57344 train_time:5786235ms step_avg:605.44ms +step:9558/57344 train_time:5786477ms step_avg:605.41ms +step:9559/57344 train_time:5787009ms step_avg:605.40ms +grad accum step:2390/14336 +step:9560/57344 train_time:5788279ms step_avg:605.47ms +step:9561/57344 train_time:5788296ms step_avg:605.41ms +step:9562/57344 train_time:5788538ms step_avg:605.37ms +step:9563/57344 train_time:5789071ms step_avg:605.36ms +grad accum step:2391/14336 +step:9564/57344 train_time:5790343ms step_avg:605.43ms +step:9565/57344 train_time:5790360ms step_avg:605.37ms +step:9566/57344 train_time:5790604ms step_avg:605.33ms +step:9567/57344 train_time:5791132ms step_avg:605.32ms +grad accum step:2392/14336 +step:9568/57344 train_time:5792409ms step_avg:605.39ms +step:9569/57344 train_time:5792426ms step_avg:605.33ms +step:9570/57344 train_time:5792670ms step_avg:605.29ms +step:9571/57344 train_time:5793200ms step_avg:605.29ms +grad accum step:2393/14336 +step:9572/57344 train_time:5794458ms step_avg:605.36ms +step:9573/57344 train_time:5794475ms step_avg:605.29ms +step:9574/57344 train_time:5794716ms step_avg:605.26ms +step:9575/57344 train_time:5795244ms step_avg:605.25ms +grad accum step:2394/14336 +step:9576/57344 train_time:5796523ms step_avg:605.32ms +step:9577/57344 train_time:5796540ms step_avg:605.26ms +step:9578/57344 train_time:5796782ms step_avg:605.22ms +step:9579/57344 train_time:5797311ms step_avg:605.21ms +grad accum step:2395/14336 +step:9580/57344 train_time:5798570ms step_avg:605.28ms +step:9581/57344 train_time:5798587ms step_avg:605.22ms +step:9582/57344 train_time:5798834ms step_avg:605.18ms +step:9583/57344 train_time:5799380ms step_avg:605.17ms +grad accum step:2396/14336 +step:9584/57344 train_time:5800651ms step_avg:605.24ms +step:9585/57344 train_time:5800668ms step_avg:605.18ms +step:9586/57344 train_time:5800911ms step_avg:605.14ms +step:9587/57344 train_time:5801446ms step_avg:605.14ms +grad accum step:2397/14336 +step:9588/57344 train_time:5802719ms step_avg:605.21ms +step:9589/57344 train_time:5802736ms step_avg:605.15ms +step:9590/57344 train_time:5802976ms step_avg:605.11ms +step:9591/57344 train_time:5803504ms step_avg:605.10ms +grad accum step:2398/14336 +step:9592/57344 train_time:5804785ms step_avg:605.17ms +step:9593/57344 train_time:5804802ms step_avg:605.11ms +step:9594/57344 train_time:5805044ms step_avg:605.07ms +step:9595/57344 train_time:5805570ms step_avg:605.06ms +grad accum step:2399/14336 +step:9596/57344 train_time:5806855ms step_avg:605.13ms +step:9597/57344 train_time:5806872ms step_avg:605.07ms +step:9598/57344 train_time:5807117ms step_avg:605.03ms +step:9599/57344 train_time:5807660ms step_avg:605.03ms +grad accum step:2400/14336 +step:9600/57344 train_time:5808928ms step_avg:605.10ms +step:9600/57344 val_loss:6.904294 train_time:5808929ms step_avg:605.10ms +step:9601/57344 train_time:5808940ms step_avg:605.03ms +step:9602/57344 train_time:5809161ms step_avg:604.99ms +step:9603/57344 train_time:5809698ms step_avg:604.99ms +grad accum step:2401/14336 +step:9604/57344 train_time:5810965ms step_avg:605.06ms +step:9605/57344 train_time:5810983ms step_avg:605.00ms +step:9606/57344 train_time:5811227ms step_avg:604.96ms +step:9607/57344 train_time:5811770ms step_avg:604.95ms +grad accum step:2402/14336 +step:9608/57344 train_time:5813044ms step_avg:605.02ms +step:9609/57344 train_time:5813061ms step_avg:604.96ms +step:9610/57344 train_time:5813301ms step_avg:604.92ms +step:9611/57344 train_time:5813832ms step_avg:604.91ms +grad accum step:2403/14336 +step:9612/57344 train_time:5815109ms step_avg:604.98ms +step:9613/57344 train_time:5815126ms step_avg:604.92ms +step:9614/57344 train_time:5815367ms step_avg:604.89ms +step:9615/57344 train_time:5815890ms step_avg:604.88ms +grad accum step:2404/14336 +step:9616/57344 train_time:5817154ms step_avg:604.95ms +step:9617/57344 train_time:5817171ms step_avg:604.88ms +step:9618/57344 train_time:5817413ms step_avg:604.85ms +step:9619/57344 train_time:5817947ms step_avg:604.84ms +grad accum step:2405/14336 +step:9620/57344 train_time:5819211ms step_avg:604.91ms +step:9621/57344 train_time:5819228ms step_avg:604.85ms +step:9622/57344 train_time:5819471ms step_avg:604.81ms +step:9623/57344 train_time:5819999ms step_avg:604.80ms +grad accum step:2406/14336 +step:9624/57344 train_time:5821283ms step_avg:604.87ms +step:9625/57344 train_time:5821300ms step_avg:604.81ms +step:9626/57344 train_time:5821545ms step_avg:604.77ms +step:9627/57344 train_time:5822090ms step_avg:604.77ms +grad accum step:2407/14336 +step:9628/57344 train_time:5823356ms step_avg:604.84ms +step:9629/57344 train_time:5823373ms step_avg:604.77ms +step:9630/57344 train_time:5823617ms step_avg:604.74ms +step:9631/57344 train_time:5824151ms step_avg:604.73ms +grad accum step:2408/14336 +step:9632/57344 train_time:5825429ms step_avg:604.80ms +step:9633/57344 train_time:5825447ms step_avg:604.74ms +step:9634/57344 train_time:5825691ms step_avg:604.70ms +step:9635/57344 train_time:5826220ms step_avg:604.69ms +grad accum step:2409/14336 +step:9636/57344 train_time:5827483ms step_avg:604.76ms +step:9637/57344 train_time:5827499ms step_avg:604.70ms +step:9638/57344 train_time:5827742ms step_avg:604.66ms +step:9639/57344 train_time:5828279ms step_avg:604.66ms +grad accum step:2410/14336 +step:9640/57344 train_time:5829544ms step_avg:604.72ms +step:9641/57344 train_time:5829561ms step_avg:604.66ms +step:9642/57344 train_time:5829805ms step_avg:604.63ms +step:9643/57344 train_time:5830336ms step_avg:604.62ms +grad accum step:2411/14336 +step:9644/57344 train_time:5831613ms step_avg:604.69ms +step:9645/57344 train_time:5831630ms step_avg:604.63ms +step:9646/57344 train_time:5831871ms step_avg:604.59ms +step:9647/57344 train_time:5832407ms step_avg:604.58ms +grad accum step:2412/14336 +step:9648/57344 train_time:5833675ms step_avg:604.65ms +step:9649/57344 train_time:5833692ms step_avg:604.59ms +step:9650/57344 train_time:5833931ms step_avg:604.55ms +step:9651/57344 train_time:5834457ms step_avg:604.54ms +grad accum step:2413/14336 +step:9652/57344 train_time:5835735ms step_avg:604.61ms +step:9653/57344 train_time:5835753ms step_avg:604.55ms +step:9654/57344 train_time:5835994ms step_avg:604.52ms +step:9655/57344 train_time:5836521ms step_avg:604.51ms +grad accum step:2414/14336 +step:9656/57344 train_time:5837783ms step_avg:604.58ms +step:9657/57344 train_time:5837800ms step_avg:604.51ms +step:9658/57344 train_time:5838042ms step_avg:604.48ms +step:9659/57344 train_time:5838571ms step_avg:604.47ms +grad accum step:2415/14336 +step:9660/57344 train_time:5839852ms step_avg:604.54ms +step:9661/57344 train_time:5839869ms step_avg:604.48ms +step:9662/57344 train_time:5840112ms step_avg:604.44ms +step:9663/57344 train_time:5840643ms step_avg:604.43ms +grad accum step:2416/14336 +step:9664/57344 train_time:5841921ms step_avg:604.50ms +step:9664/57344 val_loss:6.895104 train_time:5841922ms step_avg:604.50ms +step:9665/57344 train_time:5841934ms step_avg:604.44ms +step:9666/57344 train_time:5842152ms step_avg:604.40ms +step:9667/57344 train_time:5842676ms step_avg:604.39ms +grad accum step:2417/14336 +step:9668/57344 train_time:5843940ms step_avg:604.46ms +step:9669/57344 train_time:5843956ms step_avg:604.40ms +step:9670/57344 train_time:5844200ms step_avg:604.36ms +step:9671/57344 train_time:5844741ms step_avg:604.36ms +grad accum step:2418/14336 +step:9672/57344 train_time:5846014ms step_avg:604.43ms +step:9673/57344 train_time:5846031ms step_avg:604.37ms +step:9674/57344 train_time:5846273ms step_avg:604.33ms +step:9675/57344 train_time:5846802ms step_avg:604.32ms +grad accum step:2419/14336 +step:9676/57344 train_time:5848083ms step_avg:604.39ms +step:9677/57344 train_time:5848100ms step_avg:604.33ms +step:9678/57344 train_time:5848348ms step_avg:604.29ms +step:9679/57344 train_time:5848893ms step_avg:604.29ms +grad accum step:2420/14336 +step:9680/57344 train_time:5850169ms step_avg:604.36ms +step:9681/57344 train_time:5850186ms step_avg:604.30ms +step:9682/57344 train_time:5850428ms step_avg:604.26ms +step:9683/57344 train_time:5850966ms step_avg:604.25ms +grad accum step:2421/14336 +step:9684/57344 train_time:5852229ms step_avg:604.32ms +step:9685/57344 train_time:5852246ms step_avg:604.26ms +step:9686/57344 train_time:5852487ms step_avg:604.22ms +step:9687/57344 train_time:5853013ms step_avg:604.21ms +grad accum step:2422/14336 +step:9688/57344 train_time:5854280ms step_avg:604.28ms +step:9689/57344 train_time:5854297ms step_avg:604.22ms +step:9690/57344 train_time:5854541ms step_avg:604.18ms +step:9691/57344 train_time:5855081ms step_avg:604.18ms +grad accum step:2423/14336 +step:9692/57344 train_time:5856362ms step_avg:604.25ms +step:9693/57344 train_time:5856379ms step_avg:604.19ms +step:9694/57344 train_time:5856623ms step_avg:604.15ms +step:9695/57344 train_time:5857155ms step_avg:604.14ms +grad accum step:2424/14336 +step:9696/57344 train_time:5858433ms step_avg:604.21ms +step:9697/57344 train_time:5858450ms step_avg:604.15ms +step:9698/57344 train_time:5858694ms step_avg:604.11ms +step:9699/57344 train_time:5859237ms step_avg:604.11ms +grad accum step:2425/14336 +step:9700/57344 train_time:5860513ms step_avg:604.18ms +step:9701/57344 train_time:5860530ms step_avg:604.12ms +step:9702/57344 train_time:5860776ms step_avg:604.08ms +step:9703/57344 train_time:5861306ms step_avg:604.07ms +grad accum step:2426/14336 +step:9704/57344 train_time:5862582ms step_avg:604.14ms +step:9705/57344 train_time:5862599ms step_avg:604.08ms +step:9706/57344 train_time:5862842ms step_avg:604.04ms +step:9707/57344 train_time:5863383ms step_avg:604.04ms +grad accum step:2427/14336 +step:9708/57344 train_time:5864646ms step_avg:604.10ms +step:9709/57344 train_time:5864663ms step_avg:604.04ms +step:9710/57344 train_time:5864908ms step_avg:604.01ms +step:9711/57344 train_time:5865456ms step_avg:604.00ms +grad accum step:2428/14336 +step:9712/57344 train_time:5866750ms step_avg:604.07ms +step:9713/57344 train_time:5866767ms step_avg:604.01ms +step:9714/57344 train_time:5867020ms step_avg:603.98ms +step:9715/57344 train_time:5867581ms step_avg:603.97ms +grad accum step:2429/14336 +step:9716/57344 train_time:5868879ms step_avg:604.04ms +step:9717/57344 train_time:5868896ms step_avg:603.98ms +step:9718/57344 train_time:5869139ms step_avg:603.95ms +step:9719/57344 train_time:5869680ms step_avg:603.94ms +grad accum step:2430/14336 +step:9720/57344 train_time:5870967ms step_avg:604.01ms +step:9721/57344 train_time:5870984ms step_avg:603.95ms +step:9722/57344 train_time:5871226ms step_avg:603.91ms +step:9723/57344 train_time:5871760ms step_avg:603.90ms +grad accum step:2431/14336 +step:9724/57344 train_time:5873031ms step_avg:603.97ms +step:9725/57344 train_time:5873048ms step_avg:603.91ms +step:9726/57344 train_time:5873296ms step_avg:603.88ms +step:9727/57344 train_time:5873842ms step_avg:603.87ms +grad accum step:2432/14336 +step:9728/57344 train_time:5875112ms step_avg:603.94ms +step:9728/57344 val_loss:6.919672 train_time:5875112ms step_avg:603.94ms +step:9729/57344 train_time:5875124ms step_avg:603.88ms +step:9730/57344 train_time:5875344ms step_avg:603.84ms +step:9731/57344 train_time:5875885ms step_avg:603.83ms +grad accum step:2433/14336 +step:9732/57344 train_time:5877153ms step_avg:603.90ms +step:9733/57344 train_time:5877170ms step_avg:603.84ms +step:9734/57344 train_time:5877413ms step_avg:603.80ms +step:9735/57344 train_time:5877954ms step_avg:603.80ms +grad accum step:2434/14336 +step:9736/57344 train_time:5879232ms step_avg:603.87ms +step:9737/57344 train_time:5879249ms step_avg:603.80ms +step:9738/57344 train_time:5879491ms step_avg:603.77ms +step:9739/57344 train_time:5880024ms step_avg:603.76ms +grad accum step:2435/14336 +step:9740/57344 train_time:5882523ms step_avg:603.96ms +step:9741/57344 train_time:5882536ms step_avg:603.89ms +step:9742/57344 train_time:5882751ms step_avg:603.85ms +step:9743/57344 train_time:5883288ms step_avg:603.85ms +grad accum step:2436/14336 +step:9744/57344 train_time:5884551ms step_avg:603.92ms +step:9745/57344 train_time:5884568ms step_avg:603.86ms +step:9746/57344 train_time:5884817ms step_avg:603.82ms +step:9747/57344 train_time:5885364ms step_avg:603.81ms +grad accum step:2437/14336 +step:9748/57344 train_time:5886642ms step_avg:603.88ms +step:9749/57344 train_time:5886659ms step_avg:603.82ms +step:9750/57344 train_time:5886902ms step_avg:603.78ms +step:9751/57344 train_time:5887445ms step_avg:603.78ms +grad accum step:2438/14336 +step:9752/57344 train_time:5888723ms step_avg:603.85ms +step:9753/57344 train_time:5888740ms step_avg:603.79ms +step:9754/57344 train_time:5888986ms step_avg:603.75ms +step:9755/57344 train_time:5889528ms step_avg:603.74ms +grad accum step:2439/14336 +step:9756/57344 train_time:5890804ms step_avg:603.81ms +step:9757/57344 train_time:5890821ms step_avg:603.75ms +step:9758/57344 train_time:5891065ms step_avg:603.72ms +step:9759/57344 train_time:5891600ms step_avg:603.71ms +grad accum step:2440/14336 +step:9760/57344 train_time:5892897ms step_avg:603.78ms +step:9761/57344 train_time:5892914ms step_avg:603.72ms +step:9762/57344 train_time:5893156ms step_avg:603.68ms +step:9763/57344 train_time:5893690ms step_avg:603.68ms +grad accum step:2441/14336 +step:9764/57344 train_time:5894963ms step_avg:603.74ms +step:9765/57344 train_time:5894980ms step_avg:603.68ms +step:9766/57344 train_time:5895225ms step_avg:603.65ms +step:9767/57344 train_time:5895763ms step_avg:603.64ms +grad accum step:2442/14336 +step:9768/57344 train_time:5897022ms step_avg:603.71ms +step:9769/57344 train_time:5897040ms step_avg:603.65ms +step:9770/57344 train_time:5897283ms step_avg:603.61ms +step:9771/57344 train_time:5897813ms step_avg:603.60ms +grad accum step:2443/14336 +step:9772/57344 train_time:5899091ms step_avg:603.67ms +step:9773/57344 train_time:5899108ms step_avg:603.61ms +step:9774/57344 train_time:5899353ms step_avg:603.58ms +step:9775/57344 train_time:5899885ms step_avg:603.57ms +grad accum step:2444/14336 +step:9776/57344 train_time:5901165ms step_avg:603.64ms +step:9777/57344 train_time:5901182ms step_avg:603.58ms +step:9778/57344 train_time:5901426ms step_avg:603.54ms +step:9779/57344 train_time:5901969ms step_avg:603.54ms +grad accum step:2445/14336 +step:9780/57344 train_time:5903252ms step_avg:603.60ms +step:9781/57344 train_time:5903269ms step_avg:603.54ms +step:9782/57344 train_time:5903514ms step_avg:603.51ms +step:9783/57344 train_time:5904058ms step_avg:603.50ms +grad accum step:2446/14336 +step:9784/57344 train_time:5905321ms step_avg:603.57ms +step:9785/57344 train_time:5905338ms step_avg:603.51ms +step:9786/57344 train_time:5905581ms step_avg:603.47ms +step:9787/57344 train_time:5906117ms step_avg:603.47ms +grad accum step:2447/14336 +step:9788/57344 train_time:5907369ms step_avg:603.53ms +step:9789/57344 train_time:5907386ms step_avg:603.47ms +step:9790/57344 train_time:5907628ms step_avg:603.43ms +step:9791/57344 train_time:5908162ms step_avg:603.43ms +grad accum step:2448/14336 +step:9792/57344 train_time:5909452ms step_avg:603.50ms +step:9792/57344 val_loss:6.897346 train_time:5909453ms step_avg:603.50ms +step:9793/57344 train_time:5909465ms step_avg:603.44ms +step:9794/57344 train_time:5909683ms step_avg:603.40ms +step:9795/57344 train_time:5910212ms step_avg:603.39ms +grad accum step:2449/14336 +step:9796/57344 train_time:5911488ms step_avg:603.46ms +step:9797/57344 train_time:5911504ms step_avg:603.40ms +step:9798/57344 train_time:5911747ms step_avg:603.36ms +step:9799/57344 train_time:5912277ms step_avg:603.36ms +grad accum step:2450/14336 +step:9800/57344 train_time:5913552ms step_avg:603.42ms +step:9801/57344 train_time:5913569ms step_avg:603.36ms +step:9802/57344 train_time:5913811ms step_avg:603.33ms +step:9803/57344 train_time:5914345ms step_avg:603.32ms +grad accum step:2451/14336 +step:9804/57344 train_time:5915618ms step_avg:603.39ms +step:9805/57344 train_time:5915635ms step_avg:603.33ms +step:9806/57344 train_time:5915879ms step_avg:603.29ms +step:9807/57344 train_time:5916403ms step_avg:603.28ms +grad accum step:2452/14336 +step:9808/57344 train_time:5917667ms step_avg:603.35ms +step:9809/57344 train_time:5917684ms step_avg:603.29ms +step:9810/57344 train_time:5917926ms step_avg:603.25ms +step:9811/57344 train_time:5918452ms step_avg:603.25ms +grad accum step:2453/14336 +step:9812/57344 train_time:5919719ms step_avg:603.31ms +step:9813/57344 train_time:5919736ms step_avg:603.25ms +step:9814/57344 train_time:5919980ms step_avg:603.22ms +step:9815/57344 train_time:5920518ms step_avg:603.21ms +grad accum step:2454/14336 +step:9816/57344 train_time:5921782ms step_avg:603.28ms +step:9817/57344 train_time:5921800ms step_avg:603.22ms +step:9818/57344 train_time:5922042ms step_avg:603.18ms +step:9819/57344 train_time:5922584ms step_avg:603.18ms +grad accum step:2455/14336 +step:9820/57344 train_time:5923847ms step_avg:603.24ms +step:9821/57344 train_time:5923865ms step_avg:603.18ms +step:9822/57344 train_time:5924105ms step_avg:603.15ms +step:9823/57344 train_time:5924629ms step_avg:603.14ms +grad accum step:2456/14336 +step:9824/57344 train_time:5925892ms step_avg:603.21ms +step:9825/57344 train_time:5925909ms step_avg:603.15ms +step:9826/57344 train_time:5926150ms step_avg:603.11ms +step:9827/57344 train_time:5926679ms step_avg:603.10ms +grad accum step:2457/14336 +step:9828/57344 train_time:5927955ms step_avg:603.17ms +step:9829/57344 train_time:5927972ms step_avg:603.11ms +step:9830/57344 train_time:5928214ms step_avg:603.07ms +step:9831/57344 train_time:5928745ms step_avg:603.07ms +grad accum step:2458/14336 +step:9832/57344 train_time:5930055ms step_avg:603.14ms +step:9833/57344 train_time:5930072ms step_avg:603.08ms +step:9834/57344 train_time:5930315ms step_avg:603.04ms +step:9835/57344 train_time:5930851ms step_avg:603.04ms +grad accum step:2459/14336 +step:9836/57344 train_time:5932135ms step_avg:603.10ms +step:9837/57344 train_time:5932152ms step_avg:603.04ms +step:9838/57344 train_time:5932397ms step_avg:603.01ms +step:9839/57344 train_time:5932938ms step_avg:603.00ms +grad accum step:2460/14336 +step:9840/57344 train_time:5934221ms step_avg:603.07ms +step:9841/57344 train_time:5934238ms step_avg:603.01ms +step:9842/57344 train_time:5934484ms step_avg:602.98ms +step:9843/57344 train_time:5935023ms step_avg:602.97ms +grad accum step:2461/14336 +step:9844/57344 train_time:5936288ms step_avg:603.04ms +step:9845/57344 train_time:5936305ms step_avg:602.98ms +step:9846/57344 train_time:5936549ms step_avg:602.94ms +step:9847/57344 train_time:5937079ms step_avg:602.93ms +grad accum step:2462/14336 +step:9848/57344 train_time:5938344ms step_avg:603.00ms +step:9849/57344 train_time:5938361ms step_avg:602.94ms +step:9850/57344 train_time:5938600ms step_avg:602.90ms +step:9851/57344 train_time:5939127ms step_avg:602.90ms +grad accum step:2463/14336 +step:9852/57344 train_time:5940395ms step_avg:602.96ms +step:9853/57344 train_time:5940412ms step_avg:602.90ms +step:9854/57344 train_time:5940654ms step_avg:602.87ms +step:9855/57344 train_time:5941197ms step_avg:602.86ms +grad accum step:2464/14336 +step:9856/57344 train_time:5942452ms step_avg:602.93ms +step:9856/57344 val_loss:6.873909 train_time:5942453ms step_avg:602.93ms +step:9857/57344 train_time:5942465ms step_avg:602.87ms +step:9858/57344 train_time:5942686ms step_avg:602.83ms +step:9859/57344 train_time:5943219ms step_avg:602.82ms +grad accum step:2465/14336 +step:9860/57344 train_time:5944491ms step_avg:602.89ms +step:9861/57344 train_time:5944508ms step_avg:602.83ms +step:9862/57344 train_time:5944748ms step_avg:602.79ms +step:9863/57344 train_time:5945278ms step_avg:602.79ms +grad accum step:2466/14336 +step:9864/57344 train_time:5946541ms step_avg:602.85ms +step:9865/57344 train_time:5946558ms step_avg:602.79ms +step:9866/57344 train_time:5946807ms step_avg:602.76ms +step:9867/57344 train_time:5947351ms step_avg:602.75ms +grad accum step:2467/14336 +step:9868/57344 train_time:5948614ms step_avg:602.82ms +step:9869/57344 train_time:5948631ms step_avg:602.76ms +step:9870/57344 train_time:5948874ms step_avg:602.72ms +step:9871/57344 train_time:5949417ms step_avg:602.72ms +grad accum step:2468/14336 +step:9872/57344 train_time:5950698ms step_avg:602.79ms +step:9873/57344 train_time:5950715ms step_avg:602.73ms +step:9874/57344 train_time:5950956ms step_avg:602.69ms +step:9875/57344 train_time:5951483ms step_avg:602.68ms +grad accum step:2469/14336 +step:9876/57344 train_time:5952755ms step_avg:602.75ms +step:9877/57344 train_time:5952772ms step_avg:602.69ms +step:9878/57344 train_time:5953013ms step_avg:602.65ms +step:9879/57344 train_time:5953545ms step_avg:602.65ms +grad accum step:2470/14336 +step:9880/57344 train_time:5954805ms step_avg:602.71ms +step:9881/57344 train_time:5954822ms step_avg:602.65ms +step:9882/57344 train_time:5955063ms step_avg:602.62ms +step:9883/57344 train_time:5955590ms step_avg:602.61ms +grad accum step:2471/14336 +step:9884/57344 train_time:5956856ms step_avg:602.68ms +step:9885/57344 train_time:5956873ms step_avg:602.62ms +step:9886/57344 train_time:5957117ms step_avg:602.58ms +step:9887/57344 train_time:5957662ms step_avg:602.58ms +grad accum step:2472/14336 +step:9888/57344 train_time:5958936ms step_avg:602.64ms +step:9889/57344 train_time:5958953ms step_avg:602.58ms +step:9890/57344 train_time:5959199ms step_avg:602.55ms +step:9891/57344 train_time:5959746ms step_avg:602.54ms +grad accum step:2473/14336 +step:9892/57344 train_time:5961023ms step_avg:602.61ms +step:9893/57344 train_time:5961040ms step_avg:602.55ms +step:9894/57344 train_time:5961288ms step_avg:602.52ms +step:9895/57344 train_time:5961833ms step_avg:602.51ms +grad accum step:2474/14336 +step:9896/57344 train_time:5963114ms step_avg:602.58ms +step:9897/57344 train_time:5963131ms step_avg:602.52ms +step:9898/57344 train_time:5963375ms step_avg:602.48ms +step:9899/57344 train_time:5963909ms step_avg:602.48ms +grad accum step:2475/14336 +step:9900/57344 train_time:5965162ms step_avg:602.54ms +step:9901/57344 train_time:5965179ms step_avg:602.48ms +step:9902/57344 train_time:5965420ms step_avg:602.45ms +step:9903/57344 train_time:5965945ms step_avg:602.44ms +grad accum step:2476/14336 +step:9904/57344 train_time:5967212ms step_avg:602.51ms +step:9905/57344 train_time:5967229ms step_avg:602.45ms +step:9906/57344 train_time:5967475ms step_avg:602.41ms +step:9907/57344 train_time:5968021ms step_avg:602.40ms +grad accum step:2477/14336 +step:9908/57344 train_time:5969302ms step_avg:602.47ms +step:9909/57344 train_time:5969319ms step_avg:602.41ms +step:9910/57344 train_time:5969560ms step_avg:602.38ms +step:9911/57344 train_time:5970088ms step_avg:602.37ms +grad accum step:2478/14336 +step:9912/57344 train_time:5971370ms step_avg:602.44ms +step:9913/57344 train_time:5971387ms step_avg:602.38ms +step:9914/57344 train_time:5971636ms step_avg:602.34ms +step:9915/57344 train_time:5972183ms step_avg:602.34ms +grad accum step:2479/14336 +step:9916/57344 train_time:5973459ms step_avg:602.41ms +step:9917/57344 train_time:5973476ms step_avg:602.35ms +step:9918/57344 train_time:5973718ms step_avg:602.31ms +step:9919/57344 train_time:5974247ms step_avg:602.30ms +grad accum step:2480/14336 +step:9920/57344 train_time:5975530ms step_avg:602.37ms +step:9920/57344 val_loss:6.848538 train_time:5975530ms step_avg:602.37ms +step:9921/57344 train_time:5975542ms step_avg:602.31ms +step:9922/57344 train_time:5975765ms step_avg:602.27ms +step:9923/57344 train_time:5976297ms step_avg:602.27ms +grad accum step:2481/14336 +step:9924/57344 train_time:5977563ms step_avg:602.33ms +step:9925/57344 train_time:5977580ms step_avg:602.28ms +step:9926/57344 train_time:5977823ms step_avg:602.24ms +step:9927/57344 train_time:5978359ms step_avg:602.23ms +grad accum step:2482/14336 +step:9928/57344 train_time:5979636ms step_avg:602.30ms +step:9929/57344 train_time:5979653ms step_avg:602.24ms +step:9930/57344 train_time:5979894ms step_avg:602.20ms +step:9931/57344 train_time:5980425ms step_avg:602.20ms +grad accum step:2483/14336 +step:9932/57344 train_time:5981704ms step_avg:602.27ms +step:9933/57344 train_time:5981721ms step_avg:602.21ms +step:9934/57344 train_time:5981963ms step_avg:602.17ms +step:9935/57344 train_time:5982492ms step_avg:602.16ms +grad accum step:2484/14336 +step:9936/57344 train_time:5983756ms step_avg:602.23ms +step:9937/57344 train_time:5983773ms step_avg:602.17ms +step:9938/57344 train_time:5984015ms step_avg:602.13ms +step:9939/57344 train_time:5984541ms step_avg:602.13ms +grad accum step:2485/14336 +step:9940/57344 train_time:5985805ms step_avg:602.19ms +step:9941/57344 train_time:5985821ms step_avg:602.13ms +step:9942/57344 train_time:5986065ms step_avg:602.10ms +step:9943/57344 train_time:5986602ms step_avg:602.09ms +grad accum step:2486/14336 +step:9944/57344 train_time:5987864ms step_avg:602.16ms +step:9945/57344 train_time:5987881ms step_avg:602.10ms +step:9946/57344 train_time:5988123ms step_avg:602.06ms +step:9947/57344 train_time:5988653ms step_avg:602.06ms +grad accum step:2487/14336 +step:9948/57344 train_time:5989934ms step_avg:602.12ms +step:9949/57344 train_time:5989951ms step_avg:602.07ms +step:9950/57344 train_time:5990194ms step_avg:602.03ms +step:9951/57344 train_time:5990724ms step_avg:602.02ms +grad accum step:2488/14336 +step:9952/57344 train_time:5992006ms step_avg:602.09ms +step:9953/57344 train_time:5992023ms step_avg:602.03ms +step:9954/57344 train_time:5992263ms step_avg:602.00ms +step:9955/57344 train_time:5992791ms step_avg:601.99ms +grad accum step:2489/14336 +step:9956/57344 train_time:5994075ms step_avg:602.06ms +step:9957/57344 train_time:5994092ms step_avg:602.00ms +step:9958/57344 train_time:5994335ms step_avg:601.96ms +step:9959/57344 train_time:5994877ms step_avg:601.96ms +grad accum step:2490/14336 +step:9960/57344 train_time:5996141ms step_avg:602.02ms +step:9961/57344 train_time:5996158ms step_avg:601.96ms +step:9962/57344 train_time:5996399ms step_avg:601.93ms +step:9963/57344 train_time:5996930ms step_avg:601.92ms +grad accum step:2491/14336 +step:9964/57344 train_time:5998227ms step_avg:601.99ms +step:9965/57344 train_time:5998244ms step_avg:601.93ms +step:9966/57344 train_time:5998486ms step_avg:601.90ms +step:9967/57344 train_time:5999024ms step_avg:601.89ms +grad accum step:2492/14336 +step:9968/57344 train_time:6000303ms step_avg:601.96ms +step:9969/57344 train_time:6000320ms step_avg:601.90ms +step:9970/57344 train_time:6000562ms step_avg:601.86ms +step:9971/57344 train_time:6001087ms step_avg:601.85ms +grad accum step:2493/14336 +step:9972/57344 train_time:6002350ms step_avg:601.92ms +step:9973/57344 train_time:6002367ms step_avg:601.86ms +step:9974/57344 train_time:6002609ms step_avg:601.83ms +step:9975/57344 train_time:6003135ms step_avg:601.82ms +grad accum step:2494/14336 +step:9976/57344 train_time:6004415ms step_avg:601.89ms +step:9977/57344 train_time:6004432ms step_avg:601.83ms +step:9978/57344 train_time:6004675ms step_avg:601.79ms +step:9979/57344 train_time:6005205ms step_avg:601.78ms +grad accum step:2495/14336 +step:9980/57344 train_time:6006482ms step_avg:601.85ms +step:9981/57344 train_time:6006499ms step_avg:601.79ms +step:9982/57344 train_time:6006742ms step_avg:601.76ms +step:9983/57344 train_time:6007268ms step_avg:601.75ms +grad accum step:2496/14336 +step:9984/57344 train_time:6008529ms step_avg:601.82ms +step:9984/57344 val_loss:6.834342 train_time:6008529ms step_avg:601.82ms +step:9985/57344 train_time:6008541ms step_avg:601.76ms +step:9986/57344 train_time:6008762ms step_avg:601.72ms +step:9987/57344 train_time:6009295ms step_avg:601.71ms +grad accum step:2497/14336 +step:9988/57344 train_time:6010557ms step_avg:601.78ms +step:9989/57344 train_time:6010574ms step_avg:601.72ms +step:9990/57344 train_time:6010817ms step_avg:601.68ms +step:9991/57344 train_time:6011346ms step_avg:601.68ms +grad accum step:2498/14336 +step:9992/57344 train_time:6012623ms step_avg:601.74ms +step:9993/57344 train_time:6012640ms step_avg:601.69ms +step:9994/57344 train_time:6012885ms step_avg:601.65ms +step:9995/57344 train_time:6013422ms step_avg:601.64ms +grad accum step:2499/14336 +step:9996/57344 train_time:6014688ms step_avg:601.71ms +step:9997/57344 train_time:6014705ms step_avg:601.65ms +step:9998/57344 train_time:6014945ms step_avg:601.61ms +step:9999/57344 train_time:6015471ms step_avg:601.61ms +grad accum step:2500/14336 +step:10000/57344 train_time:6016752ms step_avg:601.68ms +step:10001/57344 train_time:6016769ms step_avg:601.62ms +step:10002/57344 train_time:6017013ms step_avg:601.58ms +step:10003/57344 train_time:6017541ms step_avg:601.57ms +grad accum step:2501/14336 +step:10004/57344 train_time:6018799ms step_avg:601.64ms +step:10005/57344 train_time:6018816ms step_avg:601.58ms +step:10006/57344 train_time:6019056ms step_avg:601.54ms +step:10007/57344 train_time:6019592ms step_avg:601.54ms +grad accum step:2502/14336 +step:10008/57344 train_time:6020869ms step_avg:601.61ms +step:10009/57344 train_time:6020886ms step_avg:601.55ms +step:10010/57344 train_time:6021128ms step_avg:601.51ms +step:10011/57344 train_time:6021656ms step_avg:601.50ms +grad accum step:2503/14336 +step:10012/57344 train_time:6022943ms step_avg:601.57ms +step:10013/57344 train_time:6022960ms step_avg:601.51ms +step:10014/57344 train_time:6023200ms step_avg:601.48ms +step:10015/57344 train_time:6023730ms step_avg:601.47ms +grad accum step:2504/14336 +step:10016/57344 train_time:6025011ms step_avg:601.54ms +step:10017/57344 train_time:6025028ms step_avg:601.48ms +step:10018/57344 train_time:6025272ms step_avg:601.44ms +step:10019/57344 train_time:6025808ms step_avg:601.44ms +grad accum step:2505/14336 +step:10020/57344 train_time:6027081ms step_avg:601.51ms +step:10021/57344 train_time:6027098ms step_avg:601.45ms +step:10022/57344 train_time:6027342ms step_avg:601.41ms +step:10023/57344 train_time:6027883ms step_avg:601.41ms +grad accum step:2506/14336 +step:10024/57344 train_time:6029147ms step_avg:601.47ms +step:10025/57344 train_time:6029164ms step_avg:601.41ms +step:10026/57344 train_time:6029409ms step_avg:601.38ms +step:10027/57344 train_time:6029946ms step_avg:601.37ms +grad accum step:2507/14336 +step:10028/57344 train_time:6031217ms step_avg:601.44ms +step:10029/57344 train_time:6031234ms step_avg:601.38ms +step:10030/57344 train_time:6031477ms step_avg:601.34ms +step:10031/57344 train_time:6032007ms step_avg:601.34ms +grad accum step:2508/14336 +step:10032/57344 train_time:6033273ms step_avg:601.40ms +step:10033/57344 train_time:6033290ms step_avg:601.34ms +step:10034/57344 train_time:6033531ms step_avg:601.31ms +step:10035/57344 train_time:6034066ms step_avg:601.30ms +grad accum step:2509/14336 +step:10036/57344 train_time:6035333ms step_avg:601.37ms +step:10037/57344 train_time:6035350ms step_avg:601.31ms +step:10038/57344 train_time:6035592ms step_avg:601.27ms +step:10039/57344 train_time:6036122ms step_avg:601.27ms +grad accum step:2510/14336 +step:10040/57344 train_time:6037399ms step_avg:601.33ms +step:10041/57344 train_time:6037416ms step_avg:601.28ms +step:10042/57344 train_time:6037657ms step_avg:601.24ms +step:10043/57344 train_time:6038183ms step_avg:601.23ms +grad accum step:2511/14336 +step:10044/57344 train_time:6039454ms step_avg:601.30ms +step:10045/57344 train_time:6039471ms step_avg:601.24ms +step:10046/57344 train_time:6039716ms step_avg:601.21ms +step:10047/57344 train_time:6040254ms step_avg:601.20ms +grad accum step:2512/14336 +step:10048/57344 train_time:6041518ms step_avg:601.27ms +step:10048/57344 val_loss:6.803728 train_time:6041518ms step_avg:601.27ms +step:10049/57344 train_time:6041530ms step_avg:601.21ms +step:10050/57344 train_time:6041750ms step_avg:601.17ms +step:10051/57344 train_time:6042282ms step_avg:601.16ms +grad accum step:2513/14336 +step:10052/57344 train_time:6043547ms step_avg:601.23ms +step:10053/57344 train_time:6043564ms step_avg:601.17ms +step:10054/57344 train_time:6043807ms step_avg:601.13ms +step:10055/57344 train_time:6044350ms step_avg:601.13ms +grad accum step:2514/14336 +step:10056/57344 train_time:6045628ms step_avg:601.20ms +step:10057/57344 train_time:6045645ms step_avg:601.14ms +step:10058/57344 train_time:6045889ms step_avg:601.10ms +step:10059/57344 train_time:6046420ms step_avg:601.10ms +grad accum step:2515/14336 +step:10060/57344 train_time:6047681ms step_avg:601.16ms +step:10061/57344 train_time:6047698ms step_avg:601.10ms +step:10062/57344 train_time:6047938ms step_avg:601.07ms +step:10063/57344 train_time:6048473ms step_avg:601.06ms +grad accum step:2516/14336 +step:10064/57344 train_time:6049742ms step_avg:601.13ms +step:10065/57344 train_time:6049759ms step_avg:601.07ms +step:10066/57344 train_time:6050002ms step_avg:601.03ms +step:10067/57344 train_time:6050538ms step_avg:601.03ms +grad accum step:2517/14336 +step:10068/57344 train_time:6051790ms step_avg:601.09ms +step:10069/57344 train_time:6051807ms step_avg:601.03ms +step:10070/57344 train_time:6052051ms step_avg:601.00ms +step:10071/57344 train_time:6052583ms step_avg:600.99ms +grad accum step:2518/14336 +step:10072/57344 train_time:6053855ms step_avg:601.06ms +step:10073/57344 train_time:6053872ms step_avg:601.00ms +step:10074/57344 train_time:6054113ms step_avg:600.96ms +step:10075/57344 train_time:6054648ms step_avg:600.96ms +grad accum step:2519/14336 +step:10076/57344 train_time:6055931ms step_avg:601.03ms +step:10077/57344 train_time:6055948ms step_avg:600.97ms +step:10078/57344 train_time:6056192ms step_avg:600.93ms +step:10079/57344 train_time:6056731ms step_avg:600.93ms +grad accum step:2520/14336 +step:10080/57344 train_time:6057991ms step_avg:600.99ms +step:10081/57344 train_time:6058008ms step_avg:600.93ms +step:10082/57344 train_time:6058255ms step_avg:600.90ms +step:10083/57344 train_time:6058789ms step_avg:600.89ms +grad accum step:2521/14336 +step:10084/57344 train_time:6060060ms step_avg:600.96ms +step:10085/57344 train_time:6060077ms step_avg:600.90ms +step:10086/57344 train_time:6060320ms step_avg:600.86ms +step:10087/57344 train_time:6060856ms step_avg:600.86ms +grad accum step:2522/14336 +step:10088/57344 train_time:6062135ms step_avg:600.93ms +step:10089/57344 train_time:6062153ms step_avg:600.87ms +step:10090/57344 train_time:6062401ms step_avg:600.83ms +step:10091/57344 train_time:6062953ms step_avg:600.83ms +grad accum step:2523/14336 +step:10092/57344 train_time:6064223ms step_avg:600.89ms +step:10093/57344 train_time:6064240ms step_avg:600.84ms +step:10094/57344 train_time:6064489ms step_avg:600.80ms +step:10095/57344 train_time:6065035ms step_avg:600.80ms +grad accum step:2524/14336 +step:10096/57344 train_time:6066308ms step_avg:600.86ms +step:10097/57344 train_time:6066325ms step_avg:600.80ms +step:10098/57344 train_time:6066568ms step_avg:600.77ms +step:10099/57344 train_time:6067106ms step_avg:600.76ms +grad accum step:2525/14336 +step:10100/57344 train_time:6068363ms step_avg:600.83ms +step:10101/57344 train_time:6068380ms step_avg:600.77ms +step:10102/57344 train_time:6068623ms step_avg:600.73ms +step:10103/57344 train_time:6069160ms step_avg:600.73ms +grad accum step:2526/14336 +step:10104/57344 train_time:6070436ms step_avg:600.80ms +step:10105/57344 train_time:6070453ms step_avg:600.74ms +step:10106/57344 train_time:6070698ms step_avg:600.70ms +step:10107/57344 train_time:6071241ms step_avg:600.70ms +grad accum step:2527/14336 +step:10108/57344 train_time:6072513ms step_avg:600.76ms +step:10109/57344 train_time:6072530ms step_avg:600.71ms +step:10110/57344 train_time:6072775ms step_avg:600.67ms +step:10111/57344 train_time:6073308ms step_avg:600.66ms +grad accum step:2528/14336 +step:10112/57344 train_time:6074579ms step_avg:600.73ms +step:10112/57344 val_loss:6.795698 train_time:6074579ms step_avg:600.73ms +step:10113/57344 train_time:6074591ms step_avg:600.67ms +step:10114/57344 train_time:6074812ms step_avg:600.63ms +step:10115/57344 train_time:6075351ms step_avg:600.63ms +grad accum step:2529/14336 +step:10116/57344 train_time:6076635ms step_avg:600.70ms +step:10117/57344 train_time:6076652ms step_avg:600.64ms +step:10118/57344 train_time:6076896ms step_avg:600.60ms +step:10119/57344 train_time:6077436ms step_avg:600.60ms +grad accum step:2530/14336 +step:10120/57344 train_time:6078698ms step_avg:600.66ms +step:10121/57344 train_time:6078715ms step_avg:600.60ms +step:10122/57344 train_time:6078956ms step_avg:600.57ms +step:10123/57344 train_time:6079482ms step_avg:600.56ms +grad accum step:2531/14336 +step:10124/57344 train_time:6080742ms step_avg:600.63ms +step:10125/57344 train_time:6080759ms step_avg:600.57ms +step:10126/57344 train_time:6081001ms step_avg:600.53ms +step:10127/57344 train_time:6081529ms step_avg:600.53ms +grad accum step:2532/14336 +step:10128/57344 train_time:6082817ms step_avg:600.59ms +step:10129/57344 train_time:6082834ms step_avg:600.54ms +step:10130/57344 train_time:6083080ms step_avg:600.50ms +step:10131/57344 train_time:6083626ms step_avg:600.50ms +grad accum step:2533/14336 +step:10132/57344 train_time:6084902ms step_avg:600.56ms +step:10133/57344 train_time:6084919ms step_avg:600.51ms +step:10134/57344 train_time:6085167ms step_avg:600.47ms +step:10135/57344 train_time:6085713ms step_avg:600.47ms +grad accum step:2534/14336 +step:10136/57344 train_time:6086976ms step_avg:600.53ms +step:10137/57344 train_time:6086993ms step_avg:600.47ms +step:10138/57344 train_time:6087235ms step_avg:600.44ms +step:10139/57344 train_time:6087763ms step_avg:600.43ms +grad accum step:2535/14336 +step:10140/57344 train_time:6090333ms step_avg:600.62ms +step:10141/57344 train_time:6090562ms step_avg:600.59ms +step:10142/57344 train_time:6090695ms step_avg:600.54ms +step:10143/57344 train_time:6091222ms step_avg:600.53ms +grad accum step:2536/14336 +step:10144/57344 train_time:6092496ms step_avg:600.60ms +step:10145/57344 train_time:6092513ms step_avg:600.54ms +step:10146/57344 train_time:6092759ms step_avg:600.51ms +step:10147/57344 train_time:6093297ms step_avg:600.50ms +grad accum step:2537/14336 +step:10148/57344 train_time:6094576ms step_avg:600.57ms +step:10149/57344 train_time:6094593ms step_avg:600.51ms +step:10150/57344 train_time:6094838ms step_avg:600.48ms +step:10151/57344 train_time:6095379ms step_avg:600.47ms +grad accum step:2538/14336 +step:10152/57344 train_time:6096657ms step_avg:600.54ms +step:10153/57344 train_time:6096674ms step_avg:600.48ms +step:10154/57344 train_time:6096917ms step_avg:600.44ms +step:10155/57344 train_time:6097446ms step_avg:600.44ms +grad accum step:2539/14336 +step:10156/57344 train_time:6098706ms step_avg:600.50ms +step:10157/57344 train_time:6098723ms step_avg:600.45ms +step:10158/57344 train_time:6098972ms step_avg:600.41ms +step:10159/57344 train_time:6099517ms step_avg:600.41ms +grad accum step:2540/14336 +step:10160/57344 train_time:6100785ms step_avg:600.47ms +step:10161/57344 train_time:6100802ms step_avg:600.41ms +step:10162/57344 train_time:6101049ms step_avg:600.38ms +step:10163/57344 train_time:6101597ms step_avg:600.37ms +grad accum step:2541/14336 +step:10164/57344 train_time:6102867ms step_avg:600.44ms +step:10165/57344 train_time:6102884ms step_avg:600.38ms +step:10166/57344 train_time:6103129ms step_avg:600.35ms +step:10167/57344 train_time:6103671ms step_avg:600.34ms +grad accum step:2542/14336 +step:10168/57344 train_time:6104956ms step_avg:600.41ms +step:10169/57344 train_time:6104972ms step_avg:600.35ms +step:10170/57344 train_time:6105213ms step_avg:600.32ms +step:10171/57344 train_time:6105739ms step_avg:600.31ms +grad accum step:2543/14336 +step:10172/57344 train_time:6106997ms step_avg:600.37ms +step:10173/57344 train_time:6107014ms step_avg:600.32ms +step:10174/57344 train_time:6107258ms step_avg:600.28ms +step:10175/57344 train_time:6107795ms step_avg:600.27ms +grad accum step:2544/14336 +step:10176/57344 train_time:6109053ms step_avg:600.34ms +step:10176/57344 val_loss:6.764917 train_time:6109053ms step_avg:600.34ms +step:10177/57344 train_time:6109065ms step_avg:600.28ms +step:10178/57344 train_time:6109287ms step_avg:600.24ms +step:10179/57344 train_time:6109823ms step_avg:600.24ms +grad accum step:2545/14336 +step:10180/57344 train_time:6111088ms step_avg:600.30ms +step:10181/57344 train_time:6111106ms step_avg:600.25ms +step:10182/57344 train_time:6111349ms step_avg:600.21ms +step:10183/57344 train_time:6111890ms step_avg:600.21ms +grad accum step:2546/14336 +step:10184/57344 train_time:6113159ms step_avg:600.27ms +step:10185/57344 train_time:6113176ms step_avg:600.21ms +step:10186/57344 train_time:6113419ms step_avg:600.18ms +step:10187/57344 train_time:6113961ms step_avg:600.17ms +grad accum step:2547/14336 +step:10188/57344 train_time:6115253ms step_avg:600.24ms +step:10189/57344 train_time:6115270ms step_avg:600.18ms +step:10190/57344 train_time:6115514ms step_avg:600.15ms +step:10191/57344 train_time:6116057ms step_avg:600.14ms +grad accum step:2548/14336 +step:10192/57344 train_time:6117315ms step_avg:600.21ms +step:10193/57344 train_time:6117332ms step_avg:600.15ms +step:10194/57344 train_time:6117578ms step_avg:600.12ms +step:10195/57344 train_time:6118125ms step_avg:600.11ms +grad accum step:2549/14336 +step:10196/57344 train_time:6119408ms step_avg:600.18ms +step:10197/57344 train_time:6119424ms step_avg:600.12ms +step:10198/57344 train_time:6119672ms step_avg:600.09ms +step:10199/57344 train_time:6120212ms step_avg:600.08ms +grad accum step:2550/14336 +step:10200/57344 train_time:6121471ms step_avg:600.14ms +step:10201/57344 train_time:6121488ms step_avg:600.09ms +step:10202/57344 train_time:6121730ms step_avg:600.05ms +step:10203/57344 train_time:6122259ms step_avg:600.05ms +grad accum step:2551/14336 +step:10204/57344 train_time:6123541ms step_avg:600.11ms +step:10205/57344 train_time:6123558ms step_avg:600.05ms +step:10206/57344 train_time:6123800ms step_avg:600.02ms +step:10207/57344 train_time:6124338ms step_avg:600.01ms +grad accum step:2552/14336 +step:10208/57344 train_time:6125614ms step_avg:600.08ms +step:10209/57344 train_time:6125631ms step_avg:600.02ms +step:10210/57344 train_time:6125876ms step_avg:599.99ms +step:10211/57344 train_time:6126414ms step_avg:599.98ms +grad accum step:2553/14336 +step:10212/57344 train_time:6127679ms step_avg:600.05ms +step:10213/57344 train_time:6127696ms step_avg:599.99ms +step:10214/57344 train_time:6127941ms step_avg:599.96ms +step:10215/57344 train_time:6128470ms step_avg:599.95ms +grad accum step:2554/14336 +step:10216/57344 train_time:6129728ms step_avg:600.01ms +step:10217/57344 train_time:6129745ms step_avg:599.96ms +step:10218/57344 train_time:6129988ms step_avg:599.92ms +step:10219/57344 train_time:6130514ms step_avg:599.91ms +grad accum step:2555/14336 +step:10220/57344 train_time:6131773ms step_avg:599.98ms +step:10221/57344 train_time:6131790ms step_avg:599.92ms +step:10222/57344 train_time:6132034ms step_avg:599.89ms +step:10223/57344 train_time:6132561ms step_avg:599.88ms +grad accum step:2556/14336 +step:10224/57344 train_time:6133834ms step_avg:599.94ms +step:10225/57344 train_time:6133851ms step_avg:599.89ms +step:10226/57344 train_time:6134098ms step_avg:599.85ms +step:10227/57344 train_time:6134646ms step_avg:599.85ms +grad accum step:2557/14336 +step:10228/57344 train_time:6135919ms step_avg:599.91ms +step:10229/57344 train_time:6135936ms step_avg:599.86ms +step:10230/57344 train_time:6136183ms step_avg:599.82ms +step:10231/57344 train_time:6136726ms step_avg:599.82ms +grad accum step:2558/14336 +step:10232/57344 train_time:6137988ms step_avg:599.88ms +step:10233/57344 train_time:6138005ms step_avg:599.82ms +step:10234/57344 train_time:6138251ms step_avg:599.79ms +step:10235/57344 train_time:6138791ms step_avg:599.78ms +grad accum step:2559/14336 +step:10236/57344 train_time:6140053ms step_avg:599.85ms +step:10237/57344 train_time:6140069ms step_avg:599.79ms +step:10238/57344 train_time:6140312ms step_avg:599.76ms +step:10239/57344 train_time:6140849ms step_avg:599.75ms +grad accum step:2560/14336 +step:10240/57344 train_time:6142110ms step_avg:599.82ms +step:10240/57344 val_loss:6.742457 train_time:6142110ms step_avg:599.82ms +step:10241/57344 train_time:6142122ms step_avg:599.76ms +step:10242/57344 train_time:6142342ms step_avg:599.72ms +step:10243/57344 train_time:6142871ms step_avg:599.71ms +grad accum step:2561/14336 +step:10244/57344 train_time:6144133ms step_avg:599.78ms +step:10245/57344 train_time:6144150ms step_avg:599.72ms +step:10246/57344 train_time:6144393ms step_avg:599.69ms +step:10247/57344 train_time:6144924ms step_avg:599.68ms +grad accum step:2562/14336 +step:10248/57344 train_time:6146185ms step_avg:599.74ms +step:10249/57344 train_time:6146202ms step_avg:599.69ms +step:10250/57344 train_time:6146445ms step_avg:599.65ms +step:10251/57344 train_time:6146977ms step_avg:599.65ms +grad accum step:2563/14336 +step:10252/57344 train_time:6148260ms step_avg:599.71ms +step:10253/57344 train_time:6148277ms step_avg:599.66ms +step:10254/57344 train_time:6148520ms step_avg:599.62ms +step:10255/57344 train_time:6149049ms step_avg:599.61ms +grad accum step:2564/14336 +step:10256/57344 train_time:6150310ms step_avg:599.68ms +step:10257/57344 train_time:6150328ms step_avg:599.62ms +step:10258/57344 train_time:6150575ms step_avg:599.59ms +step:10259/57344 train_time:6151120ms step_avg:599.58ms +grad accum step:2565/14336 +step:10260/57344 train_time:6152397ms step_avg:599.65ms +step:10261/57344 train_time:6152415ms step_avg:599.59ms +step:10262/57344 train_time:6152656ms step_avg:599.56ms +step:10263/57344 train_time:6153185ms step_avg:599.55ms +grad accum step:2566/14336 +step:10264/57344 train_time:6154460ms step_avg:599.62ms +step:10265/57344 train_time:6154477ms step_avg:599.56ms +step:10266/57344 train_time:6154718ms step_avg:599.52ms +step:10267/57344 train_time:6155246ms step_avg:599.52ms +grad accum step:2567/14336 +step:10268/57344 train_time:6156501ms step_avg:599.58ms +step:10269/57344 train_time:6156518ms step_avg:599.52ms +step:10270/57344 train_time:6156759ms step_avg:599.49ms +step:10271/57344 train_time:6157290ms step_avg:599.48ms +grad accum step:2568/14336 +step:10272/57344 train_time:6158568ms step_avg:599.55ms +step:10273/57344 train_time:6158585ms step_avg:599.49ms +step:10274/57344 train_time:6158829ms step_avg:599.46ms +step:10275/57344 train_time:6159367ms step_avg:599.45ms +grad accum step:2569/14336 +step:10276/57344 train_time:6160633ms step_avg:599.52ms +step:10277/57344 train_time:6160650ms step_avg:599.46ms +step:10278/57344 train_time:6160895ms step_avg:599.43ms +step:10279/57344 train_time:6161435ms step_avg:599.42ms +grad accum step:2570/14336 +step:10280/57344 train_time:6162699ms step_avg:599.48ms +step:10281/57344 train_time:6162716ms step_avg:599.43ms +step:10282/57344 train_time:6162957ms step_avg:599.39ms +step:10283/57344 train_time:6163495ms step_avg:599.39ms +grad accum step:2571/14336 +step:10284/57344 train_time:6164756ms step_avg:599.45ms +step:10285/57344 train_time:6164773ms step_avg:599.39ms +step:10286/57344 train_time:6165016ms step_avg:599.36ms +step:10287/57344 train_time:6165545ms step_avg:599.35ms +grad accum step:2572/14336 +step:10288/57344 train_time:6166821ms step_avg:599.42ms +step:10289/57344 train_time:6166838ms step_avg:599.36ms +step:10290/57344 train_time:6167082ms step_avg:599.33ms +step:10291/57344 train_time:6167612ms step_avg:599.32ms +grad accum step:2573/14336 +step:10292/57344 train_time:6168886ms step_avg:599.39ms +step:10293/57344 train_time:6168903ms step_avg:599.33ms +step:10294/57344 train_time:6169146ms step_avg:599.30ms +step:10295/57344 train_time:6169681ms step_avg:599.29ms +grad accum step:2574/14336 +step:10296/57344 train_time:6170950ms step_avg:599.35ms +step:10297/57344 train_time:6170967ms step_avg:599.30ms +step:10298/57344 train_time:6171209ms step_avg:599.26ms +step:10299/57344 train_time:6171744ms step_avg:599.26ms +grad accum step:2575/14336 +step:10300/57344 train_time:6173008ms step_avg:599.32ms +step:10301/57344 train_time:6173025ms step_avg:599.26ms +step:10302/57344 train_time:6173268ms step_avg:599.23ms +step:10303/57344 train_time:6173792ms step_avg:599.22ms +grad accum step:2576/14336 +step:10304/57344 train_time:6175052ms step_avg:599.29ms +step:10304/57344 val_loss:6.716063 train_time:6176059ms step_avg:599.38ms +step:10305/57344 train_time:6176071ms step_avg:599.33ms +step:10306/57344 train_time:6176295ms step_avg:599.29ms +step:10307/57344 train_time:6176838ms step_avg:599.29ms +grad accum step:2577/14336 +step:10308/57344 train_time:6178130ms step_avg:599.35ms +step:10309/57344 train_time:6178142ms step_avg:599.30ms +step:10310/57344 train_time:6178377ms step_avg:599.26ms +step:10311/57344 train_time:6178906ms step_avg:599.25ms +grad accum step:2578/14336 +step:10312/57344 train_time:6180191ms step_avg:599.32ms +step:10313/57344 train_time:6180208ms step_avg:599.26ms +step:10314/57344 train_time:6180451ms step_avg:599.23ms +step:10315/57344 train_time:6180983ms step_avg:599.22ms +grad accum step:2579/14336 +step:10316/57344 train_time:6182254ms step_avg:599.29ms +step:10317/57344 train_time:6182271ms step_avg:599.23ms +step:10318/57344 train_time:6182516ms step_avg:599.20ms +step:10319/57344 train_time:6183047ms step_avg:599.19ms +grad accum step:2580/14336 +step:10320/57344 train_time:6184298ms step_avg:599.25ms +step:10321/57344 train_time:6184316ms step_avg:599.20ms +step:10322/57344 train_time:6184558ms step_avg:599.16ms +step:10323/57344 train_time:6185088ms step_avg:599.16ms +grad accum step:2581/14336 +step:10324/57344 train_time:6186348ms step_avg:599.22ms +step:10325/57344 train_time:6186365ms step_avg:599.16ms +step:10326/57344 train_time:6186607ms step_avg:599.13ms +step:10327/57344 train_time:6187135ms step_avg:599.12ms +grad accum step:2582/14336 +step:10328/57344 train_time:6188413ms step_avg:599.19ms +step:10329/57344 train_time:6188430ms step_avg:599.13ms +step:10330/57344 train_time:6188678ms step_avg:599.10ms +step:10331/57344 train_time:6189223ms step_avg:599.09ms +grad accum step:2583/14336 +step:10332/57344 train_time:6190500ms step_avg:599.16ms +step:10333/57344 train_time:6190518ms step_avg:599.10ms +step:10334/57344 train_time:6190767ms step_avg:599.07ms +step:10335/57344 train_time:6191308ms step_avg:599.06ms +grad accum step:2584/14336 +step:10336/57344 train_time:6192587ms step_avg:599.13ms +step:10337/57344 train_time:6192604ms step_avg:599.07ms +step:10338/57344 train_time:6192849ms step_avg:599.04ms +step:10339/57344 train_time:6193388ms step_avg:599.03ms +grad accum step:2585/14336 +step:10340/57344 train_time:6194644ms step_avg:599.10ms +step:10341/57344 train_time:6194661ms step_avg:599.04ms +step:10342/57344 train_time:6194905ms step_avg:599.00ms +step:10343/57344 train_time:6195439ms step_avg:599.00ms +grad accum step:2586/14336 +step:10344/57344 train_time:6196713ms step_avg:599.06ms +step:10345/57344 train_time:6196730ms step_avg:599.01ms +step:10346/57344 train_time:6196973ms step_avg:598.97ms +step:10347/57344 train_time:6197513ms step_avg:598.97ms +grad accum step:2587/14336 +step:10348/57344 train_time:6198776ms step_avg:599.03ms +step:10349/57344 train_time:6198793ms step_avg:598.98ms +step:10350/57344 train_time:6199036ms step_avg:598.94ms +step:10351/57344 train_time:6199581ms step_avg:598.94ms +grad accum step:2588/14336 +step:10352/57344 train_time:6200856ms step_avg:599.00ms +step:10353/57344 train_time:6200873ms step_avg:598.94ms +step:10354/57344 train_time:6201115ms step_avg:598.91ms +step:10355/57344 train_time:6201642ms step_avg:598.90ms +grad accum step:2589/14336 +step:10356/57344 train_time:6202923ms step_avg:598.97ms +step:10357/57344 train_time:6202940ms step_avg:598.91ms +step:10358/57344 train_time:6203184ms step_avg:598.88ms +step:10359/57344 train_time:6203729ms step_avg:598.87ms +grad accum step:2590/14336 +step:10360/57344 train_time:6204999ms step_avg:598.94ms +step:10361/57344 train_time:6205016ms step_avg:598.88ms +step:10362/57344 train_time:6205258ms step_avg:598.85ms +step:10363/57344 train_time:6205786ms step_avg:598.84ms +grad accum step:2591/14336 +step:10364/57344 train_time:6207048ms step_avg:598.90ms +step:10365/57344 train_time:6207065ms step_avg:598.85ms +step:10366/57344 train_time:6207312ms step_avg:598.81ms +step:10367/57344 train_time:6207856ms step_avg:598.81ms +grad accum step:2592/14336 +step:10368/57344 train_time:6209129ms step_avg:598.87ms +step:10368/57344 val_loss:6.749411 train_time:6209130ms step_avg:598.87ms +step:10369/57344 train_time:6209142ms step_avg:598.82ms +step:10370/57344 train_time:6209363ms step_avg:598.78ms +step:10371/57344 train_time:6209905ms step_avg:598.78ms +grad accum step:2593/14336 +step:10372/57344 train_time:6211191ms step_avg:598.84ms +step:10373/57344 train_time:6211208ms step_avg:598.79ms +step:10374/57344 train_time:6211450ms step_avg:598.75ms +step:10375/57344 train_time:6211987ms step_avg:598.75ms +grad accum step:2594/14336 +step:10376/57344 train_time:6213237ms step_avg:598.81ms +step:10377/57344 train_time:6213254ms step_avg:598.75ms +step:10378/57344 train_time:6213500ms step_avg:598.72ms +step:10379/57344 train_time:6214041ms step_avg:598.71ms +grad accum step:2595/14336 +step:10380/57344 train_time:6215328ms step_avg:598.78ms +step:10381/57344 train_time:6215344ms step_avg:598.72ms +step:10382/57344 train_time:6215586ms step_avg:598.69ms +step:10383/57344 train_time:6216126ms step_avg:598.68ms +grad accum step:2596/14336 +step:10384/57344 train_time:6217404ms step_avg:598.75ms +step:10385/57344 train_time:6217421ms step_avg:598.69ms +step:10386/57344 train_time:6217663ms step_avg:598.66ms +step:10387/57344 train_time:6218190ms step_avg:598.65ms +grad accum step:2597/14336 +step:10388/57344 train_time:6219468ms step_avg:598.72ms +step:10389/57344 train_time:6219485ms step_avg:598.66ms +step:10390/57344 train_time:6219732ms step_avg:598.63ms +step:10391/57344 train_time:6220277ms step_avg:598.62ms +grad accum step:2598/14336 +step:10392/57344 train_time:6221553ms step_avg:598.69ms +step:10393/57344 train_time:6221738ms step_avg:598.65ms +step:10394/57344 train_time:6221959ms step_avg:598.61ms +step:10395/57344 train_time:6222514ms step_avg:598.61ms +grad accum step:2599/14336 +step:10396/57344 train_time:6223797ms step_avg:598.67ms +step:10397/57344 train_time:6223814ms step_avg:598.62ms +step:10398/57344 train_time:6224057ms step_avg:598.58ms +step:10399/57344 train_time:6224586ms step_avg:598.58ms +grad accum step:2600/14336 +step:10400/57344 train_time:6225860ms step_avg:598.64ms +step:10401/57344 train_time:6225877ms step_avg:598.58ms +step:10402/57344 train_time:6226121ms step_avg:598.55ms +step:10403/57344 train_time:6226649ms step_avg:598.54ms +grad accum step:2601/14336 +step:10404/57344 train_time:6227924ms step_avg:598.61ms +step:10405/57344 train_time:6227941ms step_avg:598.55ms +step:10406/57344 train_time:6228184ms step_avg:598.52ms +step:10407/57344 train_time:6228715ms step_avg:598.51ms +grad accum step:2602/14336 +step:10408/57344 train_time:6229985ms step_avg:598.58ms +step:10409/57344 train_time:6230002ms step_avg:598.52ms +step:10410/57344 train_time:6230251ms step_avg:598.49ms +step:10411/57344 train_time:6230801ms step_avg:598.48ms +grad accum step:2603/14336 +step:10412/57344 train_time:6232074ms step_avg:598.55ms +step:10413/57344 train_time:6232091ms step_avg:598.49ms +step:10414/57344 train_time:6232334ms step_avg:598.46ms +step:10415/57344 train_time:6232869ms step_avg:598.45ms +grad accum step:2604/14336 +step:10416/57344 train_time:6234136ms step_avg:598.52ms +step:10417/57344 train_time:6234154ms step_avg:598.46ms +step:10418/57344 train_time:6234394ms step_avg:598.43ms +step:10419/57344 train_time:6234921ms step_avg:598.42ms +grad accum step:2605/14336 +step:10420/57344 train_time:6236197ms step_avg:598.48ms +step:10421/57344 train_time:6236214ms step_avg:598.43ms +step:10422/57344 train_time:6236461ms step_avg:598.39ms +step:10423/57344 train_time:6237005ms step_avg:598.39ms +grad accum step:2606/14336 +step:10424/57344 train_time:6238280ms step_avg:598.45ms +step:10425/57344 train_time:6238297ms step_avg:598.40ms +step:10426/57344 train_time:6238541ms step_avg:598.36ms +step:10427/57344 train_time:6239076ms step_avg:598.36ms +grad accum step:2607/14336 +step:10428/57344 train_time:6240346ms step_avg:598.42ms +step:10429/57344 train_time:6240363ms step_avg:598.37ms +step:10430/57344 train_time:6240607ms step_avg:598.33ms +step:10431/57344 train_time:6241149ms step_avg:598.33ms +grad accum step:2608/14336 +step:10432/57344 train_time:6242411ms step_avg:598.39ms +step:10432/57344 val_loss:6.760437 train_time:6242412ms step_avg:598.39ms +step:10433/57344 train_time:6242424ms step_avg:598.33ms +step:10434/57344 train_time:6242643ms step_avg:598.30ms +step:10435/57344 train_time:6243172ms step_avg:598.29ms +grad accum step:2609/14336 +step:10436/57344 train_time:6244445ms step_avg:598.36ms +step:10437/57344 train_time:6244462ms step_avg:598.30ms +step:10438/57344 train_time:6244704ms step_avg:598.27ms +step:10439/57344 train_time:6245241ms step_avg:598.26ms +grad accum step:2610/14336 +step:10440/57344 train_time:6246500ms step_avg:598.32ms +step:10441/57344 train_time:6246517ms step_avg:598.27ms +step:10442/57344 train_time:6246764ms step_avg:598.23ms +step:10443/57344 train_time:6247306ms step_avg:598.23ms +grad accum step:2611/14336 +step:10444/57344 train_time:6248583ms step_avg:598.29ms +step:10445/57344 train_time:6248600ms step_avg:598.24ms +step:10446/57344 train_time:6248842ms step_avg:598.20ms +step:10447/57344 train_time:6249367ms step_avg:598.20ms +grad accum step:2612/14336 +step:10448/57344 train_time:6250643ms step_avg:598.26ms +step:10449/57344 train_time:6250660ms step_avg:598.21ms +step:10450/57344 train_time:6250900ms step_avg:598.17ms +step:10451/57344 train_time:6251426ms step_avg:598.17ms +grad accum step:2613/14336 +step:10452/57344 train_time:6252685ms step_avg:598.23ms +step:10453/57344 train_time:6252702ms step_avg:598.17ms +step:10454/57344 train_time:6252947ms step_avg:598.14ms +step:10455/57344 train_time:6253476ms step_avg:598.13ms +grad accum step:2614/14336 +step:10456/57344 train_time:6254751ms step_avg:598.20ms +step:10457/57344 train_time:6254769ms step_avg:598.14ms +step:10458/57344 train_time:6255012ms step_avg:598.11ms +step:10459/57344 train_time:6255544ms step_avg:598.10ms +grad accum step:2615/14336 +step:10460/57344 train_time:6256813ms step_avg:598.17ms +step:10461/57344 train_time:6256830ms step_avg:598.11ms +step:10462/57344 train_time:6257077ms step_avg:598.08ms +step:10463/57344 train_time:6257626ms step_avg:598.07ms +grad accum step:2616/14336 +step:10464/57344 train_time:6258900ms step_avg:598.14ms +step:10465/57344 train_time:6258917ms step_avg:598.08ms +step:10466/57344 train_time:6259163ms step_avg:598.05ms +step:10467/57344 train_time:6259704ms step_avg:598.04ms +grad accum step:2617/14336 +step:10468/57344 train_time:6260971ms step_avg:598.11ms +step:10469/57344 train_time:6260988ms step_avg:598.05ms +step:10470/57344 train_time:6261234ms step_avg:598.02ms +step:10471/57344 train_time:6261766ms step_avg:598.01ms +grad accum step:2618/14336 +step:10472/57344 train_time:6263027ms step_avg:598.07ms +step:10473/57344 train_time:6263044ms step_avg:598.02ms +step:10474/57344 train_time:6263287ms step_avg:597.98ms +step:10475/57344 train_time:6263827ms step_avg:597.98ms +grad accum step:2619/14336 +step:10476/57344 train_time:6265116ms step_avg:598.04ms +step:10477/57344 train_time:6265133ms step_avg:597.99ms +step:10478/57344 train_time:6265382ms step_avg:597.96ms +step:10479/57344 train_time:6265927ms step_avg:597.95ms +grad accum step:2620/14336 +step:10480/57344 train_time:6267213ms step_avg:598.02ms +step:10481/57344 train_time:6267231ms step_avg:597.96ms +step:10482/57344 train_time:6267470ms step_avg:597.93ms +step:10483/57344 train_time:6268001ms step_avg:597.92ms +grad accum step:2621/14336 +step:10484/57344 train_time:6269302ms step_avg:597.99ms +step:10485/57344 train_time:6269319ms step_avg:597.93ms +step:10486/57344 train_time:6269563ms step_avg:597.90ms +step:10487/57344 train_time:6270102ms step_avg:597.89ms +grad accum step:2622/14336 +step:10488/57344 train_time:6271379ms step_avg:597.96ms +step:10489/57344 train_time:6271396ms step_avg:597.90ms +step:10490/57344 train_time:6271636ms step_avg:597.87ms +step:10491/57344 train_time:6272166ms step_avg:597.86ms +grad accum step:2623/14336 +step:10492/57344 train_time:6273446ms step_avg:597.93ms +step:10493/57344 train_time:6273463ms step_avg:597.87ms +step:10494/57344 train_time:6273707ms step_avg:597.84ms +step:10495/57344 train_time:6274246ms step_avg:597.83ms +grad accum step:2624/14336 +step:10496/57344 train_time:6275531ms step_avg:597.90ms +step:10496/57344 val_loss:6.781706 train_time:6275531ms step_avg:597.90ms +step:10497/57344 train_time:6275543ms step_avg:597.84ms +step:10498/57344 train_time:6275766ms step_avg:597.81ms +step:10499/57344 train_time:6276308ms step_avg:597.80ms +grad accum step:2625/14336 +step:10500/57344 train_time:6277584ms step_avg:597.87ms +step:10501/57344 train_time:6277601ms step_avg:597.81ms +step:10502/57344 train_time:6277850ms step_avg:597.78ms +step:10503/57344 train_time:6278396ms step_avg:597.77ms +grad accum step:2626/14336 +step:10504/57344 train_time:6279666ms step_avg:597.84ms +step:10505/57344 train_time:6279684ms step_avg:597.78ms +step:10506/57344 train_time:6279929ms step_avg:597.75ms +step:10507/57344 train_time:6280462ms step_avg:597.74ms +grad accum step:2627/14336 +step:10508/57344 train_time:6281714ms step_avg:597.80ms +step:10509/57344 train_time:6281731ms step_avg:597.75ms +step:10510/57344 train_time:6281974ms step_avg:597.71ms +step:10511/57344 train_time:6282503ms step_avg:597.71ms +grad accum step:2628/14336 +step:10512/57344 train_time:6283779ms step_avg:597.77ms +step:10513/57344 train_time:6283796ms step_avg:597.72ms +step:10514/57344 train_time:6284041ms step_avg:597.68ms +step:10515/57344 train_time:6284579ms step_avg:597.68ms +grad accum step:2629/14336 +step:10516/57344 train_time:6285863ms step_avg:597.74ms +step:10517/57344 train_time:6285880ms step_avg:597.69ms +step:10518/57344 train_time:6286121ms step_avg:597.65ms +step:10519/57344 train_time:6286648ms step_avg:597.65ms +grad accum step:2630/14336 +step:10520/57344 train_time:6287941ms step_avg:597.71ms +step:10521/57344 train_time:6287958ms step_avg:597.66ms +step:10522/57344 train_time:6288204ms step_avg:597.62ms +step:10523/57344 train_time:6288750ms step_avg:597.62ms +grad accum step:2631/14336 +step:10524/57344 train_time:6290026ms step_avg:597.68ms +step:10525/57344 train_time:6290043ms step_avg:597.63ms +step:10526/57344 train_time:6290285ms step_avg:597.60ms +step:10527/57344 train_time:6290815ms step_avg:597.59ms +grad accum step:2632/14336 +step:10528/57344 train_time:6292077ms step_avg:597.65ms +step:10529/57344 train_time:6292094ms step_avg:597.60ms +step:10530/57344 train_time:6292343ms step_avg:597.56ms +step:10531/57344 train_time:6292882ms step_avg:597.56ms +grad accum step:2633/14336 +step:10532/57344 train_time:6294154ms step_avg:597.62ms +step:10533/57344 train_time:6294171ms step_avg:597.57ms +step:10534/57344 train_time:6294417ms step_avg:597.53ms +step:10535/57344 train_time:6294965ms step_avg:597.53ms +grad accum step:2634/14336 +step:10536/57344 train_time:6296255ms step_avg:597.59ms +step:10537/57344 train_time:6296272ms step_avg:597.54ms +step:10538/57344 train_time:6296515ms step_avg:597.51ms +step:10539/57344 train_time:6297049ms step_avg:597.50ms +grad accum step:2635/14336 +step:10540/57344 train_time:6298320ms step_avg:597.56ms +step:10541/57344 train_time:6298337ms step_avg:597.51ms +step:10542/57344 train_time:6298582ms step_avg:597.48ms +step:10543/57344 train_time:6299117ms step_avg:597.47ms +grad accum step:2636/14336 +step:10544/57344 train_time:6300379ms step_avg:597.53ms +step:10545/57344 train_time:6300396ms step_avg:597.48ms +step:10546/57344 train_time:6300641ms step_avg:597.44ms +step:10547/57344 train_time:6301189ms step_avg:597.44ms +grad accum step:2637/14336 +step:10548/57344 train_time:6302464ms step_avg:597.50ms +step:10549/57344 train_time:6302481ms step_avg:597.45ms +step:10550/57344 train_time:6302729ms step_avg:597.42ms +step:10551/57344 train_time:6303274ms step_avg:597.41ms +grad accum step:2638/14336 +step:10552/57344 train_time:6304544ms step_avg:597.47ms +step:10553/57344 train_time:6304561ms step_avg:597.42ms +step:10554/57344 train_time:6304804ms step_avg:597.39ms +step:10555/57344 train_time:6305333ms step_avg:597.38ms +grad accum step:2639/14336 +step:10556/57344 train_time:6320793ms step_avg:598.79ms +step:10557/57344 train_time:6320806ms step_avg:598.73ms +step:10558/57344 train_time:6321088ms step_avg:598.70ms +step:10559/57344 train_time:6321620ms step_avg:598.69ms +grad accum step:2640/14336 +step:10560/57344 train_time:6322879ms step_avg:598.76ms +step:10560/57344 val_loss:6.785400 train_time:6322879ms step_avg:598.76ms +step:10561/57344 train_time:6322891ms step_avg:598.70ms +step:10562/57344 train_time:6323111ms step_avg:598.67ms +step:10563/57344 train_time:6323650ms step_avg:598.66ms +grad accum step:2641/14336 +step:10564/57344 train_time:6324931ms step_avg:598.73ms +step:10565/57344 train_time:6324948ms step_avg:598.67ms +step:10566/57344 train_time:6325190ms step_avg:598.64ms +step:10567/57344 train_time:6325716ms step_avg:598.63ms +grad accum step:2642/14336 +step:10568/57344 train_time:6326987ms step_avg:598.69ms +step:10569/57344 train_time:6327004ms step_avg:598.64ms +step:10570/57344 train_time:6327246ms step_avg:598.60ms +step:10571/57344 train_time:6327777ms step_avg:598.60ms +grad accum step:2643/14336 +step:10572/57344 train_time:6329058ms step_avg:598.66ms +step:10573/57344 train_time:6329075ms step_avg:598.61ms +step:10574/57344 train_time:6329317ms step_avg:598.57ms +step:10575/57344 train_time:6329846ms step_avg:598.57ms +grad accum step:2644/14336 +step:10576/57344 train_time:6331111ms step_avg:598.63ms +step:10577/57344 train_time:6331128ms step_avg:598.58ms +step:10578/57344 train_time:6331373ms step_avg:598.54ms +step:10579/57344 train_time:6331910ms step_avg:598.54ms +grad accum step:2645/14336 +step:10580/57344 train_time:6333251ms step_avg:598.61ms +step:10581/57344 train_time:6333268ms step_avg:598.55ms +step:10582/57344 train_time:6333518ms step_avg:598.52ms +step:10583/57344 train_time:6334071ms step_avg:598.51ms +grad accum step:2646/14336 +step:10584/57344 train_time:6335330ms step_avg:598.58ms +step:10585/57344 train_time:6335347ms step_avg:598.52ms +step:10586/57344 train_time:6335588ms step_avg:598.49ms +step:10587/57344 train_time:6336124ms step_avg:598.48ms +grad accum step:2647/14336 +step:10588/57344 train_time:6337408ms step_avg:598.55ms +step:10589/57344 train_time:6337425ms step_avg:598.49ms +step:10590/57344 train_time:6337667ms step_avg:598.46ms +step:10591/57344 train_time:6338194ms step_avg:598.45ms +grad accum step:2648/14336 +step:10592/57344 train_time:6339475ms step_avg:598.52ms +step:10593/57344 train_time:6339493ms step_avg:598.46ms +step:10594/57344 train_time:6339734ms step_avg:598.43ms +step:10595/57344 train_time:6340261ms step_avg:598.42ms +grad accum step:2649/14336 +step:10596/57344 train_time:6341520ms step_avg:598.48ms +step:10597/57344 train_time:6341537ms step_avg:598.43ms +step:10598/57344 train_time:6341779ms step_avg:598.39ms +step:10599/57344 train_time:6342308ms step_avg:598.39ms +grad accum step:2650/14336 +step:10600/57344 train_time:6343582ms step_avg:598.45ms +step:10601/57344 train_time:6343600ms step_avg:598.40ms +step:10602/57344 train_time:6343842ms step_avg:598.36ms +step:10603/57344 train_time:6344368ms step_avg:598.36ms +grad accum step:2651/14336 +step:10604/57344 train_time:6345631ms step_avg:598.42ms +step:10605/57344 train_time:6345648ms step_avg:598.36ms +step:10606/57344 train_time:6345889ms step_avg:598.33ms +step:10607/57344 train_time:6346416ms step_avg:598.32ms +grad accum step:2652/14336 +step:10608/57344 train_time:6347690ms step_avg:598.39ms +step:10609/57344 train_time:6347708ms step_avg:598.33ms +step:10610/57344 train_time:6347948ms step_avg:598.30ms +step:10611/57344 train_time:6348475ms step_avg:598.29ms +grad accum step:2653/14336 +step:10612/57344 train_time:6349741ms step_avg:598.35ms +step:10613/57344 train_time:6349758ms step_avg:598.30ms +step:10614/57344 train_time:6350005ms step_avg:598.27ms +step:10615/57344 train_time:6350551ms step_avg:598.26ms +grad accum step:2654/14336 +step:10616/57344 train_time:6351830ms step_avg:598.33ms +step:10617/57344 train_time:6351847ms step_avg:598.27ms +step:10618/57344 train_time:6352088ms step_avg:598.24ms +step:10619/57344 train_time:6352617ms step_avg:598.23ms +grad accum step:2655/14336 +step:10620/57344 train_time:6353884ms step_avg:598.29ms +step:10621/57344 train_time:6353901ms step_avg:598.24ms +step:10622/57344 train_time:6354141ms step_avg:598.21ms +step:10623/57344 train_time:6354682ms step_avg:598.20ms +grad accum step:2656/14336 +step:10624/57344 train_time:6356619ms step_avg:598.33ms +step:10624/57344 val_loss:6.800114 train_time:6356619ms step_avg:598.33ms +step:10625/57344 train_time:6356730ms step_avg:598.28ms +step:10626/57344 train_time:6356856ms step_avg:598.24ms +step:10627/57344 train_time:6357387ms step_avg:598.23ms +grad accum step:2657/14336 +step:10628/57344 train_time:6358659ms step_avg:598.29ms +step:10629/57344 train_time:6358676ms step_avg:598.24ms +step:10630/57344 train_time:6358920ms step_avg:598.21ms +step:10631/57344 train_time:6359460ms step_avg:598.20ms +grad accum step:2658/14336 +step:10632/57344 train_time:6360740ms step_avg:598.26ms +step:10633/57344 train_time:6360757ms step_avg:598.21ms +step:10634/57344 train_time:6361006ms step_avg:598.18ms +step:10635/57344 train_time:6361554ms step_avg:598.17ms +grad accum step:2659/14336 +step:10636/57344 train_time:6362824ms step_avg:598.23ms +step:10637/57344 train_time:6362841ms step_avg:598.18ms +step:10638/57344 train_time:6363082ms step_avg:598.15ms +step:10639/57344 train_time:6363610ms step_avg:598.14ms +grad accum step:2660/14336 +step:10640/57344 train_time:6364877ms step_avg:598.20ms +step:10641/57344 train_time:6364894ms step_avg:598.15ms +step:10642/57344 train_time:6365137ms step_avg:598.11ms +step:10643/57344 train_time:6365678ms step_avg:598.11ms +grad accum step:2661/14336 +step:10644/57344 train_time:6366954ms step_avg:598.17ms +step:10645/57344 train_time:6366971ms step_avg:598.12ms +step:10646/57344 train_time:6367215ms step_avg:598.09ms +step:10647/57344 train_time:6367746ms step_avg:598.08ms +grad accum step:2662/14336 +step:10648/57344 train_time:6369016ms step_avg:598.14ms +step:10649/57344 train_time:6369033ms step_avg:598.09ms +step:10650/57344 train_time:6369278ms step_avg:598.05ms +step:10651/57344 train_time:6369811ms step_avg:598.05ms +grad accum step:2663/14336 +step:10652/57344 train_time:6371083ms step_avg:598.11ms +step:10653/57344 train_time:6371100ms step_avg:598.06ms +step:10654/57344 train_time:6371342ms step_avg:598.02ms +step:10655/57344 train_time:6371873ms step_avg:598.02ms +grad accum step:2664/14336 +step:10656/57344 train_time:6373158ms step_avg:598.08ms +step:10657/57344 train_time:6373175ms step_avg:598.03ms +step:10658/57344 train_time:6373415ms step_avg:597.99ms +step:10659/57344 train_time:6373959ms step_avg:597.99ms +grad accum step:2665/14336 +step:10660/57344 train_time:6375222ms step_avg:598.05ms +step:10661/57344 train_time:6375239ms step_avg:598.00ms +step:10662/57344 train_time:6375481ms step_avg:597.96ms +step:10663/57344 train_time:6376016ms step_avg:597.96ms +grad accum step:2666/14336 +step:10664/57344 train_time:6377290ms step_avg:598.02ms +step:10665/57344 train_time:6377307ms step_avg:597.97ms +step:10666/57344 train_time:6377551ms step_avg:597.93ms +step:10667/57344 train_time:6378093ms step_avg:597.93ms +grad accum step:2667/14336 +step:10668/57344 train_time:6379354ms step_avg:597.99ms +step:10669/57344 train_time:6379371ms step_avg:597.94ms +step:10670/57344 train_time:6379612ms step_avg:597.90ms +step:10671/57344 train_time:6380143ms step_avg:597.90ms +grad accum step:2668/14336 +step:10672/57344 train_time:6381416ms step_avg:597.96ms +step:10673/57344 train_time:6381433ms step_avg:597.90ms +step:10674/57344 train_time:6381673ms step_avg:597.87ms +step:10675/57344 train_time:6382204ms step_avg:597.86ms +grad accum step:2669/14336 +step:10676/57344 train_time:6383483ms step_avg:597.93ms +step:10677/57344 train_time:6383500ms step_avg:597.87ms +step:10678/57344 train_time:6383741ms step_avg:597.84ms +step:10679/57344 train_time:6384275ms step_avg:597.83ms +grad accum step:2670/14336 +step:10680/57344 train_time:6385570ms step_avg:597.90ms +step:10681/57344 train_time:6385587ms step_avg:597.85ms +step:10682/57344 train_time:6385832ms step_avg:597.81ms +step:10683/57344 train_time:6386375ms step_avg:597.81ms +grad accum step:2671/14336 +step:10684/57344 train_time:6387660ms step_avg:597.87ms +step:10685/57344 train_time:6387677ms step_avg:597.82ms +step:10686/57344 train_time:6387919ms step_avg:597.78ms +step:10687/57344 train_time:6388448ms step_avg:597.78ms +grad accum step:2672/14336 +step:10688/57344 train_time:6389724ms step_avg:597.84ms +step:10688/57344 val_loss:6.797913 train_time:6389724ms step_avg:597.84ms +step:10689/57344 train_time:6389736ms step_avg:597.79ms +step:10690/57344 train_time:6389958ms step_avg:597.75ms +step:10691/57344 train_time:6390491ms step_avg:597.74ms +grad accum step:2673/14336 +step:10692/57344 train_time:6391770ms step_avg:597.81ms +step:10693/57344 train_time:6391787ms step_avg:597.75ms +step:10694/57344 train_time:6392032ms step_avg:597.72ms +step:10695/57344 train_time:6392574ms step_avg:597.72ms +grad accum step:2674/14336 +step:10696/57344 train_time:6393855ms step_avg:597.78ms +step:10697/57344 train_time:6393872ms step_avg:597.73ms +step:10698/57344 train_time:6394120ms step_avg:597.69ms +step:10699/57344 train_time:6394661ms step_avg:597.69ms +grad accum step:2675/14336 +step:10700/57344 train_time:6395942ms step_avg:597.75ms +step:10701/57344 train_time:6395959ms step_avg:597.70ms +step:10702/57344 train_time:6396204ms step_avg:597.66ms +step:10703/57344 train_time:6396745ms step_avg:597.66ms +grad accum step:2676/14336 +step:10704/57344 train_time:6398004ms step_avg:597.72ms +step:10705/57344 train_time:6398020ms step_avg:597.67ms +step:10706/57344 train_time:6398266ms step_avg:597.63ms +step:10707/57344 train_time:6398812ms step_avg:597.63ms +grad accum step:2677/14336 +step:10708/57344 train_time:6400101ms step_avg:597.69ms +step:10709/57344 train_time:6400118ms step_avg:597.64ms +step:10710/57344 train_time:6400363ms step_avg:597.61ms +step:10711/57344 train_time:6400899ms step_avg:597.60ms +grad accum step:2678/14336 +step:10712/57344 train_time:6402165ms step_avg:597.66ms +step:10713/57344 train_time:6402183ms step_avg:597.61ms +step:10714/57344 train_time:6402428ms step_avg:597.58ms +step:10715/57344 train_time:6402962ms step_avg:597.57ms +grad accum step:2679/14336 +step:10716/57344 train_time:6404233ms step_avg:597.63ms +step:10717/57344 train_time:6404250ms step_avg:597.58ms +step:10718/57344 train_time:6404494ms step_avg:597.55ms +step:10719/57344 train_time:6405026ms step_avg:597.54ms +grad accum step:2680/14336 +step:10720/57344 train_time:6406303ms step_avg:597.60ms +step:10721/57344 train_time:6406320ms step_avg:597.55ms +step:10722/57344 train_time:6406563ms step_avg:597.52ms +step:10723/57344 train_time:6407099ms step_avg:597.51ms +grad accum step:2681/14336 +step:10724/57344 train_time:6408370ms step_avg:597.57ms +step:10725/57344 train_time:6408387ms step_avg:597.52ms +step:10726/57344 train_time:6408632ms step_avg:597.49ms +step:10727/57344 train_time:6409175ms step_avg:597.48ms +grad accum step:2682/14336 +step:10728/57344 train_time:6410456ms step_avg:597.54ms +step:10729/57344 train_time:6410473ms step_avg:597.49ms +step:10730/57344 train_time:6410714ms step_avg:597.46ms +step:10731/57344 train_time:6411244ms step_avg:597.45ms +grad accum step:2683/14336 +step:10732/57344 train_time:6412520ms step_avg:597.51ms +step:10733/57344 train_time:6412537ms step_avg:597.46ms +step:10734/57344 train_time:6412785ms step_avg:597.43ms +step:10735/57344 train_time:6413327ms step_avg:597.42ms +grad accum step:2684/14336 +step:10736/57344 train_time:6414605ms step_avg:597.49ms +step:10737/57344 train_time:6414622ms step_avg:597.43ms +step:10738/57344 train_time:6414865ms step_avg:597.40ms +step:10739/57344 train_time:6415406ms step_avg:597.39ms +grad accum step:2685/14336 +step:10740/57344 train_time:6416673ms step_avg:597.46ms +step:10741/57344 train_time:6416690ms step_avg:597.40ms +step:10742/57344 train_time:6416934ms step_avg:597.37ms +step:10743/57344 train_time:6417478ms step_avg:597.36ms +grad accum step:2686/14336 +step:10744/57344 train_time:6418762ms step_avg:597.43ms +step:10745/57344 train_time:6418779ms step_avg:597.37ms +step:10746/57344 train_time:6419025ms step_avg:597.34ms +step:10747/57344 train_time:6419570ms step_avg:597.34ms +grad accum step:2687/14336 +step:10748/57344 train_time:6420837ms step_avg:597.40ms +step:10749/57344 train_time:6420854ms step_avg:597.34ms +step:10750/57344 train_time:6421097ms step_avg:597.31ms +step:10751/57344 train_time:6421639ms step_avg:597.31ms +grad accum step:2688/14336 +step:10752/57344 train_time:6422918ms step_avg:597.37ms +step:10752/57344 val_loss:6.798851 train_time:6422918ms step_avg:597.37ms +step:10753/57344 train_time:6422930ms step_avg:597.32ms +step:10754/57344 train_time:6423148ms step_avg:597.28ms +step:10755/57344 train_time:6423678ms step_avg:597.27ms +grad accum step:2689/14336 +step:10756/57344 train_time:6424941ms step_avg:597.34ms +step:10757/57344 train_time:6424959ms step_avg:597.28ms +step:10758/57344 train_time:6425203ms step_avg:597.25ms +step:10759/57344 train_time:6425733ms step_avg:597.24ms +grad accum step:2690/14336 +step:10760/57344 train_time:6427009ms step_avg:597.31ms +step:10761/57344 train_time:6427026ms step_avg:597.25ms +step:10762/57344 train_time:6427272ms step_avg:597.22ms +step:10763/57344 train_time:6427814ms step_avg:597.21ms +grad accum step:2691/14336 +step:10764/57344 train_time:6429091ms step_avg:597.28ms +step:10765/57344 train_time:6429108ms step_avg:597.22ms +step:10766/57344 train_time:6429351ms step_avg:597.19ms +step:10767/57344 train_time:6429878ms step_avg:597.18ms +grad accum step:2692/14336 +step:10768/57344 train_time:6431139ms step_avg:597.25ms +step:10769/57344 train_time:6431156ms step_avg:597.19ms +step:10770/57344 train_time:6431399ms step_avg:597.16ms +step:10771/57344 train_time:6431942ms step_avg:597.15ms +grad accum step:2693/14336 +step:10772/57344 train_time:6433223ms step_avg:597.22ms +step:10773/57344 train_time:6433240ms step_avg:597.16ms +step:10774/57344 train_time:6433483ms step_avg:597.13ms +step:10775/57344 train_time:6434012ms step_avg:597.12ms +grad accum step:2694/14336 +step:10776/57344 train_time:6435290ms step_avg:597.19ms +step:10777/57344 train_time:6435307ms step_avg:597.13ms +step:10778/57344 train_time:6435550ms step_avg:597.10ms +step:10779/57344 train_time:6436089ms step_avg:597.10ms +grad accum step:2695/14336 +step:10780/57344 train_time:6437353ms step_avg:597.16ms +step:10781/57344 train_time:6437370ms step_avg:597.10ms +step:10782/57344 train_time:6437613ms step_avg:597.07ms +step:10783/57344 train_time:6438143ms step_avg:597.06ms +grad accum step:2696/14336 +step:10784/57344 train_time:6439423ms step_avg:597.13ms +step:10785/57344 train_time:6439440ms step_avg:597.07ms +step:10786/57344 train_time:6439684ms step_avg:597.04ms +step:10787/57344 train_time:6440225ms step_avg:597.04ms +grad accum step:2697/14336 +step:10788/57344 train_time:6441505ms step_avg:597.10ms +step:10789/57344 train_time:6441522ms step_avg:597.05ms +step:10790/57344 train_time:6441765ms step_avg:597.01ms +step:10791/57344 train_time:6442295ms step_avg:597.01ms +grad accum step:2698/14336 +step:10792/57344 train_time:6443571ms step_avg:597.07ms +step:10793/57344 train_time:6443588ms step_avg:597.02ms +step:10794/57344 train_time:6443832ms step_avg:596.98ms +step:10795/57344 train_time:6444358ms step_avg:596.98ms +grad accum step:2699/14336 +step:10796/57344 train_time:6445634ms step_avg:597.04ms +step:10797/57344 train_time:6445651ms step_avg:596.99ms +step:10798/57344 train_time:6445896ms step_avg:596.95ms +step:10799/57344 train_time:6446426ms step_avg:596.95ms +grad accum step:2700/14336 +step:10800/57344 train_time:6447705ms step_avg:597.01ms +step:10801/57344 train_time:6447722ms step_avg:596.96ms +step:10802/57344 train_time:6447971ms step_avg:596.92ms +step:10803/57344 train_time:6448518ms step_avg:596.92ms +grad accum step:2701/14336 +step:10804/57344 train_time:6449789ms step_avg:596.98ms +step:10805/57344 train_time:6449807ms step_avg:596.93ms +step:10806/57344 train_time:6450049ms step_avg:596.90ms +step:10807/57344 train_time:6450583ms step_avg:596.89ms +grad accum step:2702/14336 +step:10808/57344 train_time:6451880ms step_avg:596.95ms +step:10809/57344 train_time:6451897ms step_avg:596.90ms +step:10810/57344 train_time:6452138ms step_avg:596.87ms +step:10811/57344 train_time:6452671ms step_avg:596.86ms +grad accum step:2703/14336 +step:10812/57344 train_time:6453954ms step_avg:596.93ms +step:10813/57344 train_time:6453971ms step_avg:596.87ms +step:10814/57344 train_time:6454214ms step_avg:596.84ms +step:10815/57344 train_time:6454745ms step_avg:596.83ms +grad accum step:2704/14336 +step:10816/57344 train_time:6456026ms step_avg:596.90ms +step:10816/57344 val_loss:6.835824 train_time:6456027ms step_avg:596.90ms +step:10817/57344 train_time:6456039ms step_avg:596.84ms +step:10818/57344 train_time:6456260ms step_avg:596.81ms +step:10819/57344 train_time:6456800ms step_avg:596.80ms +grad accum step:2705/14336 +step:10820/57344 train_time:6458100ms step_avg:596.87ms +step:10821/57344 train_time:6458117ms step_avg:596.81ms +step:10822/57344 train_time:6458358ms step_avg:596.78ms +step:10823/57344 train_time:6458897ms step_avg:596.78ms +grad accum step:2706/14336 +step:10824/57344 train_time:6460158ms step_avg:596.84ms +step:10825/57344 train_time:6460175ms step_avg:596.78ms +step:10826/57344 train_time:6460420ms step_avg:596.75ms +step:10827/57344 train_time:6460950ms step_avg:596.74ms +grad accum step:2707/14336 +step:10828/57344 train_time:6462224ms step_avg:596.81ms +step:10829/57344 train_time:6462241ms step_avg:596.75ms +step:10830/57344 train_time:6462483ms step_avg:596.72ms +step:10831/57344 train_time:6463009ms step_avg:596.71ms +grad accum step:2708/14336 +step:10832/57344 train_time:6464270ms step_avg:596.78ms +step:10833/57344 train_time:6464288ms step_avg:596.72ms +step:10834/57344 train_time:6464531ms step_avg:596.69ms +step:10835/57344 train_time:6465058ms step_avg:596.68ms +grad accum step:2709/14336 +step:10836/57344 train_time:6466340ms step_avg:596.75ms +step:10837/57344 train_time:6466357ms step_avg:596.69ms +step:10838/57344 train_time:6466606ms step_avg:596.66ms +step:10839/57344 train_time:6467155ms step_avg:596.66ms +grad accum step:2710/14336 +step:10840/57344 train_time:6468428ms step_avg:596.72ms +step:10841/57344 train_time:6468445ms step_avg:596.66ms +step:10842/57344 train_time:6468692ms step_avg:596.63ms +step:10843/57344 train_time:6469231ms step_avg:596.63ms +grad accum step:2711/14336 +step:10844/57344 train_time:6470491ms step_avg:596.69ms +step:10845/57344 train_time:6470508ms step_avg:596.64ms +step:10846/57344 train_time:6470752ms step_avg:596.60ms +step:10847/57344 train_time:6471285ms step_avg:596.60ms +grad accum step:2712/14336 +step:10848/57344 train_time:6472562ms step_avg:596.66ms +step:10849/57344 train_time:6472579ms step_avg:596.61ms +step:10850/57344 train_time:6472833ms step_avg:596.57ms +step:10851/57344 train_time:6473398ms step_avg:596.57ms +grad accum step:2713/14336 +step:10852/57344 train_time:6474688ms step_avg:596.64ms +step:10853/57344 train_time:6474705ms step_avg:596.58ms +step:10854/57344 train_time:6474950ms step_avg:596.55ms +step:10855/57344 train_time:6475491ms step_avg:596.54ms +grad accum step:2714/14336 +step:10856/57344 train_time:6476764ms step_avg:596.61ms +step:10857/57344 train_time:6476782ms step_avg:596.55ms +step:10858/57344 train_time:6477026ms step_avg:596.52ms +step:10859/57344 train_time:6477560ms step_avg:596.52ms +grad accum step:2715/14336 +step:10860/57344 train_time:6478971ms step_avg:596.59ms +step:10861/57344 train_time:6478983ms step_avg:596.54ms +step:10862/57344 train_time:6479191ms step_avg:596.50ms +step:10863/57344 train_time:6479725ms step_avg:596.49ms +grad accum step:2716/14336 +step:10864/57344 train_time:6480971ms step_avg:596.55ms +step:10865/57344 train_time:6480988ms step_avg:596.50ms +step:10866/57344 train_time:6481233ms step_avg:596.47ms +step:10867/57344 train_time:6481768ms step_avg:596.46ms +grad accum step:2717/14336 +step:10868/57344 train_time:6483037ms step_avg:596.53ms +step:10869/57344 train_time:6483054ms step_avg:596.47ms +step:10870/57344 train_time:6483298ms step_avg:596.44ms +step:10871/57344 train_time:6483829ms step_avg:596.43ms +grad accum step:2718/14336 +step:10872/57344 train_time:6485099ms step_avg:596.50ms +step:10873/57344 train_time:6485115ms step_avg:596.44ms +step:10874/57344 train_time:6485355ms step_avg:596.41ms +step:10875/57344 train_time:6485881ms step_avg:596.40ms +grad accum step:2719/14336 +step:10876/57344 train_time:6487169ms step_avg:596.47ms +step:10877/57344 train_time:6487186ms step_avg:596.41ms +step:10878/57344 train_time:6487429ms step_avg:596.38ms +step:10879/57344 train_time:6487964ms step_avg:596.38ms +grad accum step:2720/14336 +step:10880/57344 train_time:6489231ms step_avg:596.44ms +step:10880/57344 val_loss:6.815319 train_time:6489232ms step_avg:596.44ms +step:10881/57344 train_time:6489244ms step_avg:596.38ms +step:10882/57344 train_time:6489462ms step_avg:596.35ms +step:10883/57344 train_time:6489991ms step_avg:596.34ms +grad accum step:2721/14336 +step:10884/57344 train_time:6491250ms step_avg:596.40ms +step:10885/57344 train_time:6491267ms step_avg:596.35ms +step:10886/57344 train_time:6491511ms step_avg:596.32ms +step:10887/57344 train_time:6492042ms step_avg:596.31ms +grad accum step:2722/14336 +step:10888/57344 train_time:6493323ms step_avg:596.37ms +step:10889/57344 train_time:6493339ms step_avg:596.32ms +step:10890/57344 train_time:6493584ms step_avg:596.29ms +step:10891/57344 train_time:6494128ms step_avg:596.28ms +grad accum step:2723/14336 +step:10892/57344 train_time:6495408ms step_avg:596.35ms +step:10893/57344 train_time:6495425ms step_avg:596.29ms +step:10894/57344 train_time:6495667ms step_avg:596.26ms +step:10895/57344 train_time:6496201ms step_avg:596.26ms +grad accum step:2724/14336 +step:10896/57344 train_time:6497466ms step_avg:596.32ms +step:10897/57344 train_time:6497484ms step_avg:596.26ms +step:10898/57344 train_time:6497724ms step_avg:596.23ms +step:10899/57344 train_time:6498248ms step_avg:596.22ms +grad accum step:2725/14336 +step:10900/57344 train_time:6499514ms step_avg:596.29ms +step:10901/57344 train_time:6499531ms step_avg:596.23ms +step:10902/57344 train_time:6499775ms step_avg:596.20ms +step:10903/57344 train_time:6500317ms step_avg:596.20ms +grad accum step:2726/14336 +step:10904/57344 train_time:6501587ms step_avg:596.26ms +step:10905/57344 train_time:6501604ms step_avg:596.20ms +step:10906/57344 train_time:6501845ms step_avg:596.17ms +step:10907/57344 train_time:6502378ms step_avg:596.17ms +grad accum step:2727/14336 +step:10908/57344 train_time:6503648ms step_avg:596.23ms +step:10909/57344 train_time:6503665ms step_avg:596.17ms +step:10910/57344 train_time:6503910ms step_avg:596.14ms +step:10911/57344 train_time:6504452ms step_avg:596.14ms +grad accum step:2728/14336 +step:10912/57344 train_time:6505715ms step_avg:596.20ms +step:10913/57344 train_time:6505732ms step_avg:596.15ms +step:10914/57344 train_time:6505975ms step_avg:596.11ms +step:10915/57344 train_time:6506513ms step_avg:596.11ms +grad accum step:2729/14336 +step:10916/57344 train_time:6507779ms step_avg:596.17ms +step:10917/57344 train_time:6507796ms step_avg:596.12ms +step:10918/57344 train_time:6508044ms step_avg:596.08ms +step:10919/57344 train_time:6508586ms step_avg:596.08ms +grad accum step:2730/14336 +step:10920/57344 train_time:6509841ms step_avg:596.14ms +step:10921/57344 train_time:6509858ms step_avg:596.09ms +step:10922/57344 train_time:6510102ms step_avg:596.05ms +step:10923/57344 train_time:6510635ms step_avg:596.05ms +grad accum step:2731/14336 +step:10924/57344 train_time:6511920ms step_avg:596.11ms +step:10925/57344 train_time:6511937ms step_avg:596.06ms +step:10926/57344 train_time:6512179ms step_avg:596.03ms +step:10927/57344 train_time:6512709ms step_avg:596.02ms +grad accum step:2732/14336 +step:10928/57344 train_time:6514002ms step_avg:596.08ms +step:10929/57344 train_time:6514019ms step_avg:596.03ms +step:10930/57344 train_time:6514264ms step_avg:596.00ms +step:10931/57344 train_time:6514801ms step_avg:595.99ms +grad accum step:2733/14336 +step:10932/57344 train_time:6516079ms step_avg:596.06ms +step:10933/57344 train_time:6516096ms step_avg:596.00ms +step:10934/57344 train_time:6516337ms step_avg:595.97ms +step:10935/57344 train_time:6516863ms step_avg:595.96ms +grad accum step:2734/14336 +step:10936/57344 train_time:6518125ms step_avg:596.02ms +step:10937/57344 train_time:6518142ms step_avg:595.97ms +step:10938/57344 train_time:6518390ms step_avg:595.94ms +step:10939/57344 train_time:6518935ms step_avg:595.94ms +grad accum step:2735/14336 +step:10940/57344 train_time:6520201ms step_avg:596.00ms +step:10941/57344 train_time:6520218ms step_avg:595.94ms +step:10942/57344 train_time:6520459ms step_avg:595.91ms +step:10943/57344 train_time:6520987ms step_avg:595.90ms +grad accum step:2736/14336 +step:10944/57344 train_time:6522261ms step_avg:595.97ms +step:10944/57344 val_loss:6.821844 train_time:6522262ms step_avg:595.97ms +step:10945/57344 train_time:6522274ms step_avg:595.91ms +step:10946/57344 train_time:6522494ms step_avg:595.88ms +step:10947/57344 train_time:6523035ms step_avg:595.87ms +grad accum step:2737/14336 +step:10948/57344 train_time:6524316ms step_avg:595.94ms +step:10949/57344 train_time:6524333ms step_avg:595.88ms +step:10950/57344 train_time:6524575ms step_avg:595.85ms +step:10951/57344 train_time:6525105ms step_avg:595.85ms +grad accum step:2738/14336 +step:10952/57344 train_time:6526387ms step_avg:595.91ms +step:10953/57344 train_time:6526404ms step_avg:595.86ms +step:10954/57344 train_time:6526647ms step_avg:595.82ms +step:10955/57344 train_time:6527185ms step_avg:595.82ms +grad accum step:2739/14336 +step:10956/57344 train_time:6528454ms step_avg:595.88ms +step:10957/57344 train_time:6528472ms step_avg:595.83ms +step:10958/57344 train_time:6528714ms step_avg:595.79ms +step:10959/57344 train_time:6529246ms step_avg:595.79ms +grad accum step:2740/14336 +step:10960/57344 train_time:6530519ms step_avg:595.85ms +step:10961/57344 train_time:6530536ms step_avg:595.80ms +step:10962/57344 train_time:6530781ms step_avg:595.77ms +step:10963/57344 train_time:6531330ms step_avg:595.76ms +grad accum step:2741/14336 +step:10964/57344 train_time:6532600ms step_avg:595.82ms +step:10965/57344 train_time:6532617ms step_avg:595.77ms +step:10966/57344 train_time:6532861ms step_avg:595.74ms +step:10967/57344 train_time:6533394ms step_avg:595.73ms +grad accum step:2742/14336 +step:10968/57344 train_time:6534681ms step_avg:595.80ms +step:10969/57344 train_time:6534698ms step_avg:595.74ms +step:10970/57344 train_time:6534937ms step_avg:595.71ms +step:10971/57344 train_time:6535470ms step_avg:595.70ms +grad accum step:2743/14336 +step:10972/57344 train_time:6536741ms step_avg:595.77ms +step:10973/57344 train_time:6536759ms step_avg:595.71ms +step:10974/57344 train_time:6537001ms step_avg:595.68ms +step:10975/57344 train_time:6537531ms step_avg:595.67ms +grad accum step:2744/14336 +step:10976/57344 train_time:6538813ms step_avg:595.74ms +step:10977/57344 train_time:6538829ms step_avg:595.68ms +step:10978/57344 train_time:6539071ms step_avg:595.65ms +step:10979/57344 train_time:6539610ms step_avg:595.65ms +grad accum step:2745/14336 +step:10980/57344 train_time:6540896ms step_avg:595.71ms +step:10981/57344 train_time:6540913ms step_avg:595.66ms +step:10982/57344 train_time:6541163ms step_avg:595.63ms +step:10983/57344 train_time:6541707ms step_avg:595.62ms +grad accum step:2746/14336 +step:10984/57344 train_time:6542975ms step_avg:595.68ms +step:10985/57344 train_time:6542993ms step_avg:595.63ms +step:10986/57344 train_time:6543236ms step_avg:595.60ms +step:10987/57344 train_time:6543767ms step_avg:595.59ms +grad accum step:2747/14336 +step:10988/57344 train_time:6545025ms step_avg:595.65ms +step:10989/57344 train_time:6545042ms step_avg:595.60ms +step:10990/57344 train_time:6545285ms step_avg:595.57ms +step:10991/57344 train_time:6545813ms step_avg:595.56ms +grad accum step:2748/14336 +step:10992/57344 train_time:6547083ms step_avg:595.62ms +step:10993/57344 train_time:6547100ms step_avg:595.57ms +step:10994/57344 train_time:6547342ms step_avg:595.54ms +step:10995/57344 train_time:6547873ms step_avg:595.53ms +grad accum step:2749/14336 +step:10996/57344 train_time:6549144ms step_avg:595.59ms +step:10997/57344 train_time:6549161ms step_avg:595.54ms +step:10998/57344 train_time:6549404ms step_avg:595.51ms +step:10999/57344 train_time:6549935ms step_avg:595.50ms +grad accum step:2750/14336 +step:11000/57344 train_time:6551211ms step_avg:595.56ms +step:11001/57344 train_time:6551228ms step_avg:595.51ms +step:11002/57344 train_time:6551471ms step_avg:595.48ms +step:11003/57344 train_time:6552007ms step_avg:595.47ms +grad accum step:2751/14336 +step:11004/57344 train_time:6553278ms step_avg:595.54ms +step:11005/57344 train_time:6553295ms step_avg:595.48ms +step:11006/57344 train_time:6553536ms step_avg:595.45ms +step:11007/57344 train_time:6554070ms step_avg:595.45ms +grad accum step:2752/14336 +step:11008/57344 train_time:6555342ms step_avg:595.51ms +step:11008/57344 val_loss:6.818879 train_time:6555343ms step_avg:595.51ms +step:11009/57344 train_time:6555355ms step_avg:595.45ms +step:11010/57344 train_time:6555573ms step_avg:595.42ms +step:11011/57344 train_time:6556104ms step_avg:595.41ms +grad accum step:2753/14336 +step:11012/57344 train_time:6557376ms step_avg:595.48ms +step:11013/57344 train_time:6557394ms step_avg:595.42ms +step:11014/57344 train_time:6557636ms step_avg:595.39ms +step:11015/57344 train_time:6558163ms step_avg:595.38ms +grad accum step:2754/14336 +step:11016/57344 train_time:6559425ms step_avg:595.45ms +step:11017/57344 train_time:6559442ms step_avg:595.39ms +step:11018/57344 train_time:6559687ms step_avg:595.36ms +step:11019/57344 train_time:6560225ms step_avg:595.36ms +grad accum step:2755/14336 +step:11020/57344 train_time:6561472ms step_avg:595.41ms +step:11021/57344 train_time:6561489ms step_avg:595.36ms +step:11022/57344 train_time:6561734ms step_avg:595.33ms +step:11023/57344 train_time:6562285ms step_avg:595.33ms +grad accum step:2756/14336 +step:11024/57344 train_time:6563573ms step_avg:595.39ms +step:11025/57344 train_time:6563591ms step_avg:595.34ms +step:11026/57344 train_time:6563832ms step_avg:595.30ms +step:11027/57344 train_time:6564361ms step_avg:595.30ms +grad accum step:2757/14336 +step:11028/57344 train_time:6565647ms step_avg:595.36ms +step:11029/57344 train_time:6565664ms step_avg:595.31ms +step:11030/57344 train_time:6565906ms step_avg:595.28ms +step:11031/57344 train_time:6566444ms step_avg:595.27ms +grad accum step:2758/14336 +step:11032/57344 train_time:6567711ms step_avg:595.33ms +step:11033/57344 train_time:6567728ms step_avg:595.28ms +step:11034/57344 train_time:6567972ms step_avg:595.25ms +step:11035/57344 train_time:6568504ms step_avg:595.24ms +grad accum step:2759/14336 +step:11036/57344 train_time:6569785ms step_avg:595.30ms +step:11037/57344 train_time:6569802ms step_avg:595.25ms +step:11038/57344 train_time:6570049ms step_avg:595.22ms +step:11039/57344 train_time:6570590ms step_avg:595.22ms +grad accum step:2760/14336 +step:11040/57344 train_time:6571862ms step_avg:595.28ms +step:11041/57344 train_time:6571878ms step_avg:595.22ms +step:11042/57344 train_time:6572124ms step_avg:595.19ms +step:11043/57344 train_time:6572654ms step_avg:595.19ms +grad accum step:2761/14336 +step:11044/57344 train_time:6573922ms step_avg:595.25ms +step:11045/57344 train_time:6573939ms step_avg:595.20ms +step:11046/57344 train_time:6574184ms step_avg:595.16ms +step:11047/57344 train_time:6574727ms step_avg:595.16ms +grad accum step:2762/14336 +step:11048/57344 train_time:6575991ms step_avg:595.22ms +step:11049/57344 train_time:6576008ms step_avg:595.17ms +step:11050/57344 train_time:6576252ms step_avg:595.14ms +step:11051/57344 train_time:6576798ms step_avg:595.13ms +grad accum step:2763/14336 +step:11052/57344 train_time:6578073ms step_avg:595.19ms +step:11053/57344 train_time:6578090ms step_avg:595.14ms +step:11054/57344 train_time:6578330ms step_avg:595.11ms +step:11055/57344 train_time:6578862ms step_avg:595.10ms +grad accum step:2764/14336 +step:11056/57344 train_time:6580123ms step_avg:595.16ms +step:11057/57344 train_time:6580140ms step_avg:595.11ms +step:11058/57344 train_time:6580379ms step_avg:595.08ms +step:11059/57344 train_time:6580902ms step_avg:595.07ms +grad accum step:2765/14336 +step:11060/57344 train_time:6582168ms step_avg:595.13ms +step:11061/57344 train_time:6582186ms step_avg:595.08ms +step:11062/57344 train_time:6582426ms step_avg:595.05ms +step:11063/57344 train_time:6582955ms step_avg:595.04ms +grad accum step:2766/14336 +step:11064/57344 train_time:6584237ms step_avg:595.10ms +step:11065/57344 train_time:6584255ms step_avg:595.05ms +step:11066/57344 train_time:6584496ms step_avg:595.02ms +step:11067/57344 train_time:6585028ms step_avg:595.01ms +grad accum step:2767/14336 +step:11068/57344 train_time:6592593ms step_avg:595.64ms +step:11069/57344 train_time:6592605ms step_avg:595.59ms +step:11070/57344 train_time:6592861ms step_avg:595.56ms +step:11071/57344 train_time:6593397ms step_avg:595.56ms +grad accum step:2768/14336 +step:11072/57344 train_time:6594659ms step_avg:595.62ms +step:11072/57344 val_loss:6.813581 train_time:6594659ms step_avg:595.62ms +step:11073/57344 train_time:6594671ms step_avg:595.56ms +step:11074/57344 train_time:6594891ms step_avg:595.53ms +step:11075/57344 train_time:6595429ms step_avg:595.52ms +grad accum step:2769/14336 +step:11076/57344 train_time:6596759ms step_avg:595.59ms +step:11077/57344 train_time:6596775ms step_avg:595.54ms +step:11078/57344 train_time:6597022ms step_avg:595.51ms +step:11079/57344 train_time:6597560ms step_avg:595.50ms +grad accum step:2770/14336 +step:11080/57344 train_time:6598820ms step_avg:595.56ms +step:11081/57344 train_time:6598838ms step_avg:595.51ms +step:11082/57344 train_time:6599080ms step_avg:595.48ms +step:11083/57344 train_time:6599608ms step_avg:595.47ms +grad accum step:2771/14336 +step:11084/57344 train_time:6600893ms step_avg:595.53ms +step:11085/57344 train_time:6600910ms step_avg:595.48ms +step:11086/57344 train_time:6601154ms step_avg:595.45ms +step:11087/57344 train_time:6601691ms step_avg:595.44ms +grad accum step:2772/14336 +step:11088/57344 train_time:6602962ms step_avg:595.51ms +step:11089/57344 train_time:6602979ms step_avg:595.45ms +step:11090/57344 train_time:6603219ms step_avg:595.42ms +step:11091/57344 train_time:6603744ms step_avg:595.41ms +grad accum step:2773/14336 +step:11092/57344 train_time:6605018ms step_avg:595.48ms +step:11093/57344 train_time:6605035ms step_avg:595.42ms +step:11094/57344 train_time:6605277ms step_avg:595.39ms +step:11095/57344 train_time:6605808ms step_avg:595.39ms +grad accum step:2774/14336 +step:11096/57344 train_time:6607080ms step_avg:595.45ms +step:11097/57344 train_time:6607097ms step_avg:595.39ms +step:11098/57344 train_time:6607342ms step_avg:595.36ms +step:11099/57344 train_time:6607874ms step_avg:595.36ms +grad accum step:2775/14336 +step:11100/57344 train_time:6609149ms step_avg:595.42ms +step:11101/57344 train_time:6609166ms step_avg:595.37ms +step:11102/57344 train_time:6609410ms step_avg:595.34ms +step:11103/57344 train_time:6609945ms step_avg:595.33ms +grad accum step:2776/14336 +step:11104/57344 train_time:6611201ms step_avg:595.39ms +step:11105/57344 train_time:6611217ms step_avg:595.34ms +step:11106/57344 train_time:6611462ms step_avg:595.31ms +step:11107/57344 train_time:6612002ms step_avg:595.30ms +grad accum step:2777/14336 +step:11108/57344 train_time:6613284ms step_avg:595.36ms +step:11109/57344 train_time:6613301ms step_avg:595.31ms +step:11110/57344 train_time:6613542ms step_avg:595.28ms +step:11111/57344 train_time:6614080ms step_avg:595.27ms +grad accum step:2778/14336 +step:11112/57344 train_time:6615410ms step_avg:595.34ms +step:11113/57344 train_time:6615427ms step_avg:595.29ms +step:11114/57344 train_time:6615670ms step_avg:595.26ms +step:11115/57344 train_time:6616211ms step_avg:595.25ms +grad accum step:2779/14336 +step:11116/57344 train_time:6617486ms step_avg:595.31ms +step:11117/57344 train_time:6617503ms step_avg:595.26ms +step:11118/57344 train_time:6617746ms step_avg:595.23ms +step:11119/57344 train_time:6618276ms step_avg:595.22ms +grad accum step:2780/14336 +step:11120/57344 train_time:6619555ms step_avg:595.28ms +step:11121/57344 train_time:6619572ms step_avg:595.23ms +step:11122/57344 train_time:6619815ms step_avg:595.20ms +step:11123/57344 train_time:6620354ms step_avg:595.20ms +grad accum step:2781/14336 +step:11124/57344 train_time:6621615ms step_avg:595.25ms +step:11125/57344 train_time:6621632ms step_avg:595.20ms +step:11126/57344 train_time:6621874ms step_avg:595.17ms +step:11127/57344 train_time:6622404ms step_avg:595.17ms +grad accum step:2782/14336 +step:11128/57344 train_time:6623681ms step_avg:595.23ms +step:11129/57344 train_time:6623698ms step_avg:595.17ms +step:11130/57344 train_time:6623939ms step_avg:595.14ms +step:11131/57344 train_time:6624476ms step_avg:595.14ms +grad accum step:2783/14336 +step:11132/57344 train_time:6630454ms step_avg:595.62ms +step:11133/57344 train_time:6630466ms step_avg:595.57ms +step:11134/57344 train_time:6630745ms step_avg:595.54ms +step:11135/57344 train_time:6631278ms step_avg:595.53ms +grad accum step:2784/14336 +step:11136/57344 train_time:6632542ms step_avg:595.59ms +step:11136/57344 val_loss:6.821119 train_time:6632542ms step_avg:595.59ms +step:11137/57344 train_time:6632554ms step_avg:595.54ms +step:11138/57344 train_time:6632774ms step_avg:595.51ms +step:11139/57344 train_time:6633311ms step_avg:595.50ms +grad accum step:2785/14336 +step:11140/57344 train_time:6634584ms step_avg:595.56ms +step:11141/57344 train_time:6634601ms step_avg:595.51ms +step:11142/57344 train_time:6634844ms step_avg:595.48ms +step:11143/57344 train_time:6635384ms step_avg:595.48ms +grad accum step:2786/14336 +step:11144/57344 train_time:6636674ms step_avg:595.54ms +step:11145/57344 train_time:6636691ms step_avg:595.49ms +step:11146/57344 train_time:6636933ms step_avg:595.45ms +step:11147/57344 train_time:6637465ms step_avg:595.45ms +grad accum step:2787/14336 +step:11148/57344 train_time:6638745ms step_avg:595.51ms +step:11149/57344 train_time:6638762ms step_avg:595.46ms +step:11150/57344 train_time:6639007ms step_avg:595.43ms +step:11151/57344 train_time:6639547ms step_avg:595.42ms +grad accum step:2788/14336 +step:11152/57344 train_time:6640806ms step_avg:595.48ms +step:11153/57344 train_time:6640823ms step_avg:595.43ms +step:11154/57344 train_time:6641064ms step_avg:595.40ms +step:11155/57344 train_time:6641592ms step_avg:595.39ms +grad accum step:2789/14336 +step:11156/57344 train_time:6642868ms step_avg:595.45ms +step:11157/57344 train_time:6642885ms step_avg:595.40ms +step:11158/57344 train_time:6643127ms step_avg:595.37ms +step:11159/57344 train_time:6643656ms step_avg:595.36ms +grad accum step:2790/14336 +step:11160/57344 train_time:6644916ms step_avg:595.42ms +step:11161/57344 train_time:6644934ms step_avg:595.37ms +step:11162/57344 train_time:6645174ms step_avg:595.34ms +step:11163/57344 train_time:6645700ms step_avg:595.33ms +grad accum step:2791/14336 +step:11164/57344 train_time:6646979ms step_avg:595.39ms +step:11165/57344 train_time:6646996ms step_avg:595.34ms +step:11166/57344 train_time:6647240ms step_avg:595.31ms +step:11167/57344 train_time:6647780ms step_avg:595.31ms +grad accum step:2792/14336 +step:11168/57344 train_time:6649061ms step_avg:595.37ms +step:11169/57344 train_time:6649078ms step_avg:595.32ms +step:11170/57344 train_time:6649324ms step_avg:595.28ms +step:11171/57344 train_time:6649870ms step_avg:595.28ms +grad accum step:2793/14336 +step:11172/57344 train_time:6651130ms step_avg:595.34ms +step:11173/57344 train_time:6651146ms step_avg:595.29ms +step:11174/57344 train_time:6651398ms step_avg:595.26ms +step:11175/57344 train_time:6651955ms step_avg:595.25ms +grad accum step:2794/14336 +step:11176/57344 train_time:6653232ms step_avg:595.31ms +step:11177/57344 train_time:6653249ms step_avg:595.26ms +step:11178/57344 train_time:6653490ms step_avg:595.23ms +step:11179/57344 train_time:6654023ms step_avg:595.23ms +grad accum step:2795/14336 +step:11180/57344 train_time:6655292ms step_avg:595.29ms +step:11181/57344 train_time:6655310ms step_avg:595.23ms +step:11182/57344 train_time:6655553ms step_avg:595.20ms +step:11183/57344 train_time:6656092ms step_avg:595.20ms +grad accum step:2796/14336 +step:11184/57344 train_time:6657357ms step_avg:595.26ms +step:11185/57344 train_time:6657374ms step_avg:595.21ms +step:11186/57344 train_time:6657618ms step_avg:595.17ms +step:11187/57344 train_time:6658161ms step_avg:595.17ms +grad accum step:2797/14336 +step:11188/57344 train_time:6659450ms step_avg:595.23ms +step:11189/57344 train_time:6659466ms step_avg:595.18ms +step:11190/57344 train_time:6659709ms step_avg:595.15ms +step:11191/57344 train_time:6660246ms step_avg:595.14ms +grad accum step:2798/14336 +step:11192/57344 train_time:6661505ms step_avg:595.20ms +step:11193/57344 train_time:6661522ms step_avg:595.15ms +step:11194/57344 train_time:6661763ms step_avg:595.12ms +step:11195/57344 train_time:6662292ms step_avg:595.11ms +grad accum step:2799/14336 +step:11196/57344 train_time:6663569ms step_avg:595.17ms +step:11197/57344 train_time:6663586ms step_avg:595.12ms +step:11198/57344 train_time:6663828ms step_avg:595.09ms +step:11199/57344 train_time:6664363ms step_avg:595.09ms +grad accum step:2800/14336 +step:11200/57344 train_time:6665654ms step_avg:595.15ms +step:11200/57344 val_loss:6.829227 train_time:6665655ms step_avg:595.15ms +step:11201/57344 train_time:6665667ms step_avg:595.10ms +step:11202/57344 train_time:6665888ms step_avg:595.06ms +step:11203/57344 train_time:6666420ms step_avg:595.06ms +grad accum step:2801/14336 +step:11204/57344 train_time:6667682ms step_avg:595.12ms +step:11205/57344 train_time:6667699ms step_avg:595.06ms +step:11206/57344 train_time:6667941ms step_avg:595.03ms +step:11207/57344 train_time:6668471ms step_avg:595.03ms +grad accum step:2802/14336 +step:11208/57344 train_time:6669749ms step_avg:595.09ms +step:11209/57344 train_time:6669766ms step_avg:595.04ms +step:11210/57344 train_time:6670011ms step_avg:595.01ms +step:11211/57344 train_time:6670547ms step_avg:595.00ms +grad accum step:2803/14336 +step:11212/57344 train_time:6671811ms step_avg:595.06ms +step:11213/57344 train_time:6671828ms step_avg:595.01ms +step:11214/57344 train_time:6672068ms step_avg:594.98ms +step:11215/57344 train_time:6672598ms step_avg:594.97ms +grad accum step:2804/14336 +step:11216/57344 train_time:6673891ms step_avg:595.03ms +step:11217/57344 train_time:6673908ms step_avg:594.98ms +step:11218/57344 train_time:6674153ms step_avg:594.95ms +step:11219/57344 train_time:6674692ms step_avg:594.95ms +grad accum step:2805/14336 +step:11220/57344 train_time:6675964ms step_avg:595.01ms +step:11221/57344 train_time:6675982ms step_avg:594.95ms +step:11222/57344 train_time:6676222ms step_avg:594.92ms +step:11223/57344 train_time:6676751ms step_avg:594.92ms +grad accum step:2806/14336 +step:11224/57344 train_time:6678058ms step_avg:594.98ms +step:11225/57344 train_time:6678075ms step_avg:594.93ms +step:11226/57344 train_time:6678323ms step_avg:594.90ms +step:11227/57344 train_time:6678863ms step_avg:594.89ms +grad accum step:2807/14336 +step:11228/57344 train_time:6680139ms step_avg:594.95ms +step:11229/57344 train_time:6680156ms step_avg:594.90ms +step:11230/57344 train_time:6680397ms step_avg:594.87ms +step:11231/57344 train_time:6680923ms step_avg:594.86ms +grad accum step:2808/14336 +step:11232/57344 train_time:6682200ms step_avg:594.93ms +step:11233/57344 train_time:6682217ms step_avg:594.87ms +step:11234/57344 train_time:6682458ms step_avg:594.84ms +step:11235/57344 train_time:6682982ms step_avg:594.84ms +grad accum step:2809/14336 +step:11236/57344 train_time:6684269ms step_avg:594.90ms +step:11237/57344 train_time:6684286ms step_avg:594.85ms +step:11238/57344 train_time:6684530ms step_avg:594.81ms +step:11239/57344 train_time:6685071ms step_avg:594.81ms +grad accum step:2810/14336 +step:11240/57344 train_time:6686342ms step_avg:594.87ms +step:11241/57344 train_time:6686359ms step_avg:594.82ms +step:11242/57344 train_time:6686603ms step_avg:594.79ms +step:11243/57344 train_time:6687133ms step_avg:594.78ms +grad accum step:2811/14336 +step:11244/57344 train_time:6688409ms step_avg:594.84ms +step:11245/57344 train_time:6688426ms step_avg:594.79ms +step:11246/57344 train_time:6688670ms step_avg:594.76ms +step:11247/57344 train_time:6689214ms step_avg:594.76ms +grad accum step:2812/14336 +step:11248/57344 train_time:6690488ms step_avg:594.82ms +step:11249/57344 train_time:6690505ms step_avg:594.76ms +step:11250/57344 train_time:6690744ms step_avg:594.73ms +step:11251/57344 train_time:6691271ms step_avg:594.73ms +grad accum step:2813/14336 +step:11252/57344 train_time:6692544ms step_avg:594.79ms +step:11253/57344 train_time:6692562ms step_avg:594.74ms +step:11254/57344 train_time:6692803ms step_avg:594.70ms +step:11255/57344 train_time:6693330ms step_avg:594.70ms +grad accum step:2814/14336 +step:11256/57344 train_time:6694619ms step_avg:594.76ms +step:11257/57344 train_time:6694635ms step_avg:594.71ms +step:11258/57344 train_time:6694883ms step_avg:594.68ms +step:11259/57344 train_time:6695424ms step_avg:594.67ms +grad accum step:2815/14336 +step:11260/57344 train_time:6696697ms step_avg:594.73ms +step:11261/57344 train_time:6696714ms step_avg:594.68ms +step:11262/57344 train_time:6696954ms step_avg:594.65ms +step:11263/57344 train_time:6697486ms step_avg:594.64ms +grad accum step:2816/14336 +step:11264/57344 train_time:6698758ms step_avg:594.71ms +step:11264/57344 val_loss:6.825552 train_time:6698758ms step_avg:594.71ms +step:11265/57344 train_time:6698770ms step_avg:594.65ms +step:11266/57344 train_time:6698988ms step_avg:594.62ms +step:11267/57344 train_time:6699514ms step_avg:594.61ms +grad accum step:2817/14336 +step:11268/57344 train_time:6700778ms step_avg:594.67ms +step:11269/57344 train_time:6700796ms step_avg:594.62ms +step:11270/57344 train_time:6701039ms step_avg:594.59ms +step:11271/57344 train_time:6701567ms step_avg:594.58ms +grad accum step:2818/14336 +step:11272/57344 train_time:6702840ms step_avg:594.65ms +step:11273/57344 train_time:6702857ms step_avg:594.59ms +step:11274/57344 train_time:6703099ms step_avg:594.56ms +step:11275/57344 train_time:6703627ms step_avg:594.56ms +grad accum step:2819/14336 +step:11276/57344 train_time:6704880ms step_avg:594.62ms +step:11277/57344 train_time:6704897ms step_avg:594.56ms +step:11278/57344 train_time:6705143ms step_avg:594.53ms +step:11279/57344 train_time:6705673ms step_avg:594.53ms +grad accum step:2820/14336 +step:11280/57344 train_time:6706949ms step_avg:594.59ms +step:11281/57344 train_time:6706966ms step_avg:594.54ms +step:11282/57344 train_time:6707213ms step_avg:594.51ms +step:11283/57344 train_time:6707760ms step_avg:594.50ms +grad accum step:2821/14336 +step:11284/57344 train_time:6709034ms step_avg:594.56ms +step:11285/57344 train_time:6709051ms step_avg:594.51ms +step:11286/57344 train_time:6709292ms step_avg:594.48ms +step:11287/57344 train_time:6709816ms step_avg:594.47ms +grad accum step:2822/14336 +step:11288/57344 train_time:6711099ms step_avg:594.53ms +step:11289/57344 train_time:6711116ms step_avg:594.48ms +step:11290/57344 train_time:6711361ms step_avg:594.45ms +step:11291/57344 train_time:6711900ms step_avg:594.45ms +grad accum step:2823/14336 +step:11292/57344 train_time:6713165ms step_avg:594.51ms +step:11293/57344 train_time:6713182ms step_avg:594.46ms +step:11294/57344 train_time:6713424ms step_avg:594.42ms +step:11295/57344 train_time:6713956ms step_avg:594.42ms +grad accum step:2824/14336 +step:11296/57344 train_time:6715249ms step_avg:594.48ms +step:11297/57344 train_time:6715266ms step_avg:594.43ms +step:11298/57344 train_time:6715511ms step_avg:594.40ms +step:11299/57344 train_time:6716050ms step_avg:594.39ms +grad accum step:2825/14336 +step:11300/57344 train_time:6717327ms step_avg:594.45ms +step:11301/57344 train_time:6717344ms step_avg:594.40ms +step:11302/57344 train_time:6717585ms step_avg:594.37ms +step:11303/57344 train_time:6718113ms step_avg:594.37ms +grad accum step:2826/14336 +step:11304/57344 train_time:6719389ms step_avg:594.43ms +step:11305/57344 train_time:6719406ms step_avg:594.37ms +step:11306/57344 train_time:6719648ms step_avg:594.34ms +step:11307/57344 train_time:6720177ms step_avg:594.34ms +grad accum step:2827/14336 +step:11308/57344 train_time:6721453ms step_avg:594.40ms +step:11309/57344 train_time:6721470ms step_avg:594.35ms +step:11310/57344 train_time:6721715ms step_avg:594.32ms +step:11311/57344 train_time:6722258ms step_avg:594.31ms +grad accum step:2828/14336 +step:11312/57344 train_time:6723538ms step_avg:594.37ms +step:11313/57344 train_time:6723555ms step_avg:594.32ms +step:11314/57344 train_time:6723798ms step_avg:594.29ms +step:11315/57344 train_time:6724332ms step_avg:594.28ms +grad accum step:2829/14336 +step:11316/57344 train_time:6725607ms step_avg:594.34ms +step:11317/57344 train_time:6725624ms step_avg:594.29ms +step:11318/57344 train_time:6725868ms step_avg:594.26ms +step:11319/57344 train_time:6726404ms step_avg:594.26ms +grad accum step:2830/14336 +step:11320/57344 train_time:6727668ms step_avg:594.32ms +step:11321/57344 train_time:6727685ms step_avg:594.27ms +step:11322/57344 train_time:6727934ms step_avg:594.24ms +step:11323/57344 train_time:6728475ms step_avg:594.23ms +grad accum step:2831/14336 +step:11324/57344 train_time:6729739ms step_avg:594.29ms +step:11325/57344 train_time:6729756ms step_avg:594.24ms +step:11326/57344 train_time:6729999ms step_avg:594.21ms +step:11327/57344 train_time:6730533ms step_avg:594.20ms +grad accum step:2832/14336 +step:11328/57344 train_time:6731798ms step_avg:594.26ms +step:11328/57344 val_loss:6.823633 train_time:6731799ms step_avg:594.26ms +step:11329/57344 train_time:6731811ms step_avg:594.21ms +step:11330/57344 train_time:6732034ms step_avg:594.18ms +step:11331/57344 train_time:6732569ms step_avg:594.17ms +grad accum step:2833/14336 +step:11332/57344 train_time:6733850ms step_avg:594.23ms +step:11333/57344 train_time:6733867ms step_avg:594.18ms +step:11334/57344 train_time:6734108ms step_avg:594.15ms +step:11335/57344 train_time:6734641ms step_avg:594.15ms +grad accum step:2834/14336 +step:11336/57344 train_time:6735910ms step_avg:594.21ms +step:11337/57344 train_time:6735927ms step_avg:594.15ms +step:11338/57344 train_time:6736171ms step_avg:594.12ms +step:11339/57344 train_time:6736715ms step_avg:594.12ms +grad accum step:2835/14336 +step:11340/57344 train_time:6737979ms step_avg:594.18ms +step:11341/57344 train_time:6737996ms step_avg:594.13ms +step:11342/57344 train_time:6738243ms step_avg:594.10ms +step:11343/57344 train_time:6738785ms step_avg:594.09ms +grad accum step:2836/14336 +step:11344/57344 train_time:6740062ms step_avg:594.15ms +step:11345/57344 train_time:6740078ms step_avg:594.10ms +step:11346/57344 train_time:6740324ms step_avg:594.07ms +step:11347/57344 train_time:6740864ms step_avg:594.07ms +grad accum step:2837/14336 +step:11348/57344 train_time:6742138ms step_avg:594.13ms +step:11349/57344 train_time:6742155ms step_avg:594.07ms +step:11350/57344 train_time:6742397ms step_avg:594.04ms +step:11351/57344 train_time:6742926ms step_avg:594.04ms +grad accum step:2838/14336 +step:11352/57344 train_time:6744205ms step_avg:594.10ms +step:11353/57344 train_time:6744223ms step_avg:594.05ms +step:11354/57344 train_time:6744468ms step_avg:594.02ms +step:11355/57344 train_time:6745011ms step_avg:594.01ms +grad accum step:2839/14336 +step:11356/57344 train_time:6746290ms step_avg:594.07ms +step:11357/57344 train_time:6746307ms step_avg:594.02ms +step:11358/57344 train_time:6746549ms step_avg:593.99ms +step:11359/57344 train_time:6747084ms step_avg:593.99ms +grad accum step:2840/14336 +step:11360/57344 train_time:6748347ms step_avg:594.04ms +step:11361/57344 train_time:6748363ms step_avg:593.99ms +step:11362/57344 train_time:6748607ms step_avg:593.96ms +step:11363/57344 train_time:6749138ms step_avg:593.96ms +grad accum step:2841/14336 +step:11364/57344 train_time:6750417ms step_avg:594.02ms +step:11365/57344 train_time:6750434ms step_avg:593.97ms +step:11366/57344 train_time:6750680ms step_avg:593.94ms +step:11367/57344 train_time:6751221ms step_avg:593.93ms +grad accum step:2842/14336 +step:11368/57344 train_time:6752501ms step_avg:593.99ms +step:11369/57344 train_time:6752518ms step_avg:593.94ms +step:11370/57344 train_time:6752761ms step_avg:593.91ms +step:11371/57344 train_time:6753301ms step_avg:593.91ms +grad accum step:2843/14336 +step:11372/57344 train_time:6754559ms step_avg:593.96ms +step:11373/57344 train_time:6754577ms step_avg:593.91ms +step:11374/57344 train_time:6754817ms step_avg:593.88ms +step:11375/57344 train_time:6755342ms step_avg:593.88ms +grad accum step:2844/14336 +step:11376/57344 train_time:6756619ms step_avg:593.94ms +step:11377/57344 train_time:6756635ms step_avg:593.89ms +step:11378/57344 train_time:6756877ms step_avg:593.85ms +step:11379/57344 train_time:6757406ms step_avg:593.85ms +grad accum step:2845/14336 +step:11380/57344 train_time:6758678ms step_avg:593.91ms +step:11381/57344 train_time:6758694ms step_avg:593.86ms +step:11382/57344 train_time:6758935ms step_avg:593.83ms +step:11383/57344 train_time:6759472ms step_avg:593.82ms +grad accum step:2846/14336 +step:11384/57344 train_time:6760747ms step_avg:593.88ms +step:11385/57344 train_time:6760764ms step_avg:593.83ms +step:11386/57344 train_time:6761010ms step_avg:593.80ms +step:11387/57344 train_time:6761555ms step_avg:593.80ms +grad accum step:2847/14336 +step:11388/57344 train_time:6764434ms step_avg:594.00ms +step:11389/57344 train_time:6764446ms step_avg:593.95ms +step:11390/57344 train_time:6764655ms step_avg:593.91ms +step:11391/57344 train_time:6765186ms step_avg:593.91ms +grad accum step:2848/14336 +step:11392/57344 train_time:6766457ms step_avg:593.97ms +step:11392/57344 val_loss:6.831137 train_time:6766457ms step_avg:593.97ms +step:11393/57344 train_time:6766826ms step_avg:593.95ms +step:11394/57344 train_time:6766891ms step_avg:593.90ms +step:11395/57344 train_time:6767331ms step_avg:593.89ms +grad accum step:2849/14336 +step:11396/57344 train_time:6768657ms step_avg:593.95ms +step:11397/57344 train_time:6768669ms step_avg:593.90ms +step:11398/57344 train_time:6768883ms step_avg:593.87ms +step:11399/57344 train_time:6769416ms step_avg:593.86ms +grad accum step:2850/14336 +step:11400/57344 train_time:6770683ms step_avg:593.92ms +step:11401/57344 train_time:6770700ms step_avg:593.87ms +step:11402/57344 train_time:6770947ms step_avg:593.84ms +step:11403/57344 train_time:6771492ms step_avg:593.83ms +grad accum step:2851/14336 +step:11404/57344 train_time:6772771ms step_avg:593.89ms +step:11405/57344 train_time:6772788ms step_avg:593.84ms +step:11406/57344 train_time:6773031ms step_avg:593.81ms +step:11407/57344 train_time:6773570ms step_avg:593.81ms +grad accum step:2852/14336 +step:11408/57344 train_time:6774849ms step_avg:593.87ms +step:11409/57344 train_time:6774866ms step_avg:593.82ms +step:11410/57344 train_time:6775110ms step_avg:593.79ms +step:11411/57344 train_time:6775642ms step_avg:593.78ms +grad accum step:2853/14336 +step:11412/57344 train_time:6776926ms step_avg:593.84ms +step:11413/57344 train_time:6776943ms step_avg:593.79ms +step:11414/57344 train_time:6777185ms step_avg:593.76ms +step:11415/57344 train_time:6777720ms step_avg:593.76ms +grad accum step:2854/14336 +step:11416/57344 train_time:6778995ms step_avg:593.82ms +step:11417/57344 train_time:6779012ms step_avg:593.76ms +step:11418/57344 train_time:6779254ms step_avg:593.73ms +step:11419/57344 train_time:6779783ms step_avg:593.73ms +grad accum step:2855/14336 +step:11420/57344 train_time:6781062ms step_avg:593.79ms +step:11421/57344 train_time:6781079ms step_avg:593.74ms +step:11422/57344 train_time:6781320ms step_avg:593.71ms +step:11423/57344 train_time:6781849ms step_avg:593.70ms +grad accum step:2856/14336 +step:11424/57344 train_time:6783125ms step_avg:593.76ms +step:11425/57344 train_time:6783142ms step_avg:593.71ms +step:11426/57344 train_time:6783383ms step_avg:593.68ms +step:11427/57344 train_time:6783913ms step_avg:593.67ms +grad accum step:2857/14336 +step:11428/57344 train_time:6785180ms step_avg:593.73ms +step:11429/57344 train_time:6785197ms step_avg:593.68ms +step:11430/57344 train_time:6785443ms step_avg:593.65ms +step:11431/57344 train_time:6785974ms step_avg:593.65ms +grad accum step:2858/14336 +step:11432/57344 train_time:6787248ms step_avg:593.71ms +step:11433/57344 train_time:6787266ms step_avg:593.66ms +step:11434/57344 train_time:6787510ms step_avg:593.63ms +step:11435/57344 train_time:6788050ms step_avg:593.62ms +grad accum step:2859/14336 +step:11436/57344 train_time:6789312ms step_avg:593.68ms +step:11437/57344 train_time:6789329ms step_avg:593.63ms +step:11438/57344 train_time:6789578ms step_avg:593.60ms +step:11439/57344 train_time:6790133ms step_avg:593.59ms +grad accum step:2860/14336 +step:11440/57344 train_time:6791415ms step_avg:593.66ms +step:11441/57344 train_time:6791432ms step_avg:593.60ms +step:11442/57344 train_time:6791676ms step_avg:593.57ms +step:11443/57344 train_time:6792203ms step_avg:593.57ms +grad accum step:2861/14336 +step:11444/57344 train_time:6793484ms step_avg:593.63ms +step:11445/57344 train_time:6793501ms step_avg:593.58ms +step:11446/57344 train_time:6793742ms step_avg:593.55ms +step:11447/57344 train_time:6794278ms step_avg:593.54ms +grad accum step:2862/14336 +step:11448/57344 train_time:6795540ms step_avg:593.60ms +step:11449/57344 train_time:6795557ms step_avg:593.55ms +step:11450/57344 train_time:6795798ms step_avg:593.52ms +step:11451/57344 train_time:6796328ms step_avg:593.51ms +grad accum step:2863/14336 +step:11452/57344 train_time:6797612ms step_avg:593.57ms +step:11453/57344 train_time:6797629ms step_avg:593.52ms +step:11454/57344 train_time:6797872ms step_avg:593.49ms +step:11455/57344 train_time:6798411ms step_avg:593.49ms +grad accum step:2864/14336 +step:11456/57344 train_time:6799691ms step_avg:593.55ms +step:11456/57344 val_loss:6.834339 train_time:6799691ms step_avg:593.55ms +step:11457/57344 train_time:6799703ms step_avg:593.50ms +step:11458/57344 train_time:6799923ms step_avg:593.47ms +step:11459/57344 train_time:6800459ms step_avg:593.46ms +grad accum step:2865/14336 +step:11460/57344 train_time:6801719ms step_avg:593.52ms +step:11461/57344 train_time:6801737ms step_avg:593.47ms +step:11462/57344 train_time:6801976ms step_avg:593.44ms +step:11463/57344 train_time:6802509ms step_avg:593.43ms +grad accum step:2866/14336 +step:11464/57344 train_time:6803787ms step_avg:593.49ms +step:11465/57344 train_time:6803804ms step_avg:593.44ms +step:11466/57344 train_time:6804044ms step_avg:593.41ms +step:11467/57344 train_time:6804567ms step_avg:593.40ms +grad accum step:2867/14336 +step:11468/57344 train_time:6805832ms step_avg:593.46ms +step:11469/57344 train_time:6805849ms step_avg:593.41ms +step:11470/57344 train_time:6806092ms step_avg:593.38ms +step:11471/57344 train_time:6806627ms step_avg:593.38ms +grad accum step:2868/14336 +step:11472/57344 train_time:6807942ms step_avg:593.44ms +step:11473/57344 train_time:6807958ms step_avg:593.39ms +step:11474/57344 train_time:6808201ms step_avg:593.36ms +step:11475/57344 train_time:6808740ms step_avg:593.35ms +grad accum step:2869/14336 +step:11476/57344 train_time:6810017ms step_avg:593.41ms +step:11477/57344 train_time:6810035ms step_avg:593.36ms +step:11478/57344 train_time:6810276ms step_avg:593.33ms +step:11479/57344 train_time:6810803ms step_avg:593.33ms +grad accum step:2870/14336 +step:11480/57344 train_time:6812079ms step_avg:593.39ms +step:11481/57344 train_time:6812096ms step_avg:593.34ms +step:11482/57344 train_time:6812339ms step_avg:593.31ms +step:11483/57344 train_time:6812881ms step_avg:593.30ms +grad accum step:2871/14336 +step:11484/57344 train_time:6814157ms step_avg:593.36ms +step:11485/57344 train_time:6814174ms step_avg:593.31ms +step:11486/57344 train_time:6814415ms step_avg:593.28ms +step:11487/57344 train_time:6814944ms step_avg:593.27ms +grad accum step:2872/14336 +step:11488/57344 train_time:6816226ms step_avg:593.33ms +step:11489/57344 train_time:6816243ms step_avg:593.28ms +step:11490/57344 train_time:6816487ms step_avg:593.25ms +step:11491/57344 train_time:6817027ms step_avg:593.25ms +grad accum step:2873/14336 +step:11492/57344 train_time:6818306ms step_avg:593.31ms +step:11493/57344 train_time:6818323ms step_avg:593.26ms +step:11494/57344 train_time:6818565ms step_avg:593.23ms +step:11495/57344 train_time:6819096ms step_avg:593.22ms +grad accum step:2874/14336 +step:11496/57344 train_time:6820373ms step_avg:593.28ms +step:11497/57344 train_time:6820389ms step_avg:593.23ms +step:11498/57344 train_time:6820632ms step_avg:593.20ms +step:11499/57344 train_time:6821173ms step_avg:593.20ms +grad accum step:2875/14336 +step:11500/57344 train_time:6822576ms step_avg:593.27ms +step:11501/57344 train_time:6822593ms step_avg:593.22ms +step:11502/57344 train_time:6822837ms step_avg:593.19ms +step:11503/57344 train_time:6823366ms step_avg:593.18ms +grad accum step:2876/14336 +step:11504/57344 train_time:6824627ms step_avg:593.24ms +step:11505/57344 train_time:6824644ms step_avg:593.19ms +step:11506/57344 train_time:6824887ms step_avg:593.16ms +step:11507/57344 train_time:6825421ms step_avg:593.15ms +grad accum step:2877/14336 +step:11508/57344 train_time:6826683ms step_avg:593.21ms +step:11509/57344 train_time:6826700ms step_avg:593.16ms +step:11510/57344 train_time:6826943ms step_avg:593.13ms +step:11511/57344 train_time:6827478ms step_avg:593.13ms +grad accum step:2878/14336 +step:11512/57344 train_time:6828772ms step_avg:593.19ms +step:11513/57344 train_time:6828789ms step_avg:593.14ms +step:11514/57344 train_time:6829032ms step_avg:593.11ms +step:11515/57344 train_time:6829571ms step_avg:593.10ms +grad accum step:2879/14336 +step:11516/57344 train_time:6830836ms step_avg:593.16ms +step:11517/57344 train_time:6830853ms step_avg:593.11ms +step:11518/57344 train_time:6831095ms step_avg:593.08ms +step:11519/57344 train_time:6831624ms step_avg:593.07ms +grad accum step:2880/14336 +step:11520/57344 train_time:6832886ms step_avg:593.13ms +step:11520/57344 val_loss:6.832121 train_time:6832887ms step_avg:593.13ms +step:11521/57344 train_time:6832899ms step_avg:593.08ms +step:11522/57344 train_time:6833118ms step_avg:593.05ms +step:11523/57344 train_time:6833646ms step_avg:593.04ms +grad accum step:2881/14336 +step:11524/57344 train_time:6834956ms step_avg:593.11ms +step:11525/57344 train_time:6834973ms step_avg:593.06ms +step:11526/57344 train_time:6835214ms step_avg:593.03ms +step:11527/57344 train_time:6835754ms step_avg:593.02ms +grad accum step:2882/14336 +step:11528/57344 train_time:6837034ms step_avg:593.08ms +step:11529/57344 train_time:6837051ms step_avg:593.03ms +step:11530/57344 train_time:6837291ms step_avg:593.00ms +step:11531/57344 train_time:6837821ms step_avg:592.99ms +grad accum step:2883/14336 +step:11532/57344 train_time:6839099ms step_avg:593.05ms +step:11533/57344 train_time:6839116ms step_avg:593.00ms +step:11534/57344 train_time:6839359ms step_avg:592.97ms +step:11535/57344 train_time:6839900ms step_avg:592.97ms +grad accum step:2884/14336 +step:11536/57344 train_time:6841180ms step_avg:593.03ms +step:11537/57344 train_time:6841198ms step_avg:592.98ms +step:11538/57344 train_time:6841444ms step_avg:592.95ms +step:11539/57344 train_time:6841989ms step_avg:592.94ms +grad accum step:2885/14336 +step:11540/57344 train_time:6843289ms step_avg:593.01ms +step:11541/57344 train_time:6843305ms step_avg:592.96ms +step:11542/57344 train_time:6843548ms step_avg:592.93ms +step:11543/57344 train_time:6844083ms step_avg:592.92ms +grad accum step:2886/14336 +step:11544/57344 train_time:6845353ms step_avg:592.98ms +step:11545/57344 train_time:6845370ms step_avg:592.93ms +step:11546/57344 train_time:6845610ms step_avg:592.90ms +step:11547/57344 train_time:6846146ms step_avg:592.89ms +grad accum step:2887/14336 +step:11548/57344 train_time:6847427ms step_avg:592.95ms +step:11549/57344 train_time:6847444ms step_avg:592.90ms +step:11550/57344 train_time:6847690ms step_avg:592.87ms +step:11551/57344 train_time:6848236ms step_avg:592.87ms +grad accum step:2888/14336 +step:11552/57344 train_time:6849505ms step_avg:592.93ms +step:11553/57344 train_time:6849522ms step_avg:592.88ms +step:11554/57344 train_time:6849768ms step_avg:592.85ms +step:11555/57344 train_time:6850314ms step_avg:592.84ms +grad accum step:2889/14336 +step:11556/57344 train_time:6851596ms step_avg:592.90ms +step:11557/57344 train_time:6851613ms step_avg:592.85ms +step:11558/57344 train_time:6851865ms step_avg:592.82ms +step:11559/57344 train_time:6852421ms step_avg:592.82ms +grad accum step:2890/14336 +step:11560/57344 train_time:6853687ms step_avg:592.88ms +step:11561/57344 train_time:6853703ms step_avg:592.83ms +step:11562/57344 train_time:6853948ms step_avg:592.80ms +step:11563/57344 train_time:6854483ms step_avg:592.79ms +grad accum step:2891/14336 +step:11564/57344 train_time:6855748ms step_avg:592.85ms +step:11565/57344 train_time:6855765ms step_avg:592.80ms +step:11566/57344 train_time:6856008ms step_avg:592.77ms +step:11567/57344 train_time:6856546ms step_avg:592.77ms +grad accum step:2892/14336 +step:11568/57344 train_time:6857823ms step_avg:592.83ms +step:11569/57344 train_time:6857840ms step_avg:592.78ms +step:11570/57344 train_time:6858086ms step_avg:592.75ms +step:11571/57344 train_time:6858631ms step_avg:592.74ms +grad accum step:2893/14336 +step:11572/57344 train_time:6859914ms step_avg:592.80ms +step:11573/57344 train_time:6859930ms step_avg:592.75ms +step:11574/57344 train_time:6860171ms step_avg:592.72ms +step:11575/57344 train_time:6860708ms step_avg:592.72ms +grad accum step:2894/14336 +step:11576/57344 train_time:6861981ms step_avg:592.78ms +step:11577/57344 train_time:6861997ms step_avg:592.73ms +step:11578/57344 train_time:6862244ms step_avg:592.70ms +step:11579/57344 train_time:6862783ms step_avg:592.69ms +grad accum step:2895/14336 +step:11580/57344 train_time:6864059ms step_avg:592.75ms +step:11581/57344 train_time:6864075ms step_avg:592.70ms +step:11582/57344 train_time:6864321ms step_avg:592.67ms +step:11583/57344 train_time:6864871ms step_avg:592.67ms +grad accum step:2896/14336 +step:11584/57344 train_time:6866150ms step_avg:592.73ms +step:11584/57344 val_loss:6.843909 train_time:6866151ms step_avg:592.73ms +step:11585/57344 train_time:6866163ms step_avg:592.68ms +step:11586/57344 train_time:6866381ms step_avg:592.64ms +step:11587/57344 train_time:6866922ms step_avg:592.64ms +grad accum step:2897/14336 +step:11588/57344 train_time:6868216ms step_avg:592.70ms +step:11589/57344 train_time:6868234ms step_avg:592.65ms +step:11590/57344 train_time:6868480ms step_avg:592.62ms +step:11591/57344 train_time:6869020ms step_avg:592.62ms +grad accum step:2898/14336 +step:11592/57344 train_time:6870298ms step_avg:592.68ms +step:11593/57344 train_time:6870315ms step_avg:592.63ms +step:11594/57344 train_time:6870563ms step_avg:592.60ms +step:11595/57344 train_time:6871104ms step_avg:592.59ms +grad accum step:2899/14336 +step:11596/57344 train_time:6872364ms step_avg:592.65ms +step:11597/57344 train_time:6872381ms step_avg:592.60ms +step:11598/57344 train_time:6872622ms step_avg:592.57ms +step:11599/57344 train_time:6873145ms step_avg:592.56ms +grad accum step:2900/14336 +step:11600/57344 train_time:6874425ms step_avg:592.62ms +step:11601/57344 train_time:6874442ms step_avg:592.57ms +step:11602/57344 train_time:6874682ms step_avg:592.54ms +step:11603/57344 train_time:6875212ms step_avg:592.54ms +grad accum step:2901/14336 +step:11604/57344 train_time:6876492ms step_avg:592.60ms +step:11605/57344 train_time:6876509ms step_avg:592.55ms +step:11606/57344 train_time:6876750ms step_avg:592.52ms +step:11607/57344 train_time:6877279ms step_avg:592.51ms +grad accum step:2902/14336 +step:11608/57344 train_time:6878555ms step_avg:592.57ms +step:11609/57344 train_time:6878572ms step_avg:592.52ms +step:11610/57344 train_time:6878815ms step_avg:592.49ms +step:11611/57344 train_time:6879359ms step_avg:592.49ms +grad accum step:2903/14336 +step:11612/57344 train_time:6880657ms step_avg:592.55ms +step:11613/57344 train_time:6880674ms step_avg:592.50ms +step:11614/57344 train_time:6880921ms step_avg:592.47ms +step:11615/57344 train_time:6881470ms step_avg:592.46ms +grad accum step:2904/14336 +step:11616/57344 train_time:6882737ms step_avg:592.52ms +step:11617/57344 train_time:6882754ms step_avg:592.47ms +step:11618/57344 train_time:6882994ms step_avg:592.44ms +step:11619/57344 train_time:6883518ms step_avg:592.44ms +grad accum step:2905/14336 +step:11620/57344 train_time:6884788ms step_avg:592.49ms +step:11621/57344 train_time:6884805ms step_avg:592.45ms +step:11622/57344 train_time:6885050ms step_avg:592.42ms +step:11623/57344 train_time:6885588ms step_avg:592.41ms +grad accum step:2906/14336 +step:11624/57344 train_time:6886853ms step_avg:592.47ms +step:11625/57344 train_time:6886870ms step_avg:592.42ms +step:11626/57344 train_time:6887583ms step_avg:592.43ms +step:11627/57344 train_time:6887830ms step_avg:592.40ms +grad accum step:2907/14336 +step:11628/57344 train_time:6889097ms step_avg:592.46ms +step:11629/57344 train_time:6889114ms step_avg:592.41ms +step:11630/57344 train_time:6889359ms step_avg:592.38ms +step:11631/57344 train_time:6889898ms step_avg:592.37ms +grad accum step:2908/14336 +step:11632/57344 train_time:6891158ms step_avg:592.43ms +step:11633/57344 train_time:6891175ms step_avg:592.38ms +step:11634/57344 train_time:6891415ms step_avg:592.35ms +step:11635/57344 train_time:6891949ms step_avg:592.35ms +grad accum step:2909/14336 +step:11636/57344 train_time:6893228ms step_avg:592.41ms +step:11637/57344 train_time:6893245ms step_avg:592.36ms +step:11638/57344 train_time:6893488ms step_avg:592.33ms +step:11639/57344 train_time:6894020ms step_avg:592.32ms +grad accum step:2910/14336 +step:11640/57344 train_time:6895306ms step_avg:592.38ms +step:11641/57344 train_time:6895323ms step_avg:592.33ms +step:11642/57344 train_time:6895566ms step_avg:592.30ms +step:11643/57344 train_time:6896101ms step_avg:592.30ms +grad accum step:2911/14336 +step:11644/57344 train_time:6897373ms step_avg:592.35ms +step:11645/57344 train_time:6897389ms step_avg:592.30ms +step:11646/57344 train_time:6897634ms step_avg:592.27ms +step:11647/57344 train_time:6898170ms step_avg:592.27ms +grad accum step:2912/14336 +step:11648/57344 train_time:6899448ms step_avg:592.33ms +step:11648/57344 val_loss:6.840989 train_time:6899449ms step_avg:592.33ms +step:11649/57344 train_time:6899461ms step_avg:592.28ms +step:11650/57344 train_time:6899681ms step_avg:592.25ms +step:11651/57344 train_time:6900212ms step_avg:592.24ms +grad accum step:2913/14336 +step:11652/57344 train_time:6901482ms step_avg:592.30ms +step:11653/57344 train_time:6901499ms step_avg:592.25ms +step:11654/57344 train_time:6901742ms step_avg:592.22ms +step:11655/57344 train_time:6902275ms step_avg:592.22ms +grad accum step:2914/14336 +step:11656/57344 train_time:6903556ms step_avg:592.27ms +step:11657/57344 train_time:6903572ms step_avg:592.23ms +step:11658/57344 train_time:6903817ms step_avg:592.20ms +step:11659/57344 train_time:6904357ms step_avg:592.19ms +grad accum step:2915/14336 +step:11660/57344 train_time:6905635ms step_avg:592.25ms +step:11661/57344 train_time:6905652ms step_avg:592.20ms +step:11662/57344 train_time:6905896ms step_avg:592.17ms +step:11663/57344 train_time:6906437ms step_avg:592.17ms +grad accum step:2916/14336 +step:11664/57344 train_time:6907721ms step_avg:592.23ms +step:11665/57344 train_time:6907738ms step_avg:592.18ms +step:11666/57344 train_time:6907980ms step_avg:592.15ms +step:11667/57344 train_time:6908518ms step_avg:592.14ms +grad accum step:2917/14336 +step:11668/57344 train_time:6909783ms step_avg:592.20ms +step:11669/57344 train_time:6909800ms step_avg:592.15ms +step:11670/57344 train_time:6910043ms step_avg:592.12ms +step:11671/57344 train_time:6910569ms step_avg:592.11ms +grad accum step:2918/14336 +step:11672/57344 train_time:6911830ms step_avg:592.17ms +step:11673/57344 train_time:6911848ms step_avg:592.12ms +step:11674/57344 train_time:6912090ms step_avg:592.09ms +step:11675/57344 train_time:6912617ms step_avg:592.09ms +grad accum step:2919/14336 +step:11676/57344 train_time:6913892ms step_avg:592.15ms +step:11677/57344 train_time:6913909ms step_avg:592.10ms +step:11678/57344 train_time:6914150ms step_avg:592.07ms +step:11679/57344 train_time:6914683ms step_avg:592.06ms +grad accum step:2920/14336 +step:11680/57344 train_time:6915957ms step_avg:592.12ms +step:11681/57344 train_time:6915974ms step_avg:592.07ms +step:11682/57344 train_time:6916215ms step_avg:592.04ms +step:11683/57344 train_time:6916744ms step_avg:592.03ms +grad accum step:2921/14336 +step:11684/57344 train_time:6918058ms step_avg:592.10ms +step:11685/57344 train_time:6918075ms step_avg:592.05ms +step:11686/57344 train_time:6918316ms step_avg:592.02ms +step:11687/57344 train_time:6918846ms step_avg:592.01ms +grad accum step:2922/14336 +step:11688/57344 train_time:6920107ms step_avg:592.07ms +step:11689/57344 train_time:6920124ms step_avg:592.02ms +step:11690/57344 train_time:6920366ms step_avg:591.99ms +step:11691/57344 train_time:6920900ms step_avg:591.99ms +grad accum step:2923/14336 +step:11692/57344 train_time:6922175ms step_avg:592.04ms +step:11693/57344 train_time:6922192ms step_avg:591.99ms +step:11694/57344 train_time:6922435ms step_avg:591.96ms +step:11695/57344 train_time:6922965ms step_avg:591.96ms +grad accum step:2924/14336 +step:11696/57344 train_time:6924241ms step_avg:592.02ms +step:11697/57344 train_time:6924258ms step_avg:591.97ms +step:11698/57344 train_time:6924498ms step_avg:591.94ms +step:11699/57344 train_time:6925025ms step_avg:591.93ms +grad accum step:2925/14336 +step:11700/57344 train_time:6926307ms step_avg:591.99ms +step:11701/57344 train_time:6926324ms step_avg:591.94ms +step:11702/57344 train_time:6926570ms step_avg:591.91ms +step:11703/57344 train_time:6927116ms step_avg:591.91ms +grad accum step:2926/14336 +step:11704/57344 train_time:6928390ms step_avg:591.97ms +step:11705/57344 train_time:6928408ms step_avg:591.92ms +step:11706/57344 train_time:6928651ms step_avg:591.89ms +step:11707/57344 train_time:6929189ms step_avg:591.88ms +grad accum step:2927/14336 +step:11708/57344 train_time:6930481ms step_avg:591.94ms +step:11709/57344 train_time:6930498ms step_avg:591.89ms +step:11710/57344 train_time:6930742ms step_avg:591.87ms +step:11711/57344 train_time:6931284ms step_avg:591.86ms +grad accum step:2928/14336 +step:11712/57344 train_time:6932546ms step_avg:591.92ms +step:11712/57344 val_loss:6.845339 train_time:6932546ms step_avg:591.92ms +step:11713/57344 train_time:6932558ms step_avg:591.87ms +step:11714/57344 train_time:6932776ms step_avg:591.84ms +step:11715/57344 train_time:6933310ms step_avg:591.83ms +grad accum step:2929/14336 +step:11716/57344 train_time:6934587ms step_avg:591.89ms +step:11717/57344 train_time:6934604ms step_avg:591.84ms +step:11718/57344 train_time:6934847ms step_avg:591.81ms +step:11719/57344 train_time:6935377ms step_avg:591.81ms +grad accum step:2930/14336 +step:11720/57344 train_time:6936646ms step_avg:591.86ms +step:11721/57344 train_time:6936663ms step_avg:591.81ms +step:11722/57344 train_time:6936907ms step_avg:591.79ms +step:11723/57344 train_time:6937448ms step_avg:591.78ms +grad accum step:2931/14336 +step:11724/57344 train_time:6938743ms step_avg:591.84ms +step:11725/57344 train_time:6938760ms step_avg:591.79ms +step:11726/57344 train_time:6939005ms step_avg:591.76ms +step:11727/57344 train_time:6939545ms step_avg:591.76ms +grad accum step:2932/14336 +step:11728/57344 train_time:6940825ms step_avg:591.82ms +step:11729/57344 train_time:6940842ms step_avg:591.77ms +step:11730/57344 train_time:6941087ms step_avg:591.74ms +step:11731/57344 train_time:6941629ms step_avg:591.73ms +grad accum step:2933/14336 +step:11732/57344 train_time:6942907ms step_avg:591.79ms +step:11733/57344 train_time:6942924ms step_avg:591.74ms +step:11734/57344 train_time:6943168ms step_avg:591.71ms +step:11735/57344 train_time:6943711ms step_avg:591.71ms +grad accum step:2934/14336 +step:11736/57344 train_time:6944995ms step_avg:591.77ms +step:11737/57344 train_time:6945012ms step_avg:591.72ms +step:11738/57344 train_time:6945253ms step_avg:591.69ms +step:11739/57344 train_time:6945788ms step_avg:591.68ms +grad accum step:2935/14336 +step:11740/57344 train_time:6947056ms step_avg:591.74ms +step:11741/57344 train_time:6947073ms step_avg:591.69ms +step:11742/57344 train_time:6947316ms step_avg:591.66ms +step:11743/57344 train_time:6947859ms step_avg:591.66ms +grad accum step:2936/14336 +step:11744/57344 train_time:6949139ms step_avg:591.72ms +step:11745/57344 train_time:6949156ms step_avg:591.67ms +step:11746/57344 train_time:6949398ms step_avg:591.64ms +step:11747/57344 train_time:6949924ms step_avg:591.63ms +grad accum step:2937/14336 +step:11748/57344 train_time:6951183ms step_avg:591.69ms +step:11749/57344 train_time:6951200ms step_avg:591.64ms +step:11750/57344 train_time:6951446ms step_avg:591.61ms +step:11751/57344 train_time:6951994ms step_avg:591.61ms +grad accum step:2938/14336 +step:11752/57344 train_time:6953250ms step_avg:591.67ms +step:11753/57344 train_time:6953267ms step_avg:591.62ms +step:11754/57344 train_time:6953509ms step_avg:591.59ms +step:11755/57344 train_time:6954039ms step_avg:591.58ms +grad accum step:2939/14336 +step:11756/57344 train_time:6955316ms step_avg:591.64ms +step:11757/57344 train_time:6955333ms step_avg:591.59ms +step:11758/57344 train_time:6955579ms step_avg:591.56ms +step:11759/57344 train_time:6956126ms step_avg:591.56ms +grad accum step:2940/14336 +step:11760/57344 train_time:6957403ms step_avg:591.62ms +step:11761/57344 train_time:6957420ms step_avg:591.57ms +step:11762/57344 train_time:6957659ms step_avg:591.54ms +step:11763/57344 train_time:6958184ms step_avg:591.53ms +grad accum step:2941/14336 +step:11764/57344 train_time:6959433ms step_avg:591.59ms +step:11765/57344 train_time:6959450ms step_avg:591.54ms +step:11766/57344 train_time:6959695ms step_avg:591.51ms +step:11767/57344 train_time:6960237ms step_avg:591.50ms +grad accum step:2942/14336 +step:11768/57344 train_time:6961518ms step_avg:591.56ms +step:11769/57344 train_time:6961535ms step_avg:591.51ms +step:11770/57344 train_time:6961777ms step_avg:591.48ms +step:11771/57344 train_time:6962311ms step_avg:591.48ms +grad accum step:2943/14336 +step:11772/57344 train_time:6963639ms step_avg:591.54ms +step:11773/57344 train_time:6963656ms step_avg:591.49ms +step:11774/57344 train_time:6963898ms step_avg:591.46ms +step:11775/57344 train_time:6964427ms step_avg:591.46ms +grad accum step:2944/14336 +step:11776/57344 train_time:6965689ms step_avg:591.52ms +step:11776/57344 val_loss:6.858196 train_time:6965690ms step_avg:591.52ms +step:11777/57344 train_time:6965702ms step_avg:591.47ms +step:11778/57344 train_time:6965921ms step_avg:591.43ms +step:11779/57344 train_time:6966448ms step_avg:591.43ms +grad accum step:2945/14336 +step:11780/57344 train_time:6967695ms step_avg:591.49ms +step:11781/57344 train_time:6967712ms step_avg:591.44ms +step:11782/57344 train_time:6967954ms step_avg:591.41ms +step:11783/57344 train_time:6968492ms step_avg:591.40ms +grad accum step:2946/14336 +step:11784/57344 train_time:6969759ms step_avg:591.46ms +step:11785/57344 train_time:6969776ms step_avg:591.41ms +step:11786/57344 train_time:6970020ms step_avg:591.38ms +step:11787/57344 train_time:6970563ms step_avg:591.38ms +grad accum step:2947/14336 +step:11788/57344 train_time:6971838ms step_avg:591.44ms +step:11789/57344 train_time:6971856ms step_avg:591.39ms +step:11790/57344 train_time:6972100ms step_avg:591.36ms +step:11791/57344 train_time:6972644ms step_avg:591.35ms +grad accum step:2948/14336 +step:11792/57344 train_time:6973923ms step_avg:591.41ms +step:11793/57344 train_time:6973940ms step_avg:591.36ms +step:11794/57344 train_time:6974182ms step_avg:591.33ms +step:11795/57344 train_time:6974709ms step_avg:591.33ms +grad accum step:2949/14336 +step:11796/57344 train_time:6975982ms step_avg:591.39ms +step:11797/57344 train_time:6976000ms step_avg:591.34ms +step:11798/57344 train_time:6976240ms step_avg:591.31ms +step:11799/57344 train_time:6976770ms step_avg:591.30ms +grad accum step:2950/14336 +step:11800/57344 train_time:6978032ms step_avg:591.36ms +step:11801/57344 train_time:6978050ms step_avg:591.31ms +step:11802/57344 train_time:6978293ms step_avg:591.28ms +step:11803/57344 train_time:6978833ms step_avg:591.28ms +grad accum step:2951/14336 +step:11804/57344 train_time:6980102ms step_avg:591.33ms +step:11805/57344 train_time:6980119ms step_avg:591.28ms +step:11806/57344 train_time:6980365ms step_avg:591.26ms +step:11807/57344 train_time:6980904ms step_avg:591.25ms +grad accum step:2952/14336 +step:11808/57344 train_time:6982165ms step_avg:591.31ms +step:11809/57344 train_time:6982183ms step_avg:591.26ms +step:11810/57344 train_time:6982427ms step_avg:591.23ms +step:11811/57344 train_time:6982968ms step_avg:591.23ms +grad accum step:2953/14336 +step:11812/57344 train_time:6984254ms step_avg:591.28ms +step:11813/57344 train_time:6984271ms step_avg:591.24ms +step:11814/57344 train_time:6984514ms step_avg:591.21ms +step:11815/57344 train_time:6985056ms step_avg:591.20ms +grad accum step:2954/14336 +step:11816/57344 train_time:6986318ms step_avg:591.26ms +step:11817/57344 train_time:6986335ms step_avg:591.21ms +step:11818/57344 train_time:6986579ms step_avg:591.18ms +step:11819/57344 train_time:6987124ms step_avg:591.18ms +grad accum step:2955/14336 +step:11820/57344 train_time:6988409ms step_avg:591.24ms +step:11821/57344 train_time:6988426ms step_avg:591.19ms +step:11822/57344 train_time:6988670ms step_avg:591.16ms +step:11823/57344 train_time:6989208ms step_avg:591.15ms +grad accum step:2956/14336 +step:11824/57344 train_time:6990486ms step_avg:591.21ms +step:11825/57344 train_time:6990503ms step_avg:591.16ms +step:11826/57344 train_time:6990743ms step_avg:591.13ms +step:11827/57344 train_time:6991271ms step_avg:591.13ms +grad accum step:2957/14336 +step:11828/57344 train_time:6992515ms step_avg:591.18ms +step:11829/57344 train_time:6992532ms step_avg:591.13ms +step:11830/57344 train_time:6992777ms step_avg:591.11ms +step:11831/57344 train_time:6993313ms step_avg:591.10ms +grad accum step:2958/14336 +step:11832/57344 train_time:6994582ms step_avg:591.16ms +step:11833/57344 train_time:6994600ms step_avg:591.11ms +step:11834/57344 train_time:6994843ms step_avg:591.08ms +step:11835/57344 train_time:6995385ms step_avg:591.08ms +grad accum step:2959/14336 +step:11836/57344 train_time:6996660ms step_avg:591.13ms +step:11837/57344 train_time:6996677ms step_avg:591.09ms +step:11838/57344 train_time:6996919ms step_avg:591.06ms +step:11839/57344 train_time:6997460ms step_avg:591.05ms +grad accum step:2960/14336 +step:11840/57344 train_time:6998739ms step_avg:591.11ms +step:11840/57344 val_loss:6.831332 train_time:6998739ms step_avg:591.11ms +step:11841/57344 train_time:6998751ms step_avg:591.06ms +step:11842/57344 train_time:6998975ms step_avg:591.03ms +step:11843/57344 train_time:6999520ms step_avg:591.03ms +grad accum step:2961/14336 +step:11844/57344 train_time:7000801ms step_avg:591.08ms +step:11845/57344 train_time:7000818ms step_avg:591.04ms +step:11846/57344 train_time:7001063ms step_avg:591.01ms +step:11847/57344 train_time:7001598ms step_avg:591.00ms +grad accum step:2962/14336 +step:11848/57344 train_time:7002867ms step_avg:591.06ms +step:11849/57344 train_time:7002885ms step_avg:591.01ms +step:11850/57344 train_time:7003127ms step_avg:590.98ms +step:11851/57344 train_time:7003672ms step_avg:590.98ms +grad accum step:2963/14336 +step:11852/57344 train_time:7004933ms step_avg:591.03ms +step:11853/57344 train_time:7004950ms step_avg:590.99ms +step:11854/57344 train_time:7005190ms step_avg:590.96ms +step:11855/57344 train_time:7005725ms step_avg:590.95ms +grad accum step:2964/14336 +step:11856/57344 train_time:7006990ms step_avg:591.01ms +step:11857/57344 train_time:7007007ms step_avg:590.96ms +step:11858/57344 train_time:7007252ms step_avg:590.93ms +step:11859/57344 train_time:7007794ms step_avg:590.93ms +grad accum step:2965/14336 +step:11860/57344 train_time:7009056ms step_avg:590.98ms +step:11861/57344 train_time:7009073ms step_avg:590.93ms +step:11862/57344 train_time:7009316ms step_avg:590.91ms +step:11863/57344 train_time:7009847ms step_avg:590.90ms +grad accum step:2966/14336 +step:11864/57344 train_time:7011125ms step_avg:590.96ms +step:11865/57344 train_time:7011142ms step_avg:590.91ms +step:11866/57344 train_time:7011386ms step_avg:590.88ms +step:11867/57344 train_time:7011930ms step_avg:590.88ms +grad accum step:2967/14336 +step:11868/57344 train_time:7013204ms step_avg:590.93ms +step:11869/57344 train_time:7013221ms step_avg:590.89ms +step:11870/57344 train_time:7013463ms step_avg:590.86ms +step:11871/57344 train_time:7013989ms step_avg:590.85ms +grad accum step:2968/14336 +step:11872/57344 train_time:7015256ms step_avg:590.91ms +step:11873/57344 train_time:7015273ms step_avg:590.86ms +step:11874/57344 train_time:7015516ms step_avg:590.83ms +step:11875/57344 train_time:7016054ms step_avg:590.83ms +grad accum step:2969/14336 +step:11876/57344 train_time:7017318ms step_avg:590.88ms +step:11877/57344 train_time:7017335ms step_avg:590.83ms +step:11878/57344 train_time:7017577ms step_avg:590.80ms +step:11879/57344 train_time:7018109ms step_avg:590.80ms +grad accum step:2970/14336 +step:11880/57344 train_time:7019389ms step_avg:590.86ms +step:11881/57344 train_time:7019406ms step_avg:590.81ms +step:11882/57344 train_time:7019647ms step_avg:590.78ms +step:11883/57344 train_time:7020185ms step_avg:590.78ms +grad accum step:2971/14336 +step:11884/57344 train_time:7021449ms step_avg:590.83ms +step:11885/57344 train_time:7021466ms step_avg:590.78ms +step:11886/57344 train_time:7021712ms step_avg:590.75ms +step:11887/57344 train_time:7022258ms step_avg:590.75ms +grad accum step:2972/14336 +step:11888/57344 train_time:7023536ms step_avg:590.81ms +step:11889/57344 train_time:7023553ms step_avg:590.76ms +step:11890/57344 train_time:7023794ms step_avg:590.73ms +step:11891/57344 train_time:7024325ms step_avg:590.73ms +grad accum step:2973/14336 +step:11892/57344 train_time:7025609ms step_avg:590.78ms +step:11893/57344 train_time:7025626ms step_avg:590.74ms +step:11894/57344 train_time:7025871ms step_avg:590.71ms +step:11895/57344 train_time:7026411ms step_avg:590.70ms +grad accum step:2974/14336 +step:11896/57344 train_time:7027684ms step_avg:590.76ms +step:11897/57344 train_time:7027699ms step_avg:590.71ms +step:11898/57344 train_time:7027942ms step_avg:590.68ms +step:11899/57344 train_time:7028476ms step_avg:590.68ms +grad accum step:2975/14336 +step:11900/57344 train_time:7029743ms step_avg:590.73ms +step:11901/57344 train_time:7029761ms step_avg:590.69ms +step:11902/57344 train_time:7030002ms step_avg:590.66ms +step:11903/57344 train_time:7030531ms step_avg:590.65ms +grad accum step:2976/14336 +step:11904/57344 train_time:7031792ms step_avg:590.71ms +step:11904/57344 val_loss:6.828391 train_time:7031793ms step_avg:590.71ms +step:11905/57344 train_time:7031805ms step_avg:590.66ms +step:11906/57344 train_time:7032027ms step_avg:590.63ms +step:11907/57344 train_time:7032562ms step_avg:590.62ms +grad accum step:2977/14336 +step:11908/57344 train_time:7033839ms step_avg:590.68ms +step:11909/57344 train_time:7033856ms step_avg:590.63ms +step:11910/57344 train_time:7034101ms step_avg:590.60ms +step:11911/57344 train_time:7034632ms step_avg:590.60ms +grad accum step:2978/14336 +step:11912/57344 train_time:7035908ms step_avg:590.66ms +step:11913/57344 train_time:7035925ms step_avg:590.61ms +step:11914/57344 train_time:7036171ms step_avg:590.58ms +step:11915/57344 train_time:7036712ms step_avg:590.58ms +grad accum step:2979/14336 +step:11916/57344 train_time:7037992ms step_avg:590.63ms +step:11917/57344 train_time:7038009ms step_avg:590.59ms +step:11918/57344 train_time:7038248ms step_avg:590.56ms +step:11919/57344 train_time:7038772ms step_avg:590.55ms +grad accum step:2980/14336 +step:11920/57344 train_time:7040057ms step_avg:590.61ms +step:11921/57344 train_time:7040074ms step_avg:590.56ms +step:11922/57344 train_time:7040316ms step_avg:590.53ms +step:11923/57344 train_time:7040850ms step_avg:590.53ms +grad accum step:2981/14336 +step:11924/57344 train_time:7042123ms step_avg:590.58ms +step:11925/57344 train_time:7042140ms step_avg:590.54ms +step:11926/57344 train_time:7042384ms step_avg:590.51ms +step:11927/57344 train_time:7042929ms step_avg:590.50ms +grad accum step:2982/14336 +step:11928/57344 train_time:7044208ms step_avg:590.56ms +step:11929/57344 train_time:7044225ms step_avg:590.51ms +step:11930/57344 train_time:7044466ms step_avg:590.48ms +step:11931/57344 train_time:7044996ms step_avg:590.48ms +grad accum step:2983/14336 +step:11932/57344 train_time:7046273ms step_avg:590.54ms +step:11933/57344 train_time:7046290ms step_avg:590.49ms +step:11934/57344 train_time:7046538ms step_avg:590.46ms +step:11935/57344 train_time:7047081ms step_avg:590.46ms +grad accum step:2984/14336 +step:11936/57344 train_time:7048363ms step_avg:590.51ms +step:11937/57344 train_time:7048380ms step_avg:590.46ms +step:11938/57344 train_time:7048623ms step_avg:590.44ms +step:11939/57344 train_time:7049165ms step_avg:590.43ms +grad accum step:2985/14336 +step:11940/57344 train_time:7050424ms step_avg:590.49ms +step:11941/57344 train_time:7050441ms step_avg:590.44ms +step:11942/57344 train_time:7050684ms step_avg:590.41ms +step:11943/57344 train_time:7051210ms step_avg:590.41ms +grad accum step:2986/14336 +step:11944/57344 train_time:7052491ms step_avg:590.46ms +step:11945/57344 train_time:7052509ms step_avg:590.42ms +step:11946/57344 train_time:7052753ms step_avg:590.39ms +step:11947/57344 train_time:7053294ms step_avg:590.38ms +grad accum step:2987/14336 +step:11948/57344 train_time:7054556ms step_avg:590.44ms +step:11949/57344 train_time:7054573ms step_avg:590.39ms +step:11950/57344 train_time:7054818ms step_avg:590.36ms +step:11951/57344 train_time:7055359ms step_avg:590.36ms +grad accum step:2988/14336 +step:11952/57344 train_time:7056626ms step_avg:590.41ms +step:11953/57344 train_time:7056644ms step_avg:590.37ms +step:11954/57344 train_time:7056888ms step_avg:590.34ms +step:11955/57344 train_time:7057426ms step_avg:590.33ms +grad accum step:2989/14336 +step:11956/57344 train_time:7058698ms step_avg:590.39ms +step:11957/57344 train_time:7058715ms step_avg:590.34ms +step:11958/57344 train_time:7058957ms step_avg:590.31ms +step:11959/57344 train_time:7059487ms step_avg:590.31ms +grad accum step:2990/14336 +step:11960/57344 train_time:7060742ms step_avg:590.36ms +step:11961/57344 train_time:7060760ms step_avg:590.32ms +step:11962/57344 train_time:7061001ms step_avg:590.29ms +step:11963/57344 train_time:7061529ms step_avg:590.28ms +grad accum step:2991/14336 +step:11964/57344 train_time:7062794ms step_avg:590.34ms +step:11965/57344 train_time:7062811ms step_avg:590.29ms +step:11966/57344 train_time:7063055ms step_avg:590.26ms +step:11967/57344 train_time:7063595ms step_avg:590.26ms +grad accum step:2992/14336 +step:11968/57344 train_time:7064864ms step_avg:590.31ms +step:11968/57344 val_loss:6.826741 train_time:7064865ms step_avg:590.31ms +step:11969/57344 train_time:7064876ms step_avg:590.26ms +step:11970/57344 train_time:7065099ms step_avg:590.23ms +step:11971/57344 train_time:7065643ms step_avg:590.23ms +grad accum step:2993/14336 +step:11972/57344 train_time:7066909ms step_avg:590.29ms +step:11973/57344 train_time:7066926ms step_avg:590.24ms +step:11974/57344 train_time:7067171ms step_avg:590.21ms +step:11975/57344 train_time:7067709ms step_avg:590.21ms +grad accum step:2994/14336 +step:11976/57344 train_time:7068979ms step_avg:590.26ms +step:11977/57344 train_time:7068997ms step_avg:590.21ms +step:11978/57344 train_time:7069241ms step_avg:590.19ms +step:11979/57344 train_time:7069774ms step_avg:590.18ms +grad accum step:2995/14336 +step:11980/57344 train_time:7071049ms step_avg:590.24ms +step:11981/57344 train_time:7071067ms step_avg:590.19ms +step:11982/57344 train_time:7071310ms step_avg:590.16ms +step:11983/57344 train_time:7071845ms step_avg:590.16ms +grad accum step:2996/14336 +step:11984/57344 train_time:7073118ms step_avg:590.21ms +step:11985/57344 train_time:7073135ms step_avg:590.17ms +step:11986/57344 train_time:7073378ms step_avg:590.14ms +step:11987/57344 train_time:7073917ms step_avg:590.13ms +grad accum step:2997/14336 +step:11988/57344 train_time:7075178ms step_avg:590.19ms +step:11989/57344 train_time:7075195ms step_avg:590.14ms +step:11990/57344 train_time:7075437ms step_avg:590.11ms +step:11991/57344 train_time:7075967ms step_avg:590.11ms +grad accum step:2998/14336 +step:11992/57344 train_time:7077224ms step_avg:590.16ms +step:11993/57344 train_time:7077241ms step_avg:590.11ms +step:11994/57344 train_time:7077483ms step_avg:590.09ms +step:11995/57344 train_time:7078012ms step_avg:590.08ms +grad accum step:2999/14336 +step:11996/57344 train_time:7079294ms step_avg:590.14ms +step:11997/57344 train_time:7079311ms step_avg:590.09ms +step:11998/57344 train_time:7079555ms step_avg:590.06ms +step:11999/57344 train_time:7080096ms step_avg:590.06ms +grad accum step:3000/14336 +step:12000/57344 train_time:7081379ms step_avg:590.11ms +step:12001/57344 train_time:7081396ms step_avg:590.07ms +step:12002/57344 train_time:7081636ms step_avg:590.04ms +step:12003/57344 train_time:7082164ms step_avg:590.03ms +grad accum step:3001/14336 +step:12004/57344 train_time:7083436ms step_avg:590.09ms +step:12005/57344 train_time:7083453ms step_avg:590.04ms +step:12006/57344 train_time:7083696ms step_avg:590.01ms +step:12007/57344 train_time:7084227ms step_avg:590.01ms +grad accum step:3002/14336 +step:12008/57344 train_time:7085471ms step_avg:590.06ms +step:12009/57344 train_time:7085489ms step_avg:590.01ms +step:12010/57344 train_time:7085731ms step_avg:589.99ms +step:12011/57344 train_time:7086258ms step_avg:589.98ms +grad accum step:3003/14336 +step:12012/57344 train_time:7087520ms step_avg:590.04ms +step:12013/57344 train_time:7087537ms step_avg:589.99ms +step:12014/57344 train_time:7087781ms step_avg:589.96ms +step:12015/57344 train_time:7088313ms step_avg:589.96ms +grad accum step:3004/14336 +step:12016/57344 train_time:7089594ms step_avg:590.01ms +step:12017/57344 train_time:7089611ms step_avg:589.97ms +step:12018/57344 train_time:7089855ms step_avg:589.94ms +step:12019/57344 train_time:7090400ms step_avg:589.93ms +grad accum step:3005/14336 +step:12020/57344 train_time:7091678ms step_avg:589.99ms +step:12021/57344 train_time:7091695ms step_avg:589.94ms +step:12022/57344 train_time:7091938ms step_avg:589.91ms +step:12023/57344 train_time:7092467ms step_avg:589.91ms +grad accum step:3006/14336 +step:12024/57344 train_time:7093747ms step_avg:589.97ms +step:12025/57344 train_time:7093764ms step_avg:589.92ms +step:12026/57344 train_time:7094003ms step_avg:589.89ms +step:12027/57344 train_time:7094528ms step_avg:589.88ms +grad accum step:3007/14336 +step:12028/57344 train_time:7095799ms step_avg:589.94ms +step:12029/57344 train_time:7095816ms step_avg:589.89ms +step:12030/57344 train_time:7096057ms step_avg:589.86ms +step:12031/57344 train_time:7096587ms step_avg:589.86ms +grad accum step:3008/14336 +step:12032/57344 train_time:7097864ms step_avg:589.92ms +step:12032/57344 val_loss:6.813456 train_time:7097864ms step_avg:589.92ms +step:12033/57344 train_time:7097876ms step_avg:589.87ms +step:12034/57344 train_time:7098094ms step_avg:589.84ms +step:12035/57344 train_time:7098629ms step_avg:589.83ms +grad accum step:3009/14336 +step:12036/57344 train_time:7099898ms step_avg:589.89ms +step:12037/57344 train_time:7099915ms step_avg:589.84ms +step:12038/57344 train_time:7100161ms step_avg:589.81ms +step:12039/57344 train_time:7100703ms step_avg:589.81ms +grad accum step:3010/14336 +step:12040/57344 train_time:7101987ms step_avg:589.87ms +step:12041/57344 train_time:7102004ms step_avg:589.82ms +step:12042/57344 train_time:7102248ms step_avg:589.79ms +step:12043/57344 train_time:7102782ms step_avg:589.79ms +grad accum step:3011/14336 +step:12044/57344 train_time:7128082ms step_avg:591.84ms +step:12045/57344 train_time:7128093ms step_avg:591.79ms +step:12046/57344 train_time:7128354ms step_avg:591.76ms +step:12047/57344 train_time:7128878ms step_avg:591.76ms +grad accum step:3012/14336 +step:12048/57344 train_time:7130340ms step_avg:591.83ms +step:12049/57344 train_time:7130357ms step_avg:591.78ms +step:12050/57344 train_time:7130593ms step_avg:591.75ms +step:12051/57344 train_time:7131131ms step_avg:591.75ms +grad accum step:3013/14336 +step:12052/57344 train_time:7132414ms step_avg:591.80ms +step:12053/57344 train_time:7132431ms step_avg:591.76ms +step:12054/57344 train_time:7132675ms step_avg:591.73ms +step:12055/57344 train_time:7133215ms step_avg:591.72ms +grad accum step:3014/14336 +step:12056/57344 train_time:7134499ms step_avg:591.78ms +step:12057/57344 train_time:7134515ms step_avg:591.73ms +step:12058/57344 train_time:7134757ms step_avg:591.70ms +step:12059/57344 train_time:7135285ms step_avg:591.70ms +grad accum step:3015/14336 +step:12060/57344 train_time:7136521ms step_avg:591.75ms +step:12061/57344 train_time:7136538ms step_avg:591.70ms +step:12062/57344 train_time:7136776ms step_avg:591.67ms +step:12063/57344 train_time:7137294ms step_avg:591.67ms +grad accum step:3016/14336 +step:12064/57344 train_time:7138541ms step_avg:591.72ms +step:12065/57344 train_time:7138557ms step_avg:591.67ms +step:12066/57344 train_time:7138796ms step_avg:591.65ms +step:12067/57344 train_time:7139318ms step_avg:591.64ms +grad accum step:3017/14336 +step:12068/57344 train_time:7140583ms step_avg:591.70ms +step:12069/57344 train_time:7140600ms step_avg:591.65ms +step:12070/57344 train_time:7140842ms step_avg:591.62ms +step:12071/57344 train_time:7141368ms step_avg:591.61ms +grad accum step:3018/14336 +step:12072/57344 train_time:7142633ms step_avg:591.67ms +step:12073/57344 train_time:7142650ms step_avg:591.62ms +step:12074/57344 train_time:7142894ms step_avg:591.59ms +step:12075/57344 train_time:7143435ms step_avg:591.59ms +grad accum step:3019/14336 +step:12076/57344 train_time:7144717ms step_avg:591.65ms +step:12077/57344 train_time:7144734ms step_avg:591.60ms +step:12078/57344 train_time:7144975ms step_avg:591.57ms +step:12079/57344 train_time:7145503ms step_avg:591.56ms +grad accum step:3020/14336 +step:12080/57344 train_time:7146761ms step_avg:591.62ms +step:12081/57344 train_time:7146778ms step_avg:591.57ms +step:12082/57344 train_time:7147019ms step_avg:591.54ms +step:12083/57344 train_time:7147546ms step_avg:591.54ms +grad accum step:3021/14336 +step:12084/57344 train_time:7148806ms step_avg:591.59ms +step:12085/57344 train_time:7148823ms step_avg:591.55ms +step:12086/57344 train_time:7149063ms step_avg:591.52ms +step:12087/57344 train_time:7149591ms step_avg:591.51ms +grad accum step:3022/14336 +step:12088/57344 train_time:7150862ms step_avg:591.57ms +step:12089/57344 train_time:7150879ms step_avg:591.52ms +step:12090/57344 train_time:7151122ms step_avg:591.49ms +step:12091/57344 train_time:7151648ms step_avg:591.49ms +grad accum step:3023/14336 +step:12092/57344 train_time:7152926ms step_avg:591.54ms +step:12093/57344 train_time:7152943ms step_avg:591.49ms +step:12094/57344 train_time:7153182ms step_avg:591.47ms +step:12095/57344 train_time:7153706ms step_avg:591.46ms +grad accum step:3024/14336 +step:12096/57344 train_time:7154950ms step_avg:591.51ms +step:12096/57344 val_loss:6.816004 train_time:7154951ms step_avg:591.51ms +step:12097/57344 train_time:7154962ms step_avg:591.47ms +step:12098/57344 train_time:7155188ms step_avg:591.44ms +step:12099/57344 train_time:7155728ms step_avg:591.43ms +grad accum step:3025/14336 +step:12100/57344 train_time:7157011ms step_avg:591.49ms +step:12101/57344 train_time:7157028ms step_avg:591.44ms +step:12102/57344 train_time:7157281ms step_avg:591.41ms +step:12103/57344 train_time:7157837ms step_avg:591.41ms +grad accum step:3026/14336 +step:12104/57344 train_time:7159096ms step_avg:591.47ms +step:12105/57344 train_time:7159113ms step_avg:591.42ms +step:12106/57344 train_time:7159354ms step_avg:591.39ms +step:12107/57344 train_time:7159882ms step_avg:591.38ms +grad accum step:3027/14336 +step:12108/57344 train_time:7161161ms step_avg:591.44ms +step:12109/57344 train_time:7161178ms step_avg:591.39ms +step:12110/57344 train_time:7161422ms step_avg:591.36ms +step:12111/57344 train_time:7161962ms step_avg:591.36ms +grad accum step:3028/14336 +step:12112/57344 train_time:7163245ms step_avg:591.42ms +step:12113/57344 train_time:7163261ms step_avg:591.37ms +step:12114/57344 train_time:7163503ms step_avg:591.34ms +step:12115/57344 train_time:7164034ms step_avg:591.34ms +grad accum step:3029/14336 +step:12116/57344 train_time:7165284ms step_avg:591.39ms +step:12117/57344 train_time:7165301ms step_avg:591.34ms +step:12118/57344 train_time:7165546ms step_avg:591.31ms +step:12119/57344 train_time:7166099ms step_avg:591.31ms +grad accum step:3030/14336 +step:12120/57344 train_time:7167392ms step_avg:591.37ms +step:12121/57344 train_time:7167409ms step_avg:591.32ms +step:12122/57344 train_time:7167650ms step_avg:591.29ms +step:12123/57344 train_time:7168180ms step_avg:591.29ms +grad accum step:3031/14336 +step:12124/57344 train_time:7169453ms step_avg:591.34ms +step:12125/57344 train_time:7169470ms step_avg:591.30ms +step:12126/57344 train_time:7169715ms step_avg:591.27ms +step:12127/57344 train_time:7170255ms step_avg:591.26ms +grad accum step:3032/14336 +step:12128/57344 train_time:7171523ms step_avg:591.32ms +step:12129/57344 train_time:7171540ms step_avg:591.27ms +step:12130/57344 train_time:7171781ms step_avg:591.24ms +step:12131/57344 train_time:7172317ms step_avg:591.24ms +grad accum step:3033/14336 +step:12132/57344 train_time:7173647ms step_avg:591.30ms +step:12133/57344 train_time:7173664ms step_avg:591.25ms +step:12134/57344 train_time:7173912ms step_avg:591.22ms +step:12135/57344 train_time:7174453ms step_avg:591.22ms +grad accum step:3034/14336 +step:12136/57344 train_time:7175710ms step_avg:591.27ms +step:12137/57344 train_time:7175727ms step_avg:591.23ms +step:12138/57344 train_time:7175969ms step_avg:591.20ms +step:12139/57344 train_time:7176510ms step_avg:591.19ms +grad accum step:3035/14336 +step:12140/57344 train_time:7177786ms step_avg:591.25ms +step:12141/57344 train_time:7177804ms step_avg:591.20ms +step:12142/57344 train_time:7178045ms step_avg:591.17ms +step:12143/57344 train_time:7178574ms step_avg:591.17ms +grad accum step:3036/14336 +step:12144/57344 train_time:7179851ms step_avg:591.23ms +step:12145/57344 train_time:7179868ms step_avg:591.18ms +step:12146/57344 train_time:7180112ms step_avg:591.15ms +step:12147/57344 train_time:7180652ms step_avg:591.15ms +grad accum step:3037/14336 +step:12148/57344 train_time:7181929ms step_avg:591.20ms +step:12149/57344 train_time:7181947ms step_avg:591.16ms +step:12150/57344 train_time:7182191ms step_avg:591.13ms +step:12151/57344 train_time:7182725ms step_avg:591.12ms +grad accum step:3038/14336 +step:12152/57344 train_time:7183990ms step_avg:591.18ms +step:12153/57344 train_time:7184008ms step_avg:591.13ms +step:12154/57344 train_time:7184251ms step_avg:591.10ms +step:12155/57344 train_time:7184778ms step_avg:591.10ms +grad accum step:3039/14336 +step:12156/57344 train_time:7202721ms step_avg:592.52ms +step:12157/57344 train_time:7202733ms step_avg:592.48ms +step:12158/57344 train_time:7202991ms step_avg:592.45ms +step:12159/57344 train_time:7203515ms step_avg:592.44ms +grad accum step:3040/14336 +step:12160/57344 train_time:7204778ms step_avg:592.50ms +step:12160/57344 val_loss:6.811965 train_time:7204778ms step_avg:592.50ms +step:12161/57344 train_time:7204790ms step_avg:592.45ms +step:12162/57344 train_time:7205084ms step_avg:592.43ms +step:12163/57344 train_time:7205614ms step_avg:592.42ms +grad accum step:3041/14336 +step:12164/57344 train_time:7206868ms step_avg:592.48ms +step:12165/57344 train_time:7206885ms step_avg:592.43ms +step:12166/57344 train_time:7207125ms step_avg:592.40ms +step:12167/57344 train_time:7207656ms step_avg:592.39ms +grad accum step:3042/14336 +step:12168/57344 train_time:7208916ms step_avg:592.45ms +step:12169/57344 train_time:7208933ms step_avg:592.40ms +step:12170/57344 train_time:7209174ms step_avg:592.37ms +step:12171/57344 train_time:7209706ms step_avg:592.37ms +grad accum step:3043/14336 +step:12172/57344 train_time:7210969ms step_avg:592.42ms +step:12173/57344 train_time:7210986ms step_avg:592.38ms +step:12174/57344 train_time:7211231ms step_avg:592.35ms +step:12175/57344 train_time:7211765ms step_avg:592.34ms +grad accum step:3044/14336 +step:12176/57344 train_time:7213015ms step_avg:592.40ms +step:12177/57344 train_time:7213032ms step_avg:592.35ms +step:12178/57344 train_time:7213274ms step_avg:592.32ms +step:12179/57344 train_time:7213800ms step_avg:592.31ms +grad accum step:3045/14336 +step:12180/57344 train_time:7215080ms step_avg:592.37ms +step:12181/57344 train_time:7215097ms step_avg:592.32ms +step:12182/57344 train_time:7215336ms step_avg:592.29ms +step:12183/57344 train_time:7215864ms step_avg:592.29ms +grad accum step:3046/14336 +step:12184/57344 train_time:7217125ms step_avg:592.34ms +step:12185/57344 train_time:7217142ms step_avg:592.30ms +step:12186/57344 train_time:7217379ms step_avg:592.27ms +step:12187/57344 train_time:7217902ms step_avg:592.26ms +grad accum step:3047/14336 +step:12188/57344 train_time:7219167ms step_avg:592.32ms +step:12189/57344 train_time:7219184ms step_avg:592.27ms +step:12190/57344 train_time:7219424ms step_avg:592.24ms +step:12191/57344 train_time:7219950ms step_avg:592.24ms +grad accum step:3048/14336 +step:12192/57344 train_time:7221220ms step_avg:592.29ms +step:12193/57344 train_time:7221237ms step_avg:592.24ms +step:12194/57344 train_time:7221480ms step_avg:592.22ms +step:12195/57344 train_time:7222014ms step_avg:592.21ms +grad accum step:3049/14336 +step:12196/57344 train_time:7223271ms step_avg:592.27ms +step:12197/57344 train_time:7223287ms step_avg:592.22ms +step:12198/57344 train_time:7223530ms step_avg:592.19ms +step:12199/57344 train_time:7224068ms step_avg:592.19ms +grad accum step:3050/14336 +step:12200/57344 train_time:7225332ms step_avg:592.24ms +step:12201/57344 train_time:7225349ms step_avg:592.19ms +step:12202/57344 train_time:7225589ms step_avg:592.16ms +step:12203/57344 train_time:7226114ms step_avg:592.16ms +grad accum step:3051/14336 +step:12204/57344 train_time:7227385ms step_avg:592.21ms +step:12205/57344 train_time:7227402ms step_avg:592.17ms +step:12206/57344 train_time:7227645ms step_avg:592.14ms +step:12207/57344 train_time:7228186ms step_avg:592.13ms +grad accum step:3052/14336 +step:12208/57344 train_time:7229468ms step_avg:592.19ms +step:12209/57344 train_time:7229485ms step_avg:592.14ms +step:12210/57344 train_time:7229725ms step_avg:592.12ms +step:12211/57344 train_time:7230252ms step_avg:592.11ms +grad accum step:3053/14336 +step:12212/57344 train_time:7231513ms step_avg:592.16ms +step:12213/57344 train_time:7231530ms step_avg:592.12ms +step:12214/57344 train_time:7231772ms step_avg:592.09ms +step:12215/57344 train_time:7232303ms step_avg:592.08ms +grad accum step:3054/14336 +step:12216/57344 train_time:7233575ms step_avg:592.14ms +step:12217/57344 train_time:7233593ms step_avg:592.09ms +step:12218/57344 train_time:7233835ms step_avg:592.06ms +step:12219/57344 train_time:7234368ms step_avg:592.06ms +grad accum step:3055/14336 +step:12220/57344 train_time:7235641ms step_avg:592.11ms +step:12221/57344 train_time:7235658ms step_avg:592.07ms +step:12222/57344 train_time:7235902ms step_avg:592.04ms +step:12223/57344 train_time:7236439ms step_avg:592.03ms +grad accum step:3056/14336 +step:12224/57344 train_time:7237713ms step_avg:592.09ms +step:12224/57344 val_loss:6.795401 train_time:7237714ms step_avg:592.09ms +step:12225/57344 train_time:7237726ms step_avg:592.04ms +step:12226/57344 train_time:7237948ms step_avg:592.01ms +step:12227/57344 train_time:7238479ms step_avg:592.01ms +grad accum step:3057/14336 +step:12228/57344 train_time:7239758ms step_avg:592.06ms +step:12229/57344 train_time:7239775ms step_avg:592.02ms +step:12230/57344 train_time:7240019ms step_avg:591.99ms +step:12231/57344 train_time:7240556ms step_avg:591.98ms +grad accum step:3058/14336 +step:12232/57344 train_time:7241823ms step_avg:592.04ms +step:12233/57344 train_time:7241840ms step_avg:591.99ms +step:12234/57344 train_time:7242085ms step_avg:591.96ms +step:12235/57344 train_time:7242626ms step_avg:591.96ms +grad accum step:3059/14336 +step:12236/57344 train_time:7243904ms step_avg:592.02ms +step:12237/57344 train_time:7243921ms step_avg:591.97ms +step:12238/57344 train_time:7244165ms step_avg:591.94ms +step:12239/57344 train_time:7244697ms step_avg:591.94ms +grad accum step:3060/14336 +step:12240/57344 train_time:7245971ms step_avg:591.99ms +step:12241/57344 train_time:7245985ms step_avg:591.94ms +step:12242/57344 train_time:7246232ms step_avg:591.92ms +step:12243/57344 train_time:7246781ms step_avg:591.91ms +grad accum step:3061/14336 +step:12244/57344 train_time:7248058ms step_avg:591.97ms +step:12245/57344 train_time:7248075ms step_avg:591.92ms +step:12246/57344 train_time:7248322ms step_avg:591.89ms +step:12247/57344 train_time:7248864ms step_avg:591.89ms +grad accum step:3062/14336 +step:12248/57344 train_time:7250133ms step_avg:591.94ms +step:12249/57344 train_time:7250150ms step_avg:591.90ms +step:12250/57344 train_time:7250399ms step_avg:591.87ms +step:12251/57344 train_time:7250938ms step_avg:591.86ms +grad accum step:3063/14336 +step:12252/57344 train_time:7252207ms step_avg:591.92ms +step:12253/57344 train_time:7253571ms step_avg:591.98ms +step:12254/57344 train_time:7253786ms step_avg:591.95ms +step:12255/57344 train_time:7254330ms step_avg:591.95ms +grad accum step:3064/14336 +step:12256/57344 train_time:7255624ms step_avg:592.01ms +step:12257/57344 train_time:7255641ms step_avg:591.96ms +step:12258/57344 train_time:7255882ms step_avg:591.93ms +step:12259/57344 train_time:7256412ms step_avg:591.93ms +grad accum step:3065/14336 +step:12260/57344 train_time:7257678ms step_avg:591.98ms +step:12261/57344 train_time:7257695ms step_avg:591.93ms +step:12262/57344 train_time:7257939ms step_avg:591.90ms +step:12263/57344 train_time:7258480ms step_avg:591.90ms +grad accum step:3066/14336 +step:12264/57344 train_time:7259761ms step_avg:591.96ms +step:12265/57344 train_time:7259779ms step_avg:591.91ms +step:12266/57344 train_time:7260024ms step_avg:591.88ms +step:12267/57344 train_time:7260569ms step_avg:591.88ms +grad accum step:3067/14336 +step:12268/57344 train_time:7261853ms step_avg:591.93ms +step:12269/57344 train_time:7261870ms step_avg:591.89ms +step:12270/57344 train_time:7262115ms step_avg:591.86ms +step:12271/57344 train_time:7262661ms step_avg:591.86ms +grad accum step:3068/14336 +step:12272/57344 train_time:7263936ms step_avg:591.91ms +step:12273/57344 train_time:7263954ms step_avg:591.86ms +step:12274/57344 train_time:7264204ms step_avg:591.84ms +step:12275/57344 train_time:7264754ms step_avg:591.83ms +grad accum step:3069/14336 +step:12276/57344 train_time:7266025ms step_avg:591.89ms +step:12277/57344 train_time:7266042ms step_avg:591.84ms +step:12278/57344 train_time:7266286ms step_avg:591.81ms +step:12279/57344 train_time:7266823ms step_avg:591.81ms +grad accum step:3070/14336 +step:12280/57344 train_time:7268094ms step_avg:591.86ms +step:12281/57344 train_time:7268111ms step_avg:591.82ms +step:12282/57344 train_time:7268362ms step_avg:591.79ms +step:12283/57344 train_time:7268911ms step_avg:591.79ms +grad accum step:3071/14336 +step:12284/57344 train_time:7270186ms step_avg:591.84ms +step:12285/57344 train_time:7270203ms step_avg:591.80ms +step:12286/57344 train_time:7270444ms step_avg:591.77ms +step:12287/57344 train_time:7270974ms step_avg:591.76ms +grad accum step:3072/14336 +step:12288/57344 train_time:7272256ms step_avg:591.82ms +step:12288/57344 val_loss:6.807929 train_time:7272256ms step_avg:591.82ms +step:12289/57344 train_time:7272268ms step_avg:591.77ms +step:12290/57344 train_time:7272490ms step_avg:591.74ms +step:12291/57344 train_time:7273027ms step_avg:591.74ms +grad accum step:3073/14336 +step:12292/57344 train_time:7274297ms step_avg:591.79ms +step:12293/57344 train_time:7274315ms step_avg:591.74ms +step:12294/57344 train_time:7274558ms step_avg:591.72ms +step:12295/57344 train_time:7275085ms step_avg:591.71ms +grad accum step:3074/14336 +step:12296/57344 train_time:7276344ms step_avg:591.77ms +step:12297/57344 train_time:7276361ms step_avg:591.72ms +step:12298/57344 train_time:7276607ms step_avg:591.69ms +step:12299/57344 train_time:7277139ms step_avg:591.69ms +grad accum step:3075/14336 +step:12300/57344 train_time:7278413ms step_avg:591.74ms +step:12301/57344 train_time:7278430ms step_avg:591.69ms +step:12302/57344 train_time:7278672ms step_avg:591.67ms +step:12303/57344 train_time:7279200ms step_avg:591.66ms +grad accum step:3076/14336 +step:12304/57344 train_time:7280483ms step_avg:591.72ms +step:12305/57344 train_time:7280500ms step_avg:591.67ms +step:12306/57344 train_time:7280745ms step_avg:591.64ms +step:12307/57344 train_time:7281283ms step_avg:591.64ms +grad accum step:3077/14336 +step:12308/57344 train_time:7282554ms step_avg:591.69ms +step:12309/57344 train_time:7282571ms step_avg:591.65ms +step:12310/57344 train_time:7282814ms step_avg:591.62ms +step:12311/57344 train_time:7283354ms step_avg:591.61ms +grad accum step:3078/14336 +step:12312/57344 train_time:7284634ms step_avg:591.67ms +step:12313/57344 train_time:7284651ms step_avg:591.62ms +step:12314/57344 train_time:7284896ms step_avg:591.59ms +step:12315/57344 train_time:7285441ms step_avg:591.59ms +grad accum step:3079/14336 +step:12316/57344 train_time:7286715ms step_avg:591.65ms +step:12317/57344 train_time:7286732ms step_avg:591.60ms +step:12318/57344 train_time:7286976ms step_avg:591.57ms +step:12319/57344 train_time:7287509ms step_avg:591.57ms +grad accum step:3080/14336 +step:12320/57344 train_time:7288786ms step_avg:591.62ms +step:12321/57344 train_time:7288803ms step_avg:591.58ms +step:12322/57344 train_time:7289049ms step_avg:591.55ms +step:12323/57344 train_time:7289591ms step_avg:591.54ms +grad accum step:3081/14336 +step:12324/57344 train_time:7290864ms step_avg:591.60ms +step:12325/57344 train_time:7290882ms step_avg:591.55ms +step:12326/57344 train_time:7291124ms step_avg:591.52ms +step:12327/57344 train_time:7291661ms step_avg:591.52ms +grad accum step:3082/14336 +step:12328/57344 train_time:7292978ms step_avg:591.58ms +step:12329/57344 train_time:7292995ms step_avg:591.53ms +step:12330/57344 train_time:7293238ms step_avg:591.50ms +step:12331/57344 train_time:7293777ms step_avg:591.50ms +grad accum step:3083/14336 +step:12332/57344 train_time:7295040ms step_avg:591.55ms +step:12333/57344 train_time:7295058ms step_avg:591.51ms +step:12334/57344 train_time:7295310ms step_avg:591.48ms +step:12335/57344 train_time:7295865ms step_avg:591.48ms +grad accum step:3084/14336 +step:12336/57344 train_time:7297154ms step_avg:591.53ms +step:12337/57344 train_time:7297171ms step_avg:591.49ms +step:12338/57344 train_time:7297418ms step_avg:591.46ms +step:12339/57344 train_time:7297966ms step_avg:591.46ms +grad accum step:3085/14336 +step:12340/57344 train_time:7299249ms step_avg:591.51ms +step:12341/57344 train_time:7299266ms step_avg:591.46ms +step:12342/57344 train_time:7299514ms step_avg:591.44ms +step:12343/57344 train_time:7300057ms step_avg:591.43ms +grad accum step:3086/14336 +step:12344/57344 train_time:7301359ms step_avg:591.49ms +step:12345/57344 train_time:7301375ms step_avg:591.44ms +step:12346/57344 train_time:7301625ms step_avg:591.42ms +step:12347/57344 train_time:7302166ms step_avg:591.41ms +grad accum step:3087/14336 +step:12348/57344 train_time:7303447ms step_avg:591.47ms +step:12349/57344 train_time:7303463ms step_avg:591.42ms +step:12350/57344 train_time:7303708ms step_avg:591.39ms +step:12351/57344 train_time:7304257ms step_avg:591.39ms +grad accum step:3088/14336 +step:12352/57344 train_time:7305552ms step_avg:591.45ms +step:12352/57344 val_loss:6.793258 train_time:7305552ms step_avg:591.45ms +step:12353/57344 train_time:7305564ms step_avg:591.40ms +step:12354/57344 train_time:7305787ms step_avg:591.37ms +step:12355/57344 train_time:7306334ms step_avg:591.37ms +grad accum step:3089/14336 +step:12356/57344 train_time:7307634ms step_avg:591.42ms +step:12357/57344 train_time:7307651ms step_avg:591.38ms +step:12358/57344 train_time:7307892ms step_avg:591.35ms +step:12359/57344 train_time:7308432ms step_avg:591.34ms +grad accum step:3090/14336 +step:12360/57344 train_time:7309711ms step_avg:591.40ms +step:12361/57344 train_time:7309728ms step_avg:591.35ms +step:12362/57344 train_time:7309977ms step_avg:591.33ms +step:12363/57344 train_time:7310522ms step_avg:591.32ms +grad accum step:3091/14336 +step:12364/57344 train_time:7311802ms step_avg:591.38ms +step:12365/57344 train_time:7311819ms step_avg:591.33ms +step:12366/57344 train_time:7312063ms step_avg:591.30ms +step:12367/57344 train_time:7312607ms step_avg:591.30ms +grad accum step:3092/14336 +step:12368/57344 train_time:7313901ms step_avg:591.36ms +step:12369/57344 train_time:7313918ms step_avg:591.31ms +step:12370/57344 train_time:7314163ms step_avg:591.28ms +step:12371/57344 train_time:7314707ms step_avg:591.28ms +grad accum step:3093/14336 +step:12372/57344 train_time:7316049ms step_avg:591.34ms +step:12373/57344 train_time:7316066ms step_avg:591.29ms +step:12374/57344 train_time:7316314ms step_avg:591.27ms +step:12375/57344 train_time:7316871ms step_avg:591.26ms +grad accum step:3094/14336 +step:12376/57344 train_time:7318178ms step_avg:591.32ms +step:12377/57344 train_time:7318195ms step_avg:591.27ms +step:12378/57344 train_time:7318437ms step_avg:591.25ms +step:12379/57344 train_time:7318977ms step_avg:591.24ms +grad accum step:3095/14336 +step:12380/57344 train_time:7320265ms step_avg:591.30ms +step:12381/57344 train_time:7320282ms step_avg:591.25ms +step:12382/57344 train_time:7320527ms step_avg:591.22ms +step:12383/57344 train_time:7321065ms step_avg:591.22ms +grad accum step:3096/14336 +step:12384/57344 train_time:7322348ms step_avg:591.27ms +step:12385/57344 train_time:7322365ms step_avg:591.23ms +step:12386/57344 train_time:7322614ms step_avg:591.20ms +step:12387/57344 train_time:7323160ms step_avg:591.20ms +grad accum step:3097/14336 +step:12388/57344 train_time:7324436ms step_avg:591.25ms +step:12389/57344 train_time:7324453ms step_avg:591.21ms +step:12390/57344 train_time:7324702ms step_avg:591.18ms +step:12391/57344 train_time:7325246ms step_avg:591.17ms +grad accum step:3098/14336 +step:12392/57344 train_time:7326533ms step_avg:591.23ms +step:12393/57344 train_time:7326550ms step_avg:591.18ms +step:12394/57344 train_time:7326796ms step_avg:591.16ms +step:12395/57344 train_time:7327337ms step_avg:591.15ms +grad accum step:3099/14336 +step:12396/57344 train_time:7328618ms step_avg:591.21ms +step:12397/57344 train_time:7328635ms step_avg:591.16ms +step:12398/57344 train_time:7328880ms step_avg:591.13ms +step:12399/57344 train_time:7329423ms step_avg:591.13ms +grad accum step:3100/14336 +step:12400/57344 train_time:7330713ms step_avg:591.19ms +step:12401/57344 train_time:7330730ms step_avg:591.14ms +step:12402/57344 train_time:7330975ms step_avg:591.11ms +step:12403/57344 train_time:7331518ms step_avg:591.11ms +grad accum step:3101/14336 +step:12404/57344 train_time:7332796ms step_avg:591.16ms +step:12405/57344 train_time:7332814ms step_avg:591.12ms +step:12406/57344 train_time:7333059ms step_avg:591.09ms +step:12407/57344 train_time:7333609ms step_avg:591.09ms +grad accum step:3102/14336 +step:12408/57344 train_time:7334901ms step_avg:591.14ms +step:12409/57344 train_time:7334918ms step_avg:591.10ms +step:12410/57344 train_time:7335162ms step_avg:591.07ms +step:12411/57344 train_time:7335698ms step_avg:591.06ms +grad accum step:3103/14336 +step:12412/57344 train_time:7336978ms step_avg:591.12ms +step:12413/57344 train_time:7336995ms step_avg:591.07ms +step:12414/57344 train_time:7337243ms step_avg:591.05ms +step:12415/57344 train_time:7337788ms step_avg:591.04ms +grad accum step:3104/14336 +step:12416/57344 train_time:7339061ms step_avg:591.10ms +step:12416/57344 val_loss:6.786059 train_time:7339062ms step_avg:591.10ms +step:12417/57344 train_time:7339074ms step_avg:591.05ms +step:12418/57344 train_time:7339296ms step_avg:591.02ms +step:12419/57344 train_time:7339841ms step_avg:591.02ms +grad accum step:3105/14336 +step:12420/57344 train_time:7341153ms step_avg:591.08ms +step:12421/57344 train_time:7341170ms step_avg:591.03ms +step:12422/57344 train_time:7341417ms step_avg:591.00ms +step:12423/57344 train_time:7341968ms step_avg:591.00ms +grad accum step:3106/14336 +step:12424/57344 train_time:7343250ms step_avg:591.05ms +step:12425/57344 train_time:7343267ms step_avg:591.01ms +step:12426/57344 train_time:7343512ms step_avg:590.98ms +step:12427/57344 train_time:7344055ms step_avg:590.98ms +grad accum step:3107/14336 +step:12428/57344 train_time:7345336ms step_avg:591.03ms +step:12429/57344 train_time:7345352ms step_avg:590.98ms +step:12430/57344 train_time:7345601ms step_avg:590.96ms +step:12431/57344 train_time:7346142ms step_avg:590.95ms +grad accum step:3108/14336 +step:12432/57344 train_time:7347436ms step_avg:591.01ms +step:12433/57344 train_time:7347453ms step_avg:590.96ms +step:12434/57344 train_time:7347712ms step_avg:590.94ms +step:12435/57344 train_time:7348276ms step_avg:590.93ms +grad accum step:3109/14336 +step:12436/57344 train_time:7349544ms step_avg:590.99ms +step:12437/57344 train_time:7349562ms step_avg:590.94ms +step:12438/57344 train_time:7349806ms step_avg:590.92ms +step:12439/57344 train_time:7350355ms step_avg:590.91ms +grad accum step:3110/14336 +step:12440/57344 train_time:7351635ms step_avg:590.97ms +step:12441/57344 train_time:7351652ms step_avg:590.92ms +step:12442/57344 train_time:7351897ms step_avg:590.89ms +step:12443/57344 train_time:7352439ms step_avg:590.89ms +grad accum step:3111/14336 +step:12444/57344 train_time:7353714ms step_avg:590.94ms +step:12445/57344 train_time:7353731ms step_avg:590.90ms +step:12446/57344 train_time:7353976ms step_avg:590.87ms +step:12447/57344 train_time:7354527ms step_avg:590.87ms +grad accum step:3112/14336 +step:12448/57344 train_time:7355868ms step_avg:590.93ms +step:12449/57344 train_time:7355884ms step_avg:590.88ms +step:12450/57344 train_time:7356132ms step_avg:590.85ms +step:12451/57344 train_time:7356684ms step_avg:590.85ms +grad accum step:3113/14336 +step:12452/57344 train_time:7358001ms step_avg:590.91ms +step:12453/57344 train_time:7358018ms step_avg:590.86ms +step:12454/57344 train_time:7358262ms step_avg:590.84ms +step:12455/57344 train_time:7358812ms step_avg:590.83ms +grad accum step:3114/14336 +step:12456/57344 train_time:7360104ms step_avg:590.89ms +step:12457/57344 train_time:7360121ms step_avg:590.84ms +step:12458/57344 train_time:7360372ms step_avg:590.81ms +step:12459/57344 train_time:7360925ms step_avg:590.81ms +grad accum step:3115/14336 +step:12460/57344 train_time:7362217ms step_avg:590.87ms +step:12461/57344 train_time:7362234ms step_avg:590.82ms +step:12462/57344 train_time:7362476ms step_avg:590.79ms +step:12463/57344 train_time:7363009ms step_avg:590.79ms +grad accum step:3116/14336 +step:12464/57344 train_time:7364293ms step_avg:590.85ms +step:12465/57344 train_time:7364310ms step_avg:590.80ms +step:12466/57344 train_time:7364555ms step_avg:590.77ms +step:12467/57344 train_time:7365104ms step_avg:590.77ms +grad accum step:3117/14336 +step:12468/57344 train_time:7366439ms step_avg:590.83ms +step:12469/57344 train_time:7366456ms step_avg:590.78ms +step:12470/57344 train_time:7366699ms step_avg:590.75ms +step:12471/57344 train_time:7367243ms step_avg:590.75ms +grad accum step:3118/14336 +step:12472/57344 train_time:7368519ms step_avg:590.80ms +step:12473/57344 train_time:7368536ms step_avg:590.76ms +step:12474/57344 train_time:7368786ms step_avg:590.73ms +step:12475/57344 train_time:7369334ms step_avg:590.73ms +grad accum step:3119/14336 +step:12476/57344 train_time:7370625ms step_avg:590.78ms +step:12477/57344 train_time:7370642ms step_avg:590.74ms +step:12478/57344 train_time:7370887ms step_avg:590.71ms +step:12479/57344 train_time:7371420ms step_avg:590.71ms +grad accum step:3120/14336 +step:12480/57344 train_time:7372690ms step_avg:590.76ms +step:12480/57344 val_loss:6.784020 train_time:7372691ms step_avg:590.76ms +step:12481/57344 train_time:7372848ms step_avg:590.73ms +step:12482/57344 train_time:7372961ms step_avg:590.69ms +step:12483/57344 train_time:7373488ms step_avg:590.68ms +grad accum step:3121/14336 +step:12484/57344 train_time:7374801ms step_avg:590.74ms +step:12485/57344 train_time:7374813ms step_avg:590.69ms +step:12486/57344 train_time:7375040ms step_avg:590.66ms +step:12487/57344 train_time:7375588ms step_avg:590.66ms +grad accum step:3122/14336 +step:12488/57344 train_time:7376868ms step_avg:590.72ms +step:12489/57344 train_time:7376885ms step_avg:590.67ms +step:12490/57344 train_time:7377129ms step_avg:590.64ms +step:12491/57344 train_time:7377666ms step_avg:590.64ms +grad accum step:3123/14336 +step:12492/57344 train_time:7378932ms step_avg:590.69ms +step:12493/57344 train_time:7378949ms step_avg:590.65ms +step:12494/57344 train_time:7379195ms step_avg:590.62ms +step:12495/57344 train_time:7379742ms step_avg:590.62ms +grad accum step:3124/14336 +step:12496/57344 train_time:7381014ms step_avg:590.67ms +step:12497/57344 train_time:7381032ms step_avg:590.62ms +step:12498/57344 train_time:7381277ms step_avg:590.60ms +step:12499/57344 train_time:7381825ms step_avg:590.59ms +grad accum step:3125/14336 +step:12500/57344 train_time:7383162ms step_avg:590.65ms +step:12501/57344 train_time:7383179ms step_avg:590.61ms +step:12502/57344 train_time:7383426ms step_avg:590.58ms +step:12503/57344 train_time:7383983ms step_avg:590.58ms +grad accum step:3126/14336 +step:12504/57344 train_time:7385293ms step_avg:590.63ms +step:12505/57344 train_time:7385310ms step_avg:590.59ms +step:12506/57344 train_time:7385553ms step_avg:590.56ms +step:12507/57344 train_time:7386088ms step_avg:590.56ms +grad accum step:3127/14336 +step:12508/57344 train_time:7387367ms step_avg:590.61ms +step:12509/57344 train_time:7387384ms step_avg:590.57ms +step:12510/57344 train_time:7387630ms step_avg:590.54ms +step:12511/57344 train_time:7388169ms step_avg:590.53ms +grad accum step:3128/14336 +step:12512/57344 train_time:7389456ms step_avg:590.59ms +step:12513/57344 train_time:7389473ms step_avg:590.54ms +step:12514/57344 train_time:7389725ms step_avg:590.52ms +step:12515/57344 train_time:7390285ms step_avg:590.51ms +grad accum step:3129/14336 +step:12516/57344 train_time:7391563ms step_avg:590.57ms +step:12517/57344 train_time:7391580ms step_avg:590.52ms +step:12518/57344 train_time:7391832ms step_avg:590.50ms +step:12519/57344 train_time:7392395ms step_avg:590.49ms +grad accum step:3130/14336 +step:12520/57344 train_time:7393694ms step_avg:590.55ms +step:12521/57344 train_time:7393711ms step_avg:590.50ms +step:12522/57344 train_time:7393961ms step_avg:590.48ms +step:12523/57344 train_time:7394505ms step_avg:590.47ms +grad accum step:3131/14336 +step:12524/57344 train_time:7395807ms step_avg:590.53ms +step:12525/57344 train_time:7395824ms step_avg:590.48ms +step:12526/57344 train_time:7396068ms step_avg:590.46ms +step:12527/57344 train_time:7396611ms step_avg:590.45ms +grad accum step:3132/14336 +step:12528/57344 train_time:7397914ms step_avg:590.51ms +step:12529/57344 train_time:7397931ms step_avg:590.46ms +step:12530/57344 train_time:7398175ms step_avg:590.44ms +step:12531/57344 train_time:7398717ms step_avg:590.43ms +grad accum step:3133/14336 +step:12532/57344 train_time:7399991ms step_avg:590.49ms +step:12533/57344 train_time:7400008ms step_avg:590.44ms +step:12534/57344 train_time:7400256ms step_avg:590.41ms +step:12535/57344 train_time:7400804ms step_avg:590.41ms +grad accum step:3134/14336 +step:12536/57344 train_time:7402079ms step_avg:590.47ms +step:12537/57344 train_time:7402096ms step_avg:590.42ms +step:12538/57344 train_time:7402342ms step_avg:590.39ms +step:12539/57344 train_time:7402892ms step_avg:590.39ms +grad accum step:3135/14336 +step:12540/57344 train_time:7404186ms step_avg:590.45ms +step:12541/57344 train_time:7404203ms step_avg:590.40ms +step:12542/57344 train_time:7404448ms step_avg:590.37ms +step:12543/57344 train_time:7404992ms step_avg:590.37ms +grad accum step:3136/14336 +step:12544/57344 train_time:7406288ms step_avg:590.42ms +step:12544/57344 val_loss:6.784947 train_time:7406289ms step_avg:590.42ms +step:12545/57344 train_time:7406300ms step_avg:590.38ms +step:12546/57344 train_time:7406525ms step_avg:590.35ms +step:12547/57344 train_time:7407071ms step_avg:590.35ms +grad accum step:3137/14336 +step:12548/57344 train_time:7408350ms step_avg:590.40ms +step:12549/57344 train_time:7408367ms step_avg:590.36ms +step:12550/57344 train_time:7408616ms step_avg:590.33ms +step:12551/57344 train_time:7409162ms step_avg:590.32ms +grad accum step:3138/14336 +step:12552/57344 train_time:7410443ms step_avg:590.38ms +step:12553/57344 train_time:7410460ms step_avg:590.33ms +step:12554/57344 train_time:7410705ms step_avg:590.31ms +step:12555/57344 train_time:7411251ms step_avg:590.30ms +grad accum step:3139/14336 +step:12556/57344 train_time:7412571ms step_avg:590.36ms +step:12557/57344 train_time:7412588ms step_avg:590.32ms +step:12558/57344 train_time:7412831ms step_avg:590.29ms +step:12559/57344 train_time:7413370ms step_avg:590.28ms +grad accum step:3140/14336 +step:12560/57344 train_time:7414660ms step_avg:590.34ms +step:12561/57344 train_time:7414677ms step_avg:590.29ms +step:12562/57344 train_time:7414932ms step_avg:590.27ms +step:12563/57344 train_time:7415490ms step_avg:590.26ms +grad accum step:3141/14336 +step:12564/57344 train_time:7416791ms step_avg:590.32ms +step:12565/57344 train_time:7416807ms step_avg:590.28ms +step:12566/57344 train_time:7417054ms step_avg:590.25ms +step:12567/57344 train_time:7417614ms step_avg:590.25ms +grad accum step:3142/14336 +step:12568/57344 train_time:7418939ms step_avg:590.30ms +step:12569/57344 train_time:7418955ms step_avg:590.26ms +step:12570/57344 train_time:7419200ms step_avg:590.23ms +step:12571/57344 train_time:7419745ms step_avg:590.23ms +grad accum step:3143/14336 +step:12572/57344 train_time:7421052ms step_avg:590.28ms +step:12573/57344 train_time:7421068ms step_avg:590.24ms +step:12574/57344 train_time:7421315ms step_avg:590.21ms +step:12575/57344 train_time:7421862ms step_avg:590.21ms +grad accum step:3144/14336 +step:12576/57344 train_time:7423153ms step_avg:590.26ms +step:12577/57344 train_time:7423170ms step_avg:590.22ms +step:12578/57344 train_time:7423414ms step_avg:590.19ms +step:12579/57344 train_time:7423964ms step_avg:590.19ms +grad accum step:3145/14336 +step:12580/57344 train_time:7425265ms step_avg:590.24ms +step:12581/57344 train_time:7425282ms step_avg:590.20ms +step:12582/57344 train_time:7425532ms step_avg:590.17ms +step:12583/57344 train_time:7426089ms step_avg:590.17ms +grad accum step:3146/14336 +step:12584/57344 train_time:7427372ms step_avg:590.22ms +step:12585/57344 train_time:7427389ms step_avg:590.18ms +step:12586/57344 train_time:7427633ms step_avg:590.15ms +step:12587/57344 train_time:7428184ms step_avg:590.15ms +grad accum step:3147/14336 +step:12588/57344 train_time:7429476ms step_avg:590.20ms +step:12589/57344 train_time:7429493ms step_avg:590.16ms +step:12590/57344 train_time:7429740ms step_avg:590.13ms +step:12591/57344 train_time:7430285ms step_avg:590.13ms +grad accum step:3148/14336 +step:12592/57344 train_time:7431563ms step_avg:590.18ms +step:12593/57344 train_time:7431580ms step_avg:590.14ms +step:12594/57344 train_time:7431827ms step_avg:590.11ms +step:12595/57344 train_time:7432371ms step_avg:590.10ms +grad accum step:3149/14336 +step:12596/57344 train_time:7433653ms step_avg:590.16ms +step:12597/57344 train_time:7433670ms step_avg:590.11ms +step:12598/57344 train_time:7433915ms step_avg:590.09ms +step:12599/57344 train_time:7434452ms step_avg:590.08ms +grad accum step:3150/14336 +step:12600/57344 train_time:7435725ms step_avg:590.14ms +step:12601/57344 train_time:7435742ms step_avg:590.09ms +step:12602/57344 train_time:7435989ms step_avg:590.06ms +step:12603/57344 train_time:7436534ms step_avg:590.06ms +grad accum step:3151/14336 +step:12604/57344 train_time:7437815ms step_avg:590.12ms +step:12605/57344 train_time:7437832ms step_avg:590.07ms +step:12606/57344 train_time:7438076ms step_avg:590.04ms +step:12607/57344 train_time:7438619ms step_avg:590.04ms +grad accum step:3152/14336 +step:12608/57344 train_time:7439897ms step_avg:590.09ms +step:12608/57344 val_loss:6.778395 train_time:7439898ms step_avg:590.09ms +step:12609/57344 train_time:7439910ms step_avg:590.05ms +step:12610/57344 train_time:7440129ms step_avg:590.02ms +step:12611/57344 train_time:7440675ms step_avg:590.01ms +grad accum step:3153/14336 +step:12612/57344 train_time:7441969ms step_avg:590.07ms +step:12613/57344 train_time:7441987ms step_avg:590.03ms +step:12614/57344 train_time:7442233ms step_avg:590.00ms +step:12615/57344 train_time:7442781ms step_avg:589.99ms +grad accum step:3154/14336 +step:12616/57344 train_time:7444063ms step_avg:590.05ms +step:12617/57344 train_time:7444080ms step_avg:590.00ms +step:12618/57344 train_time:7444324ms step_avg:589.98ms +step:12619/57344 train_time:7444871ms step_avg:589.97ms +grad accum step:3155/14336 +step:12620/57344 train_time:7446154ms step_avg:590.03ms +step:12621/57344 train_time:7446171ms step_avg:589.98ms +step:12622/57344 train_time:7446419ms step_avg:589.96ms +step:12623/57344 train_time:7446964ms step_avg:589.95ms +grad accum step:3156/14336 +step:12624/57344 train_time:7448262ms step_avg:590.01ms +step:12625/57344 train_time:7448280ms step_avg:589.96ms +step:12626/57344 train_time:7448525ms step_avg:589.94ms +step:12627/57344 train_time:7449070ms step_avg:589.93ms +grad accum step:3157/14336 +step:12628/57344 train_time:7450379ms step_avg:589.99ms +step:12629/57344 train_time:7450396ms step_avg:589.94ms +step:12630/57344 train_time:7450649ms step_avg:589.92ms +step:12631/57344 train_time:7451203ms step_avg:589.91ms +grad accum step:3158/14336 +step:12632/57344 train_time:7452504ms step_avg:589.97ms +step:12633/57344 train_time:7452521ms step_avg:589.92ms +step:12634/57344 train_time:7452764ms step_avg:589.90ms +step:12635/57344 train_time:7453311ms step_avg:589.89ms +grad accum step:3159/14336 +step:12636/57344 train_time:7454593ms step_avg:589.95ms +step:12637/57344 train_time:7454610ms step_avg:589.90ms +step:12638/57344 train_time:7454857ms step_avg:589.88ms +step:12639/57344 train_time:7455403ms step_avg:589.87ms +grad accum step:3160/14336 +step:12640/57344 train_time:7456689ms step_avg:589.93ms +step:12641/57344 train_time:7456706ms step_avg:589.88ms +step:12642/57344 train_time:7456957ms step_avg:589.86ms +step:12643/57344 train_time:7457514ms step_avg:589.85ms +grad accum step:3161/14336 +step:12644/57344 train_time:7458831ms step_avg:589.91ms +step:12645/57344 train_time:7458848ms step_avg:589.87ms +step:12646/57344 train_time:7459093ms step_avg:589.84ms +step:12647/57344 train_time:7459638ms step_avg:589.83ms +grad accum step:3162/14336 +step:12648/57344 train_time:7460939ms step_avg:589.89ms +step:12649/57344 train_time:7460955ms step_avg:589.85ms +step:12650/57344 train_time:7461205ms step_avg:589.82ms +step:12651/57344 train_time:7461753ms step_avg:589.82ms +grad accum step:3163/14336 +step:12652/57344 train_time:7463036ms step_avg:589.87ms +step:12653/57344 train_time:7463054ms step_avg:589.82ms +step:12654/57344 train_time:7463298ms step_avg:589.80ms +step:12655/57344 train_time:7463843ms step_avg:589.79ms +grad accum step:3164/14336 +step:12656/57344 train_time:7465146ms step_avg:589.85ms +step:12657/57344 train_time:7465162ms step_avg:589.81ms +step:12658/57344 train_time:7465406ms step_avg:589.78ms +step:12659/57344 train_time:7465943ms step_avg:589.77ms +grad accum step:3165/14336 +step:12660/57344 train_time:7467227ms step_avg:589.83ms +step:12661/57344 train_time:7467243ms step_avg:589.78ms +step:12662/57344 train_time:7467490ms step_avg:589.76ms +step:12663/57344 train_time:7468032ms step_avg:589.75ms +grad accum step:3166/14336 +step:12664/57344 train_time:7469325ms step_avg:589.81ms +step:12665/57344 train_time:7469343ms step_avg:589.76ms +step:12666/57344 train_time:7469589ms step_avg:589.74ms +step:12667/57344 train_time:7470141ms step_avg:589.73ms +grad accum step:3167/14336 +step:12668/57344 train_time:7471435ms step_avg:589.79ms +step:12669/57344 train_time:7471452ms step_avg:589.74ms +step:12670/57344 train_time:7471694ms step_avg:589.72ms +step:12671/57344 train_time:7472240ms step_avg:589.71ms +grad accum step:3168/14336 +step:12672/57344 train_time:7473535ms step_avg:589.77ms +step:12672/57344 val_loss:6.777520 train_time:7473535ms step_avg:589.77ms +step:12673/57344 train_time:7473547ms step_avg:589.72ms +step:12674/57344 train_time:7473778ms step_avg:589.69ms +step:12675/57344 train_time:7474342ms step_avg:589.69ms +grad accum step:3169/14336 +step:12676/57344 train_time:7475627ms step_avg:589.75ms +step:12677/57344 train_time:7475644ms step_avg:589.70ms +step:12678/57344 train_time:7475893ms step_avg:589.67ms +step:12679/57344 train_time:7476449ms step_avg:589.67ms +grad accum step:3170/14336 +step:12680/57344 train_time:7477742ms step_avg:589.73ms +step:12681/57344 train_time:7477759ms step_avg:589.68ms +step:12682/57344 train_time:7478005ms step_avg:589.66ms +step:12683/57344 train_time:7478548ms step_avg:589.65ms +grad accum step:3171/14336 +step:12684/57344 train_time:7479848ms step_avg:589.71ms +step:12685/57344 train_time:7479865ms step_avg:589.66ms +step:12686/57344 train_time:7480109ms step_avg:589.63ms +step:12687/57344 train_time:7480654ms step_avg:589.63ms +grad accum step:3172/14336 +step:12688/57344 train_time:7481942ms step_avg:589.69ms +step:12689/57344 train_time:7481959ms step_avg:589.64ms +step:12690/57344 train_time:7482205ms step_avg:589.61ms +step:12691/57344 train_time:7482748ms step_avg:589.61ms +grad accum step:3173/14336 +step:12692/57344 train_time:7484030ms step_avg:589.67ms +step:12693/57344 train_time:7484047ms step_avg:589.62ms +step:12694/57344 train_time:7484294ms step_avg:589.59ms +step:12695/57344 train_time:7484840ms step_avg:589.59ms +grad accum step:3174/14336 +step:12696/57344 train_time:7486119ms step_avg:589.64ms +step:12697/57344 train_time:7486136ms step_avg:589.60ms +step:12698/57344 train_time:7486385ms step_avg:589.57ms +step:12699/57344 train_time:7486927ms step_avg:589.57ms +grad accum step:3175/14336 +step:12700/57344 train_time:7488220ms step_avg:589.62ms +step:12701/57344 train_time:7488237ms step_avg:589.58ms +step:12702/57344 train_time:7488483ms step_avg:589.55ms +step:12703/57344 train_time:7489031ms step_avg:589.55ms +grad accum step:3176/14336 +step:12704/57344 train_time:7490318ms step_avg:589.60ms +step:12705/57344 train_time:7490335ms step_avg:589.56ms +step:12706/57344 train_time:7490580ms step_avg:589.53ms +step:12707/57344 train_time:7491117ms step_avg:589.53ms +grad accum step:3177/14336 +step:12708/57344 train_time:7492390ms step_avg:589.58ms +step:12709/57344 train_time:7492408ms step_avg:589.54ms +step:12710/57344 train_time:7492662ms step_avg:589.51ms +step:12711/57344 train_time:7493219ms step_avg:589.51ms +grad accum step:3178/14336 +step:12712/57344 train_time:7494498ms step_avg:589.56ms +step:12713/57344 train_time:7494515ms step_avg:589.52ms +step:12714/57344 train_time:7494760ms step_avg:589.49ms +step:12715/57344 train_time:7495302ms step_avg:589.49ms +grad accum step:3179/14336 +step:12716/57344 train_time:7496586ms step_avg:589.54ms +step:12717/57344 train_time:7496602ms step_avg:589.49ms +step:12718/57344 train_time:7496852ms step_avg:589.47ms +step:12719/57344 train_time:7497393ms step_avg:589.46ms +grad accum step:3180/14336 +step:12720/57344 train_time:7498676ms step_avg:589.52ms +step:12721/57344 train_time:7498693ms step_avg:589.47ms +step:12722/57344 train_time:7498940ms step_avg:589.45ms +step:12723/57344 train_time:7499487ms step_avg:589.44ms +grad accum step:3181/14336 +step:12724/57344 train_time:7500775ms step_avg:589.50ms +step:12725/57344 train_time:7500792ms step_avg:589.45ms +step:12726/57344 train_time:7501037ms step_avg:589.43ms +step:12727/57344 train_time:7501575ms step_avg:589.42ms +grad accum step:3182/14336 +step:12728/57344 train_time:7502872ms step_avg:589.48ms +step:12729/57344 train_time:7502889ms step_avg:589.43ms +step:12730/57344 train_time:7503131ms step_avg:589.41ms +step:12731/57344 train_time:7503670ms step_avg:589.40ms +grad accum step:3183/14336 +step:12732/57344 train_time:7508882ms step_avg:589.76ms +step:12733/57344 train_time:7508894ms step_avg:589.72ms +step:12734/57344 train_time:7509167ms step_avg:589.69ms +step:12735/57344 train_time:7509711ms step_avg:589.69ms +grad accum step:3184/14336 +step:12736/57344 train_time:7510995ms step_avg:589.75ms +step:12736/57344 val_loss:6.763353 train_time:7510995ms step_avg:589.75ms +step:12737/57344 train_time:7511007ms step_avg:589.70ms +step:12738/57344 train_time:7511295ms step_avg:589.68ms +step:12739/57344 train_time:7511837ms step_avg:589.67ms +grad accum step:3185/14336 +step:12740/57344 train_time:7513116ms step_avg:589.73ms +step:12741/57344 train_time:7513133ms step_avg:589.68ms +step:12742/57344 train_time:7513372ms step_avg:589.65ms +step:12743/57344 train_time:7513899ms step_avg:589.65ms +grad accum step:3186/14336 +step:12744/57344 train_time:7515207ms step_avg:589.71ms +step:12745/57344 train_time:7515224ms step_avg:589.66ms +step:12746/57344 train_time:7515469ms step_avg:589.63ms +step:12747/57344 train_time:7516011ms step_avg:589.63ms +grad accum step:3187/14336 +step:12748/57344 train_time:7517289ms step_avg:589.68ms +step:12749/57344 train_time:7517306ms step_avg:589.64ms +step:12750/57344 train_time:7517549ms step_avg:589.61ms +step:12751/57344 train_time:7518091ms step_avg:589.61ms +grad accum step:3188/14336 +step:12752/57344 train_time:7519393ms step_avg:589.66ms +step:12753/57344 train_time:7519410ms step_avg:589.62ms +step:12754/57344 train_time:7519655ms step_avg:589.59ms +step:12755/57344 train_time:7520200ms step_avg:589.59ms +grad accum step:3189/14336 +step:12756/57344 train_time:7521492ms step_avg:589.64ms +step:12757/57344 train_time:7521509ms step_avg:589.60ms +step:12758/57344 train_time:7521761ms step_avg:589.57ms +step:12759/57344 train_time:7522313ms step_avg:589.57ms +grad accum step:3190/14336 +step:12760/57344 train_time:7523579ms step_avg:589.62ms +step:12761/57344 train_time:7523596ms step_avg:589.58ms +step:12762/57344 train_time:7523841ms step_avg:589.55ms +step:12763/57344 train_time:7524389ms step_avg:589.55ms +grad accum step:3191/14336 +step:12764/57344 train_time:7525709ms step_avg:589.60ms +step:12765/57344 train_time:7525726ms step_avg:589.56ms +step:12766/57344 train_time:7525976ms step_avg:589.53ms +step:12767/57344 train_time:7526524ms step_avg:589.53ms +grad accum step:3192/14336 +step:12768/57344 train_time:7527826ms step_avg:589.59ms +step:12769/57344 train_time:7527843ms step_avg:589.54ms +step:12770/57344 train_time:7528092ms step_avg:589.51ms +step:12771/57344 train_time:7528637ms step_avg:589.51ms +grad accum step:3193/14336 +step:12772/57344 train_time:7529945ms step_avg:589.57ms +step:12773/57344 train_time:7529962ms step_avg:589.52ms +step:12774/57344 train_time:7530212ms step_avg:589.50ms +step:12775/57344 train_time:7530767ms step_avg:589.49ms +grad accum step:3194/14336 +step:12776/57344 train_time:7532055ms step_avg:589.55ms +step:12777/57344 train_time:7532072ms step_avg:589.50ms +step:12778/57344 train_time:7532314ms step_avg:589.48ms +step:12779/57344 train_time:7532859ms step_avg:589.47ms +grad accum step:3195/14336 +step:12780/57344 train_time:7534161ms step_avg:589.53ms +step:12781/57344 train_time:7534178ms step_avg:589.48ms +step:12782/57344 train_time:7534423ms step_avg:589.46ms +step:12783/57344 train_time:7534967ms step_avg:589.45ms +grad accum step:3196/14336 +step:12784/57344 train_time:7536241ms step_avg:589.51ms +step:12785/57344 train_time:7536258ms step_avg:589.46ms +step:12786/57344 train_time:7536504ms step_avg:589.43ms +step:12787/57344 train_time:7537049ms step_avg:589.43ms +grad accum step:3197/14336 +step:12788/57344 train_time:7538327ms step_avg:589.48ms +step:12789/57344 train_time:7538343ms step_avg:589.44ms +step:12790/57344 train_time:7538588ms step_avg:589.41ms +step:12791/57344 train_time:7539131ms step_avg:589.41ms +grad accum step:3198/14336 +step:12792/57344 train_time:7561110ms step_avg:591.08ms +step:12793/57344 train_time:7566208ms step_avg:591.43ms +step:12794/57344 train_time:7566466ms step_avg:591.41ms +step:12795/57344 train_time:7567007ms step_avg:591.40ms +grad accum step:3199/14336 +step:12796/57344 train_time:7568277ms step_avg:591.46ms +step:12797/57344 train_time:7568293ms step_avg:591.41ms +step:12798/57344 train_time:7568537ms step_avg:591.38ms +step:12799/57344 train_time:7569071ms step_avg:591.38ms +grad accum step:3200/14336 +step:12800/57344 train_time:7570352ms step_avg:591.43ms +step:12800/57344 val_loss:6.765983 train_time:7570352ms step_avg:591.43ms +step:12801/57344 train_time:7570364ms step_avg:591.39ms +step:12802/57344 train_time:7570589ms step_avg:591.36ms +step:12803/57344 train_time:7571139ms step_avg:591.36ms +grad accum step:3201/14336 +step:12804/57344 train_time:7572444ms step_avg:591.41ms +step:12805/57344 train_time:7572460ms step_avg:591.37ms +step:12806/57344 train_time:7572709ms step_avg:591.34ms +step:12807/57344 train_time:7573258ms step_avg:591.34ms +grad accum step:3202/14336 +step:12808/57344 train_time:7574546ms step_avg:591.39ms +step:12809/57344 train_time:7574562ms step_avg:591.35ms +step:12810/57344 train_time:7574808ms step_avg:591.32ms +step:12811/57344 train_time:7575351ms step_avg:591.32ms +grad accum step:3203/14336 +step:12812/57344 train_time:7576625ms step_avg:591.37ms +step:12813/57344 train_time:7576642ms step_avg:591.32ms +step:12814/57344 train_time:7576888ms step_avg:591.30ms +step:12815/57344 train_time:7577436ms step_avg:591.29ms +grad accum step:3204/14336 +step:12816/57344 train_time:7578730ms step_avg:591.35ms +step:12817/57344 train_time:7578747ms step_avg:591.30ms +step:12818/57344 train_time:7578988ms step_avg:591.28ms +step:12819/57344 train_time:7579520ms step_avg:591.27ms +grad accum step:3205/14336 +step:12820/57344 train_time:7580816ms step_avg:591.33ms +step:12821/57344 train_time:7580832ms step_avg:591.28ms +step:12822/57344 train_time:7581078ms step_avg:591.26ms +step:12823/57344 train_time:7581626ms step_avg:591.25ms +grad accum step:3206/14336 +step:12824/57344 train_time:7582902ms step_avg:591.31ms +step:12825/57344 train_time:7582919ms step_avg:591.26ms +step:12826/57344 train_time:7583166ms step_avg:591.23ms +step:12827/57344 train_time:7583719ms step_avg:591.23ms +grad accum step:3207/14336 +step:12828/57344 train_time:7585010ms step_avg:591.29ms +step:12829/57344 train_time:7585027ms step_avg:591.24ms +step:12830/57344 train_time:7585271ms step_avg:591.21ms +step:12831/57344 train_time:7585817ms step_avg:591.21ms +grad accum step:3208/14336 +step:12832/57344 train_time:7587102ms step_avg:591.26ms +step:12833/57344 train_time:7587119ms step_avg:591.22ms +step:12834/57344 train_time:7587364ms step_avg:591.19ms +step:12835/57344 train_time:7587907ms step_avg:591.19ms +grad accum step:3209/14336 +step:12836/57344 train_time:7589227ms step_avg:591.25ms +step:12837/57344 train_time:7589244ms step_avg:591.20ms +step:12838/57344 train_time:7589489ms step_avg:591.17ms +step:12839/57344 train_time:7590044ms step_avg:591.17ms +grad accum step:3210/14336 +step:12840/57344 train_time:7593155ms step_avg:591.37ms +step:12841/57344 train_time:7593172ms step_avg:591.32ms +step:12842/57344 train_time:7593415ms step_avg:591.30ms +step:12843/57344 train_time:7593953ms step_avg:591.29ms +grad accum step:3211/14336 +step:12844/57344 train_time:7595240ms step_avg:591.35ms +step:12845/57344 train_time:7595257ms step_avg:591.30ms +step:12846/57344 train_time:7595501ms step_avg:591.27ms +step:12847/57344 train_time:7596041ms step_avg:591.27ms +grad accum step:3212/14336 +step:12848/57344 train_time:7597325ms step_avg:591.32ms +step:12849/57344 train_time:7597342ms step_avg:591.28ms +step:12850/57344 train_time:7597587ms step_avg:591.25ms +step:12851/57344 train_time:7598133ms step_avg:591.25ms +grad accum step:3213/14336 +step:12852/57344 train_time:7599415ms step_avg:591.30ms +step:12853/57344 train_time:7599432ms step_avg:591.26ms +step:12854/57344 train_time:7599677ms step_avg:591.23ms +step:12855/57344 train_time:7600226ms step_avg:591.23ms +grad accum step:3214/14336 +step:12856/57344 train_time:7601504ms step_avg:591.28ms +step:12857/57344 train_time:7601521ms step_avg:591.24ms +step:12858/57344 train_time:7601769ms step_avg:591.21ms +step:12859/57344 train_time:7602320ms step_avg:591.21ms +grad accum step:3215/14336 +step:12860/57344 train_time:7603613ms step_avg:591.26ms +step:12861/57344 train_time:7603630ms step_avg:591.22ms +step:12862/57344 train_time:7603874ms step_avg:591.19ms +step:12863/57344 train_time:7604421ms step_avg:591.19ms +grad accum step:3216/14336 +step:12864/57344 train_time:7605703ms step_avg:591.24ms +step:12864/57344 val_loss:6.761808 train_time:7605704ms step_avg:591.24ms +step:12865/57344 train_time:7605716ms step_avg:591.19ms +step:12866/57344 train_time:7605940ms step_avg:591.17ms +step:12867/57344 train_time:7606480ms step_avg:591.16ms +grad accum step:3217/14336 +step:12868/57344 train_time:7607756ms step_avg:591.22ms +step:12869/57344 train_time:7607773ms step_avg:591.17ms +step:12870/57344 train_time:7608017ms step_avg:591.14ms +step:12871/57344 train_time:7608565ms step_avg:591.14ms +grad accum step:3218/14336 +step:12872/57344 train_time:7609862ms step_avg:591.19ms +step:12873/57344 train_time:7609879ms step_avg:591.15ms +step:12874/57344 train_time:7610127ms step_avg:591.12ms +step:12875/57344 train_time:7610669ms step_avg:591.12ms +grad accum step:3219/14336 +step:12876/57344 train_time:7611969ms step_avg:591.17ms +step:12877/57344 train_time:7611986ms step_avg:591.13ms +step:12878/57344 train_time:7612230ms step_avg:591.10ms +step:12879/57344 train_time:7612771ms step_avg:591.10ms +grad accum step:3220/14336 +step:12880/57344 train_time:7614055ms step_avg:591.15ms +step:12881/57344 train_time:7614070ms step_avg:591.11ms +step:12882/57344 train_time:7614321ms step_avg:591.08ms +step:12883/57344 train_time:7614883ms step_avg:591.08ms +grad accum step:3221/14336 +step:12884/57344 train_time:7616154ms step_avg:591.13ms +step:12885/57344 train_time:7616171ms step_avg:591.09ms +step:12886/57344 train_time:7616419ms step_avg:591.06ms +step:12887/57344 train_time:7616968ms step_avg:591.06ms +grad accum step:3222/14336 +step:12888/57344 train_time:7618246ms step_avg:591.11ms +step:12889/57344 train_time:7618264ms step_avg:591.07ms +step:12890/57344 train_time:7618514ms step_avg:591.04ms +step:12891/57344 train_time:7619060ms step_avg:591.04ms +grad accum step:3223/14336 +step:12892/57344 train_time:7620356ms step_avg:591.09ms +step:12893/57344 train_time:7620373ms step_avg:591.05ms +step:12894/57344 train_time:7620621ms step_avg:591.02ms +step:12895/57344 train_time:7621168ms step_avg:591.02ms +grad accum step:3224/14336 +step:12896/57344 train_time:7622443ms step_avg:591.07ms +step:12897/57344 train_time:7622460ms step_avg:591.03ms +step:12898/57344 train_time:7622702ms step_avg:591.00ms +step:12899/57344 train_time:7623245ms step_avg:591.00ms +grad accum step:3225/14336 +step:12900/57344 train_time:7624546ms step_avg:591.05ms +step:12901/57344 train_time:7624563ms step_avg:591.01ms +step:12902/57344 train_time:7624810ms step_avg:590.98ms +step:12903/57344 train_time:7625355ms step_avg:590.98ms +grad accum step:3226/14336 +step:12904/57344 train_time:7626629ms step_avg:591.03ms +step:12905/57344 train_time:7626646ms step_avg:590.98ms +step:12906/57344 train_time:7626893ms step_avg:590.96ms +step:12907/57344 train_time:7627443ms step_avg:590.95ms +grad accum step:3227/14336 +step:12908/57344 train_time:7628724ms step_avg:591.01ms +step:12909/57344 train_time:7628741ms step_avg:590.96ms +step:12910/57344 train_time:7628989ms step_avg:590.94ms +step:12911/57344 train_time:7629534ms step_avg:590.93ms +grad accum step:3228/14336 +step:12912/57344 train_time:7630813ms step_avg:590.99ms +step:12913/57344 train_time:7630830ms step_avg:590.94ms +step:12914/57344 train_time:7631082ms step_avg:590.92ms +step:12915/57344 train_time:7631631ms step_avg:590.91ms +grad accum step:3229/14336 +step:12916/57344 train_time:7632904ms step_avg:590.96ms +step:12917/57344 train_time:7632920ms step_avg:590.92ms +step:12918/57344 train_time:7633170ms step_avg:590.89ms +step:12919/57344 train_time:7633721ms step_avg:590.89ms +grad accum step:3230/14336 +step:12920/57344 train_time:7635018ms step_avg:590.95ms +step:12921/57344 train_time:7635035ms step_avg:590.90ms +step:12922/57344 train_time:7635282ms step_avg:590.87ms +step:12923/57344 train_time:7635825ms step_avg:590.87ms +grad accum step:3231/14336 +step:12924/57344 train_time:7637108ms step_avg:590.92ms +step:12925/57344 train_time:7637125ms step_avg:590.88ms +step:12926/57344 train_time:7637381ms step_avg:590.85ms +step:12927/57344 train_time:7637944ms step_avg:590.85ms +grad accum step:3232/14336 +step:12928/57344 train_time:7639263ms step_avg:590.91ms +step:12928/57344 val_loss:6.779835 train_time:7639263ms step_avg:590.91ms +step:12929/57344 train_time:7639275ms step_avg:590.86ms +step:12930/57344 train_time:7639505ms step_avg:590.84ms +step:12931/57344 train_time:7640062ms step_avg:590.83ms +grad accum step:3233/14336 +step:12932/57344 train_time:7641378ms step_avg:590.89ms +step:12933/57344 train_time:7641395ms step_avg:590.84ms +step:12934/57344 train_time:7641650ms step_avg:590.82ms +step:12935/57344 train_time:7642211ms step_avg:590.82ms +grad accum step:3234/14336 +step:12936/57344 train_time:7643507ms step_avg:590.87ms +step:12937/57344 train_time:7643524ms step_avg:590.83ms +step:12938/57344 train_time:7643769ms step_avg:590.80ms +step:12939/57344 train_time:7644306ms step_avg:590.80ms +grad accum step:3235/14336 +step:12940/57344 train_time:7645611ms step_avg:590.85ms +step:12941/57344 train_time:7645627ms step_avg:590.81ms +step:12942/57344 train_time:7645872ms step_avg:590.78ms +step:12943/57344 train_time:7646413ms step_avg:590.78ms +grad accum step:3236/14336 +step:12944/57344 train_time:7647731ms step_avg:590.83ms +step:12945/57344 train_time:7647748ms step_avg:590.79ms +step:12946/57344 train_time:7647997ms step_avg:590.76ms +step:12947/57344 train_time:7648543ms step_avg:590.76ms +grad accum step:3237/14336 +step:12948/57344 train_time:7649838ms step_avg:590.81ms +step:12949/57344 train_time:7649855ms step_avg:590.77ms +step:12950/57344 train_time:7650104ms step_avg:590.74ms +step:12951/57344 train_time:7650648ms step_avg:590.74ms +grad accum step:3238/14336 +step:12952/57344 train_time:7651925ms step_avg:590.79ms +step:12953/57344 train_time:7651942ms step_avg:590.75ms +step:12954/57344 train_time:7652187ms step_avg:590.72ms +step:12955/57344 train_time:7652728ms step_avg:590.72ms +grad accum step:3239/14336 +step:12956/57344 train_time:7654010ms step_avg:590.77ms +step:12957/57344 train_time:7654027ms step_avg:590.73ms +step:12958/57344 train_time:7654275ms step_avg:590.70ms +step:12959/57344 train_time:7654825ms step_avg:590.70ms +grad accum step:3240/14336 +step:12960/57344 train_time:7656119ms step_avg:590.75ms +step:12961/57344 train_time:7656136ms step_avg:590.71ms +step:12962/57344 train_time:7656383ms step_avg:590.68ms +step:12963/57344 train_time:7656929ms step_avg:590.68ms +grad accum step:3241/14336 +step:12964/57344 train_time:7658207ms step_avg:590.73ms +step:12965/57344 train_time:7658224ms step_avg:590.68ms +step:12966/57344 train_time:7658472ms step_avg:590.66ms +step:12967/57344 train_time:7659021ms step_avg:590.65ms +grad accum step:3242/14336 +step:12968/57344 train_time:7660297ms step_avg:590.71ms +step:12969/57344 train_time:7660313ms step_avg:590.66ms +step:12970/57344 train_time:7660564ms step_avg:590.64ms +step:12971/57344 train_time:7661120ms step_avg:590.63ms +grad accum step:3243/14336 +step:12972/57344 train_time:7662413ms step_avg:590.69ms +step:12973/57344 train_time:7662430ms step_avg:590.64ms +step:12974/57344 train_time:7662678ms step_avg:590.62ms +step:12975/57344 train_time:7663226ms step_avg:590.61ms +grad accum step:3244/14336 +step:12976/57344 train_time:7664521ms step_avg:590.67ms +step:12977/57344 train_time:7664538ms step_avg:590.62ms +step:12978/57344 train_time:7664788ms step_avg:590.60ms +step:12979/57344 train_time:7665334ms step_avg:590.60ms +grad accum step:3245/14336 +step:12980/57344 train_time:7666611ms step_avg:590.65ms +step:12981/57344 train_time:7666628ms step_avg:590.60ms +step:12982/57344 train_time:7666876ms step_avg:590.58ms +step:12983/57344 train_time:7667420ms step_avg:590.57ms +grad accum step:3246/14336 +step:12984/57344 train_time:7668697ms step_avg:590.63ms +step:12985/57344 train_time:7668714ms step_avg:590.58ms +step:12986/57344 train_time:7668962ms step_avg:590.56ms +step:12987/57344 train_time:7669511ms step_avg:590.55ms +grad accum step:3247/14336 +step:12988/57344 train_time:7670809ms step_avg:590.61ms +step:12989/57344 train_time:7670826ms step_avg:590.56ms +step:12990/57344 train_time:7671070ms step_avg:590.54ms +step:12991/57344 train_time:7671610ms step_avg:590.53ms +grad accum step:3248/14336 +step:12992/57344 train_time:7672891ms step_avg:590.59ms +step:12992/57344 val_loss:6.764882 train_time:7672891ms step_avg:590.59ms +step:12993/57344 train_time:7672903ms step_avg:590.54ms +step:12994/57344 train_time:7673123ms step_avg:590.51ms +step:12995/57344 train_time:7673661ms step_avg:590.51ms +grad accum step:3249/14336 +step:12996/57344 train_time:7674956ms step_avg:590.56ms +step:12997/57344 train_time:7674973ms step_avg:590.52ms +step:12998/57344 train_time:7675220ms step_avg:590.49ms +step:12999/57344 train_time:7675766ms step_avg:590.49ms +grad accum step:3250/14336 +step:13000/57344 train_time:7677062ms step_avg:590.54ms +step:13001/57344 train_time:7677079ms step_avg:590.50ms +step:13002/57344 train_time:7677326ms step_avg:590.47ms +step:13003/57344 train_time:7677874ms step_avg:590.47ms +grad accum step:3251/14336 +step:13004/57344 train_time:7679146ms step_avg:590.52ms +step:13005/57344 train_time:7679163ms step_avg:590.48ms +step:13006/57344 train_time:7679409ms step_avg:590.45ms +step:13007/57344 train_time:7679954ms step_avg:590.45ms +grad accum step:3252/14336 +step:13008/57344 train_time:7681235ms step_avg:590.50ms +step:13009/57344 train_time:7681252ms step_avg:590.46ms +step:13010/57344 train_time:7681496ms step_avg:590.43ms +step:13011/57344 train_time:7682041ms step_avg:590.43ms +grad accum step:3253/14336 +step:13012/57344 train_time:7683339ms step_avg:590.48ms +step:13013/57344 train_time:7683356ms step_avg:590.44ms +step:13014/57344 train_time:7683605ms step_avg:590.41ms +step:13015/57344 train_time:7684155ms step_avg:590.41ms +grad accum step:3254/14336 +step:13016/57344 train_time:7685464ms step_avg:590.46ms +step:13017/57344 train_time:7685481ms step_avg:590.42ms +step:13018/57344 train_time:7685728ms step_avg:590.39ms +step:13019/57344 train_time:7686274ms step_avg:590.39ms +grad accum step:3255/14336 +step:13020/57344 train_time:7687580ms step_avg:590.44ms +step:13021/57344 train_time:7687597ms step_avg:590.40ms +step:13022/57344 train_time:7687841ms step_avg:590.37ms +step:13023/57344 train_time:7688395ms step_avg:590.37ms +grad accum step:3256/14336 +step:13024/57344 train_time:7689713ms step_avg:590.43ms +step:13025/57344 train_time:7689730ms step_avg:590.38ms +step:13026/57344 train_time:7689973ms step_avg:590.36ms +step:13027/57344 train_time:7690520ms step_avg:590.35ms +grad accum step:3257/14336 +step:13028/57344 train_time:7691800ms step_avg:590.41ms +step:13029/57344 train_time:7691817ms step_avg:590.36ms +step:13030/57344 train_time:7692065ms step_avg:590.33ms +step:13031/57344 train_time:7692609ms step_avg:590.33ms +grad accum step:3258/14336 +step:13032/57344 train_time:7693889ms step_avg:590.38ms +step:13033/57344 train_time:7693906ms step_avg:590.34ms +step:13034/57344 train_time:7694157ms step_avg:590.31ms +step:13035/57344 train_time:7694713ms step_avg:590.31ms +grad accum step:3259/14336 +step:13036/57344 train_time:7696029ms step_avg:590.37ms +step:13037/57344 train_time:7696046ms step_avg:590.32ms +step:13038/57344 train_time:7696292ms step_avg:590.30ms +step:13039/57344 train_time:7696832ms step_avg:590.29ms +grad accum step:3260/14336 +step:13040/57344 train_time:7698115ms step_avg:590.35ms +step:13041/57344 train_time:7698132ms step_avg:590.30ms +step:13042/57344 train_time:7698384ms step_avg:590.28ms +step:13043/57344 train_time:7698942ms step_avg:590.27ms +grad accum step:3261/14336 +step:13044/57344 train_time:7700217ms step_avg:590.33ms +step:13045/57344 train_time:7700234ms step_avg:590.28ms +step:13046/57344 train_time:7700480ms step_avg:590.26ms +step:13047/57344 train_time:7701024ms step_avg:590.25ms +grad accum step:3262/14336 +step:13048/57344 train_time:7702300ms step_avg:590.30ms +step:13049/57344 train_time:7702316ms step_avg:590.26ms +step:13050/57344 train_time:7702562ms step_avg:590.23ms +step:13051/57344 train_time:7703107ms step_avg:590.23ms +grad accum step:3263/14336 +step:13052/57344 train_time:7704457ms step_avg:590.29ms +step:13053/57344 train_time:7704469ms step_avg:590.25ms +step:13054/57344 train_time:7704686ms step_avg:590.22ms +step:13055/57344 train_time:7705232ms step_avg:590.21ms +grad accum step:3264/14336 +step:13056/57344 train_time:7706508ms step_avg:590.27ms +step:13056/57344 val_loss:6.759225 train_time:7706508ms step_avg:590.27ms +step:13057/57344 train_time:7706520ms step_avg:590.22ms +step:13058/57344 train_time:7706746ms step_avg:590.19ms +step:13059/57344 train_time:7707309ms step_avg:590.19ms +grad accum step:3265/14336 +step:13060/57344 train_time:7708629ms step_avg:590.25ms +step:13061/57344 train_time:7708646ms step_avg:590.20ms +step:13062/57344 train_time:7708891ms step_avg:590.18ms +step:13063/57344 train_time:7709427ms step_avg:590.17ms +grad accum step:3266/14336 +step:13064/57344 train_time:7710701ms step_avg:590.23ms +step:13065/57344 train_time:7710716ms step_avg:590.18ms +step:13066/57344 train_time:7710966ms step_avg:590.16ms +step:13067/57344 train_time:7711511ms step_avg:590.15ms +grad accum step:3267/14336 +step:13068/57344 train_time:7712806ms step_avg:590.21ms +step:13069/57344 train_time:7712823ms step_avg:590.16ms +step:13070/57344 train_time:7713069ms step_avg:590.14ms +step:13071/57344 train_time:7713614ms step_avg:590.13ms +grad accum step:3268/14336 +step:13072/57344 train_time:7714892ms step_avg:590.18ms +step:13073/57344 train_time:7714909ms step_avg:590.14ms +step:13074/57344 train_time:7715160ms step_avg:590.11ms +step:13075/57344 train_time:7715706ms step_avg:590.11ms +grad accum step:3269/14336 +step:13076/57344 train_time:7716984ms step_avg:590.16ms +step:13077/57344 train_time:7717002ms step_avg:590.12ms +step:13078/57344 train_time:7717247ms step_avg:590.09ms +step:13079/57344 train_time:7717788ms step_avg:590.09ms +grad accum step:3270/14336 +step:13080/57344 train_time:7719071ms step_avg:590.14ms +step:13081/57344 train_time:7719088ms step_avg:590.10ms +step:13082/57344 train_time:7719335ms step_avg:590.07ms +step:13083/57344 train_time:7719883ms step_avg:590.07ms +grad accum step:3271/14336 +step:13084/57344 train_time:7721171ms step_avg:590.12ms +step:13085/57344 train_time:7721187ms step_avg:590.08ms +step:13086/57344 train_time:7721446ms step_avg:590.05ms +step:13087/57344 train_time:7722025ms step_avg:590.05ms +grad accum step:3272/14336 +step:13088/57344 train_time:7723314ms step_avg:590.11ms +step:13089/57344 train_time:7723331ms step_avg:590.06ms +step:13090/57344 train_time:7723578ms step_avg:590.04ms +step:13091/57344 train_time:7724123ms step_avg:590.03ms +grad accum step:3273/14336 +step:13092/57344 train_time:7725419ms step_avg:590.09ms +step:13093/57344 train_time:7725437ms step_avg:590.04ms +step:13094/57344 train_time:7725680ms step_avg:590.02ms +step:13095/57344 train_time:7726215ms step_avg:590.01ms +grad accum step:3274/14336 +step:13096/57344 train_time:7727491ms step_avg:590.06ms +step:13097/57344 train_time:7727508ms step_avg:590.02ms +step:13098/57344 train_time:7727754ms step_avg:589.99ms +step:13099/57344 train_time:7728297ms step_avg:589.99ms +grad accum step:3275/14336 +step:13100/57344 train_time:7729579ms step_avg:590.04ms +step:13101/57344 train_time:7729596ms step_avg:590.00ms +step:13102/57344 train_time:7729841ms step_avg:589.97ms +step:13103/57344 train_time:7730385ms step_avg:589.97ms +grad accum step:3276/14336 +step:13104/57344 train_time:7731663ms step_avg:590.02ms +step:13105/57344 train_time:7731680ms step_avg:589.98ms +step:13106/57344 train_time:7731930ms step_avg:589.95ms +step:13107/57344 train_time:7732475ms step_avg:589.95ms +grad accum step:3277/14336 +step:13108/57344 train_time:7733869ms step_avg:590.01ms +step:13109/57344 train_time:7733881ms step_avg:589.97ms +step:13110/57344 train_time:7734100ms step_avg:589.94ms +step:13111/57344 train_time:7734640ms step_avg:589.94ms +grad accum step:3278/14336 +step:13112/57344 train_time:7735912ms step_avg:589.99ms +step:13113/57344 train_time:7735929ms step_avg:589.94ms +step:13114/57344 train_time:7736175ms step_avg:589.92ms +step:13115/57344 train_time:7736716ms step_avg:589.91ms +grad accum step:3279/14336 +step:13116/57344 train_time:7737994ms step_avg:589.97ms +step:13117/57344 train_time:7738011ms step_avg:589.92ms +step:13118/57344 train_time:7738259ms step_avg:589.90ms +step:13119/57344 train_time:7738809ms step_avg:589.89ms +grad accum step:3280/14336 +step:13120/57344 train_time:7740094ms step_avg:589.95ms +step:13120/57344 val_loss:6.753849 train_time:7740094ms step_avg:589.95ms +step:13121/57344 train_time:7740106ms step_avg:589.90ms +step:13122/57344 train_time:7740333ms step_avg:589.87ms +step:13123/57344 train_time:7740883ms step_avg:589.87ms +grad accum step:3281/14336 +step:13124/57344 train_time:7742159ms step_avg:589.92ms +step:13125/57344 train_time:7742176ms step_avg:589.88ms +step:13126/57344 train_time:7742420ms step_avg:589.85ms +step:13127/57344 train_time:7742961ms step_avg:589.85ms +grad accum step:3282/14336 +step:13128/57344 train_time:7744299ms step_avg:589.91ms +step:13129/57344 train_time:7744316ms step_avg:589.86ms +step:13130/57344 train_time:7744560ms step_avg:589.84ms +step:13131/57344 train_time:7745098ms step_avg:589.83ms +grad accum step:3283/14336 +step:13132/57344 train_time:7746377ms step_avg:589.89ms +step:13133/57344 train_time:7746394ms step_avg:589.84ms +step:13134/57344 train_time:7746644ms step_avg:589.82ms +step:13135/57344 train_time:7747191ms step_avg:589.81ms +grad accum step:3284/14336 +step:13136/57344 train_time:7748468ms step_avg:589.87ms +step:13137/57344 train_time:7748485ms step_avg:589.82ms +step:13138/57344 train_time:7748730ms step_avg:589.80ms +step:13139/57344 train_time:7749275ms step_avg:589.79ms +grad accum step:3285/14336 +step:13140/57344 train_time:7750572ms step_avg:589.85ms +step:13141/57344 train_time:7750589ms step_avg:589.80ms +step:13142/57344 train_time:7750840ms step_avg:589.78ms +step:13143/57344 train_time:7751390ms step_avg:589.77ms +grad accum step:3286/14336 +step:13144/57344 train_time:7752671ms step_avg:589.83ms +step:13145/57344 train_time:7752687ms step_avg:589.78ms +step:13146/57344 train_time:7752932ms step_avg:589.76ms +step:13147/57344 train_time:7753473ms step_avg:589.75ms +grad accum step:3287/14336 +step:13148/57344 train_time:7754755ms step_avg:589.80ms +step:13149/57344 train_time:7754773ms step_avg:589.76ms +step:13150/57344 train_time:7755020ms step_avg:589.74ms +step:13151/57344 train_time:7755570ms step_avg:589.73ms +grad accum step:3288/14336 +step:13152/57344 train_time:7756850ms step_avg:589.78ms +step:13153/57344 train_time:7756867ms step_avg:589.74ms +step:13154/57344 train_time:7757115ms step_avg:589.72ms +step:13155/57344 train_time:7757657ms step_avg:589.71ms +grad accum step:3289/14336 +step:13156/57344 train_time:7758931ms step_avg:589.76ms +step:13157/57344 train_time:7758948ms step_avg:589.72ms +step:13158/57344 train_time:7759193ms step_avg:589.69ms +step:13159/57344 train_time:7759738ms step_avg:589.69ms +grad accum step:3290/14336 +step:13160/57344 train_time:7761017ms step_avg:589.74ms +step:13161/57344 train_time:7761034ms step_avg:589.70ms +step:13162/57344 train_time:7761278ms step_avg:589.67ms +step:13163/57344 train_time:7761823ms step_avg:589.67ms +grad accum step:3291/14336 +step:13164/57344 train_time:7763098ms step_avg:589.72ms +step:13165/57344 train_time:7763116ms step_avg:589.68ms +step:13166/57344 train_time:7763361ms step_avg:589.65ms +step:13167/57344 train_time:7763904ms step_avg:589.65ms +grad accum step:3292/14336 +step:13168/57344 train_time:7765208ms step_avg:589.70ms +step:13169/57344 train_time:7765225ms step_avg:589.66ms +step:13170/57344 train_time:7765470ms step_avg:589.63ms +step:13171/57344 train_time:7766013ms step_avg:589.63ms +grad accum step:3293/14336 +step:13172/57344 train_time:7767309ms step_avg:589.68ms +step:13173/57344 train_time:7767326ms step_avg:589.64ms +step:13174/57344 train_time:7767568ms step_avg:589.61ms +step:13175/57344 train_time:7768125ms step_avg:589.61ms +grad accum step:3294/14336 +step:13176/57344 train_time:7769475ms step_avg:589.67ms +step:13177/57344 train_time:7769492ms step_avg:589.63ms +step:13178/57344 train_time:7769737ms step_avg:589.60ms +step:13179/57344 train_time:7770280ms step_avg:589.60ms +grad accum step:3295/14336 +step:13180/57344 train_time:7771554ms step_avg:589.65ms +step:13181/57344 train_time:7771571ms step_avg:589.60ms +step:13182/57344 train_time:7771817ms step_avg:589.58ms +step:13183/57344 train_time:7772366ms step_avg:589.57ms +grad accum step:3296/14336 +step:13184/57344 train_time:7773638ms step_avg:589.63ms +step:13184/57344 val_loss:6.770496 train_time:7773638ms step_avg:589.63ms +step:13185/57344 train_time:7773650ms step_avg:589.58ms +step:13186/57344 train_time:7773873ms step_avg:589.56ms +step:13187/57344 train_time:7774408ms step_avg:589.55ms +grad accum step:3297/14336 +step:13188/57344 train_time:7775700ms step_avg:589.60ms +step:13189/57344 train_time:7775717ms step_avg:589.56ms +step:13190/57344 train_time:7775962ms step_avg:589.53ms +step:13191/57344 train_time:7776500ms step_avg:589.53ms +grad accum step:3298/14336 +step:13192/57344 train_time:7777806ms step_avg:589.59ms +step:13193/57344 train_time:7777823ms step_avg:589.54ms +step:13194/57344 train_time:7778069ms step_avg:589.52ms +step:13195/57344 train_time:7778612ms step_avg:589.51ms +grad accum step:3299/14336 +step:13196/57344 train_time:7779893ms step_avg:589.56ms +step:13197/57344 train_time:7779910ms step_avg:589.52ms +step:13198/57344 train_time:7780156ms step_avg:589.50ms +step:13199/57344 train_time:7780704ms step_avg:589.49ms +grad accum step:3300/14336 +step:13200/57344 train_time:7782006ms step_avg:589.55ms +step:13201/57344 train_time:7782023ms step_avg:589.50ms +step:13202/57344 train_time:7782276ms step_avg:589.48ms +step:13203/57344 train_time:7782836ms step_avg:589.47ms +grad accum step:3301/14336 +step:13204/57344 train_time:7784111ms step_avg:589.53ms +step:13205/57344 train_time:7784128ms step_avg:589.48ms +step:13206/57344 train_time:7784373ms step_avg:589.46ms +step:13207/57344 train_time:7784920ms step_avg:589.45ms +grad accum step:3302/14336 +step:13208/57344 train_time:7786217ms step_avg:589.51ms +step:13209/57344 train_time:7786234ms step_avg:589.46ms +step:13210/57344 train_time:7786482ms step_avg:589.44ms +step:13211/57344 train_time:7787028ms step_avg:589.44ms +grad accum step:3303/14336 +step:13212/57344 train_time:7788301ms step_avg:589.49ms +step:13213/57344 train_time:7788318ms step_avg:589.44ms +step:13214/57344 train_time:7788567ms step_avg:589.42ms +step:13215/57344 train_time:7789116ms step_avg:589.41ms +grad accum step:3304/14336 +step:13216/57344 train_time:7790391ms step_avg:589.47ms +step:13217/57344 train_time:7790409ms step_avg:589.42ms +step:13218/57344 train_time:7790653ms step_avg:589.40ms +step:13219/57344 train_time:7791196ms step_avg:589.39ms +grad accum step:3305/14336 +step:13220/57344 train_time:7792475ms step_avg:589.45ms +step:13221/57344 train_time:7792492ms step_avg:589.40ms +step:13222/57344 train_time:7792735ms step_avg:589.38ms +step:13223/57344 train_time:7793282ms step_avg:589.37ms +grad accum step:3306/14336 +step:13224/57344 train_time:7794584ms step_avg:589.43ms +step:13225/57344 train_time:7794601ms step_avg:589.38ms +step:13226/57344 train_time:7794850ms step_avg:589.36ms +step:13227/57344 train_time:7795402ms step_avg:589.36ms +grad accum step:3307/14336 +step:13228/57344 train_time:7796703ms step_avg:589.41ms +step:13229/57344 train_time:7796720ms step_avg:589.37ms +step:13230/57344 train_time:7796966ms step_avg:589.34ms +step:13231/57344 train_time:7797510ms step_avg:589.34ms +grad accum step:3308/14336 +step:13232/57344 train_time:7798786ms step_avg:589.39ms +step:13233/57344 train_time:7798804ms step_avg:589.35ms +step:13234/57344 train_time:7799048ms step_avg:589.32ms +step:13235/57344 train_time:7799592ms step_avg:589.32ms +grad accum step:3309/14336 +step:13236/57344 train_time:7800876ms step_avg:589.37ms +step:13237/57344 train_time:7800893ms step_avg:589.32ms +step:13238/57344 train_time:7801137ms step_avg:589.30ms +step:13239/57344 train_time:7801677ms step_avg:589.30ms +grad accum step:3310/14336 +step:13240/57344 train_time:7802976ms step_avg:589.35ms +step:13241/57344 train_time:7802993ms step_avg:589.31ms +step:13242/57344 train_time:7803239ms step_avg:589.28ms +step:13243/57344 train_time:7803782ms step_avg:589.28ms +grad accum step:3311/14336 +step:13244/57344 train_time:7805069ms step_avg:589.33ms +step:13245/57344 train_time:7805086ms step_avg:589.29ms +step:13246/57344 train_time:7805333ms step_avg:589.26ms +step:13247/57344 train_time:7805890ms step_avg:589.26ms +grad accum step:3312/14336 +step:13248/57344 train_time:7807221ms step_avg:589.31ms +step:13248/57344 val_loss:6.761217 train_time:7807221ms step_avg:589.31ms +step:13249/57344 train_time:7807328ms step_avg:589.28ms +step:13250/57344 train_time:7807520ms step_avg:589.25ms +step:13251/57344 train_time:7808078ms step_avg:589.24ms +grad accum step:3313/14336 +step:13252/57344 train_time:7809600ms step_avg:589.31ms +step:13253/57344 train_time:7809612ms step_avg:589.27ms +step:13254/57344 train_time:7809829ms step_avg:589.24ms +step:13255/57344 train_time:7810375ms step_avg:589.24ms +grad accum step:3314/14336 +step:13256/57344 train_time:7811679ms step_avg:589.29ms +step:13257/57344 train_time:7811696ms step_avg:589.25ms +step:13258/57344 train_time:7811943ms step_avg:589.22ms +step:13259/57344 train_time:7812490ms step_avg:589.22ms +grad accum step:3315/14336 +step:13260/57344 train_time:7813769ms step_avg:589.27ms +step:13261/57344 train_time:7813786ms step_avg:589.23ms +step:13262/57344 train_time:7814035ms step_avg:589.20ms +step:13263/57344 train_time:7814585ms step_avg:589.20ms +grad accum step:3316/14336 +step:13264/57344 train_time:7815866ms step_avg:589.25ms +step:13265/57344 train_time:7815881ms step_avg:589.21ms +step:13266/57344 train_time:7816128ms step_avg:589.18ms +step:13267/57344 train_time:7816680ms step_avg:589.18ms +grad accum step:3317/14336 +step:13268/57344 train_time:7817973ms step_avg:589.24ms +step:13269/57344 train_time:7817989ms step_avg:589.19ms +step:13270/57344 train_time:7818233ms step_avg:589.17ms +step:13271/57344 train_time:7818780ms step_avg:589.16ms +grad accum step:3318/14336 +step:13272/57344 train_time:7820081ms step_avg:589.22ms +step:13273/57344 train_time:7820097ms step_avg:589.17ms +step:13274/57344 train_time:7820347ms step_avg:589.15ms +step:13275/57344 train_time:7820892ms step_avg:589.14ms +grad accum step:3319/14336 +step:13276/57344 train_time:7822182ms step_avg:589.20ms +step:13277/57344 train_time:7822198ms step_avg:589.15ms +step:13278/57344 train_time:7822442ms step_avg:589.13ms +step:13279/57344 train_time:7822989ms step_avg:589.12ms +grad accum step:3320/14336 +step:13280/57344 train_time:7824296ms step_avg:589.18ms +step:13281/57344 train_time:7824312ms step_avg:589.14ms +step:13282/57344 train_time:7824558ms step_avg:589.11ms +step:13283/57344 train_time:7825091ms step_avg:589.11ms +grad accum step:3321/14336 +step:13284/57344 train_time:7826364ms step_avg:589.16ms +step:13285/57344 train_time:7826381ms step_avg:589.11ms +step:13286/57344 train_time:7826628ms step_avg:589.09ms +step:13287/57344 train_time:7827179ms step_avg:589.09ms +grad accum step:3322/14336 +step:13288/57344 train_time:7828473ms step_avg:589.14ms +step:13289/57344 train_time:7828490ms step_avg:589.10ms +step:13290/57344 train_time:7828737ms step_avg:589.07ms +step:13291/57344 train_time:7829278ms step_avg:589.07ms +grad accum step:3323/14336 +step:13292/57344 train_time:7830568ms step_avg:589.12ms +step:13293/57344 train_time:7830585ms step_avg:589.08ms +step:13294/57344 train_time:7830835ms step_avg:589.05ms +step:13295/57344 train_time:7831384ms step_avg:589.05ms +grad accum step:3324/14336 +step:13296/57344 train_time:7832672ms step_avg:589.10ms +step:13297/57344 train_time:7832689ms step_avg:589.06ms +step:13298/57344 train_time:7832933ms step_avg:589.03ms +step:13299/57344 train_time:7833465ms step_avg:589.03ms +grad accum step:3325/14336 +step:13300/57344 train_time:7834756ms step_avg:589.08ms +step:13301/57344 train_time:7834773ms step_avg:589.04ms +step:13302/57344 train_time:7835028ms step_avg:589.01ms +step:13303/57344 train_time:7835595ms step_avg:589.01ms +grad accum step:3326/14336 +step:13304/57344 train_time:7836912ms step_avg:589.06ms +step:13305/57344 train_time:7836928ms step_avg:589.02ms +step:13306/57344 train_time:7837175ms step_avg:589.00ms +step:13307/57344 train_time:7837718ms step_avg:588.99ms +grad accum step:3327/14336 +step:13308/57344 train_time:7838989ms step_avg:589.04ms +step:13309/57344 train_time:7839006ms step_avg:589.00ms +step:13310/57344 train_time:7839252ms step_avg:588.97ms +step:13311/57344 train_time:7839800ms step_avg:588.97ms +grad accum step:3328/14336 +step:13312/57344 train_time:7841095ms step_avg:589.02ms +step:13312/57344 val_loss:6.756414 train_time:7841096ms step_avg:589.02ms +step:13313/57344 train_time:7841108ms step_avg:588.98ms +step:13314/57344 train_time:7841341ms step_avg:588.95ms +step:13315/57344 train_time:7841910ms step_avg:588.95ms +grad accum step:3329/14336 +step:13316/57344 train_time:7843209ms step_avg:589.01ms +step:13317/57344 train_time:7843226ms step_avg:588.96ms +step:13318/57344 train_time:7843472ms step_avg:588.94ms +step:13319/57344 train_time:7844016ms step_avg:588.93ms +grad accum step:3330/14336 +step:13320/57344 train_time:7845320ms step_avg:588.99ms +step:13321/57344 train_time:7845337ms step_avg:588.95ms +step:13322/57344 train_time:7845585ms step_avg:588.92ms +step:13323/57344 train_time:7846139ms step_avg:588.92ms +grad accum step:3331/14336 +step:13324/57344 train_time:7847445ms step_avg:588.97ms +step:13325/57344 train_time:7847461ms step_avg:588.93ms +step:13326/57344 train_time:7847705ms step_avg:588.90ms +step:13327/57344 train_time:7848243ms step_avg:588.90ms +grad accum step:3332/14336 +step:13328/57344 train_time:7849534ms step_avg:588.95ms +step:13329/57344 train_time:7849551ms step_avg:588.91ms +step:13330/57344 train_time:7849793ms step_avg:588.88ms +step:13331/57344 train_time:7850340ms step_avg:588.88ms +grad accum step:3333/14336 +step:13332/57344 train_time:7851638ms step_avg:588.93ms +step:13333/57344 train_time:7851655ms step_avg:588.89ms +step:13334/57344 train_time:7851900ms step_avg:588.86ms +step:13335/57344 train_time:7852445ms step_avg:588.86ms +grad accum step:3334/14336 +step:13336/57344 train_time:7853742ms step_avg:588.91ms +step:13337/57344 train_time:7853759ms step_avg:588.87ms +step:13338/57344 train_time:7854007ms step_avg:588.84ms +step:13339/57344 train_time:7854550ms step_avg:588.84ms +grad accum step:3335/14336 +step:13340/57344 train_time:7855837ms step_avg:588.89ms +step:13341/57344 train_time:7855854ms step_avg:588.85ms +step:13342/57344 train_time:7856099ms step_avg:588.82ms +step:13343/57344 train_time:7856633ms step_avg:588.82ms +grad accum step:3336/14336 +step:13344/57344 train_time:7857948ms step_avg:588.87ms +step:13345/57344 train_time:7857965ms step_avg:588.83ms +step:13346/57344 train_time:7858208ms step_avg:588.81ms +step:13347/57344 train_time:7858745ms step_avg:588.80ms +grad accum step:3337/14336 +step:13348/57344 train_time:7860016ms step_avg:588.85ms +step:13349/57344 train_time:7860033ms step_avg:588.81ms +step:13350/57344 train_time:7860280ms step_avg:588.79ms +step:13351/57344 train_time:7860828ms step_avg:588.78ms +grad accum step:3338/14336 +step:13352/57344 train_time:7862123ms step_avg:588.83ms +step:13353/57344 train_time:7862139ms step_avg:588.79ms +step:13354/57344 train_time:7862385ms step_avg:588.77ms +step:13355/57344 train_time:7862929ms step_avg:588.76ms +grad accum step:3339/14336 +step:13356/57344 train_time:7864206ms step_avg:588.81ms +step:13357/57344 train_time:7864222ms step_avg:588.77ms +step:13358/57344 train_time:7864466ms step_avg:588.75ms +step:13359/57344 train_time:7865022ms step_avg:588.74ms +grad accum step:3340/14336 +step:13360/57344 train_time:7866327ms step_avg:588.80ms +step:13361/57344 train_time:7866344ms step_avg:588.75ms +step:13362/57344 train_time:7866589ms step_avg:588.73ms +step:13363/57344 train_time:7867131ms step_avg:588.72ms +grad accum step:3341/14336 +step:13364/57344 train_time:7868414ms step_avg:588.78ms +step:13365/57344 train_time:7868431ms step_avg:588.73ms +step:13366/57344 train_time:7868680ms step_avg:588.71ms +step:13367/57344 train_time:7869227ms step_avg:588.71ms +grad accum step:3342/14336 +step:13368/57344 train_time:7870517ms step_avg:588.76ms +step:13369/57344 train_time:7870534ms step_avg:588.72ms +step:13370/57344 train_time:7870782ms step_avg:588.69ms +step:13371/57344 train_time:7871326ms step_avg:588.69ms +grad accum step:3343/14336 +step:13372/57344 train_time:7872607ms step_avg:588.74ms +step:13373/57344 train_time:7872624ms step_avg:588.70ms +step:13374/57344 train_time:7872869ms step_avg:588.67ms +step:13375/57344 train_time:7873409ms step_avg:588.67ms +grad accum step:3344/14336 +step:13376/57344 train_time:7874707ms step_avg:588.72ms +step:13376/57344 val_loss:6.762049 train_time:7874707ms step_avg:588.72ms +step:13377/57344 train_time:7874719ms step_avg:588.68ms +step:13378/57344 train_time:7874938ms step_avg:588.65ms +step:13379/57344 train_time:7875466ms step_avg:588.64ms +grad accum step:3345/14336 +step:13380/57344 train_time:7876745ms step_avg:588.70ms +step:13381/57344 train_time:7876762ms step_avg:588.65ms +step:13382/57344 train_time:7877004ms step_avg:588.63ms +step:13383/57344 train_time:7877536ms step_avg:588.62ms +grad accum step:3346/14336 +step:13384/57344 train_time:7878810ms step_avg:588.67ms +step:13385/57344 train_time:7878827ms step_avg:588.63ms +step:13386/57344 train_time:7879072ms step_avg:588.61ms +step:13387/57344 train_time:7879614ms step_avg:588.60ms +grad accum step:3347/14336 +step:13388/57344 train_time:7880939ms step_avg:588.66ms +step:13389/57344 train_time:7880956ms step_avg:588.61ms +step:13390/57344 train_time:7881210ms step_avg:588.59ms +step:13391/57344 train_time:7881776ms step_avg:588.59ms +grad accum step:3348/14336 +step:13392/57344 train_time:7883080ms step_avg:588.64ms +step:13393/57344 train_time:7883096ms step_avg:588.60ms +step:13394/57344 train_time:7883342ms step_avg:588.57ms +step:13395/57344 train_time:7883882ms step_avg:588.57ms +grad accum step:3349/14336 +step:13396/57344 train_time:7885182ms step_avg:588.62ms +step:13397/57344 train_time:7885198ms step_avg:588.58ms +step:13398/57344 train_time:7885444ms step_avg:588.55ms +step:13399/57344 train_time:7885985ms step_avg:588.55ms +grad accum step:3350/14336 +step:13400/57344 train_time:7887262ms step_avg:588.60ms +step:13401/57344 train_time:7887279ms step_avg:588.56ms +step:13402/57344 train_time:7887525ms step_avg:588.53ms +step:13403/57344 train_time:7888070ms step_avg:588.53ms +grad accum step:3351/14336 +step:13404/57344 train_time:7889383ms step_avg:588.58ms +step:13405/57344 train_time:7889400ms step_avg:588.54ms +step:13406/57344 train_time:7889649ms step_avg:588.52ms +step:13407/57344 train_time:7890196ms step_avg:588.51ms +grad accum step:3352/14336 +step:13408/57344 train_time:7891470ms step_avg:588.56ms +step:13409/57344 train_time:7891487ms step_avg:588.52ms +step:13410/57344 train_time:7891733ms step_avg:588.50ms +step:13411/57344 train_time:7892276ms step_avg:588.49ms +grad accum step:3353/14336 +step:13412/57344 train_time:7893553ms step_avg:588.54ms +step:13413/57344 train_time:7893570ms step_avg:588.50ms +step:13414/57344 train_time:7893821ms step_avg:588.48ms +step:13415/57344 train_time:7894379ms step_avg:588.47ms +grad accum step:3354/14336 +step:13416/57344 train_time:7895655ms step_avg:588.53ms +step:13417/57344 train_time:7895672ms step_avg:588.48ms +step:13418/57344 train_time:7895920ms step_avg:588.46ms +step:13419/57344 train_time:7896467ms step_avg:588.45ms +grad accum step:3355/14336 +step:13420/57344 train_time:7897784ms step_avg:588.51ms +step:13421/57344 train_time:7897801ms step_avg:588.47ms +step:13422/57344 train_time:7898043ms step_avg:588.44ms +step:13423/57344 train_time:7898582ms step_avg:588.44ms +grad accum step:3356/14336 +step:13424/57344 train_time:7899865ms step_avg:588.49ms +step:13425/57344 train_time:7899882ms step_avg:588.45ms +step:13426/57344 train_time:7900126ms step_avg:588.42ms +step:13427/57344 train_time:7900665ms step_avg:588.42ms +grad accum step:3357/14336 +step:13428/57344 train_time:7901943ms step_avg:588.47ms +step:13429/57344 train_time:7901960ms step_avg:588.43ms +step:13430/57344 train_time:7902210ms step_avg:588.40ms +step:13431/57344 train_time:7902757ms step_avg:588.40ms +grad accum step:3358/14336 +step:13432/57344 train_time:7904049ms step_avg:588.45ms +step:13433/57344 train_time:7904065ms step_avg:588.41ms +step:13434/57344 train_time:7904308ms step_avg:588.38ms +step:13435/57344 train_time:7904850ms step_avg:588.38ms +grad accum step:3359/14336 +step:13436/57344 train_time:7906132ms step_avg:588.43ms +step:13437/57344 train_time:7906148ms step_avg:588.39ms +step:13438/57344 train_time:7906392ms step_avg:588.36ms +step:13439/57344 train_time:7906935ms step_avg:588.36ms +grad accum step:3360/14336 +step:13440/57344 train_time:7908216ms step_avg:588.41ms +step:13440/57344 val_loss:6.750922 train_time:7908217ms step_avg:588.41ms +step:13441/57344 train_time:7908229ms step_avg:588.37ms +step:13442/57344 train_time:7908455ms step_avg:588.34ms +step:13443/57344 train_time:7909005ms step_avg:588.34ms +grad accum step:3361/14336 +step:13444/57344 train_time:7910278ms step_avg:588.39ms +step:13445/57344 train_time:7910295ms step_avg:588.34ms +step:13446/57344 train_time:7910536ms step_avg:588.32ms +step:13447/57344 train_time:7911063ms step_avg:588.31ms +grad accum step:3362/14336 +step:13448/57344 train_time:7912342ms step_avg:588.37ms +step:13449/57344 train_time:7912359ms step_avg:588.32ms +step:13450/57344 train_time:7912605ms step_avg:588.30ms +step:13451/57344 train_time:7913153ms step_avg:588.29ms +grad accum step:3363/14336 +step:13452/57344 train_time:7914455ms step_avg:588.35ms +step:13453/57344 train_time:7914472ms step_avg:588.31ms +step:13454/57344 train_time:7914722ms step_avg:588.28ms +step:13455/57344 train_time:7915264ms step_avg:588.28ms +grad accum step:3364/14336 +step:13456/57344 train_time:7916562ms step_avg:588.33ms +step:13457/57344 train_time:7916579ms step_avg:588.29ms +step:13458/57344 train_time:7916824ms step_avg:588.26ms +step:13459/57344 train_time:7917360ms step_avg:588.26ms +grad accum step:3365/14336 +step:13460/57344 train_time:7918659ms step_avg:588.31ms +step:13461/57344 train_time:7918676ms step_avg:588.27ms +step:13462/57344 train_time:7918922ms step_avg:588.24ms +step:13463/57344 train_time:7919465ms step_avg:588.24ms +grad accum step:3366/14336 +step:13464/57344 train_time:7920739ms step_avg:588.29ms +step:13465/57344 train_time:7920756ms step_avg:588.25ms +step:13466/57344 train_time:7921007ms step_avg:588.22ms +step:13467/57344 train_time:7921567ms step_avg:588.22ms +grad accum step:3367/14336 +step:13468/57344 train_time:7922877ms step_avg:588.27ms +step:13469/57344 train_time:7922895ms step_avg:588.23ms +step:13470/57344 train_time:7923135ms step_avg:588.21ms +step:13471/57344 train_time:7923670ms step_avg:588.20ms +grad accum step:3368/14336 +step:13472/57344 train_time:7924983ms step_avg:588.26ms +step:13473/57344 train_time:7925000ms step_avg:588.21ms +step:13474/57344 train_time:7925243ms step_avg:588.19ms +step:13475/57344 train_time:7925778ms step_avg:588.18ms +grad accum step:3369/14336 +step:13476/57344 train_time:7927082ms step_avg:588.24ms +step:13477/57344 train_time:7927099ms step_avg:588.19ms +step:13478/57344 train_time:7927347ms step_avg:588.17ms +step:13479/57344 train_time:7927892ms step_avg:588.17ms +grad accum step:3370/14336 +step:13480/57344 train_time:7929195ms step_avg:588.22ms +step:13481/57344 train_time:7929212ms step_avg:588.18ms +step:13482/57344 train_time:7929452ms step_avg:588.15ms +step:13483/57344 train_time:7929982ms step_avg:588.15ms +grad accum step:3371/14336 +step:13484/57344 train_time:7931280ms step_avg:588.20ms +step:13485/57344 train_time:7931297ms step_avg:588.16ms +step:13486/57344 train_time:7931539ms step_avg:588.13ms +step:13487/57344 train_time:7932078ms step_avg:588.13ms +grad accum step:3372/14336 +step:13488/57344 train_time:7933358ms step_avg:588.18ms +step:13489/57344 train_time:7933375ms step_avg:588.14ms +step:13490/57344 train_time:7933619ms step_avg:588.11ms +step:13491/57344 train_time:7934165ms step_avg:588.11ms +grad accum step:3373/14336 +step:13492/57344 train_time:7935454ms step_avg:588.16ms +step:13493/57344 train_time:7935471ms step_avg:588.12ms +step:13494/57344 train_time:7935714ms step_avg:588.09ms +step:13495/57344 train_time:7936253ms step_avg:588.09ms +grad accum step:3374/14336 +step:13496/57344 train_time:7937545ms step_avg:588.14ms +step:13497/57344 train_time:7937562ms step_avg:588.10ms +step:13498/57344 train_time:7937807ms step_avg:588.07ms +step:13499/57344 train_time:7938349ms step_avg:588.07ms +grad accum step:3375/14336 +step:13500/57344 train_time:7939680ms step_avg:588.12ms +step:13501/57344 train_time:7939696ms step_avg:588.08ms +step:13502/57344 train_time:7939943ms step_avg:588.06ms +step:13503/57344 train_time:7940478ms step_avg:588.05ms +grad accum step:3376/14336 +step:13504/57344 train_time:7941775ms step_avg:588.11ms +step:13504/57344 val_loss:6.759320 train_time:7941775ms step_avg:588.11ms +step:13505/57344 train_time:7941787ms step_avg:588.06ms +step:13506/57344 train_time:7942009ms step_avg:588.04ms +step:13507/57344 train_time:7942547ms step_avg:588.03ms +grad accum step:3377/14336 +step:13508/57344 train_time:7943857ms step_avg:588.09ms +step:13509/57344 train_time:7943874ms step_avg:588.04ms +step:13510/57344 train_time:7944116ms step_avg:588.02ms +step:13511/57344 train_time:7944647ms step_avg:588.01ms +grad accum step:3378/14336 +step:13512/57344 train_time:7945926ms step_avg:588.06ms +step:13513/57344 train_time:7945943ms step_avg:588.02ms +step:13514/57344 train_time:7946189ms step_avg:588.00ms +step:13515/57344 train_time:7946736ms step_avg:587.99ms +grad accum step:3379/14336 +step:13516/57344 train_time:7948050ms step_avg:588.05ms +step:13517/57344 train_time:7948067ms step_avg:588.01ms +step:13518/57344 train_time:7948318ms step_avg:587.98ms +step:13519/57344 train_time:7948870ms step_avg:587.98ms +grad accum step:3380/14336 +step:13520/57344 train_time:7950151ms step_avg:588.03ms +step:13521/57344 train_time:7950168ms step_avg:587.99ms +step:13522/57344 train_time:7950419ms step_avg:587.96ms +step:13523/57344 train_time:7950974ms step_avg:587.96ms +grad accum step:3381/14336 +step:13524/57344 train_time:7952263ms step_avg:588.01ms +step:13525/57344 train_time:7952280ms step_avg:587.97ms +step:13526/57344 train_time:7952524ms step_avg:587.94ms +step:13527/57344 train_time:7953062ms step_avg:587.94ms +grad accum step:3382/14336 +step:13528/57344 train_time:7954341ms step_avg:587.99ms +step:13529/57344 train_time:7954358ms step_avg:587.95ms +step:13530/57344 train_time:7954603ms step_avg:587.92ms +step:13531/57344 train_time:7955145ms step_avg:587.92ms +grad accum step:3383/14336 +step:13532/57344 train_time:7956435ms step_avg:587.97ms +step:13533/57344 train_time:7956452ms step_avg:587.93ms +step:13534/57344 train_time:7956700ms step_avg:587.90ms +step:13535/57344 train_time:7957250ms step_avg:587.90ms +grad accum step:3384/14336 +step:13536/57344 train_time:7958549ms step_avg:587.95ms +step:13537/57344 train_time:7958566ms step_avg:587.91ms +step:13538/57344 train_time:7958826ms step_avg:587.89ms +step:13539/57344 train_time:7959400ms step_avg:587.89ms +grad accum step:3385/14336 +step:13540/57344 train_time:7960670ms step_avg:587.94ms +step:13541/57344 train_time:7960687ms step_avg:587.90ms +step:13542/57344 train_time:7960931ms step_avg:587.87ms +step:13543/57344 train_time:7961475ms step_avg:587.87ms +grad accum step:3386/14336 +step:13544/57344 train_time:7962750ms step_avg:587.92ms +step:13545/57344 train_time:7962767ms step_avg:587.88ms +step:13546/57344 train_time:7963015ms step_avg:587.85ms +step:13547/57344 train_time:7963559ms step_avg:587.85ms +grad accum step:3387/14336 +step:13548/57344 train_time:7964832ms step_avg:587.90ms +step:13549/57344 train_time:7964849ms step_avg:587.86ms +step:13550/57344 train_time:7965096ms step_avg:587.83ms +step:13551/57344 train_time:7965643ms step_avg:587.83ms +grad accum step:3388/14336 +step:13552/57344 train_time:7966914ms step_avg:587.88ms +step:13553/57344 train_time:7966931ms step_avg:587.84ms +step:13554/57344 train_time:7967178ms step_avg:587.81ms +step:13555/57344 train_time:7967725ms step_avg:587.81ms +grad accum step:3389/14336 +step:13556/57344 train_time:7969002ms step_avg:587.86ms +step:13557/57344 train_time:7969018ms step_avg:587.82ms +step:13558/57344 train_time:7969262ms step_avg:587.79ms +step:13559/57344 train_time:7969808ms step_avg:587.79ms +grad accum step:3390/14336 +step:13560/57344 train_time:7971106ms step_avg:587.84ms +step:13561/57344 train_time:7971123ms step_avg:587.80ms +step:13562/57344 train_time:7971375ms step_avg:587.77ms +step:13563/57344 train_time:7971934ms step_avg:587.77ms +grad accum step:3391/14336 +step:13564/57344 train_time:7973230ms step_avg:587.82ms +step:13565/57344 train_time:7973247ms step_avg:587.78ms +step:13566/57344 train_time:7973492ms step_avg:587.76ms +step:13567/57344 train_time:7974036ms step_avg:587.75ms +grad accum step:3392/14336 +step:13568/57344 train_time:7975314ms step_avg:587.80ms +step:13568/57344 val_loss:6.764692 train_time:7975314ms step_avg:587.80ms +step:13569/57344 train_time:7975614ms step_avg:587.78ms +step:13570/57344 train_time:7975661ms step_avg:587.74ms +step:13571/57344 train_time:7976206ms step_avg:587.74ms +grad accum step:3393/14336 +step:13572/57344 train_time:7977597ms step_avg:587.80ms +step:13573/57344 train_time:7977609ms step_avg:587.76ms +step:13574/57344 train_time:7977826ms step_avg:587.73ms +step:13575/57344 train_time:7978367ms step_avg:587.73ms +grad accum step:3394/14336 +step:13576/57344 train_time:7979660ms step_avg:587.78ms +step:13577/57344 train_time:7979678ms step_avg:587.73ms +step:13578/57344 train_time:7979920ms step_avg:587.71ms +step:13579/57344 train_time:7980460ms step_avg:587.71ms +grad accum step:3395/14336 +step:13580/57344 train_time:7981745ms step_avg:587.76ms +step:13581/57344 train_time:7981762ms step_avg:587.72ms +step:13582/57344 train_time:7982006ms step_avg:587.69ms +step:13583/57344 train_time:7982553ms step_avg:587.69ms +grad accum step:3396/14336 +step:13584/57344 train_time:7983844ms step_avg:587.74ms +step:13585/57344 train_time:7983861ms step_avg:587.70ms +step:13586/57344 train_time:7984113ms step_avg:587.67ms +step:13587/57344 train_time:7984669ms step_avg:587.67ms +grad accum step:3397/14336 +step:13588/57344 train_time:7985951ms step_avg:587.72ms +step:13589/57344 train_time:7985968ms step_avg:587.68ms +step:13590/57344 train_time:7986211ms step_avg:587.65ms +step:13591/57344 train_time:7986755ms step_avg:587.65ms +grad accum step:3398/14336 +step:13592/57344 train_time:7988035ms step_avg:587.70ms +step:13593/57344 train_time:7988052ms step_avg:587.66ms +step:13594/57344 train_time:7988297ms step_avg:587.63ms +step:13595/57344 train_time:7988838ms step_avg:587.63ms +grad accum step:3399/14336 +step:13596/57344 train_time:7990117ms step_avg:587.68ms +step:13597/57344 train_time:7990135ms step_avg:587.64ms +step:13598/57344 train_time:7990380ms step_avg:587.61ms +step:13599/57344 train_time:7990921ms step_avg:587.61ms +grad accum step:3400/14336 +step:13600/57344 train_time:7992211ms step_avg:587.66ms +step:13601/57344 train_time:7992228ms step_avg:587.62ms +step:13602/57344 train_time:7992470ms step_avg:587.60ms +step:13603/57344 train_time:7993008ms step_avg:587.59ms +grad accum step:3401/14336 +step:13604/57344 train_time:7994315ms step_avg:587.64ms +step:13605/57344 train_time:7994332ms step_avg:587.60ms +step:13606/57344 train_time:7994578ms step_avg:587.58ms +step:13607/57344 train_time:7995116ms step_avg:587.57ms +grad accum step:3402/14336 +step:13608/57344 train_time:7996409ms step_avg:587.63ms +step:13609/57344 train_time:7996426ms step_avg:587.58ms +step:13610/57344 train_time:7996670ms step_avg:587.56ms +step:13611/57344 train_time:7997215ms step_avg:587.56ms +grad accum step:3403/14336 +step:13612/57344 train_time:7998513ms step_avg:587.61ms +step:13613/57344 train_time:7998530ms step_avg:587.57ms +step:13614/57344 train_time:7998778ms step_avg:587.54ms +step:13615/57344 train_time:7999325ms step_avg:587.54ms +grad accum step:3404/14336 +step:13616/57344 train_time:8000598ms step_avg:587.59ms +step:13617/57344 train_time:8000615ms step_avg:587.55ms +step:13618/57344 train_time:8000866ms step_avg:587.52ms +step:13619/57344 train_time:8001425ms step_avg:587.52ms +grad accum step:3405/14336 +step:13620/57344 train_time:8002697ms step_avg:587.57ms +step:13621/57344 train_time:8002714ms step_avg:587.53ms +step:13622/57344 train_time:8002957ms step_avg:587.50ms +step:13623/57344 train_time:8003489ms step_avg:587.50ms +grad accum step:3406/14336 +step:13624/57344 train_time:8004766ms step_avg:587.55ms +step:13625/57344 train_time:8004783ms step_avg:587.51ms +step:13626/57344 train_time:8005029ms step_avg:587.48ms +step:13627/57344 train_time:8005579ms step_avg:587.48ms +grad accum step:3407/14336 +step:13628/57344 train_time:8006894ms step_avg:587.53ms +step:13629/57344 train_time:8006911ms step_avg:587.49ms +step:13630/57344 train_time:8007158ms step_avg:587.47ms +step:13631/57344 train_time:8007707ms step_avg:587.46ms +grad accum step:3408/14336 +step:13632/57344 train_time:8008979ms step_avg:587.51ms +step:13632/57344 val_loss:6.766222 train_time:8008979ms step_avg:587.51ms +step:13633/57344 train_time:8008991ms step_avg:587.47ms +step:13634/57344 train_time:8009214ms step_avg:587.44ms +step:13635/57344 train_time:8009752ms step_avg:587.44ms +grad accum step:3409/14336 +step:13636/57344 train_time:8011021ms step_avg:587.49ms +step:13637/57344 train_time:8011038ms step_avg:587.45ms +step:13638/57344 train_time:8011282ms step_avg:587.42ms +step:13639/57344 train_time:8011822ms step_avg:587.42ms +grad accum step:3410/14336 +step:13640/57344 train_time:8013087ms step_avg:587.47ms +step:13641/57344 train_time:8013104ms step_avg:587.43ms +step:13642/57344 train_time:8013349ms step_avg:587.40ms +step:13643/57344 train_time:8013894ms step_avg:587.40ms +grad accum step:3411/14336 +step:13644/57344 train_time:8015216ms step_avg:587.45ms +step:13645/57344 train_time:8015232ms step_avg:587.41ms +step:13646/57344 train_time:8015480ms step_avg:587.39ms +step:13647/57344 train_time:8016018ms step_avg:587.38ms +grad accum step:3412/14336 +step:13648/57344 train_time:8017328ms step_avg:587.44ms +step:13649/57344 train_time:8017345ms step_avg:587.39ms +step:13650/57344 train_time:8017593ms step_avg:587.37ms +step:13651/57344 train_time:8018142ms step_avg:587.37ms +grad accum step:3413/14336 +step:13652/57344 train_time:8019489ms step_avg:587.42ms +step:13653/57344 train_time:8019505ms step_avg:587.38ms +step:13654/57344 train_time:8019757ms step_avg:587.36ms +step:13655/57344 train_time:8020314ms step_avg:587.35ms +grad accum step:3414/14336 +step:13656/57344 train_time:8021609ms step_avg:587.41ms +step:13657/57344 train_time:8021626ms step_avg:587.36ms +step:13658/57344 train_time:8021872ms step_avg:587.34ms +step:13659/57344 train_time:8022405ms step_avg:587.33ms +grad accum step:3415/14336 +step:13660/57344 train_time:8023758ms step_avg:587.39ms +step:13661/57344 train_time:8023775ms step_avg:587.35ms +step:13662/57344 train_time:8024027ms step_avg:587.32ms +step:13663/57344 train_time:8024582ms step_avg:587.32ms +grad accum step:3416/14336 +step:13664/57344 train_time:8025860ms step_avg:587.37ms +step:13665/57344 train_time:8025877ms step_avg:587.33ms +step:13666/57344 train_time:8026121ms step_avg:587.31ms +step:13667/57344 train_time:8026656ms step_avg:587.30ms +grad accum step:3417/14336 +step:13668/57344 train_time:8027963ms step_avg:587.35ms +step:13669/57344 train_time:8027980ms step_avg:587.31ms +step:13670/57344 train_time:8028225ms step_avg:587.29ms +step:13671/57344 train_time:8028760ms step_avg:587.28ms +grad accum step:3418/14336 +step:13672/57344 train_time:8030027ms step_avg:587.33ms +step:13673/57344 train_time:8030044ms step_avg:587.29ms +step:13674/57344 train_time:8030289ms step_avg:587.27ms +step:13675/57344 train_time:8030833ms step_avg:587.26ms +grad accum step:3419/14336 +step:13676/57344 train_time:8032115ms step_avg:587.31ms +step:13677/57344 train_time:8032132ms step_avg:587.27ms +step:13678/57344 train_time:8032381ms step_avg:587.25ms +step:13679/57344 train_time:8032926ms step_avg:587.25ms +grad accum step:3420/14336 +step:13680/57344 train_time:8034207ms step_avg:587.30ms +step:13681/57344 train_time:8034223ms step_avg:587.25ms +step:13682/57344 train_time:8034470ms step_avg:587.23ms +step:13683/57344 train_time:8035009ms step_avg:587.23ms +grad accum step:3421/14336 +step:13684/57344 train_time:8036288ms step_avg:587.28ms +step:13685/57344 train_time:8036305ms step_avg:587.23ms +step:13686/57344 train_time:8036546ms step_avg:587.21ms +step:13687/57344 train_time:8037078ms step_avg:587.21ms +grad accum step:3422/14336 +step:13688/57344 train_time:8038376ms step_avg:587.26ms +step:13689/57344 train_time:8038393ms step_avg:587.22ms +step:13690/57344 train_time:8038642ms step_avg:587.19ms +step:13691/57344 train_time:8039188ms step_avg:587.19ms +grad accum step:3423/14336 +step:13692/57344 train_time:8040480ms step_avg:587.24ms +step:13693/57344 train_time:8040497ms step_avg:587.20ms +step:13694/57344 train_time:8040742ms step_avg:587.17ms +step:13695/57344 train_time:8041286ms step_avg:587.17ms +grad accum step:3424/14336 +step:13696/57344 train_time:8042564ms step_avg:587.22ms +step:13696/57344 val_loss:6.771569 train_time:8042565ms step_avg:587.22ms +step:13697/57344 train_time:8042576ms step_avg:587.18ms +step:13698/57344 train_time:8042797ms step_avg:587.15ms +step:13699/57344 train_time:8043332ms step_avg:587.15ms +grad accum step:3425/14336 +step:13700/57344 train_time:8044614ms step_avg:587.20ms +step:13701/57344 train_time:8044630ms step_avg:587.16ms +step:13702/57344 train_time:8044876ms step_avg:587.13ms +step:13703/57344 train_time:8045417ms step_avg:587.13ms +grad accum step:3426/14336 +step:13704/57344 train_time:8046697ms step_avg:587.18ms +step:13705/57344 train_time:8046713ms step_avg:587.14ms +step:13706/57344 train_time:8046960ms step_avg:587.11ms +step:13707/57344 train_time:8047504ms step_avg:587.11ms +grad accum step:3427/14336 +step:13708/57344 train_time:8048772ms step_avg:587.16ms +step:13709/57344 train_time:8048789ms step_avg:587.12ms +step:13710/57344 train_time:8049035ms step_avg:587.09ms +step:13711/57344 train_time:8049580ms step_avg:587.09ms +grad accum step:3428/14336 +step:13712/57344 train_time:8050859ms step_avg:587.14ms +step:13713/57344 train_time:8050876ms step_avg:587.10ms +step:13714/57344 train_time:8051121ms step_avg:587.07ms +step:13715/57344 train_time:8051667ms step_avg:587.07ms +grad accum step:3429/14336 +step:13716/57344 train_time:8052943ms step_avg:587.12ms +step:13717/57344 train_time:8052960ms step_avg:587.08ms +step:13718/57344 train_time:8053205ms step_avg:587.05ms +step:13719/57344 train_time:8053744ms step_avg:587.05ms +grad accum step:3430/14336 +step:13720/57344 train_time:8055044ms step_avg:587.10ms +step:13721/57344 train_time:8055062ms step_avg:587.06ms +step:13722/57344 train_time:8055310ms step_avg:587.04ms +step:13723/57344 train_time:8055860ms step_avg:587.03ms +grad accum step:3431/14336 +step:13724/57344 train_time:8057150ms step_avg:587.08ms +step:13725/57344 train_time:8057167ms step_avg:587.04ms +step:13726/57344 train_time:8057414ms step_avg:587.02ms +step:13727/57344 train_time:8057960ms step_avg:587.02ms +grad accum step:3432/14336 +step:13728/57344 train_time:8059233ms step_avg:587.07ms +step:13729/57344 train_time:8059250ms step_avg:587.02ms +step:13730/57344 train_time:8059500ms step_avg:587.00ms +step:13731/57344 train_time:8060060ms step_avg:587.00ms +grad accum step:3433/14336 +step:13732/57344 train_time:8061360ms step_avg:587.05ms +step:13733/57344 train_time:8061377ms step_avg:587.01ms +step:13734/57344 train_time:8061623ms step_avg:586.98ms +step:13735/57344 train_time:8062167ms step_avg:586.98ms +grad accum step:3434/14336 +step:13736/57344 train_time:8063446ms step_avg:587.03ms +step:13737/57344 train_time:8063463ms step_avg:586.99ms +step:13738/57344 train_time:8063708ms step_avg:586.96ms +step:13739/57344 train_time:8064250ms step_avg:586.96ms +grad accum step:3435/14336 +step:13740/57344 train_time:8065533ms step_avg:587.01ms +step:13741/57344 train_time:8065550ms step_avg:586.97ms +step:13742/57344 train_time:8065796ms step_avg:586.94ms +step:13743/57344 train_time:8066343ms step_avg:586.94ms +grad accum step:3436/14336 +step:13744/57344 train_time:8067617ms step_avg:586.99ms +step:13745/57344 train_time:8067634ms step_avg:586.95ms +step:13746/57344 train_time:8067879ms step_avg:586.93ms +step:13747/57344 train_time:8068420ms step_avg:586.92ms +grad accum step:3437/14336 +step:13748/57344 train_time:8069725ms step_avg:586.97ms +step:13749/57344 train_time:8069742ms step_avg:586.93ms +step:13750/57344 train_time:8069989ms step_avg:586.91ms +step:13751/57344 train_time:8070535ms step_avg:586.91ms +grad accum step:3438/14336 +step:13752/57344 train_time:8071837ms step_avg:586.96ms +step:13753/57344 train_time:8071854ms step_avg:586.92ms +step:13754/57344 train_time:8072101ms step_avg:586.89ms +step:13755/57344 train_time:8072642ms step_avg:586.89ms +grad accum step:3439/14336 +step:13756/57344 train_time:8073961ms step_avg:586.94ms +step:13757/57344 train_time:8073977ms step_avg:586.90ms +step:13758/57344 train_time:8074227ms step_avg:586.88ms +step:13759/57344 train_time:8074771ms step_avg:586.87ms +grad accum step:3440/14336 +step:13760/57344 train_time:8076044ms step_avg:586.92ms +step:13760/57344 val_loss:6.767988 train_time:8076045ms step_avg:586.92ms +step:13761/57344 train_time:8076057ms step_avg:586.88ms +step:13762/57344 train_time:8076279ms step_avg:586.85ms +step:13763/57344 train_time:8076824ms step_avg:586.85ms +grad accum step:3441/14336 +step:13764/57344 train_time:8078138ms step_avg:586.90ms +step:13765/57344 train_time:8078155ms step_avg:586.86ms +step:13766/57344 train_time:8078402ms step_avg:586.84ms +step:13767/57344 train_time:8078944ms step_avg:586.83ms +grad accum step:3442/14336 +step:13768/57344 train_time:8080224ms step_avg:586.88ms +step:13769/57344 train_time:8080241ms step_avg:586.84ms +step:13770/57344 train_time:8080489ms step_avg:586.82ms +step:13771/57344 train_time:8081032ms step_avg:586.82ms +grad accum step:3443/14336 +step:13772/57344 train_time:8082325ms step_avg:586.87ms +step:13773/57344 train_time:8082342ms step_avg:586.83ms +step:13774/57344 train_time:8082590ms step_avg:586.80ms +step:13775/57344 train_time:8083143ms step_avg:586.80ms +grad accum step:3444/14336 +step:13776/57344 train_time:8084444ms step_avg:586.85ms +step:13777/57344 train_time:8084461ms step_avg:586.81ms +step:13778/57344 train_time:8084708ms step_avg:586.78ms +step:13779/57344 train_time:8085253ms step_avg:586.78ms +grad accum step:3445/14336 +step:13780/57344 train_time:8086536ms step_avg:586.83ms +step:13781/57344 train_time:8086553ms step_avg:586.79ms +step:13782/57344 train_time:8086802ms step_avg:586.77ms +step:13783/57344 train_time:8087344ms step_avg:586.76ms +grad accum step:3446/14336 +step:13784/57344 train_time:8088614ms step_avg:586.81ms +step:13785/57344 train_time:8088631ms step_avg:586.77ms +step:13786/57344 train_time:8088876ms step_avg:586.75ms +step:13787/57344 train_time:8089420ms step_avg:586.74ms +grad accum step:3447/14336 +step:13788/57344 train_time:8090747ms step_avg:586.80ms +step:13789/57344 train_time:8090763ms step_avg:586.75ms +step:13790/57344 train_time:8091008ms step_avg:586.73ms +step:13791/57344 train_time:8091549ms step_avg:586.73ms +grad accum step:3448/14336 +step:13792/57344 train_time:8092843ms step_avg:586.78ms +step:13793/57344 train_time:8092860ms step_avg:586.74ms +step:13794/57344 train_time:8093105ms step_avg:586.71ms +step:13795/57344 train_time:8093651ms step_avg:586.71ms +grad accum step:3449/14336 +step:13796/57344 train_time:8094954ms step_avg:586.76ms +step:13797/57344 train_time:8094971ms step_avg:586.72ms +step:13798/57344 train_time:8095218ms step_avg:586.70ms +step:13799/57344 train_time:8095762ms step_avg:586.69ms +grad accum step:3450/14336 +step:13800/57344 train_time:8097055ms step_avg:586.74ms +step:13801/57344 train_time:8097072ms step_avg:586.70ms +step:13802/57344 train_time:8097318ms step_avg:586.68ms +step:13803/57344 train_time:8097862ms step_avg:586.67ms +grad accum step:3451/14336 +step:13804/57344 train_time:8099144ms step_avg:586.72ms +step:13805/57344 train_time:8099161ms step_avg:586.68ms +step:13806/57344 train_time:8099402ms step_avg:586.66ms +step:13807/57344 train_time:8099932ms step_avg:586.65ms +grad accum step:3452/14336 +step:13808/57344 train_time:8101214ms step_avg:586.70ms +step:13809/57344 train_time:8101231ms step_avg:586.66ms +step:13810/57344 train_time:8101477ms step_avg:586.64ms +step:13811/57344 train_time:8102021ms step_avg:586.64ms +grad accum step:3453/14336 +step:13812/57344 train_time:8103339ms step_avg:586.69ms +step:13813/57344 train_time:8103356ms step_avg:586.65ms +step:13814/57344 train_time:8103602ms step_avg:586.62ms +step:13815/57344 train_time:8104150ms step_avg:586.62ms +grad accum step:3454/14336 +step:13816/57344 train_time:8105421ms step_avg:586.67ms +step:13817/57344 train_time:8105438ms step_avg:586.63ms +step:13818/57344 train_time:8105684ms step_avg:586.60ms +step:13819/57344 train_time:8106234ms step_avg:586.60ms +grad accum step:3455/14336 +step:13820/57344 train_time:8107520ms step_avg:586.65ms +step:13821/57344 train_time:8107537ms step_avg:586.61ms +step:13822/57344 train_time:8107786ms step_avg:586.59ms +step:13823/57344 train_time:8108337ms step_avg:586.58ms +grad accum step:3456/14336 +step:13824/57344 train_time:8109634ms step_avg:586.63ms +step:13824/57344 val_loss:6.757222 train_time:8109635ms step_avg:586.63ms +step:13825/57344 train_time:8109647ms step_avg:586.59ms +step:13826/57344 train_time:8109872ms step_avg:586.57ms +step:13827/57344 train_time:8110416ms step_avg:586.56ms +grad accum step:3457/14336 +step:13828/57344 train_time:8111689ms step_avg:586.61ms +step:13829/57344 train_time:8111706ms step_avg:586.57ms +step:13830/57344 train_time:8111956ms step_avg:586.55ms +step:13831/57344 train_time:8112505ms step_avg:586.55ms +grad accum step:3458/14336 +step:13832/57344 train_time:8113789ms step_avg:586.60ms +step:13833/57344 train_time:8113806ms step_avg:586.55ms +step:13834/57344 train_time:8114052ms step_avg:586.53ms +step:13835/57344 train_time:8114601ms step_avg:586.53ms +grad accum step:3459/14336 +step:13836/57344 train_time:8115876ms step_avg:586.58ms +step:13837/57344 train_time:8115893ms step_avg:586.54ms +step:13838/57344 train_time:8116139ms step_avg:586.51ms +step:13839/57344 train_time:8116682ms step_avg:586.51ms +grad accum step:3460/14336 +step:13840/57344 train_time:8117962ms step_avg:586.56ms +step:13841/57344 train_time:8117978ms step_avg:586.52ms +step:13842/57344 train_time:8118220ms step_avg:586.49ms +step:13843/57344 train_time:8118752ms step_avg:586.49ms +grad accum step:3461/14336 +step:13844/57344 train_time:8120026ms step_avg:586.54ms +step:13845/57344 train_time:8120043ms step_avg:586.50ms +step:13846/57344 train_time:8120299ms step_avg:586.47ms +step:13847/57344 train_time:8120864ms step_avg:586.47ms +grad accum step:3462/14336 +step:13848/57344 train_time:8122160ms step_avg:586.52ms +step:13849/57344 train_time:8122177ms step_avg:586.48ms +step:13850/57344 train_time:8122422ms step_avg:586.46ms +step:13851/57344 train_time:8122959ms step_avg:586.45ms +grad accum step:3463/14336 +step:13852/57344 train_time:8124240ms step_avg:586.50ms +step:13853/57344 train_time:8124256ms step_avg:586.46ms +step:13854/57344 train_time:8124506ms step_avg:586.44ms +step:13855/57344 train_time:8125052ms step_avg:586.43ms +grad accum step:3464/14336 +step:13856/57344 train_time:8126323ms step_avg:586.48ms +step:13857/57344 train_time:8126340ms step_avg:586.44ms +step:13858/57344 train_time:8126586ms step_avg:586.42ms +step:13859/57344 train_time:8127137ms step_avg:586.42ms +grad accum step:3465/14336 +step:13860/57344 train_time:8128411ms step_avg:586.47ms +step:13861/57344 train_time:8128428ms step_avg:586.42ms +step:13862/57344 train_time:8128676ms step_avg:586.40ms +step:13863/57344 train_time:8129221ms step_avg:586.40ms +grad accum step:3466/14336 +step:13864/57344 train_time:8130519ms step_avg:586.45ms +step:13865/57344 train_time:8130542ms step_avg:586.41ms +step:13866/57344 train_time:8130768ms step_avg:586.38ms +step:13867/57344 train_time:8131318ms step_avg:586.38ms +grad accum step:3467/14336 +step:13868/57344 train_time:8132610ms step_avg:586.43ms +step:13869/57344 train_time:8132626ms step_avg:586.39ms +step:13870/57344 train_time:8132871ms step_avg:586.36ms +step:13871/57344 train_time:8133410ms step_avg:586.36ms +grad accum step:3468/14336 +step:13872/57344 train_time:8134705ms step_avg:586.41ms +step:13873/57344 train_time:8134722ms step_avg:586.37ms +step:13874/57344 train_time:8134967ms step_avg:586.35ms +step:13875/57344 train_time:8135512ms step_avg:586.34ms +grad accum step:3469/14336 +step:13876/57344 train_time:8136821ms step_avg:586.40ms +step:13877/57344 train_time:8136837ms step_avg:586.35ms +step:13878/57344 train_time:8137084ms step_avg:586.33ms +step:13879/57344 train_time:8137641ms step_avg:586.33ms +grad accum step:3470/14336 +step:13880/57344 train_time:8138972ms step_avg:586.38ms +step:13881/57344 train_time:8138989ms step_avg:586.34ms +step:13882/57344 train_time:8139231ms step_avg:586.32ms +step:13883/57344 train_time:8139777ms step_avg:586.31ms +grad accum step:3471/14336 +step:13884/57344 train_time:8141075ms step_avg:586.36ms +step:13885/57344 train_time:8141092ms step_avg:586.32ms +step:13886/57344 train_time:8141337ms step_avg:586.30ms +step:13887/57344 train_time:8141882ms step_avg:586.30ms +grad accum step:3472/14336 +step:13888/57344 train_time:8143163ms step_avg:586.35ms +step:13888/57344 val_loss:6.789327 train_time:8143163ms step_avg:586.35ms +step:13889/57344 train_time:8143175ms step_avg:586.30ms +step:13890/57344 train_time:8143400ms step_avg:586.28ms +step:13891/57344 train_time:8143948ms step_avg:586.28ms +grad accum step:3473/14336 +step:13892/57344 train_time:8145228ms step_avg:586.33ms +step:13893/57344 train_time:8145245ms step_avg:586.28ms +step:13894/57344 train_time:8145488ms step_avg:586.26ms +step:13895/57344 train_time:8146031ms step_avg:586.26ms +grad accum step:3474/14336 +step:13896/57344 train_time:8147326ms step_avg:586.31ms +step:13897/57344 train_time:8147343ms step_avg:586.27ms +step:13898/57344 train_time:8147586ms step_avg:586.24ms +step:13899/57344 train_time:8148115ms step_avg:586.24ms +grad accum step:3475/14336 +step:13900/57344 train_time:8149396ms step_avg:586.29ms +step:13901/57344 train_time:8149413ms step_avg:586.25ms +step:13902/57344 train_time:8149667ms step_avg:586.22ms +step:13903/57344 train_time:8150228ms step_avg:586.22ms +grad accum step:3476/14336 +step:13904/57344 train_time:8151523ms step_avg:586.27ms +step:13905/57344 train_time:8151540ms step_avg:586.23ms +step:13906/57344 train_time:8151786ms step_avg:586.21ms +step:13907/57344 train_time:8152331ms step_avg:586.20ms +grad accum step:3477/14336 +step:13908/57344 train_time:8153607ms step_avg:586.25ms +step:13909/57344 train_time:8153624ms step_avg:586.21ms +step:13910/57344 train_time:8153869ms step_avg:586.19ms +step:13911/57344 train_time:8154412ms step_avg:586.18ms +grad accum step:3478/14336 +step:13912/57344 train_time:8155695ms step_avg:586.23ms +step:13913/57344 train_time:8155711ms step_avg:586.19ms +step:13914/57344 train_time:8155960ms step_avg:586.17ms +step:13915/57344 train_time:8156504ms step_avg:586.17ms +grad accum step:3479/14336 +step:13916/57344 train_time:8157784ms step_avg:586.22ms +step:13917/57344 train_time:8157801ms step_avg:586.18ms +step:13918/57344 train_time:8158052ms step_avg:586.15ms +step:13919/57344 train_time:8158607ms step_avg:586.15ms +grad accum step:3480/14336 +step:13920/57344 train_time:8159907ms step_avg:586.20ms +step:13921/57344 train_time:8159923ms step_avg:586.16ms +step:13922/57344 train_time:8160170ms step_avg:586.13ms +step:13923/57344 train_time:8160713ms step_avg:586.13ms +grad accum step:3481/14336 +step:13924/57344 train_time:8162125ms step_avg:586.19ms +step:13925/57344 train_time:8162536ms step_avg:586.18ms +step:13926/57344 train_time:8162560ms step_avg:586.14ms +step:13927/57344 train_time:8163107ms step_avg:586.14ms +grad accum step:3482/14336 +step:13928/57344 train_time:8164410ms step_avg:586.19ms +step:13929/57344 train_time:8164426ms step_avg:586.15ms +step:13930/57344 train_time:8164670ms step_avg:586.12ms +step:13931/57344 train_time:8165206ms step_avg:586.12ms +grad accum step:3483/14336 +step:13932/57344 train_time:8166488ms step_avg:586.17ms +step:13933/57344 train_time:8166505ms step_avg:586.13ms +step:13934/57344 train_time:8166748ms step_avg:586.10ms +step:13935/57344 train_time:8167288ms step_avg:586.10ms +grad accum step:3484/14336 +step:13936/57344 train_time:8168577ms step_avg:586.15ms +step:13937/57344 train_time:8168594ms step_avg:586.11ms +step:13938/57344 train_time:8168841ms step_avg:586.08ms +step:13939/57344 train_time:8169385ms step_avg:586.08ms +grad accum step:3485/14336 +step:13940/57344 train_time:8170662ms step_avg:586.13ms +step:13941/57344 train_time:8170679ms step_avg:586.09ms +step:13942/57344 train_time:8170923ms step_avg:586.07ms +step:13943/57344 train_time:8171468ms step_avg:586.06ms +grad accum step:3486/14336 +step:13944/57344 train_time:8172755ms step_avg:586.11ms +step:13945/57344 train_time:8172769ms step_avg:586.07ms +step:13946/57344 train_time:8173014ms step_avg:586.05ms +step:13947/57344 train_time:8173558ms step_avg:586.04ms +grad accum step:3487/14336 +step:13948/57344 train_time:8174856ms step_avg:586.10ms +step:13949/57344 train_time:8174873ms step_avg:586.05ms +step:13950/57344 train_time:8175119ms step_avg:586.03ms +step:13951/57344 train_time:8175733ms step_avg:586.03ms +grad accum step:3488/14336 +step:13952/57344 train_time:8176984ms step_avg:586.08ms +step:13952/57344 val_loss:6.772260 train_time:8176985ms step_avg:586.08ms +step:13953/57344 train_time:8176997ms step_avg:586.04ms +step:13954/57344 train_time:8177219ms step_avg:586.01ms +step:13955/57344 train_time:8177762ms step_avg:586.01ms +grad accum step:3489/14336 +step:13956/57344 train_time:8179055ms step_avg:586.06ms +step:13957/57344 train_time:8179072ms step_avg:586.02ms +step:13958/57344 train_time:8179319ms step_avg:586.00ms +step:13959/57344 train_time:8179868ms step_avg:585.99ms +grad accum step:3490/14336 +step:13960/57344 train_time:8181150ms step_avg:586.04ms +step:13961/57344 train_time:8181167ms step_avg:586.00ms +step:13962/57344 train_time:8181415ms step_avg:585.98ms +step:13963/57344 train_time:8181959ms step_avg:585.97ms +grad accum step:3491/14336 +step:13964/57344 train_time:8183233ms step_avg:586.02ms +step:13965/57344 train_time:8183250ms step_avg:585.98ms +step:13966/57344 train_time:8183493ms step_avg:585.96ms +step:13967/57344 train_time:8184040ms step_avg:585.96ms +grad accum step:3492/14336 +step:13968/57344 train_time:8185324ms step_avg:586.01ms +step:13969/57344 train_time:8185341ms step_avg:585.96ms +step:13970/57344 train_time:8185597ms step_avg:585.94ms +step:13971/57344 train_time:8186169ms step_avg:585.94ms +grad accum step:3493/14336 +step:13972/57344 train_time:8187457ms step_avg:585.99ms +step:13973/57344 train_time:8187474ms step_avg:585.95ms +step:13974/57344 train_time:8187715ms step_avg:585.92ms +step:13975/57344 train_time:8188259ms step_avg:585.92ms +grad accum step:3494/14336 +step:13976/57344 train_time:8189534ms step_avg:585.97ms +step:13977/57344 train_time:8189551ms step_avg:585.93ms +step:13978/57344 train_time:8189800ms step_avg:585.91ms +step:13979/57344 train_time:8190346ms step_avg:585.90ms +grad accum step:3495/14336 +step:13980/57344 train_time:8191617ms step_avg:585.95ms +step:13981/57344 train_time:8191634ms step_avg:585.91ms +step:13982/57344 train_time:8191886ms step_avg:585.89ms +step:13983/57344 train_time:8192439ms step_avg:585.89ms +grad accum step:3496/14336 +step:13984/57344 train_time:8193723ms step_avg:585.94ms +step:13985/57344 train_time:8193740ms step_avg:585.89ms +step:13986/57344 train_time:8193983ms step_avg:585.87ms +step:13987/57344 train_time:8194522ms step_avg:585.87ms +grad accum step:3497/14336 +step:13988/57344 train_time:8195813ms step_avg:585.92ms +step:13989/57344 train_time:8195830ms step_avg:585.88ms +step:13990/57344 train_time:8196072ms step_avg:585.85ms +step:13991/57344 train_time:8196622ms step_avg:585.85ms +grad accum step:3498/14336 +step:13992/57344 train_time:8197917ms step_avg:585.90ms +step:13993/57344 train_time:8197934ms step_avg:585.86ms +step:13994/57344 train_time:8198177ms step_avg:585.84ms +step:13995/57344 train_time:8198721ms step_avg:585.83ms +grad accum step:3499/14336 +step:13996/57344 train_time:8200005ms step_avg:585.88ms +step:13997/57344 train_time:8200023ms step_avg:585.84ms +step:13998/57344 train_time:8200271ms step_avg:585.82ms +step:13999/57344 train_time:8200822ms step_avg:585.81ms +grad accum step:3500/14336 +step:14000/57344 train_time:8202113ms step_avg:585.87ms +step:14001/57344 train_time:8202130ms step_avg:585.82ms +step:14002/57344 train_time:8202373ms step_avg:585.80ms +step:14003/57344 train_time:8202903ms step_avg:585.80ms +grad accum step:3501/14336 +step:14004/57344 train_time:8204184ms step_avg:585.85ms +step:14005/57344 train_time:8204201ms step_avg:585.81ms +step:14006/57344 train_time:8204446ms step_avg:585.78ms +step:14007/57344 train_time:8204999ms step_avg:585.78ms +grad accum step:3502/14336 +step:14008/57344 train_time:8206295ms step_avg:585.83ms +step:14009/57344 train_time:8206312ms step_avg:585.79ms +step:14010/57344 train_time:8206557ms step_avg:585.76ms +step:14011/57344 train_time:8207105ms step_avg:585.76ms +grad accum step:3503/14336 +step:14012/57344 train_time:8208428ms step_avg:585.81ms +step:14013/57344 train_time:8208445ms step_avg:585.77ms +step:14014/57344 train_time:8208689ms step_avg:585.75ms +step:14015/57344 train_time:8209230ms step_avg:585.75ms +grad accum step:3504/14336 +step:14016/57344 train_time:8210509ms step_avg:585.80ms +step:14016/57344 val_loss:6.775136 train_time:8210509ms step_avg:585.80ms +step:14017/57344 train_time:8210521ms step_avg:585.75ms +step:14018/57344 train_time:8210746ms step_avg:585.73ms +step:14019/57344 train_time:8211284ms step_avg:585.73ms +grad accum step:3505/14336 +step:14020/57344 train_time:8212558ms step_avg:585.77ms +step:14021/57344 train_time:8212575ms step_avg:585.73ms +step:14022/57344 train_time:8212821ms step_avg:585.71ms +step:14023/57344 train_time:8213370ms step_avg:585.71ms +grad accum step:3506/14336 +step:14024/57344 train_time:8214686ms step_avg:585.76ms +step:14025/57344 train_time:8214703ms step_avg:585.72ms +step:14026/57344 train_time:8214947ms step_avg:585.69ms +step:14027/57344 train_time:8215774ms step_avg:585.71ms +grad accum step:3507/14336 +step:14028/57344 train_time:8216797ms step_avg:585.74ms +step:14029/57344 train_time:8216814ms step_avg:585.70ms +step:14030/57344 train_time:8217059ms step_avg:585.68ms +step:14031/57344 train_time:8217606ms step_avg:585.67ms +grad accum step:3508/14336 +step:14032/57344 train_time:8218908ms step_avg:585.73ms +step:14033/57344 train_time:8218925ms step_avg:585.69ms +step:14034/57344 train_time:8219173ms step_avg:585.66ms +step:14035/57344 train_time:8219722ms step_avg:585.66ms +grad accum step:3509/14336 +step:14036/57344 train_time:8221006ms step_avg:585.71ms +step:14037/57344 train_time:8221023ms step_avg:585.67ms +step:14038/57344 train_time:8221264ms step_avg:585.64ms +step:14039/57344 train_time:8221805ms step_avg:585.64ms +grad accum step:3510/14336 +step:14040/57344 train_time:8223084ms step_avg:585.69ms +step:14041/57344 train_time:8223101ms step_avg:585.65ms +step:14042/57344 train_time:8223346ms step_avg:585.62ms +step:14043/57344 train_time:8223891ms step_avg:585.62ms +grad accum step:3511/14336 +step:14044/57344 train_time:8225175ms step_avg:585.67ms +step:14045/57344 train_time:8225192ms step_avg:585.63ms +step:14046/57344 train_time:8225440ms step_avg:585.61ms +step:14047/57344 train_time:8225981ms step_avg:585.60ms +grad accum step:3512/14336 +step:14048/57344 train_time:8227258ms step_avg:585.65ms +step:14049/57344 train_time:8227275ms step_avg:585.61ms +step:14050/57344 train_time:8227525ms step_avg:585.59ms +step:14051/57344 train_time:8228079ms step_avg:585.59ms +grad accum step:3513/14336 +step:14052/57344 train_time:8229371ms step_avg:585.64ms +step:14053/57344 train_time:8229387ms step_avg:585.60ms +step:14054/57344 train_time:8229636ms step_avg:585.57ms +step:14055/57344 train_time:8230180ms step_avg:585.57ms +grad accum step:3514/14336 +step:14056/57344 train_time:8231456ms step_avg:585.62ms +step:14057/57344 train_time:8231473ms step_avg:585.58ms +step:14058/57344 train_time:8231721ms step_avg:585.55ms +step:14059/57344 train_time:8232270ms step_avg:585.55ms +grad accum step:3515/14336 +step:14060/57344 train_time:8233551ms step_avg:585.60ms +step:14061/57344 train_time:8233567ms step_avg:585.56ms +step:14062/57344 train_time:8233811ms step_avg:585.54ms +step:14063/57344 train_time:8234355ms step_avg:585.53ms +grad accum step:3516/14336 +step:14064/57344 train_time:8235656ms step_avg:585.58ms +step:14065/57344 train_time:8235673ms step_avg:585.54ms +step:14066/57344 train_time:8235918ms step_avg:585.52ms +step:14067/57344 train_time:8236466ms step_avg:585.52ms +grad accum step:3517/14336 +step:14068/57344 train_time:8237787ms step_avg:585.57ms +step:14069/57344 train_time:8237804ms step_avg:585.53ms +step:14070/57344 train_time:8238054ms step_avg:585.50ms +step:14071/57344 train_time:8238603ms step_avg:585.50ms +grad accum step:3518/14336 +step:14072/57344 train_time:8239876ms step_avg:585.55ms +step:14073/57344 train_time:8239893ms step_avg:585.51ms +step:14074/57344 train_time:8240138ms step_avg:585.49ms +step:14075/57344 train_time:8240682ms step_avg:585.48ms +grad accum step:3519/14336 +step:14076/57344 train_time:8241959ms step_avg:585.53ms +step:14077/57344 train_time:8241977ms step_avg:585.49ms +step:14078/57344 train_time:8242223ms step_avg:585.47ms +step:14079/57344 train_time:8242768ms step_avg:585.47ms +grad accum step:3520/14336 +step:14080/57344 train_time:8244050ms step_avg:585.51ms +step:14080/57344 val_loss:6.769068 train_time:8244051ms step_avg:585.51ms +step:14081/57344 train_time:8244063ms step_avg:585.47ms +step:14082/57344 train_time:8244287ms step_avg:585.45ms +step:14083/57344 train_time:8244829ms step_avg:585.45ms +grad accum step:3521/14336 +step:14084/57344 train_time:8246124ms step_avg:585.50ms +step:14085/57344 train_time:8246141ms step_avg:585.46ms +step:14086/57344 train_time:8246389ms step_avg:585.43ms +step:14087/57344 train_time:8246950ms step_avg:585.43ms +grad accum step:3522/14336 +step:14088/57344 train_time:8248266ms step_avg:585.48ms +step:14089/57344 train_time:8248283ms step_avg:585.44ms +step:14090/57344 train_time:8248526ms step_avg:585.42ms +step:14091/57344 train_time:8249065ms step_avg:585.41ms +grad accum step:3523/14336 +step:14092/57344 train_time:8250331ms step_avg:585.46ms +step:14093/57344 train_time:8250348ms step_avg:585.42ms +step:14094/57344 train_time:8250595ms step_avg:585.40ms +step:14095/57344 train_time:8251139ms step_avg:585.39ms +grad accum step:3524/14336 +step:14096/57344 train_time:8252455ms step_avg:585.45ms +step:14097/57344 train_time:8252473ms step_avg:585.41ms +step:14098/57344 train_time:8252729ms step_avg:585.38ms +step:14099/57344 train_time:8253290ms step_avg:585.38ms +grad accum step:3525/14336 +step:14100/57344 train_time:8254567ms step_avg:585.43ms +step:14101/57344 train_time:8254584ms step_avg:585.39ms +step:14102/57344 train_time:8254832ms step_avg:585.37ms +step:14103/57344 train_time:8255376ms step_avg:585.36ms +grad accum step:3526/14336 +step:14104/57344 train_time:8256654ms step_avg:585.41ms +step:14105/57344 train_time:8256671ms step_avg:585.37ms +step:14106/57344 train_time:8256919ms step_avg:585.35ms +step:14107/57344 train_time:8257461ms step_avg:585.34ms +grad accum step:3527/14336 +step:14108/57344 train_time:8258744ms step_avg:585.39ms +step:14109/57344 train_time:8258761ms step_avg:585.35ms +step:14110/57344 train_time:8259008ms step_avg:585.33ms +step:14111/57344 train_time:8259553ms step_avg:585.33ms +grad accum step:3528/14336 +step:14112/57344 train_time:8260835ms step_avg:585.38ms +step:14113/57344 train_time:8260852ms step_avg:585.34ms +step:14114/57344 train_time:8261098ms step_avg:585.31ms +step:14115/57344 train_time:8261641ms step_avg:585.31ms +grad accum step:3529/14336 +step:14116/57344 train_time:8262919ms step_avg:585.36ms +step:14117/57344 train_time:8262936ms step_avg:585.32ms +step:14118/57344 train_time:8263183ms step_avg:585.29ms +step:14119/57344 train_time:8263730ms step_avg:585.29ms +grad accum step:3530/14336 +step:14120/57344 train_time:8265027ms step_avg:585.34ms +step:14121/57344 train_time:8265045ms step_avg:585.30ms +step:14122/57344 train_time:8265290ms step_avg:585.28ms +step:14123/57344 train_time:8265839ms step_avg:585.28ms +grad accum step:3531/14336 +step:14124/57344 train_time:8267205ms step_avg:585.33ms +step:14125/57344 train_time:8267222ms step_avg:585.29ms +step:14126/57344 train_time:8267470ms step_avg:585.27ms +step:14127/57344 train_time:8268011ms step_avg:585.26ms +grad accum step:3532/14336 +step:14128/57344 train_time:8269308ms step_avg:585.31ms +step:14129/57344 train_time:8269326ms step_avg:585.27ms +step:14130/57344 train_time:8269568ms step_avg:585.25ms +step:14131/57344 train_time:8270112ms step_avg:585.25ms +grad accum step:3533/14336 +step:14132/57344 train_time:8271391ms step_avg:585.30ms +step:14133/57344 train_time:8271409ms step_avg:585.25ms +step:14134/57344 train_time:8271657ms step_avg:585.23ms +step:14135/57344 train_time:8272204ms step_avg:585.23ms +grad accum step:3534/14336 +step:14136/57344 train_time:8273490ms step_avg:585.28ms +step:14137/57344 train_time:8273507ms step_avg:585.24ms +step:14138/57344 train_time:8273753ms step_avg:585.21ms +step:14139/57344 train_time:8274293ms step_avg:585.21ms +grad accum step:3535/14336 +step:14140/57344 train_time:8275607ms step_avg:585.26ms +step:14141/57344 train_time:8275624ms step_avg:585.22ms +step:14142/57344 train_time:8275871ms step_avg:585.20ms +step:14143/57344 train_time:8276413ms step_avg:585.20ms +grad accum step:3536/14336 +step:14144/57344 train_time:8277687ms step_avg:585.24ms +step:14144/57344 val_loss:6.781752 train_time:8277687ms step_avg:585.24ms +step:14145/57344 train_time:8277699ms step_avg:585.20ms +step:14146/57344 train_time:8277921ms step_avg:585.18ms +step:14147/57344 train_time:8278468ms step_avg:585.17ms +grad accum step:3537/14336 +step:14148/57344 train_time:8279742ms step_avg:585.22ms +step:14149/57344 train_time:8279759ms step_avg:585.18ms +step:14150/57344 train_time:8280006ms step_avg:585.16ms +step:14151/57344 train_time:8280552ms step_avg:585.16ms +grad accum step:3538/14336 +step:14152/57344 train_time:8281843ms step_avg:585.21ms +step:14153/57344 train_time:8281860ms step_avg:585.17ms +step:14154/57344 train_time:8282106ms step_avg:585.14ms +step:14155/57344 train_time:8282658ms step_avg:585.14ms +grad accum step:3539/14336 +step:14156/57344 train_time:8283955ms step_avg:585.19ms +step:14157/57344 train_time:8283973ms step_avg:585.15ms +step:14158/57344 train_time:8284218ms step_avg:585.13ms +step:14159/57344 train_time:8284763ms step_avg:585.12ms +grad accum step:3540/14336 +step:14160/57344 train_time:8286039ms step_avg:585.17ms +step:14161/57344 train_time:8286056ms step_avg:585.13ms +step:14162/57344 train_time:8286303ms step_avg:585.11ms +step:14163/57344 train_time:8286853ms step_avg:585.11ms +grad accum step:3541/14336 +step:14164/57344 train_time:8288152ms step_avg:585.16ms +step:14165/57344 train_time:8288169ms step_avg:585.12ms +step:14166/57344 train_time:8288415ms step_avg:585.09ms +step:14167/57344 train_time:8288963ms step_avg:585.09ms +grad accum step:3542/14336 +step:14168/57344 train_time:8290247ms step_avg:585.14ms +step:14169/57344 train_time:8290264ms step_avg:585.10ms +step:14170/57344 train_time:8290509ms step_avg:585.07ms +step:14171/57344 train_time:8291054ms step_avg:585.07ms +grad accum step:3543/14336 +step:14172/57344 train_time:8292339ms step_avg:585.12ms +step:14173/57344 train_time:8292355ms step_avg:585.08ms +step:14174/57344 train_time:8292598ms step_avg:585.06ms +step:14175/57344 train_time:8293137ms step_avg:585.05ms +grad accum step:3544/14336 +step:14176/57344 train_time:8294424ms step_avg:585.10ms +step:14177/57344 train_time:8294441ms step_avg:585.06ms +step:14178/57344 train_time:8294689ms step_avg:585.04ms +step:14179/57344 train_time:8295240ms step_avg:585.04ms +grad accum step:3545/14336 +step:14180/57344 train_time:8296544ms step_avg:585.09ms +step:14181/57344 train_time:8296559ms step_avg:585.05ms +step:14182/57344 train_time:8296804ms step_avg:585.02ms +step:14183/57344 train_time:8297347ms step_avg:585.02ms +grad accum step:3546/14336 +step:14184/57344 train_time:8298626ms step_avg:585.07ms +step:14185/57344 train_time:8298643ms step_avg:585.03ms +step:14186/57344 train_time:8298891ms step_avg:585.01ms +step:14187/57344 train_time:8299438ms step_avg:585.00ms +grad accum step:3547/14336 +step:14188/57344 train_time:8300719ms step_avg:585.05ms +step:14189/57344 train_time:8300736ms step_avg:585.01ms +step:14190/57344 train_time:8300981ms step_avg:584.99ms +step:14191/57344 train_time:8301521ms step_avg:584.98ms +grad accum step:3548/14336 +step:14192/57344 train_time:8302814ms step_avg:585.03ms +step:14193/57344 train_time:8302832ms step_avg:584.99ms +step:14194/57344 train_time:8303074ms step_avg:584.97ms +step:14195/57344 train_time:8303603ms step_avg:584.97ms +grad accum step:3549/14336 +step:14196/57344 train_time:8304925ms step_avg:585.02ms +step:14197/57344 train_time:8304941ms step_avg:584.98ms +step:14198/57344 train_time:8305186ms step_avg:584.95ms +step:14199/57344 train_time:8305728ms step_avg:584.95ms +grad accum step:3550/14336 +step:14200/57344 train_time:8307027ms step_avg:585.00ms +step:14201/57344 train_time:8307044ms step_avg:584.96ms +step:14202/57344 train_time:8307293ms step_avg:584.94ms +step:14203/57344 train_time:8307838ms step_avg:584.94ms +grad accum step:3551/14336 +step:14204/57344 train_time:8309110ms step_avg:584.98ms +step:14205/57344 train_time:8309127ms step_avg:584.94ms +step:14206/57344 train_time:8309371ms step_avg:584.92ms +step:14207/57344 train_time:8309915ms step_avg:584.92ms +grad accum step:3552/14336 +step:14208/57344 train_time:8311197ms step_avg:584.97ms +step:14208/57344 val_loss:6.777571 train_time:8311198ms step_avg:584.97ms +step:14209/57344 train_time:8311209ms step_avg:584.93ms +step:14210/57344 train_time:8311427ms step_avg:584.90ms +step:14211/57344 train_time:8311961ms step_avg:584.90ms +grad accum step:3553/14336 +step:14212/57344 train_time:8313242ms step_avg:584.95ms +step:14213/57344 train_time:8313260ms step_avg:584.91ms +step:14214/57344 train_time:8313503ms step_avg:584.88ms +step:14215/57344 train_time:8314047ms step_avg:584.88ms +grad accum step:3554/14336 +step:14216/57344 train_time:8315333ms step_avg:584.93ms +step:14217/57344 train_time:8315350ms step_avg:584.89ms +step:14218/57344 train_time:8315595ms step_avg:584.86ms +step:14219/57344 train_time:8316137ms step_avg:584.86ms +grad accum step:3555/14336 +step:14220/57344 train_time:8317440ms step_avg:584.91ms +step:14221/57344 train_time:8317457ms step_avg:584.87ms +step:14222/57344 train_time:8317705ms step_avg:584.85ms +step:14223/57344 train_time:8318261ms step_avg:584.85ms +grad accum step:3556/14336 +step:14224/57344 train_time:8319557ms step_avg:584.90ms +step:14225/57344 train_time:8319574ms step_avg:584.86ms +step:14226/57344 train_time:8319822ms step_avg:584.83ms +step:14227/57344 train_time:8320365ms step_avg:584.83ms +grad accum step:3557/14336 +step:14228/57344 train_time:8321639ms step_avg:584.88ms +step:14229/57344 train_time:8321657ms step_avg:584.84ms +step:14230/57344 train_time:8321904ms step_avg:584.81ms +step:14231/57344 train_time:8322449ms step_avg:584.81ms +grad accum step:3558/14336 +step:14232/57344 train_time:8323739ms step_avg:584.86ms +step:14233/57344 train_time:8323756ms step_avg:584.82ms +step:14234/57344 train_time:8324001ms step_avg:584.80ms +step:14235/57344 train_time:8324536ms step_avg:584.79ms +grad accum step:3559/14336 +step:14236/57344 train_time:8325819ms step_avg:584.84ms +step:14237/57344 train_time:8325836ms step_avg:584.80ms +step:14238/57344 train_time:8326085ms step_avg:584.78ms +step:14239/57344 train_time:8326630ms step_avg:584.78ms +grad accum step:3560/14336 +step:14240/57344 train_time:8327923ms step_avg:584.83ms +step:14241/57344 train_time:8327940ms step_avg:584.79ms +step:14242/57344 train_time:8328184ms step_avg:584.76ms +step:14243/57344 train_time:8328724ms step_avg:584.76ms +grad accum step:3561/14336 +step:14244/57344 train_time:8330012ms step_avg:584.81ms +step:14245/57344 train_time:8330030ms step_avg:584.77ms +step:14246/57344 train_time:8330277ms step_avg:584.74ms +step:14247/57344 train_time:8330822ms step_avg:584.74ms +grad accum step:3562/14336 +step:14248/57344 train_time:8332126ms step_avg:584.79ms +step:14249/57344 train_time:8332143ms step_avg:584.75ms +step:14250/57344 train_time:8332385ms step_avg:584.73ms +step:14251/57344 train_time:8332924ms step_avg:584.73ms +grad accum step:3563/14336 +step:14252/57344 train_time:8334224ms step_avg:584.78ms +step:14253/57344 train_time:8334241ms step_avg:584.74ms +step:14254/57344 train_time:8334489ms step_avg:584.71ms +step:14255/57344 train_time:8335032ms step_avg:584.71ms +grad accum step:3564/14336 +step:14256/57344 train_time:8336322ms step_avg:584.76ms +step:14257/57344 train_time:8336339ms step_avg:584.72ms +step:14258/57344 train_time:8336585ms step_avg:584.70ms +step:14259/57344 train_time:8337136ms step_avg:584.69ms +grad accum step:3565/14336 +step:14260/57344 train_time:8338416ms step_avg:584.74ms +step:14261/57344 train_time:8338432ms step_avg:584.70ms +step:14262/57344 train_time:8338674ms step_avg:584.68ms +step:14263/57344 train_time:8339213ms step_avg:584.67ms +grad accum step:3566/14336 +step:14264/57344 train_time:8340509ms step_avg:584.72ms +step:14265/57344 train_time:8340527ms step_avg:584.68ms +step:14266/57344 train_time:8340771ms step_avg:584.66ms +step:14267/57344 train_time:8341301ms step_avg:584.66ms +grad accum step:3567/14336 +step:14268/57344 train_time:8342584ms step_avg:584.71ms +step:14269/57344 train_time:8342600ms step_avg:584.67ms +step:14270/57344 train_time:8342842ms step_avg:584.64ms +step:14271/57344 train_time:8343473ms step_avg:584.65ms +grad accum step:3568/14336 +step:14272/57344 train_time:8344671ms step_avg:584.69ms +step:14272/57344 val_loss:6.772471 train_time:8344672ms step_avg:584.69ms +step:14273/57344 train_time:8344684ms step_avg:584.65ms +step:14274/57344 train_time:8344907ms step_avg:584.62ms +step:14275/57344 train_time:8345453ms step_avg:584.62ms +grad accum step:3569/14336 +step:14276/57344 train_time:8346767ms step_avg:584.67ms +step:14277/57344 train_time:8346784ms step_avg:584.63ms +step:14278/57344 train_time:8347039ms step_avg:584.61ms +step:14279/57344 train_time:8347602ms step_avg:584.61ms +grad accum step:3570/14336 +step:14280/57344 train_time:8348881ms step_avg:584.66ms +step:14281/57344 train_time:8348898ms step_avg:584.62ms +step:14282/57344 train_time:8349139ms step_avg:584.59ms +step:14283/57344 train_time:8349668ms step_avg:584.59ms +grad accum step:3571/14336 +step:14284/57344 train_time:8350954ms step_avg:584.64ms +step:14285/57344 train_time:8350970ms step_avg:584.60ms +step:14286/57344 train_time:8351220ms step_avg:584.57ms +step:14287/57344 train_time:8351764ms step_avg:584.57ms +grad accum step:3572/14336 +step:14288/57344 train_time:8353055ms step_avg:584.62ms +step:14289/57344 train_time:8353073ms step_avg:584.58ms +step:14290/57344 train_time:8353318ms step_avg:584.56ms +step:14291/57344 train_time:8353864ms step_avg:584.55ms +grad accum step:3573/14336 +step:14292/57344 train_time:8355151ms step_avg:584.60ms +step:14293/57344 train_time:8355168ms step_avg:584.56ms +step:14294/57344 train_time:8355414ms step_avg:584.54ms +step:14295/57344 train_time:8355964ms step_avg:584.54ms +grad accum step:3574/14336 +step:14296/57344 train_time:8357238ms step_avg:584.59ms +step:14297/57344 train_time:8357255ms step_avg:584.55ms +step:14298/57344 train_time:8357503ms step_avg:584.52ms +step:14299/57344 train_time:8358053ms step_avg:584.52ms +grad accum step:3575/14336 +step:14300/57344 train_time:8359329ms step_avg:584.57ms +step:14301/57344 train_time:8359346ms step_avg:584.53ms +step:14302/57344 train_time:8359588ms step_avg:584.50ms +step:14303/57344 train_time:8360133ms step_avg:584.50ms +grad accum step:3576/14336 +step:14304/57344 train_time:8361415ms step_avg:584.55ms +step:14305/57344 train_time:8361432ms step_avg:584.51ms +step:14306/57344 train_time:8361679ms step_avg:584.49ms +step:14307/57344 train_time:8362221ms step_avg:584.48ms +grad accum step:3577/14336 +step:14308/57344 train_time:8363515ms step_avg:584.53ms +step:14309/57344 train_time:8363532ms step_avg:584.49ms +step:14310/57344 train_time:8363778ms step_avg:584.47ms +step:14311/57344 train_time:8364324ms step_avg:584.47ms +grad accum step:3578/14336 +step:14312/57344 train_time:8365623ms step_avg:584.52ms +step:14313/57344 train_time:8365640ms step_avg:584.48ms +step:14314/57344 train_time:8365884ms step_avg:584.45ms +step:14315/57344 train_time:8366423ms step_avg:584.45ms +grad accum step:3579/14336 +step:14316/57344 train_time:8367701ms step_avg:584.50ms +step:14317/57344 train_time:8367718ms step_avg:584.46ms +step:14318/57344 train_time:8367964ms step_avg:584.44ms +step:14319/57344 train_time:8368511ms step_avg:584.43ms +grad accum step:3580/14336 +step:14320/57344 train_time:8369788ms step_avg:584.48ms +step:14321/57344 train_time:8369805ms step_avg:584.44ms +step:14322/57344 train_time:8370049ms step_avg:584.42ms +step:14323/57344 train_time:8370597ms step_avg:584.42ms +grad accum step:3581/14336 +step:14324/57344 train_time:8371890ms step_avg:584.47ms +step:14325/57344 train_time:8371907ms step_avg:584.43ms +step:14326/57344 train_time:8372148ms step_avg:584.40ms +step:14327/57344 train_time:8372683ms step_avg:584.40ms +grad accum step:3582/14336 +step:14328/57344 train_time:8373981ms step_avg:584.45ms +step:14329/57344 train_time:8373998ms step_avg:584.41ms +step:14330/57344 train_time:8374242ms step_avg:584.39ms +step:14331/57344 train_time:8374783ms step_avg:584.38ms +grad accum step:3583/14336 +step:14332/57344 train_time:8376058ms step_avg:584.43ms +step:14333/57344 train_time:8376075ms step_avg:584.39ms +step:14334/57344 train_time:8376322ms step_avg:584.37ms +step:14335/57344 train_time:8376872ms step_avg:584.37ms +grad accum step:3584/14336 +step:14336/57344 train_time:8381430ms step_avg:584.64ms +step:14336/57344 val_loss:6.780034 train_time:8381430ms step_avg:584.64ms +step:14337/57344 train_time:8381442ms step_avg:584.60ms +step:14338/57344 train_time:8381662ms step_avg:584.58ms +step:14339/57344 train_time:8382208ms step_avg:584.57ms +grad accum step:3585/14336 +step:14340/57344 train_time:8383493ms step_avg:584.62ms +step:14341/57344 train_time:8383510ms step_avg:584.58ms +step:14342/57344 train_time:8383754ms step_avg:584.56ms +step:14343/57344 train_time:8384301ms step_avg:584.56ms +grad accum step:3586/14336 +step:14344/57344 train_time:8385585ms step_avg:584.61ms +step:14345/57344 train_time:8385601ms step_avg:584.57ms +step:14346/57344 train_time:8385849ms step_avg:584.54ms +step:14347/57344 train_time:8386393ms step_avg:584.54ms +grad accum step:3587/14336 +step:14348/57344 train_time:8387722ms step_avg:584.59ms +step:14349/57344 train_time:8387739ms step_avg:584.55ms +step:14350/57344 train_time:8387999ms step_avg:584.53ms +step:14351/57344 train_time:8388580ms step_avg:584.53ms +grad accum step:3588/14336 +step:14352/57344 train_time:8389862ms step_avg:584.58ms +step:14353/57344 train_time:8389879ms step_avg:584.54ms +step:14354/57344 train_time:8390124ms step_avg:584.51ms +step:14355/57344 train_time:8390669ms step_avg:584.51ms +grad accum step:3589/14336 +step:14356/57344 train_time:8391981ms step_avg:584.56ms +step:14357/57344 train_time:8391998ms step_avg:584.52ms +step:14358/57344 train_time:8392242ms step_avg:584.50ms +step:14359/57344 train_time:8392784ms step_avg:584.50ms +grad accum step:3590/14336 +step:14360/57344 train_time:8394086ms step_avg:584.55ms +step:14361/57344 train_time:8394103ms step_avg:584.51ms +step:14362/57344 train_time:8394347ms step_avg:584.48ms +step:14363/57344 train_time:8394894ms step_avg:584.48ms +grad accum step:3591/14336 +step:14364/57344 train_time:8396173ms step_avg:584.53ms +step:14365/57344 train_time:8396190ms step_avg:584.49ms +step:14366/57344 train_time:8396434ms step_avg:584.47ms +step:14367/57344 train_time:8396978ms step_avg:584.46ms +grad accum step:3592/14336 +step:14368/57344 train_time:8398275ms step_avg:584.51ms +step:14369/57344 train_time:8398292ms step_avg:584.47ms +step:14370/57344 train_time:8398541ms step_avg:584.45ms +step:14371/57344 train_time:8399085ms step_avg:584.45ms +grad accum step:3593/14336 +step:14372/57344 train_time:8400407ms step_avg:584.50ms +step:14373/57344 train_time:8400424ms step_avg:584.46ms +step:14374/57344 train_time:8400671ms step_avg:584.44ms +step:14375/57344 train_time:8401211ms step_avg:584.43ms +grad accum step:3594/14336 +step:14376/57344 train_time:8402484ms step_avg:584.48ms +step:14377/57344 train_time:8402501ms step_avg:584.44ms +step:14378/57344 train_time:8402745ms step_avg:584.42ms +step:14379/57344 train_time:8403289ms step_avg:584.41ms +grad accum step:3595/14336 +step:14380/57344 train_time:8404582ms step_avg:584.46ms +step:14381/57344 train_time:8404599ms step_avg:584.42ms +step:14382/57344 train_time:8404846ms step_avg:584.40ms +step:14383/57344 train_time:8405407ms step_avg:584.40ms +grad accum step:3596/14336 +step:14384/57344 train_time:8406748ms step_avg:584.45ms +step:14385/57344 train_time:8406765ms step_avg:584.41ms +step:14386/57344 train_time:8407009ms step_avg:584.39ms +step:14387/57344 train_time:8407552ms step_avg:584.39ms +grad accum step:3597/14336 +step:14388/57344 train_time:8408855ms step_avg:584.44ms +step:14389/57344 train_time:8408872ms step_avg:584.40ms +step:14390/57344 train_time:8409117ms step_avg:584.37ms +step:14391/57344 train_time:8409663ms step_avg:584.37ms +grad accum step:3598/14336 +step:14392/57344 train_time:8410946ms step_avg:584.42ms +step:14393/57344 train_time:8410963ms step_avg:584.38ms +step:14394/57344 train_time:8411207ms step_avg:584.36ms +step:14395/57344 train_time:8411744ms step_avg:584.35ms +grad accum step:3599/14336 +step:14396/57344 train_time:8413041ms step_avg:584.40ms +step:14397/57344 train_time:8413058ms step_avg:584.36ms +step:14398/57344 train_time:8413304ms step_avg:584.34ms +step:14399/57344 train_time:8413851ms step_avg:584.34ms +grad accum step:3600/14336 +step:14400/57344 train_time:8415142ms step_avg:584.38ms +step:14400/57344 val_loss:6.792025 train_time:8415143ms step_avg:584.38ms +step:14401/57344 train_time:8415155ms step_avg:584.35ms +step:14402/57344 train_time:8415375ms step_avg:584.32ms +step:14403/57344 train_time:8415922ms step_avg:584.32ms +grad accum step:3601/14336 +step:14404/57344 train_time:8417203ms step_avg:584.37ms +step:14405/57344 train_time:8417220ms step_avg:584.33ms +step:14406/57344 train_time:8417463ms step_avg:584.30ms +step:14407/57344 train_time:8417998ms step_avg:584.30ms +grad accum step:3602/14336 +step:14408/57344 train_time:8419269ms step_avg:584.35ms +step:14409/57344 train_time:8419286ms step_avg:584.31ms +step:14410/57344 train_time:8419535ms step_avg:584.28ms +step:14411/57344 train_time:8420086ms step_avg:584.28ms +grad accum step:3603/14336 +step:14412/57344 train_time:8421354ms step_avg:584.33ms +step:14413/57344 train_time:8421371ms step_avg:584.29ms +step:14414/57344 train_time:8421618ms step_avg:584.27ms +step:14415/57344 train_time:8422160ms step_avg:584.26ms +grad accum step:3604/14336 +step:14416/57344 train_time:8423442ms step_avg:584.31ms +step:14417/57344 train_time:8423459ms step_avg:584.27ms +step:14418/57344 train_time:8423708ms step_avg:584.25ms +step:14419/57344 train_time:8424257ms step_avg:584.25ms +grad accum step:3605/14336 +step:14420/57344 train_time:8425585ms step_avg:584.30ms +step:14421/57344 train_time:8425602ms step_avg:584.26ms +step:14422/57344 train_time:8425851ms step_avg:584.24ms +step:14423/57344 train_time:8426397ms step_avg:584.23ms +grad accum step:3606/14336 +step:14424/57344 train_time:8427691ms step_avg:584.28ms +step:14425/57344 train_time:8427708ms step_avg:584.24ms +step:14426/57344 train_time:8427960ms step_avg:584.22ms +step:14427/57344 train_time:8428527ms step_avg:584.22ms +grad accum step:3607/14336 +step:14428/57344 train_time:8429855ms step_avg:584.27ms +step:14429/57344 train_time:8429872ms step_avg:584.23ms +step:14430/57344 train_time:8430120ms step_avg:584.21ms +step:14431/57344 train_time:8430669ms step_avg:584.21ms +grad accum step:3608/14336 +step:14432/57344 train_time:8431958ms step_avg:584.25ms +step:14433/57344 train_time:8431975ms step_avg:584.22ms +step:14434/57344 train_time:8432221ms step_avg:584.19ms +step:14435/57344 train_time:8432766ms step_avg:584.19ms +grad accum step:3609/14336 +step:14436/57344 train_time:8434061ms step_avg:584.24ms +step:14437/57344 train_time:8434078ms step_avg:584.20ms +step:14438/57344 train_time:8434324ms step_avg:584.18ms +step:14439/57344 train_time:8434868ms step_avg:584.17ms +grad accum step:3610/14336 +step:14440/57344 train_time:8436144ms step_avg:584.22ms +step:14441/57344 train_time:8436161ms step_avg:584.18ms +step:14442/57344 train_time:8436406ms step_avg:584.16ms +step:14443/57344 train_time:8436949ms step_avg:584.15ms +grad accum step:3611/14336 +step:14444/57344 train_time:8438244ms step_avg:584.20ms +step:14445/57344 train_time:8438261ms step_avg:584.16ms +step:14446/57344 train_time:8438507ms step_avg:584.14ms +step:14447/57344 train_time:8439053ms step_avg:584.14ms +grad accum step:3612/14336 +step:14448/57344 train_time:8440349ms step_avg:584.19ms +step:14449/57344 train_time:8440366ms step_avg:584.15ms +step:14450/57344 train_time:8440609ms step_avg:584.13ms +step:14451/57344 train_time:8441151ms step_avg:584.12ms +grad accum step:3613/14336 +step:14452/57344 train_time:8442459ms step_avg:584.17ms +step:14453/57344 train_time:8442476ms step_avg:584.13ms +step:14454/57344 train_time:8442723ms step_avg:584.11ms +step:14455/57344 train_time:8443265ms step_avg:584.11ms +grad accum step:3614/14336 +step:14456/57344 train_time:8444540ms step_avg:584.15ms +step:14457/57344 train_time:8444557ms step_avg:584.12ms +step:14458/57344 train_time:8444801ms step_avg:584.09ms +step:14459/57344 train_time:8445347ms step_avg:584.09ms +grad accum step:3615/14336 +step:14460/57344 train_time:8446625ms step_avg:584.14ms +step:14461/57344 train_time:8446642ms step_avg:584.10ms +step:14462/57344 train_time:8446897ms step_avg:584.08ms +step:14463/57344 train_time:8447468ms step_avg:584.07ms +grad accum step:3616/14336 +step:14464/57344 train_time:8448749ms step_avg:584.12ms +step:14464/57344 val_loss:6.779392 train_time:8448750ms step_avg:584.12ms +step:14465/57344 train_time:8448762ms step_avg:584.08ms +step:14466/57344 train_time:8448984ms step_avg:584.06ms +step:14467/57344 train_time:8449529ms step_avg:584.06ms +grad accum step:3617/14336 +step:14468/57344 train_time:8450810ms step_avg:584.10ms +step:14469/57344 train_time:8450827ms step_avg:584.06ms +step:14470/57344 train_time:8451070ms step_avg:584.04ms +step:14471/57344 train_time:8451621ms step_avg:584.04ms +grad accum step:3618/14336 +step:14472/57344 train_time:8452934ms step_avg:584.09ms +step:14473/57344 train_time:8452951ms step_avg:584.05ms +step:14474/57344 train_time:8453200ms step_avg:584.03ms +step:14475/57344 train_time:8453743ms step_avg:584.02ms +grad accum step:3619/14336 +step:14476/57344 train_time:8455015ms step_avg:584.07ms +step:14477/57344 train_time:8455032ms step_avg:584.03ms +step:14478/57344 train_time:8455281ms step_avg:584.01ms +step:14479/57344 train_time:8455829ms step_avg:584.01ms +grad accum step:3620/14336 +step:14480/57344 train_time:8457127ms step_avg:584.06ms +step:14481/57344 train_time:8457144ms step_avg:584.02ms +step:14482/57344 train_time:8457393ms step_avg:583.99ms +step:14483/57344 train_time:8457951ms step_avg:583.99ms +grad accum step:3621/14336 +step:14484/57344 train_time:8459248ms step_avg:584.04ms +step:14485/57344 train_time:8459265ms step_avg:584.00ms +step:14486/57344 train_time:8459509ms step_avg:583.98ms +step:14487/57344 train_time:8460049ms step_avg:583.98ms +grad accum step:3622/14336 +step:14488/57344 train_time:8461342ms step_avg:584.02ms +step:14489/57344 train_time:8461359ms step_avg:583.99ms +step:14490/57344 train_time:8461610ms step_avg:583.96ms +step:14491/57344 train_time:8462163ms step_avg:583.96ms +grad accum step:3623/14336 +step:14492/57344 train_time:8463450ms step_avg:584.01ms +step:14493/57344 train_time:8463467ms step_avg:583.97ms +step:14494/57344 train_time:8463711ms step_avg:583.95ms +step:14495/57344 train_time:8464250ms step_avg:583.94ms +grad accum step:3624/14336 +step:14496/57344 train_time:8465556ms step_avg:583.99ms +step:14497/57344 train_time:8465572ms step_avg:583.95ms +step:14498/57344 train_time:8465815ms step_avg:583.93ms +step:14499/57344 train_time:8466356ms step_avg:583.93ms +grad accum step:3625/14336 +step:14500/57344 train_time:8467641ms step_avg:583.98ms +step:14501/57344 train_time:8467658ms step_avg:583.94ms +step:14502/57344 train_time:8467904ms step_avg:583.91ms +step:14503/57344 train_time:8468449ms step_avg:583.91ms +grad accum step:3626/14336 +step:14504/57344 train_time:8469761ms step_avg:583.96ms +step:14505/57344 train_time:8469779ms step_avg:583.92ms +step:14506/57344 train_time:8470021ms step_avg:583.90ms +step:14507/57344 train_time:8470569ms step_avg:583.90ms +grad accum step:3627/14336 +step:14508/57344 train_time:8471969ms step_avg:583.95ms +step:14509/57344 train_time:8471986ms step_avg:583.91ms +step:14510/57344 train_time:8472228ms step_avg:583.89ms +step:14511/57344 train_time:8472777ms step_avg:583.89ms +grad accum step:3628/14336 +step:14512/57344 train_time:8474076ms step_avg:583.94ms +step:14513/57344 train_time:8474093ms step_avg:583.90ms +step:14514/57344 train_time:8474342ms step_avg:583.87ms +step:14515/57344 train_time:8474887ms step_avg:583.87ms +grad accum step:3629/14336 +step:14516/57344 train_time:8476174ms step_avg:583.92ms +step:14517/57344 train_time:8476191ms step_avg:583.88ms +step:14518/57344 train_time:8476442ms step_avg:583.86ms +step:14519/57344 train_time:8476987ms step_avg:583.85ms +grad accum step:3630/14336 +step:14520/57344 train_time:8478259ms step_avg:583.90ms +step:14521/57344 train_time:8478276ms step_avg:583.86ms +step:14522/57344 train_time:8478524ms step_avg:583.84ms +step:14523/57344 train_time:8479072ms step_avg:583.84ms +grad accum step:3631/14336 +step:14524/57344 train_time:8480362ms step_avg:583.89ms +step:14525/57344 train_time:8480379ms step_avg:583.85ms +step:14526/57344 train_time:8480622ms step_avg:583.82ms +step:14527/57344 train_time:8481166ms step_avg:583.82ms +grad accum step:3632/14336 +step:14528/57344 train_time:8482488ms step_avg:583.87ms +step:14528/57344 val_loss:6.787096 train_time:8482489ms step_avg:583.87ms +step:14529/57344 train_time:8482500ms step_avg:583.83ms +step:14530/57344 train_time:8482721ms step_avg:583.81ms +step:14531/57344 train_time:8483263ms step_avg:583.80ms +grad accum step:3633/14336 +step:14532/57344 train_time:8484563ms step_avg:583.85ms +step:14533/57344 train_time:8484580ms step_avg:583.81ms +step:14534/57344 train_time:8484825ms step_avg:583.79ms +step:14535/57344 train_time:8485366ms step_avg:583.79ms +grad accum step:3634/14336 +step:14536/57344 train_time:8486636ms step_avg:583.84ms +step:14537/57344 train_time:8486653ms step_avg:583.80ms +step:14538/57344 train_time:8486898ms step_avg:583.77ms +step:14539/57344 train_time:8487435ms step_avg:583.77ms +grad accum step:3635/14336 +step:14540/57344 train_time:8488719ms step_avg:583.82ms +step:14541/57344 train_time:8488736ms step_avg:583.78ms +step:14542/57344 train_time:8488980ms step_avg:583.76ms +step:14543/57344 train_time:8489521ms step_avg:583.75ms +grad accum step:3636/14336 +step:14544/57344 train_time:8490826ms step_avg:583.80ms +step:14545/57344 train_time:8490843ms step_avg:583.76ms +step:14546/57344 train_time:8491088ms step_avg:583.74ms +step:14547/57344 train_time:8491626ms step_avg:583.74ms +grad accum step:3637/14336 +step:14548/57344 train_time:8492900ms step_avg:583.78ms +step:14549/57344 train_time:8492917ms step_avg:583.75ms +step:14550/57344 train_time:8493161ms step_avg:583.72ms +step:14551/57344 train_time:8493703ms step_avg:583.72ms +grad accum step:3638/14336 +step:14552/57344 train_time:8494982ms step_avg:583.77ms +step:14553/57344 train_time:8494999ms step_avg:583.73ms +step:14554/57344 train_time:8495250ms step_avg:583.71ms +step:14555/57344 train_time:8495793ms step_avg:583.70ms +grad accum step:3639/14336 +step:14556/57344 train_time:8497092ms step_avg:583.75ms +step:14557/57344 train_time:8497109ms step_avg:583.71ms +step:14558/57344 train_time:8497356ms step_avg:583.69ms +step:14559/57344 train_time:8497898ms step_avg:583.69ms +grad accum step:3640/14336 +step:14560/57344 train_time:8499212ms step_avg:583.74ms +step:14561/57344 train_time:8499229ms step_avg:583.70ms +step:14562/57344 train_time:8499478ms step_avg:583.68ms +step:14563/57344 train_time:8500019ms step_avg:583.67ms +grad accum step:3641/14336 +step:14564/57344 train_time:8501292ms step_avg:583.72ms +step:14565/57344 train_time:8501309ms step_avg:583.68ms +step:14566/57344 train_time:8501558ms step_avg:583.66ms +step:14567/57344 train_time:8502103ms step_avg:583.66ms +grad accum step:3642/14336 +step:14568/57344 train_time:8503378ms step_avg:583.70ms +step:14569/57344 train_time:8503395ms step_avg:583.66ms +step:14570/57344 train_time:8503645ms step_avg:583.64ms +step:14571/57344 train_time:8504196ms step_avg:583.64ms +grad accum step:3643/14336 +step:14572/57344 train_time:8505482ms step_avg:583.69ms +step:14573/57344 train_time:8505498ms step_avg:583.65ms +step:14574/57344 train_time:8505742ms step_avg:583.62ms +step:14575/57344 train_time:8506289ms step_avg:583.62ms +grad accum step:3644/14336 +step:14576/57344 train_time:8507564ms step_avg:583.67ms +step:14577/57344 train_time:8507581ms step_avg:583.63ms +step:14578/57344 train_time:8507828ms step_avg:583.61ms +step:14579/57344 train_time:8508375ms step_avg:583.60ms +grad accum step:3645/14336 +step:14580/57344 train_time:8509664ms step_avg:583.65ms +step:14581/57344 train_time:8509681ms step_avg:583.61ms +step:14582/57344 train_time:8509926ms step_avg:583.59ms +step:14583/57344 train_time:8510469ms step_avg:583.59ms +grad accum step:3646/14336 +step:14584/57344 train_time:8511772ms step_avg:583.64ms +step:14585/57344 train_time:8511790ms step_avg:583.60ms +step:14586/57344 train_time:8512034ms step_avg:583.58ms +step:14587/57344 train_time:8512577ms step_avg:583.57ms +grad accum step:3647/14336 +step:14588/57344 train_time:8513864ms step_avg:583.62ms +step:14589/57344 train_time:8513880ms step_avg:583.58ms +step:14590/57344 train_time:8514128ms step_avg:583.56ms +step:14591/57344 train_time:8514671ms step_avg:583.56ms +grad accum step:3648/14336 +step:14592/57344 train_time:8515966ms step_avg:583.61ms +step:14592/57344 val_loss:6.773287 train_time:8515967ms step_avg:583.61ms +step:14593/57344 train_time:8515979ms step_avg:583.57ms +step:14594/57344 train_time:8516202ms step_avg:583.54ms +step:14595/57344 train_time:8516738ms step_avg:583.54ms +grad accum step:3649/14336 +step:14596/57344 train_time:8518002ms step_avg:583.58ms +step:14597/57344 train_time:8518019ms step_avg:583.55ms +step:14598/57344 train_time:8518265ms step_avg:583.52ms +step:14599/57344 train_time:8518802ms step_avg:583.52ms +grad accum step:3650/14336 +step:14600/57344 train_time:8520117ms step_avg:583.57ms +step:14601/57344 train_time:8520134ms step_avg:583.53ms +step:14602/57344 train_time:8520379ms step_avg:583.51ms +step:14603/57344 train_time:8520922ms step_avg:583.50ms +grad accum step:3651/14336 +step:14604/57344 train_time:8522207ms step_avg:583.55ms +step:14605/57344 train_time:8522224ms step_avg:583.51ms +step:14606/57344 train_time:8522467ms step_avg:583.49ms +step:14607/57344 train_time:8523009ms step_avg:583.49ms +grad accum step:3652/14336 +step:14608/57344 train_time:8524299ms step_avg:583.54ms +step:14609/57344 train_time:8524316ms step_avg:583.50ms +step:14610/57344 train_time:8524563ms step_avg:583.47ms +step:14611/57344 train_time:8525111ms step_avg:583.47ms +grad accum step:3653/14336 +step:14612/57344 train_time:8526407ms step_avg:583.52ms +step:14613/57344 train_time:8526424ms step_avg:583.48ms +step:14614/57344 train_time:8526672ms step_avg:583.46ms +step:14615/57344 train_time:8527215ms step_avg:583.46ms +grad accum step:3654/14336 +step:14616/57344 train_time:8528513ms step_avg:583.51ms +step:14617/57344 train_time:8528530ms step_avg:583.47ms +step:14618/57344 train_time:8528776ms step_avg:583.44ms +step:14619/57344 train_time:8529317ms step_avg:583.44ms +grad accum step:3655/14336 +step:14620/57344 train_time:8530600ms step_avg:583.49ms +step:14621/57344 train_time:8530617ms step_avg:583.45ms +step:14622/57344 train_time:8530865ms step_avg:583.43ms +step:14623/57344 train_time:8531405ms step_avg:583.42ms +grad accum step:3656/14336 +step:14624/57344 train_time:8532679ms step_avg:583.47ms +step:14625/57344 train_time:8532696ms step_avg:583.43ms +step:14626/57344 train_time:8532945ms step_avg:583.41ms +step:14627/57344 train_time:8533502ms step_avg:583.41ms +grad accum step:3657/14336 +step:14628/57344 train_time:8534804ms step_avg:583.46ms +step:14629/57344 train_time:8534821ms step_avg:583.42ms +step:14630/57344 train_time:8535067ms step_avg:583.39ms +step:14631/57344 train_time:8535610ms step_avg:583.39ms +grad accum step:3658/14336 +step:14632/57344 train_time:8536909ms step_avg:583.44ms +step:14633/57344 train_time:8536925ms step_avg:583.40ms +step:14634/57344 train_time:8537173ms step_avg:583.38ms +step:14635/57344 train_time:8537714ms step_avg:583.38ms +grad accum step:3659/14336 +step:14636/57344 train_time:8539046ms step_avg:583.43ms +step:14637/57344 train_time:8539063ms step_avg:583.39ms +step:14638/57344 train_time:8539310ms step_avg:583.37ms +step:14639/57344 train_time:8539864ms step_avg:583.36ms +grad accum step:3660/14336 +step:14640/57344 train_time:8541156ms step_avg:583.41ms +step:14641/57344 train_time:8541173ms step_avg:583.37ms +step:14642/57344 train_time:8541419ms step_avg:583.35ms +step:14643/57344 train_time:8541970ms step_avg:583.35ms +grad accum step:3661/14336 +step:14644/57344 train_time:8543285ms step_avg:583.40ms +step:14645/57344 train_time:8543302ms step_avg:583.36ms +step:14646/57344 train_time:8543550ms step_avg:583.34ms +step:14647/57344 train_time:8544102ms step_avg:583.33ms +grad accum step:3662/14336 +step:14648/57344 train_time:8545402ms step_avg:583.38ms +step:14649/57344 train_time:8545420ms step_avg:583.34ms +step:14650/57344 train_time:8545664ms step_avg:583.32ms +step:14651/57344 train_time:8546196ms step_avg:583.32ms +grad accum step:3663/14336 +step:14652/57344 train_time:8547496ms step_avg:583.37ms +step:14653/57344 train_time:8547513ms step_avg:583.33ms +step:14654/57344 train_time:8547757ms step_avg:583.31ms +step:14655/57344 train_time:8548307ms step_avg:583.30ms +grad accum step:3664/14336 +step:14656/57344 train_time:8549604ms step_avg:583.35ms +step:14656/57344 val_loss:6.783534 train_time:8549604ms step_avg:583.35ms +step:14657/57344 train_time:8549616ms step_avg:583.31ms +step:14658/57344 train_time:8549841ms step_avg:583.29ms +step:14659/57344 train_time:8550387ms step_avg:583.29ms +grad accum step:3665/14336 +step:14660/57344 train_time:8551686ms step_avg:583.33ms +step:14661/57344 train_time:8551702ms step_avg:583.30ms +step:14662/57344 train_time:8551945ms step_avg:583.27ms +step:14663/57344 train_time:8552486ms step_avg:583.27ms +grad accum step:3666/14336 +step:14664/57344 train_time:8553782ms step_avg:583.32ms +step:14665/57344 train_time:8553799ms step_avg:583.28ms +step:14666/57344 train_time:8554044ms step_avg:583.26ms +step:14667/57344 train_time:8554592ms step_avg:583.25ms +grad accum step:3667/14336 +step:14668/57344 train_time:8555860ms step_avg:583.30ms +step:14669/57344 train_time:8555877ms step_avg:583.26ms +step:14670/57344 train_time:8556123ms step_avg:583.24ms +step:14671/57344 train_time:8556668ms step_avg:583.24ms +grad accum step:3668/14336 +step:14672/57344 train_time:8557947ms step_avg:583.28ms +step:14673/57344 train_time:8557964ms step_avg:583.25ms +step:14674/57344 train_time:8558208ms step_avg:583.22ms +step:14675/57344 train_time:8558750ms step_avg:583.22ms +grad accum step:3669/14336 +step:14676/57344 train_time:8560032ms step_avg:583.27ms +step:14677/57344 train_time:8560049ms step_avg:583.23ms +step:14678/57344 train_time:8560297ms step_avg:583.21ms +step:14679/57344 train_time:8560838ms step_avg:583.20ms +grad accum step:3670/14336 +step:14680/57344 train_time:8562112ms step_avg:583.25ms +step:14681/57344 train_time:8562129ms step_avg:583.21ms +step:14682/57344 train_time:8562373ms step_avg:583.19ms +step:14683/57344 train_time:8562920ms step_avg:583.19ms +grad accum step:3671/14336 +step:14684/57344 train_time:8564218ms step_avg:583.23ms +step:14685/57344 train_time:8564235ms step_avg:583.20ms +step:14686/57344 train_time:8564481ms step_avg:583.17ms +step:14687/57344 train_time:8565024ms step_avg:583.17ms +grad accum step:3672/14336 +step:14688/57344 train_time:8566331ms step_avg:583.22ms +step:14689/57344 train_time:8566348ms step_avg:583.18ms +step:14690/57344 train_time:8566596ms step_avg:583.16ms +step:14691/57344 train_time:8567147ms step_avg:583.16ms +grad accum step:3673/14336 +step:14692/57344 train_time:8568425ms step_avg:583.20ms +step:14693/57344 train_time:8568442ms step_avg:583.16ms +step:14694/57344 train_time:8568702ms step_avg:583.14ms +step:14695/57344 train_time:8569294ms step_avg:583.14ms +grad accum step:3674/14336 +step:14696/57344 train_time:8570652ms step_avg:583.20ms +step:14697/57344 train_time:8570669ms step_avg:583.16ms +step:14698/57344 train_time:8570919ms step_avg:583.14ms +step:14699/57344 train_time:8571474ms step_avg:583.13ms +grad accum step:3675/14336 +step:14700/57344 train_time:8572765ms step_avg:583.18ms +step:14701/57344 train_time:8572781ms step_avg:583.14ms +step:14702/57344 train_time:8573026ms step_avg:583.12ms +step:14703/57344 train_time:8573560ms step_avg:583.12ms +grad accum step:3676/14336 +step:14704/57344 train_time:8574859ms step_avg:583.17ms +step:14705/57344 train_time:8574876ms step_avg:583.13ms +step:14706/57344 train_time:8575122ms step_avg:583.10ms +step:14707/57344 train_time:8575669ms step_avg:583.10ms +grad accum step:3677/14336 +step:14708/57344 train_time:8595156ms step_avg:584.39ms +step:14709/57344 train_time:8595168ms step_avg:584.35ms +step:14710/57344 train_time:8595474ms step_avg:584.33ms +step:14711/57344 train_time:8596015ms step_avg:584.33ms +grad accum step:3678/14336 +step:14712/57344 train_time:8597299ms step_avg:584.37ms +step:14713/57344 train_time:8597316ms step_avg:584.33ms +step:14714/57344 train_time:8597557ms step_avg:584.31ms +step:14715/57344 train_time:8598091ms step_avg:584.31ms +grad accum step:3679/14336 +step:14716/57344 train_time:8599366ms step_avg:584.35ms +step:14717/57344 train_time:8599383ms step_avg:584.32ms +step:14718/57344 train_time:8599621ms step_avg:584.29ms +step:14719/57344 train_time:8600145ms step_avg:584.29ms +grad accum step:3680/14336 +step:14720/57344 train_time:8601440ms step_avg:584.34ms +step:14720/57344 val_loss:6.806825 train_time:8601441ms step_avg:584.34ms +step:14721/57344 train_time:8601453ms step_avg:584.30ms +step:14722/57344 train_time:8601672ms step_avg:584.27ms +step:14723/57344 train_time:8602212ms step_avg:584.27ms +grad accum step:3681/14336 +step:14724/57344 train_time:8603513ms step_avg:584.32ms +step:14725/57344 train_time:8603530ms step_avg:584.28ms +step:14726/57344 train_time:8603774ms step_avg:584.26ms +step:14727/57344 train_time:8604319ms step_avg:584.25ms +grad accum step:3682/14336 +step:14728/57344 train_time:8605620ms step_avg:584.30ms +step:14729/57344 train_time:8605637ms step_avg:584.26ms +step:14730/57344 train_time:8605885ms step_avg:584.24ms +step:14731/57344 train_time:8606428ms step_avg:584.24ms +grad accum step:3683/14336 +step:14732/57344 train_time:8607713ms step_avg:584.29ms +step:14733/57344 train_time:8607729ms step_avg:584.25ms +step:14734/57344 train_time:8607972ms step_avg:584.23ms +step:14735/57344 train_time:8608514ms step_avg:584.22ms +grad accum step:3684/14336 +step:14736/57344 train_time:8609817ms step_avg:584.27ms +step:14737/57344 train_time:8609834ms step_avg:584.23ms +step:14738/57344 train_time:8610078ms step_avg:584.21ms +step:14739/57344 train_time:8610614ms step_avg:584.21ms +grad accum step:3685/14336 +step:14740/57344 train_time:8611895ms step_avg:584.25ms +step:14741/57344 train_time:8611912ms step_avg:584.21ms +step:14742/57344 train_time:8612166ms step_avg:584.19ms +step:14743/57344 train_time:8612721ms step_avg:584.19ms +grad accum step:3686/14336 +step:14744/57344 train_time:8614002ms step_avg:584.24ms +step:14745/57344 train_time:8614019ms step_avg:584.20ms +step:14746/57344 train_time:8614266ms step_avg:584.18ms +step:14747/57344 train_time:8614815ms step_avg:584.17ms +grad accum step:3687/14336 +step:14748/57344 train_time:8616122ms step_avg:584.22ms +step:14749/57344 train_time:8616138ms step_avg:584.18ms +step:14750/57344 train_time:8616384ms step_avg:584.16ms +step:14751/57344 train_time:8616920ms step_avg:584.16ms +grad accum step:3688/14336 +step:14752/57344 train_time:8618207ms step_avg:584.21ms +step:14753/57344 train_time:8618224ms step_avg:584.17ms +step:14754/57344 train_time:8618469ms step_avg:584.14ms +step:14755/57344 train_time:8619005ms step_avg:584.14ms +grad accum step:3689/14336 +step:14756/57344 train_time:8620287ms step_avg:584.19ms +step:14757/57344 train_time:8620303ms step_avg:584.15ms +step:14758/57344 train_time:8620548ms step_avg:584.13ms +step:14759/57344 train_time:8621095ms step_avg:584.12ms +grad accum step:3690/14336 +step:14760/57344 train_time:8622379ms step_avg:584.17ms +step:14761/57344 train_time:8622396ms step_avg:584.13ms +step:14762/57344 train_time:8622642ms step_avg:584.11ms +step:14763/57344 train_time:8623183ms step_avg:584.11ms +grad accum step:3691/14336 +step:14764/57344 train_time:8624464ms step_avg:584.15ms +step:14765/57344 train_time:8624481ms step_avg:584.12ms +step:14766/57344 train_time:8624726ms step_avg:584.09ms +step:14767/57344 train_time:8625275ms step_avg:584.09ms +grad accum step:3692/14336 +step:14768/57344 train_time:8626597ms step_avg:584.14ms +step:14769/57344 train_time:8626614ms step_avg:584.10ms +step:14770/57344 train_time:8626860ms step_avg:584.08ms +step:14771/57344 train_time:8627407ms step_avg:584.08ms +grad accum step:3693/14336 +step:14772/57344 train_time:8628713ms step_avg:584.13ms +step:14773/57344 train_time:8628730ms step_avg:584.09ms +step:14774/57344 train_time:8628973ms step_avg:584.06ms +step:14775/57344 train_time:8629512ms step_avg:584.06ms +grad accum step:3694/14336 +step:14776/57344 train_time:8630796ms step_avg:584.11ms +step:14777/57344 train_time:8630813ms step_avg:584.07ms +step:14778/57344 train_time:8631062ms step_avg:584.05ms +step:14779/57344 train_time:8631611ms step_avg:584.05ms +grad accum step:3695/14336 +step:14780/57344 train_time:8632902ms step_avg:584.09ms +step:14781/57344 train_time:8632919ms step_avg:584.06ms +step:14782/57344 train_time:8633167ms step_avg:584.03ms +step:14783/57344 train_time:8633714ms step_avg:584.03ms +grad accum step:3696/14336 +step:14784/57344 train_time:8635020ms step_avg:584.08ms +step:14784/57344 val_loss:6.786703 train_time:8635020ms step_avg:584.08ms +step:14785/57344 train_time:8635032ms step_avg:584.04ms +step:14786/57344 train_time:8635253ms step_avg:584.02ms +step:14787/57344 train_time:8635797ms step_avg:584.01ms +grad accum step:3697/14336 +step:14788/57344 train_time:8637133ms step_avg:584.06ms +step:14789/57344 train_time:8637150ms step_avg:584.03ms +step:14790/57344 train_time:8637396ms step_avg:584.00ms +step:14791/57344 train_time:8637935ms step_avg:584.00ms +grad accum step:3698/14336 +step:14792/57344 train_time:8639215ms step_avg:584.05ms +step:14793/57344 train_time:8639233ms step_avg:584.01ms +step:14794/57344 train_time:8639478ms step_avg:583.99ms +step:14795/57344 train_time:8640026ms step_avg:583.98ms +grad accum step:3699/14336 +step:14796/57344 train_time:8641368ms step_avg:584.03ms +step:14797/57344 train_time:8641385ms step_avg:584.00ms +step:14798/57344 train_time:8641632ms step_avg:583.97ms +step:14799/57344 train_time:8642172ms step_avg:583.97ms +grad accum step:3700/14336 +step:14800/57344 train_time:8643446ms step_avg:584.02ms +step:14801/57344 train_time:8643463ms step_avg:583.98ms +step:14802/57344 train_time:8643714ms step_avg:583.96ms +step:14803/57344 train_time:8644268ms step_avg:583.95ms +grad accum step:3701/14336 +step:14804/57344 train_time:8645727ms step_avg:584.01ms +step:14805/57344 train_time:8645913ms step_avg:583.99ms +step:14806/57344 train_time:8646127ms step_avg:583.96ms +step:14807/57344 train_time:8646665ms step_avg:583.96ms +grad accum step:3702/14336 +step:14808/57344 train_time:8647963ms step_avg:584.01ms +step:14809/57344 train_time:8647980ms step_avg:583.97ms +step:14810/57344 train_time:8648223ms step_avg:583.94ms +step:14811/57344 train_time:8648764ms step_avg:583.94ms +grad accum step:3703/14336 +step:14812/57344 train_time:8650053ms step_avg:583.99ms +step:14813/57344 train_time:8650069ms step_avg:583.95ms +step:14814/57344 train_time:8650319ms step_avg:583.93ms +step:14815/57344 train_time:8650869ms step_avg:583.93ms +grad accum step:3704/14336 +step:14816/57344 train_time:8652176ms step_avg:583.98ms +step:14817/57344 train_time:8652193ms step_avg:583.94ms +step:14818/57344 train_time:8652447ms step_avg:583.91ms +step:14819/57344 train_time:8652997ms step_avg:583.91ms +grad accum step:3705/14336 +step:14820/57344 train_time:8654287ms step_avg:583.96ms +step:14821/57344 train_time:8654305ms step_avg:583.92ms +step:14822/57344 train_time:8654553ms step_avg:583.90ms +step:14823/57344 train_time:8655099ms step_avg:583.90ms +grad accum step:3706/14336 +step:14824/57344 train_time:8656394ms step_avg:583.94ms +step:14825/57344 train_time:8656412ms step_avg:583.91ms +step:14826/57344 train_time:8656656ms step_avg:583.88ms +step:14827/57344 train_time:8657188ms step_avg:583.88ms +grad accum step:3707/14336 +step:14828/57344 train_time:8658475ms step_avg:583.93ms +step:14829/57344 train_time:8658492ms step_avg:583.89ms +step:14830/57344 train_time:8658744ms step_avg:583.87ms +step:14831/57344 train_time:8659306ms step_avg:583.87ms +grad accum step:3708/14336 +step:14832/57344 train_time:8660600ms step_avg:583.91ms +step:14833/57344 train_time:8660616ms step_avg:583.87ms +step:14834/57344 train_time:8660864ms step_avg:583.85ms +step:14835/57344 train_time:8661409ms step_avg:583.85ms +grad accum step:3709/14336 +step:14836/57344 train_time:8662704ms step_avg:583.90ms +step:14837/57344 train_time:8662721ms step_avg:583.86ms +step:14838/57344 train_time:8662971ms step_avg:583.84ms +step:14839/57344 train_time:8663517ms step_avg:583.83ms +grad accum step:3710/14336 +step:14840/57344 train_time:8664795ms step_avg:583.88ms +step:14841/57344 train_time:8664812ms step_avg:583.84ms +step:14842/57344 train_time:8665061ms step_avg:583.82ms +step:14843/57344 train_time:8665626ms step_avg:583.82ms +grad accum step:3711/14336 +step:14844/57344 train_time:8666963ms step_avg:583.87ms +step:14845/57344 train_time:8666980ms step_avg:583.83ms +step:14846/57344 train_time:8667226ms step_avg:583.81ms +step:14847/57344 train_time:8667764ms step_avg:583.81ms +grad accum step:3712/14336 +step:14848/57344 train_time:8669040ms step_avg:583.85ms +step:14848/57344 val_loss:6.803151 train_time:8669040ms step_avg:583.85ms +step:14849/57344 train_time:8669052ms step_avg:583.81ms +step:14850/57344 train_time:8669309ms step_avg:583.79ms +step:14851/57344 train_time:8669851ms step_avg:583.79ms +grad accum step:3713/14336 +step:14852/57344 train_time:8671136ms step_avg:583.84ms +step:14853/57344 train_time:8671153ms step_avg:583.80ms +step:14854/57344 train_time:8671398ms step_avg:583.78ms +step:14855/57344 train_time:8671938ms step_avg:583.77ms +grad accum step:3714/14336 +step:14856/57344 train_time:8673221ms step_avg:583.82ms +step:14857/57344 train_time:8673238ms step_avg:583.78ms +step:14858/57344 train_time:8673480ms step_avg:583.76ms +step:14859/57344 train_time:8674025ms step_avg:583.76ms +grad accum step:3715/14336 +step:14860/57344 train_time:8675304ms step_avg:583.80ms +step:14861/57344 train_time:8675321ms step_avg:583.76ms +step:14862/57344 train_time:8675565ms step_avg:583.74ms +step:14863/57344 train_time:8676105ms step_avg:583.74ms +grad accum step:3716/14336 +step:14864/57344 train_time:8677394ms step_avg:583.79ms +step:14865/57344 train_time:8677412ms step_avg:583.75ms +step:14866/57344 train_time:8677655ms step_avg:583.72ms +step:14867/57344 train_time:8678204ms step_avg:583.72ms +grad accum step:3717/14336 +step:14868/57344 train_time:8679527ms step_avg:583.77ms +step:14869/57344 train_time:8679544ms step_avg:583.73ms +step:14870/57344 train_time:8679791ms step_avg:583.71ms +step:14871/57344 train_time:8680337ms step_avg:583.71ms +grad accum step:3718/14336 +step:14872/57344 train_time:8681613ms step_avg:583.76ms +step:14873/57344 train_time:8681630ms step_avg:583.72ms +step:14874/57344 train_time:8681875ms step_avg:583.69ms +step:14875/57344 train_time:8682411ms step_avg:583.69ms +grad accum step:3719/14336 +step:14876/57344 train_time:8683663ms step_avg:583.74ms +step:14877/57344 train_time:8683680ms step_avg:583.70ms +step:14878/57344 train_time:8683924ms step_avg:583.68ms +step:14879/57344 train_time:8684469ms step_avg:583.67ms +grad accum step:3720/14336 +step:14880/57344 train_time:8685762ms step_avg:583.72ms +step:14881/57344 train_time:8685779ms step_avg:583.68ms +step:14882/57344 train_time:8686024ms step_avg:583.66ms +step:14883/57344 train_time:8686564ms step_avg:583.66ms +grad accum step:3721/14336 +step:14884/57344 train_time:8687860ms step_avg:583.70ms +step:14885/57344 train_time:8687878ms step_avg:583.67ms +step:14886/57344 train_time:8688124ms step_avg:583.64ms +step:14887/57344 train_time:8688670ms step_avg:583.64ms +grad accum step:3722/14336 +step:14888/57344 train_time:8689947ms step_avg:583.69ms +step:14889/57344 train_time:8690059ms step_avg:583.66ms +step:14890/57344 train_time:8690274ms step_avg:583.63ms +step:14891/57344 train_time:8690814ms step_avg:583.63ms +grad accum step:3723/14336 +step:14892/57344 train_time:8692093ms step_avg:583.68ms +step:14893/57344 train_time:8692110ms step_avg:583.64ms +step:14894/57344 train_time:8692356ms step_avg:583.61ms +step:14895/57344 train_time:8692902ms step_avg:583.61ms +grad accum step:3724/14336 +step:14896/57344 train_time:8694181ms step_avg:583.66ms +step:14897/57344 train_time:8694198ms step_avg:583.62ms +step:14898/57344 train_time:8694442ms step_avg:583.60ms +step:14899/57344 train_time:8694987ms step_avg:583.60ms +grad accum step:3725/14336 +step:14900/57344 train_time:8696266ms step_avg:583.64ms +step:14901/57344 train_time:8696283ms step_avg:583.60ms +step:14902/57344 train_time:8696529ms step_avg:583.58ms +step:14903/57344 train_time:8697073ms step_avg:583.58ms +grad accum step:3726/14336 +step:14904/57344 train_time:8708059ms step_avg:584.28ms +step:14905/57344 train_time:8709192ms step_avg:584.31ms +step:14906/57344 train_time:8709491ms step_avg:584.29ms +step:14907/57344 train_time:8710034ms step_avg:584.29ms +grad accum step:3727/14336 +step:14908/57344 train_time:8711309ms step_avg:584.34ms +step:14909/57344 train_time:8711326ms step_avg:584.30ms +step:14910/57344 train_time:8711574ms step_avg:584.28ms +step:14911/57344 train_time:8712125ms step_avg:584.28ms +grad accum step:3728/14336 +step:14912/57344 train_time:8713418ms step_avg:584.32ms +step:14912/57344 val_loss:6.791473 train_time:8713418ms step_avg:584.32ms +step:14913/57344 train_time:8713430ms step_avg:584.28ms +step:14914/57344 train_time:8720643ms step_avg:584.73ms +step:14915/57344 train_time:8720951ms step_avg:584.71ms +grad accum step:3729/14336 +step:14916/57344 train_time:8722241ms step_avg:584.76ms +step:14917/57344 train_time:8722258ms step_avg:584.72ms +step:14918/57344 train_time:8722501ms step_avg:584.70ms +step:14919/57344 train_time:8723040ms step_avg:584.69ms +grad accum step:3730/14336 +step:14920/57344 train_time:8724308ms step_avg:584.74ms +step:14921/57344 train_time:8724325ms step_avg:584.70ms +step:14922/57344 train_time:8724569ms step_avg:584.68ms +step:14923/57344 train_time:8725111ms step_avg:584.68ms +grad accum step:3731/14336 +step:14924/57344 train_time:8726388ms step_avg:584.72ms +step:14925/57344 train_time:8726405ms step_avg:584.68ms +step:14926/57344 train_time:8726654ms step_avg:584.66ms +step:14927/57344 train_time:8727203ms step_avg:584.66ms +grad accum step:3732/14336 +step:14928/57344 train_time:8728477ms step_avg:584.71ms +step:14929/57344 train_time:8728494ms step_avg:584.67ms +step:14930/57344 train_time:8728738ms step_avg:584.64ms +step:14931/57344 train_time:8729285ms step_avg:584.64ms +grad accum step:3733/14336 +step:14932/57344 train_time:8730584ms step_avg:584.69ms +step:14933/57344 train_time:8730601ms step_avg:584.65ms +step:14934/57344 train_time:8730844ms step_avg:584.63ms +step:14935/57344 train_time:8731386ms step_avg:584.63ms +grad accum step:3734/14336 +step:14936/57344 train_time:8732663ms step_avg:584.67ms +step:14937/57344 train_time:8732680ms step_avg:584.63ms +step:14938/57344 train_time:8732926ms step_avg:584.61ms +step:14939/57344 train_time:8733470ms step_avg:584.61ms +grad accum step:3735/14336 +step:14940/57344 train_time:8734750ms step_avg:584.66ms +step:14941/57344 train_time:8734767ms step_avg:584.62ms +step:14942/57344 train_time:8735010ms step_avg:584.59ms +step:14943/57344 train_time:8735548ms step_avg:584.59ms +grad accum step:3736/14336 +step:14944/57344 train_time:8736849ms step_avg:584.64ms +step:14945/57344 train_time:8736866ms step_avg:584.60ms +step:14946/57344 train_time:8737111ms step_avg:584.58ms +step:14947/57344 train_time:8737658ms step_avg:584.58ms +grad accum step:3737/14336 +step:14948/57344 train_time:8738938ms step_avg:584.62ms +step:14949/57344 train_time:8738955ms step_avg:584.58ms +step:14950/57344 train_time:8739201ms step_avg:584.56ms +step:14951/57344 train_time:8739743ms step_avg:584.56ms +grad accum step:3738/14336 +step:14952/57344 train_time:8741041ms step_avg:584.61ms +step:14953/57344 train_time:8741057ms step_avg:584.57ms +step:14954/57344 train_time:8741307ms step_avg:584.55ms +step:14955/57344 train_time:8741851ms step_avg:584.54ms +grad accum step:3739/14336 +step:14956/57344 train_time:8767879ms step_avg:586.24ms +step:14957/57344 train_time:8767896ms step_avg:586.21ms +step:14958/57344 train_time:8768142ms step_avg:586.18ms +step:14959/57344 train_time:8768688ms step_avg:586.18ms +grad accum step:3740/14336 +step:14960/57344 train_time:8769949ms step_avg:586.23ms +step:14961/57344 train_time:8769966ms step_avg:586.19ms +step:14962/57344 train_time:8770211ms step_avg:586.17ms +step:14963/57344 train_time:8770759ms step_avg:586.16ms +grad accum step:3741/14336 +step:14964/57344 train_time:8772071ms step_avg:586.21ms +step:14965/57344 train_time:8772088ms step_avg:586.17ms +step:14966/57344 train_time:8772333ms step_avg:586.15ms +step:14967/57344 train_time:8772886ms step_avg:586.15ms +grad accum step:3742/14336 +step:14968/57344 train_time:8774183ms step_avg:586.20ms +step:14969/57344 train_time:8774199ms step_avg:586.16ms +step:14970/57344 train_time:8774442ms step_avg:586.14ms +step:14971/57344 train_time:8774986ms step_avg:586.13ms +grad accum step:3743/14336 +step:14972/57344 train_time:8776312ms step_avg:586.18ms +step:14973/57344 train_time:8776329ms step_avg:586.14ms +step:14974/57344 train_time:8776573ms step_avg:586.12ms +step:14975/57344 train_time:8777110ms step_avg:586.12ms +grad accum step:3744/14336 +step:14976/57344 train_time:8778381ms step_avg:586.16ms +step:14976/57344 val_loss:6.801820 train_time:8778381ms step_avg:586.16ms +step:14977/57344 train_time:8778393ms step_avg:586.12ms +step:14978/57344 train_time:8778612ms step_avg:586.10ms +step:14979/57344 train_time:8779145ms step_avg:586.10ms +grad accum step:3745/14336 +step:14980/57344 train_time:8780427ms step_avg:586.14ms +step:14981/57344 train_time:8780444ms step_avg:586.11ms +step:14982/57344 train_time:8780688ms step_avg:586.08ms +step:14983/57344 train_time:8781229ms step_avg:586.08ms +grad accum step:3746/14336 +step:14984/57344 train_time:8782528ms step_avg:586.13ms +step:14985/57344 train_time:8782545ms step_avg:586.09ms +step:14986/57344 train_time:8782794ms step_avg:586.07ms +step:14987/57344 train_time:8783346ms step_avg:586.06ms +grad accum step:3747/14336 +step:14988/57344 train_time:8784657ms step_avg:586.11ms +step:14989/57344 train_time:8784674ms step_avg:586.07ms +step:14990/57344 train_time:8784921ms step_avg:586.05ms +step:14991/57344 train_time:8785461ms step_avg:586.05ms +grad accum step:3748/14336 +step:14992/57344 train_time:8786740ms step_avg:586.10ms +step:14993/57344 train_time:8786757ms step_avg:586.06ms +step:14994/57344 train_time:8787004ms step_avg:586.03ms +step:14995/57344 train_time:8787553ms step_avg:586.03ms +grad accum step:3749/14336 +step:14996/57344 train_time:8788862ms step_avg:586.08ms +step:14997/57344 train_time:8788879ms step_avg:586.04ms +step:14998/57344 train_time:8789123ms step_avg:586.02ms +step:14999/57344 train_time:8789659ms step_avg:586.02ms +grad accum step:3750/14336 +step:15000/57344 train_time:8790977ms step_avg:586.07ms +step:15001/57344 train_time:8790994ms step_avg:586.03ms +step:15002/57344 train_time:8791238ms step_avg:586.00ms +step:15003/57344 train_time:8791785ms step_avg:586.00ms +grad accum step:3751/14336 +step:15004/57344 train_time:8793100ms step_avg:586.05ms +step:15005/57344 train_time:8793117ms step_avg:586.01ms +step:15006/57344 train_time:8793361ms step_avg:585.99ms +step:15007/57344 train_time:8793906ms step_avg:585.99ms +grad accum step:3752/14336 +step:15008/57344 train_time:8795190ms step_avg:586.03ms +step:15009/57344 train_time:8795207ms step_avg:586.00ms +step:15010/57344 train_time:8795453ms step_avg:585.97ms +step:15011/57344 train_time:8795997ms step_avg:585.97ms +grad accum step:3753/14336 +step:15012/57344 train_time:8797708ms step_avg:586.05ms +step:15013/57344 train_time:8797720ms step_avg:586.01ms +step:15014/57344 train_time:8797940ms step_avg:585.98ms +step:15015/57344 train_time:8798491ms step_avg:585.98ms +grad accum step:3754/14336 +step:15016/57344 train_time:8799765ms step_avg:586.03ms +step:15017/57344 train_time:8799782ms step_avg:585.99ms +step:15018/57344 train_time:8800027ms step_avg:585.97ms +step:15019/57344 train_time:8800572ms step_avg:585.96ms +grad accum step:3755/14336 +step:15020/57344 train_time:8801852ms step_avg:586.01ms +step:15021/57344 train_time:8801869ms step_avg:585.97ms +step:15022/57344 train_time:8802117ms step_avg:585.95ms +step:15023/57344 train_time:8802662ms step_avg:585.95ms +grad accum step:3756/14336 +step:15024/57344 train_time:8803961ms step_avg:585.99ms +step:15025/57344 train_time:8803978ms step_avg:585.96ms +step:15026/57344 train_time:8804223ms step_avg:585.93ms +step:15027/57344 train_time:8804772ms step_avg:585.93ms +grad accum step:3757/14336 +step:15028/57344 train_time:8806055ms step_avg:585.98ms +step:15029/57344 train_time:8806072ms step_avg:585.94ms +step:15030/57344 train_time:8806324ms step_avg:585.92ms +step:15031/57344 train_time:8806885ms step_avg:585.91ms +grad accum step:3758/14336 +step:15032/57344 train_time:8808228ms step_avg:585.97ms +step:15033/57344 train_time:8808245ms step_avg:585.93ms +step:15034/57344 train_time:8808493ms step_avg:585.90ms +step:15035/57344 train_time:8809047ms step_avg:585.90ms +grad accum step:3759/14336 +step:15036/57344 train_time:8810376ms step_avg:585.95ms +step:15037/57344 train_time:8810393ms step_avg:585.91ms +step:15038/57344 train_time:8810644ms step_avg:585.89ms +step:15039/57344 train_time:8811197ms step_avg:585.89ms +grad accum step:3760/14336 +step:15040/57344 train_time:8812487ms step_avg:585.94ms +step:15040/57344 val_loss:6.810283 train_time:8812487ms step_avg:585.94ms +step:15041/57344 train_time:8812499ms step_avg:585.90ms +step:15042/57344 train_time:8812723ms step_avg:585.87ms +step:15043/57344 train_time:8813268ms step_avg:585.87ms +grad accum step:3761/14336 +step:15044/57344 train_time:8814553ms step_avg:585.92ms +step:15045/57344 train_time:8814570ms step_avg:585.88ms +step:15046/57344 train_time:8814819ms step_avg:585.86ms +step:15047/57344 train_time:8815361ms step_avg:585.86ms +grad accum step:3762/14336 +step:15048/57344 train_time:8816637ms step_avg:585.90ms +step:15049/57344 train_time:8816654ms step_avg:585.86ms +step:15050/57344 train_time:8816904ms step_avg:585.84ms +step:15051/57344 train_time:8817455ms step_avg:585.84ms +grad accum step:3763/14336 +step:15052/57344 train_time:8818769ms step_avg:585.89ms +step:15053/57344 train_time:8818785ms step_avg:585.85ms +step:15054/57344 train_time:8819043ms step_avg:585.83ms +step:15055/57344 train_time:8819613ms step_avg:585.83ms +grad accum step:3764/14336 +step:15056/57344 train_time:8820891ms step_avg:585.87ms +step:15057/57344 train_time:8820908ms step_avg:585.83ms +step:15058/57344 train_time:8821158ms step_avg:585.81ms +step:15059/57344 train_time:8821714ms step_avg:585.81ms +grad accum step:3765/14336 +step:15060/57344 train_time:8823045ms step_avg:585.86ms +step:15061/57344 train_time:8823062ms step_avg:585.82ms +step:15062/57344 train_time:8823310ms step_avg:585.80ms +step:15063/57344 train_time:8823849ms step_avg:585.80ms +grad accum step:3766/14336 +step:15064/57344 train_time:8825151ms step_avg:585.84ms +step:15065/57344 train_time:8825167ms step_avg:585.81ms +step:15066/57344 train_time:8825417ms step_avg:585.78ms +step:15067/57344 train_time:8825962ms step_avg:585.78ms +grad accum step:3767/14336 +step:15068/57344 train_time:8827253ms step_avg:585.83ms +step:15069/57344 train_time:8827270ms step_avg:585.79ms +step:15070/57344 train_time:8827517ms step_avg:585.77ms +step:15071/57344 train_time:8828058ms step_avg:585.76ms +grad accum step:3768/14336 +step:15072/57344 train_time:8829338ms step_avg:585.81ms +step:15073/57344 train_time:8829355ms step_avg:585.77ms +step:15074/57344 train_time:8829602ms step_avg:585.75ms +step:15075/57344 train_time:8830148ms step_avg:585.75ms +grad accum step:3769/14336 +step:15076/57344 train_time:8831435ms step_avg:585.79ms +step:15077/57344 train_time:8831452ms step_avg:585.76ms +step:15078/57344 train_time:8831703ms step_avg:585.73ms +step:15079/57344 train_time:8832256ms step_avg:585.73ms +grad accum step:3770/14336 +step:15080/57344 train_time:8833539ms step_avg:585.78ms +step:15081/57344 train_time:8833556ms step_avg:585.74ms +step:15082/57344 train_time:8833806ms step_avg:585.72ms +step:15083/57344 train_time:8834352ms step_avg:585.72ms +grad accum step:3771/14336 +step:15084/57344 train_time:8835640ms step_avg:585.76ms +step:15085/57344 train_time:8835657ms step_avg:585.72ms +step:15086/57344 train_time:8835903ms step_avg:585.70ms +step:15087/57344 train_time:8836449ms step_avg:585.70ms +grad accum step:3772/14336 +step:15088/57344 train_time:8837731ms step_avg:585.75ms +step:15089/57344 train_time:8837747ms step_avg:585.71ms +step:15090/57344 train_time:8837996ms step_avg:585.69ms +step:15091/57344 train_time:8838540ms step_avg:585.68ms +grad accum step:3773/14336 +step:15092/57344 train_time:8839842ms step_avg:585.73ms +step:15093/57344 train_time:8839859ms step_avg:585.69ms +step:15094/57344 train_time:8840107ms step_avg:585.67ms +step:15095/57344 train_time:8840655ms step_avg:585.67ms +grad accum step:3774/14336 +step:15096/57344 train_time:8841951ms step_avg:585.71ms +step:15097/57344 train_time:8841968ms step_avg:585.68ms +step:15098/57344 train_time:8842217ms step_avg:585.65ms +step:15099/57344 train_time:8842760ms step_avg:585.65ms +grad accum step:3775/14336 +step:15100/57344 train_time:8844035ms step_avg:585.70ms +step:15101/57344 train_time:8844052ms step_avg:585.66ms +step:15102/57344 train_time:8844295ms step_avg:585.64ms +step:15103/57344 train_time:8844826ms step_avg:585.63ms +grad accum step:3776/14336 +step:15104/57344 train_time:8846119ms step_avg:585.68ms +step:15104/57344 val_loss:6.793823 train_time:8846120ms step_avg:585.68ms +step:15105/57344 train_time:8846132ms step_avg:585.64ms +step:15106/57344 train_time:8846355ms step_avg:585.62ms +step:15107/57344 train_time:8846897ms step_avg:585.62ms +grad accum step:3777/14336 +step:15108/57344 train_time:8848194ms step_avg:585.66ms +step:15109/57344 train_time:8848211ms step_avg:585.63ms +step:15110/57344 train_time:8848458ms step_avg:585.60ms +step:15111/57344 train_time:8849002ms step_avg:585.60ms +grad accum step:3778/14336 +step:15112/57344 train_time:8850326ms step_avg:585.65ms +step:15113/57344 train_time:8850342ms step_avg:585.61ms +step:15114/57344 train_time:8850590ms step_avg:585.59ms +step:15115/57344 train_time:8851129ms step_avg:585.59ms +grad accum step:3779/14336 +step:15116/57344 train_time:8852419ms step_avg:585.63ms +step:15117/57344 train_time:8852436ms step_avg:585.59ms +step:15118/57344 train_time:8852676ms step_avg:585.57ms +step:15119/57344 train_time:8853209ms step_avg:585.57ms +grad accum step:3780/14336 +step:15120/57344 train_time:8854489ms step_avg:585.61ms +step:15121/57344 train_time:8854506ms step_avg:585.58ms +step:15122/57344 train_time:8854749ms step_avg:585.55ms +step:15123/57344 train_time:8855287ms step_avg:585.55ms +grad accum step:3781/14336 +step:15124/57344 train_time:8856588ms step_avg:585.60ms +step:15125/57344 train_time:8856605ms step_avg:585.56ms +step:15126/57344 train_time:8856853ms step_avg:585.54ms +step:15127/57344 train_time:8857406ms step_avg:585.54ms +grad accum step:3782/14336 +step:15128/57344 train_time:8858707ms step_avg:585.58ms +step:15129/57344 train_time:8858724ms step_avg:585.55ms +step:15130/57344 train_time:8858985ms step_avg:585.52ms +step:15131/57344 train_time:8859578ms step_avg:585.52ms +grad accum step:3783/14336 +step:15132/57344 train_time:8860935ms step_avg:585.58ms +step:15133/57344 train_time:8860952ms step_avg:585.54ms +step:15134/57344 train_time:8861196ms step_avg:585.52ms +step:15135/57344 train_time:8861740ms step_avg:585.51ms +grad accum step:3784/14336 +step:15136/57344 train_time:8863031ms step_avg:585.56ms +step:15137/57344 train_time:8863047ms step_avg:585.52ms +step:15138/57344 train_time:8863296ms step_avg:585.50ms +step:15139/57344 train_time:8863846ms step_avg:585.50ms +grad accum step:3785/14336 +step:15140/57344 train_time:8865141ms step_avg:585.54ms +step:15141/57344 train_time:8865159ms step_avg:585.51ms +step:15142/57344 train_time:8865409ms step_avg:585.48ms +step:15143/57344 train_time:8865960ms step_avg:585.48ms +grad accum step:3786/14336 +step:15144/57344 train_time:8867244ms step_avg:585.53ms +step:15145/57344 train_time:8867261ms step_avg:585.49ms +step:15146/57344 train_time:8867513ms step_avg:585.47ms +step:15147/57344 train_time:8868071ms step_avg:585.47ms +grad accum step:3787/14336 +step:15148/57344 train_time:8869367ms step_avg:585.51ms +step:15149/57344 train_time:8869384ms step_avg:585.48ms +step:15150/57344 train_time:8869630ms step_avg:585.45ms +step:15151/57344 train_time:8870175ms step_avg:585.45ms +grad accum step:3788/14336 +step:15152/57344 train_time:8871477ms step_avg:585.50ms +step:15153/57344 train_time:8871494ms step_avg:585.46ms +step:15154/57344 train_time:8871746ms step_avg:585.44ms +step:15155/57344 train_time:8872303ms step_avg:585.44ms +grad accum step:3789/14336 +step:15156/57344 train_time:8873611ms step_avg:585.49ms +step:15157/57344 train_time:8873628ms step_avg:585.45ms +step:15158/57344 train_time:8873872ms step_avg:585.42ms +step:15159/57344 train_time:8874420ms step_avg:585.42ms +grad accum step:3790/14336 +step:15160/57344 train_time:8875697ms step_avg:585.47ms +step:15161/57344 train_time:8875714ms step_avg:585.43ms +step:15162/57344 train_time:8875962ms step_avg:585.41ms +step:15163/57344 train_time:8876509ms step_avg:585.41ms +grad accum step:3791/14336 +step:15164/57344 train_time:8877821ms step_avg:585.45ms +step:15165/57344 train_time:8877838ms step_avg:585.42ms +step:15166/57344 train_time:8878087ms step_avg:585.39ms +step:15167/57344 train_time:8878634ms step_avg:585.39ms +grad accum step:3792/14336 +step:15168/57344 train_time:8879980ms step_avg:585.44ms +step:15168/57344 val_loss:6.816493 train_time:8879981ms step_avg:585.44ms +step:15169/57344 train_time:8879993ms step_avg:585.40ms +step:15170/57344 train_time:8880217ms step_avg:585.38ms +step:15171/57344 train_time:8880764ms step_avg:585.38ms +grad accum step:3793/14336 +step:15172/57344 train_time:8882053ms step_avg:585.42ms +step:15173/57344 train_time:8882070ms step_avg:585.39ms +step:15174/57344 train_time:8882317ms step_avg:585.36ms +step:15175/57344 train_time:8882864ms step_avg:585.36ms +grad accum step:3794/14336 +step:15176/57344 train_time:8884148ms step_avg:585.41ms +step:15177/57344 train_time:8884165ms step_avg:585.37ms +step:15178/57344 train_time:8884410ms step_avg:585.35ms +step:15179/57344 train_time:8884951ms step_avg:585.34ms +grad accum step:3795/14336 +step:15180/57344 train_time:8886233ms step_avg:585.39ms +step:15181/57344 train_time:8886249ms step_avg:585.35ms +step:15182/57344 train_time:8886497ms step_avg:585.33ms +step:15183/57344 train_time:8887041ms step_avg:585.33ms +grad accum step:3796/14336 +step:15184/57344 train_time:8888331ms step_avg:585.37ms +step:15185/57344 train_time:8888349ms step_avg:585.34ms +step:15186/57344 train_time:8888597ms step_avg:585.32ms +step:15187/57344 train_time:8889155ms step_avg:585.31ms +grad accum step:3797/14336 +step:15188/57344 train_time:8890462ms step_avg:585.36ms +step:15189/57344 train_time:8890479ms step_avg:585.32ms +step:15190/57344 train_time:8890729ms step_avg:585.30ms +step:15191/57344 train_time:8891284ms step_avg:585.30ms +grad accum step:3798/14336 +step:15192/57344 train_time:8892572ms step_avg:585.35ms +step:15193/57344 train_time:8892589ms step_avg:585.31ms +step:15194/57344 train_time:8892838ms step_avg:585.29ms +step:15195/57344 train_time:8893381ms step_avg:585.28ms +grad accum step:3799/14336 +step:15196/57344 train_time:8894685ms step_avg:585.33ms +step:15197/57344 train_time:8894702ms step_avg:585.29ms +step:15198/57344 train_time:8894945ms step_avg:585.27ms +step:15199/57344 train_time:8895487ms step_avg:585.27ms +grad accum step:3800/14336 +step:15200/57344 train_time:8896792ms step_avg:585.32ms +step:15201/57344 train_time:8896809ms step_avg:585.28ms +step:15202/57344 train_time:8897055ms step_avg:585.26ms +step:15203/57344 train_time:8897607ms step_avg:585.25ms +grad accum step:3801/14336 +step:15204/57344 train_time:8898897ms step_avg:585.30ms +step:15205/57344 train_time:8898915ms step_avg:585.26ms +step:15206/57344 train_time:8899160ms step_avg:585.24ms +step:15207/57344 train_time:8899702ms step_avg:585.24ms +grad accum step:3802/14336 +step:15208/57344 train_time:8900994ms step_avg:585.28ms +step:15209/57344 train_time:8901011ms step_avg:585.25ms +step:15210/57344 train_time:8901256ms step_avg:585.22ms +step:15211/57344 train_time:8901800ms step_avg:585.22ms +grad accum step:3803/14336 +step:15212/57344 train_time:8903082ms step_avg:585.27ms +step:15213/57344 train_time:8903099ms step_avg:585.23ms +step:15214/57344 train_time:8903345ms step_avg:585.21ms +step:15215/57344 train_time:8903894ms step_avg:585.21ms +grad accum step:3804/14336 +step:15216/57344 train_time:8905201ms step_avg:585.25ms +step:15217/57344 train_time:8905218ms step_avg:585.22ms +step:15218/57344 train_time:8905461ms step_avg:585.19ms +step:15219/57344 train_time:8905994ms step_avg:585.19ms +grad accum step:3805/14336 +step:15220/57344 train_time:8907276ms step_avg:585.23ms +step:15221/57344 train_time:8907293ms step_avg:585.20ms +step:15222/57344 train_time:8907538ms step_avg:585.18ms +step:15223/57344 train_time:8908085ms step_avg:585.17ms +grad accum step:3806/14336 +step:15224/57344 train_time:8909364ms step_avg:585.22ms +step:15225/57344 train_time:8909381ms step_avg:585.18ms +step:15226/57344 train_time:8909628ms step_avg:585.16ms +step:15227/57344 train_time:8910167ms step_avg:585.16ms +grad accum step:3807/14336 +step:15228/57344 train_time:8911447ms step_avg:585.20ms +step:15229/57344 train_time:8911463ms step_avg:585.16ms +step:15230/57344 train_time:8911708ms step_avg:585.14ms +step:15231/57344 train_time:8912248ms step_avg:585.14ms +grad accum step:3808/14336 +step:15232/57344 train_time:8913529ms step_avg:585.18ms +step:15232/57344 val_loss:6.796839 train_time:8913529ms step_avg:585.18ms +step:15233/57344 train_time:8913541ms step_avg:585.15ms +step:15234/57344 train_time:8913767ms step_avg:585.12ms +step:15235/57344 train_time:8914320ms step_avg:585.12ms +grad accum step:3809/14336 +step:15236/57344 train_time:8915619ms step_avg:585.17ms +step:15237/57344 train_time:8915636ms step_avg:585.13ms +step:15238/57344 train_time:8915890ms step_avg:585.11ms +step:15239/57344 train_time:8916450ms step_avg:585.11ms +grad accum step:3810/14336 +step:15240/57344 train_time:8917719ms step_avg:585.15ms +step:15241/57344 train_time:8917736ms step_avg:585.11ms +step:15242/57344 train_time:8917983ms step_avg:585.09ms +step:15243/57344 train_time:8918533ms step_avg:585.09ms +grad accum step:3811/14336 +step:15244/57344 train_time:8919826ms step_avg:585.14ms +step:15245/57344 train_time:8919843ms step_avg:585.10ms +step:15246/57344 train_time:8920093ms step_avg:585.08ms +step:15247/57344 train_time:8920646ms step_avg:585.08ms +grad accum step:3812/14336 +step:15248/57344 train_time:8921935ms step_avg:585.12ms +step:15249/57344 train_time:8921952ms step_avg:585.08ms +step:15250/57344 train_time:8922197ms step_avg:585.06ms +step:15251/57344 train_time:8922745ms step_avg:585.06ms +grad accum step:3813/14336 +step:15252/57344 train_time:8924045ms step_avg:585.11ms +step:15253/57344 train_time:8924062ms step_avg:585.07ms +step:15254/57344 train_time:8924311ms step_avg:585.05ms +step:15255/57344 train_time:8924855ms step_avg:585.04ms +grad accum step:3814/14336 +step:15256/57344 train_time:8926135ms step_avg:585.09ms +step:15257/57344 train_time:8926152ms step_avg:585.05ms +step:15258/57344 train_time:8926396ms step_avg:585.03ms +step:15259/57344 train_time:8926939ms step_avg:585.03ms +grad accum step:3815/14336 +step:15260/57344 train_time:8928224ms step_avg:585.07ms +step:15261/57344 train_time:8928241ms step_avg:585.04ms +step:15262/57344 train_time:8928487ms step_avg:585.01ms +step:15263/57344 train_time:8929031ms step_avg:585.01ms +grad accum step:3816/14336 +step:15264/57344 train_time:8930313ms step_avg:585.06ms +step:15265/57344 train_time:8930330ms step_avg:585.02ms +step:15266/57344 train_time:8930579ms step_avg:585.00ms +step:15267/57344 train_time:8931124ms step_avg:585.00ms +grad accum step:3817/14336 +step:15268/57344 train_time:8932425ms step_avg:585.04ms +step:15269/57344 train_time:8932442ms step_avg:585.01ms +step:15270/57344 train_time:8932686ms step_avg:584.98ms +step:15271/57344 train_time:8933233ms step_avg:584.98ms +grad accum step:3818/14336 +step:15272/57344 train_time:8934512ms step_avg:585.03ms +step:15273/57344 train_time:8934529ms step_avg:584.99ms +step:15274/57344 train_time:8934773ms step_avg:584.97ms +step:15275/57344 train_time:8935321ms step_avg:584.96ms +grad accum step:3819/14336 +step:15276/57344 train_time:8936625ms step_avg:585.01ms +step:15277/57344 train_time:8936642ms step_avg:584.97ms +step:15278/57344 train_time:8936890ms step_avg:584.95ms +step:15279/57344 train_time:8937431ms step_avg:584.95ms +grad accum step:3820/14336 +step:15280/57344 train_time:8938706ms step_avg:584.99ms +step:15281/57344 train_time:8938722ms step_avg:584.96ms +step:15282/57344 train_time:8938972ms step_avg:584.93ms +step:15283/57344 train_time:8939522ms step_avg:584.93ms +grad accum step:3821/14336 +step:15284/57344 train_time:8940849ms step_avg:584.98ms +step:15285/57344 train_time:8940866ms step_avg:584.94ms +step:15286/57344 train_time:8941111ms step_avg:584.92ms +step:15287/57344 train_time:8941658ms step_avg:584.92ms +grad accum step:3822/14336 +step:15288/57344 train_time:8942951ms step_avg:584.97ms +step:15289/57344 train_time:8942967ms step_avg:584.93ms +step:15290/57344 train_time:8943214ms step_avg:584.91ms +step:15291/57344 train_time:8943759ms step_avg:584.90ms +grad accum step:3823/14336 +step:15292/57344 train_time:8945075ms step_avg:584.95ms +step:15293/57344 train_time:8945091ms step_avg:584.91ms +step:15294/57344 train_time:8945338ms step_avg:584.89ms +step:15295/57344 train_time:8945883ms step_avg:584.89ms +grad accum step:3824/14336 +step:15296/57344 train_time:8947192ms step_avg:584.94ms +step:15296/57344 val_loss:6.805776 train_time:8947193ms step_avg:584.94ms +step:15297/57344 train_time:8947204ms step_avg:584.90ms +step:15298/57344 train_time:8947430ms step_avg:584.88ms +step:15299/57344 train_time:8947975ms step_avg:584.87ms +grad accum step:3825/14336 +step:15300/57344 train_time:8949253ms step_avg:584.92ms +step:15301/57344 train_time:8949271ms step_avg:584.88ms +step:15302/57344 train_time:8949519ms step_avg:584.86ms +step:15303/57344 train_time:8950070ms step_avg:584.86ms +grad accum step:3826/14336 +step:15304/57344 train_time:8951375ms step_avg:584.90ms +step:15305/57344 train_time:8951392ms step_avg:584.87ms +step:15306/57344 train_time:8951636ms step_avg:584.84ms +step:15307/57344 train_time:8952177ms step_avg:584.84ms +grad accum step:3827/14336 +step:15308/57344 train_time:8953450ms step_avg:584.89ms +step:15309/57344 train_time:8953467ms step_avg:584.85ms +step:15310/57344 train_time:8953713ms step_avg:584.83ms +step:15311/57344 train_time:8954257ms step_avg:584.83ms +grad accum step:3828/14336 +step:15312/57344 train_time:8955554ms step_avg:584.87ms +step:15313/57344 train_time:8955572ms step_avg:584.83ms +step:15314/57344 train_time:8955814ms step_avg:584.81ms +step:15315/57344 train_time:8956345ms step_avg:584.81ms +grad accum step:3829/14336 +step:15316/57344 train_time:8957630ms step_avg:584.85ms +step:15317/57344 train_time:8957647ms step_avg:584.82ms +step:15318/57344 train_time:8957893ms step_avg:584.80ms +step:15319/57344 train_time:8958441ms step_avg:584.79ms +grad accum step:3830/14336 +step:15320/57344 train_time:8959718ms step_avg:584.84ms +step:15321/57344 train_time:8959736ms step_avg:584.80ms +step:15322/57344 train_time:8959982ms step_avg:584.78ms +step:15323/57344 train_time:8960525ms step_avg:584.78ms +grad accum step:3831/14336 +step:15324/57344 train_time:8961805ms step_avg:584.82ms +step:15325/57344 train_time:8961822ms step_avg:584.78ms +step:15326/57344 train_time:8962067ms step_avg:584.76ms +step:15327/57344 train_time:8962606ms step_avg:584.76ms +grad accum step:3832/14336 +step:15328/57344 train_time:8963887ms step_avg:584.80ms +step:15329/57344 train_time:8963904ms step_avg:584.77ms +step:15330/57344 train_time:8964148ms step_avg:584.75ms +step:15331/57344 train_time:8964684ms step_avg:584.74ms +grad accum step:3833/14336 +step:15332/57344 train_time:8966086ms step_avg:584.80ms +step:15333/57344 train_time:8966103ms step_avg:584.76ms +step:15334/57344 train_time:8966352ms step_avg:584.74ms +step:15335/57344 train_time:8966902ms step_avg:584.73ms +grad accum step:3834/14336 +step:15336/57344 train_time:8968189ms step_avg:584.78ms +step:15337/57344 train_time:8968206ms step_avg:584.74ms +step:15338/57344 train_time:8968453ms step_avg:584.72ms +step:15339/57344 train_time:8969000ms step_avg:584.72ms +grad accum step:3835/14336 +step:15340/57344 train_time:8970291ms step_avg:584.76ms +step:15341/57344 train_time:8970308ms step_avg:584.73ms +step:15342/57344 train_time:8970553ms step_avg:584.71ms +step:15343/57344 train_time:8971102ms step_avg:584.70ms +grad accum step:3836/14336 +step:15344/57344 train_time:8972376ms step_avg:584.75ms +step:15345/57344 train_time:8972393ms step_avg:584.71ms +step:15346/57344 train_time:8972642ms step_avg:584.69ms +step:15347/57344 train_time:8973187ms step_avg:584.69ms +grad accum step:3837/14336 +step:15348/57344 train_time:8974465ms step_avg:584.73ms +step:15349/57344 train_time:8974482ms step_avg:584.69ms +step:15350/57344 train_time:8974727ms step_avg:584.67ms +step:15351/57344 train_time:8975273ms step_avg:584.67ms +grad accum step:3838/14336 +step:15352/57344 train_time:8976572ms step_avg:584.72ms +step:15353/57344 train_time:8976589ms step_avg:584.68ms +step:15354/57344 train_time:8976835ms step_avg:584.66ms +step:15355/57344 train_time:8977379ms step_avg:584.66ms +grad accum step:3839/14336 +step:15356/57344 train_time:8978694ms step_avg:584.70ms +step:15357/57344 train_time:8978710ms step_avg:584.67ms +step:15358/57344 train_time:8978955ms step_avg:584.64ms +step:15359/57344 train_time:8979495ms step_avg:584.64ms +grad accum step:3840/14336 +step:15360/57344 train_time:8980797ms step_avg:584.69ms +step:15360/57344 val_loss:6.809866 train_time:8980797ms step_avg:584.69ms +step:15361/57344 train_time:8980809ms step_avg:584.65ms +step:15362/57344 train_time:8981030ms step_avg:584.63ms +step:15363/57344 train_time:8981573ms step_avg:584.62ms +grad accum step:3841/14336 +step:15364/57344 train_time:8982855ms step_avg:584.67ms +step:15365/57344 train_time:8982872ms step_avg:584.63ms +step:15366/57344 train_time:8983117ms step_avg:584.61ms +step:15367/57344 train_time:8983668ms step_avg:584.61ms +grad accum step:3842/14336 +step:15368/57344 train_time:8984987ms step_avg:584.66ms +step:15369/57344 train_time:8985005ms step_avg:584.62ms +step:15370/57344 train_time:8985248ms step_avg:584.60ms +step:15371/57344 train_time:8985795ms step_avg:584.59ms +grad accum step:3843/14336 +step:15372/57344 train_time:8987082ms step_avg:584.64ms +step:15373/57344 train_time:8987098ms step_avg:584.60ms +step:15374/57344 train_time:8987346ms step_avg:584.58ms +step:15375/57344 train_time:8987886ms step_avg:584.58ms +grad accum step:3844/14336 +step:15376/57344 train_time:8989172ms step_avg:584.62ms +step:15377/57344 train_time:8989188ms step_avg:584.59ms +step:15378/57344 train_time:8989449ms step_avg:584.57ms +step:15379/57344 train_time:8990029ms step_avg:584.57ms +grad accum step:3845/14336 +step:15380/57344 train_time:8991375ms step_avg:584.61ms +step:15381/57344 train_time:8991392ms step_avg:584.58ms +step:15382/57344 train_time:8991639ms step_avg:584.56ms +step:15383/57344 train_time:8992190ms step_avg:584.55ms +grad accum step:3846/14336 +step:15384/57344 train_time:8993505ms step_avg:584.60ms +step:15385/57344 train_time:8993522ms step_avg:584.56ms +step:15386/57344 train_time:8993769ms step_avg:584.54ms +step:15387/57344 train_time:8994314ms step_avg:584.54ms +grad accum step:3847/14336 +step:15388/57344 train_time:8995590ms step_avg:584.58ms +step:15389/57344 train_time:8995607ms step_avg:584.55ms +step:15390/57344 train_time:8995853ms step_avg:584.53ms +step:15391/57344 train_time:8996399ms step_avg:584.52ms +grad accum step:3848/14336 +step:15392/57344 train_time:8997675ms step_avg:584.57ms +step:15393/57344 train_time:8997692ms step_avg:584.53ms +step:15394/57344 train_time:8997937ms step_avg:584.51ms +step:15395/57344 train_time:8998477ms step_avg:584.51ms +grad accum step:3849/14336 +step:15396/57344 train_time:8999762ms step_avg:584.55ms +step:15397/57344 train_time:8999778ms step_avg:584.52ms +step:15398/57344 train_time:9000022ms step_avg:584.49ms +step:15399/57344 train_time:9000567ms step_avg:584.49ms +grad accum step:3850/14336 +step:15400/57344 train_time:9001847ms step_avg:584.54ms +step:15401/57344 train_time:9001864ms step_avg:584.50ms +step:15402/57344 train_time:9002108ms step_avg:584.48ms +step:15403/57344 train_time:9002651ms step_avg:584.47ms +grad accum step:3851/14336 +step:15404/57344 train_time:9003950ms step_avg:584.52ms +step:15405/57344 train_time:9003967ms step_avg:584.48ms +step:15406/57344 train_time:9004217ms step_avg:584.46ms +step:15407/57344 train_time:9004759ms step_avg:584.46ms +grad accum step:3852/14336 +step:15408/57344 train_time:9006059ms step_avg:584.51ms +step:15409/57344 train_time:9006096ms step_avg:584.47ms +step:15410/57344 train_time:9006315ms step_avg:584.45ms +step:15411/57344 train_time:9006857ms step_avg:584.44ms +grad accum step:3853/14336 +step:15412/57344 train_time:9008133ms step_avg:584.49ms +step:15413/57344 train_time:9008150ms step_avg:584.45ms +step:15414/57344 train_time:9008398ms step_avg:584.43ms +step:15415/57344 train_time:9008945ms step_avg:584.43ms +grad accum step:3854/14336 +step:15416/57344 train_time:9010272ms step_avg:584.48ms +step:15417/57344 train_time:9010288ms step_avg:584.44ms +step:15418/57344 train_time:9010541ms step_avg:584.42ms +step:15419/57344 train_time:9011105ms step_avg:584.42ms +grad accum step:3855/14336 +step:15420/57344 train_time:9012429ms step_avg:584.46ms +step:15421/57344 train_time:9012446ms step_avg:584.43ms +step:15422/57344 train_time:9012693ms step_avg:584.40ms +step:15423/57344 train_time:9013237ms step_avg:584.40ms +grad accum step:3856/14336 +step:15424/57344 train_time:9014549ms step_avg:584.45ms +step:15424/57344 val_loss:6.811370 train_time:9014549ms step_avg:584.45ms +step:15425/57344 train_time:9014561ms step_avg:584.41ms +step:15426/57344 train_time:9014793ms step_avg:584.39ms +step:15427/57344 train_time:9015363ms step_avg:584.39ms +grad accum step:3857/14336 +step:15428/57344 train_time:9016685ms step_avg:584.44ms +step:15429/57344 train_time:9016702ms step_avg:584.40ms +step:15430/57344 train_time:9016951ms step_avg:584.38ms +step:15431/57344 train_time:9017506ms step_avg:584.38ms +grad accum step:3858/14336 +step:15432/57344 train_time:9018809ms step_avg:584.42ms +step:15433/57344 train_time:9018826ms step_avg:584.39ms +step:15434/57344 train_time:9019069ms step_avg:584.36ms +step:15435/57344 train_time:9019615ms step_avg:584.36ms +grad accum step:3859/14336 +step:15436/57344 train_time:9020901ms step_avg:584.41ms +step:15437/57344 train_time:9020918ms step_avg:584.37ms +step:15438/57344 train_time:9021167ms step_avg:584.35ms +step:15439/57344 train_time:9021713ms step_avg:584.35ms +grad accum step:3860/14336 +step:15440/57344 train_time:9022985ms step_avg:584.39ms +step:15441/57344 train_time:9023002ms step_avg:584.35ms +step:15442/57344 train_time:9023250ms step_avg:584.33ms +step:15443/57344 train_time:9023800ms step_avg:584.33ms +grad accum step:3861/14336 +step:15444/57344 train_time:9025091ms step_avg:584.38ms +step:15445/57344 train_time:9025108ms step_avg:584.34ms +step:15446/57344 train_time:9025355ms step_avg:584.32ms +step:15447/57344 train_time:9025900ms step_avg:584.31ms +grad accum step:3862/14336 +step:15448/57344 train_time:9027178ms step_avg:584.36ms +step:15449/57344 train_time:9027195ms step_avg:584.32ms +step:15450/57344 train_time:9027447ms step_avg:584.30ms +step:15451/57344 train_time:9027994ms step_avg:584.30ms +grad accum step:3863/14336 +step:15452/57344 train_time:9029260ms step_avg:584.34ms +step:15453/57344 train_time:9029278ms step_avg:584.31ms +step:15454/57344 train_time:9029521ms step_avg:584.28ms +step:15455/57344 train_time:9030062ms step_avg:584.28ms +grad accum step:3864/14336 +step:15456/57344 train_time:9031366ms step_avg:584.33ms +step:15457/57344 train_time:9031383ms step_avg:584.29ms +step:15458/57344 train_time:9031632ms step_avg:584.27ms +step:15459/57344 train_time:9032177ms step_avg:584.27ms +grad accum step:3865/14336 +step:15460/57344 train_time:9033457ms step_avg:584.31ms +step:15461/57344 train_time:9033474ms step_avg:584.27ms +step:15462/57344 train_time:9033722ms step_avg:584.25ms +step:15463/57344 train_time:9034266ms step_avg:584.25ms +grad accum step:3866/14336 +step:15464/57344 train_time:9035540ms step_avg:584.30ms +step:15465/57344 train_time:9035556ms step_avg:584.26ms +step:15466/57344 train_time:9035804ms step_avg:584.24ms +step:15467/57344 train_time:9036356ms step_avg:584.23ms +grad accum step:3867/14336 +step:15468/57344 train_time:9037642ms step_avg:584.28ms +step:15469/57344 train_time:9037658ms step_avg:584.24ms +step:15470/57344 train_time:9037906ms step_avg:584.22ms +step:15471/57344 train_time:9038455ms step_avg:584.22ms +grad accum step:3868/14336 +step:15472/57344 train_time:9039787ms step_avg:584.27ms +step:15473/57344 train_time:9039803ms step_avg:584.23ms +step:15474/57344 train_time:9040049ms step_avg:584.21ms +step:15475/57344 train_time:9040591ms step_avg:584.21ms +grad accum step:3869/14336 +step:15476/57344 train_time:9041868ms step_avg:584.25ms +step:15477/57344 train_time:9041883ms step_avg:584.21ms +step:15478/57344 train_time:9042183ms step_avg:584.20ms +step:15479/57344 train_time:9042683ms step_avg:584.19ms +grad accum step:3870/14336 +step:15480/57344 train_time:9044012ms step_avg:584.24ms +step:15481/57344 train_time:9044023ms step_avg:584.20ms +step:15482/57344 train_time:9044249ms step_avg:584.18ms +step:15483/57344 train_time:9044798ms step_avg:584.18ms +grad accum step:3871/14336 +step:15484/57344 train_time:9046098ms step_avg:584.22ms +step:15485/57344 train_time:9046115ms step_avg:584.19ms +step:15486/57344 train_time:9046359ms step_avg:584.16ms +step:15487/57344 train_time:9046900ms step_avg:584.16ms +grad accum step:3872/14336 +step:15488/57344 train_time:9048181ms step_avg:584.21ms +step:15488/57344 val_loss:6.804217 train_time:9048182ms step_avg:584.21ms +step:15489/57344 train_time:9048895ms step_avg:584.21ms +step:15490/57344 train_time:9049013ms step_avg:584.18ms +step:15491/57344 train_time:9049477ms step_avg:584.18ms +grad accum step:3873/14336 +step:15492/57344 train_time:9050963ms step_avg:584.23ms +step:15493/57344 train_time:9050974ms step_avg:584.20ms +step:15494/57344 train_time:9051195ms step_avg:584.17ms +step:15495/57344 train_time:9051746ms step_avg:584.17ms +grad accum step:3874/14336 +step:15496/57344 train_time:9053062ms step_avg:584.22ms +step:15497/57344 train_time:9053079ms step_avg:584.18ms +step:15498/57344 train_time:9053321ms step_avg:584.16ms +step:15499/57344 train_time:9053859ms step_avg:584.16ms +grad accum step:3875/14336 +step:15500/57344 train_time:9055141ms step_avg:584.20ms +step:15501/57344 train_time:9055159ms step_avg:584.17ms +step:15502/57344 train_time:9055412ms step_avg:584.14ms +step:15503/57344 train_time:9055975ms step_avg:584.14ms +grad accum step:3876/14336 +step:15504/57344 train_time:9057268ms step_avg:584.19ms +step:15505/57344 train_time:9057285ms step_avg:584.15ms +step:15506/57344 train_time:9057528ms step_avg:584.13ms +step:15507/57344 train_time:9058068ms step_avg:584.13ms +grad accum step:3877/14336 +step:15508/57344 train_time:9059348ms step_avg:584.17ms +step:15509/57344 train_time:9059365ms step_avg:584.14ms +step:15510/57344 train_time:9059613ms step_avg:584.11ms +step:15511/57344 train_time:9060172ms step_avg:584.11ms +grad accum step:3878/14336 +step:15512/57344 train_time:9061502ms step_avg:584.16ms +step:15513/57344 train_time:9061518ms step_avg:584.12ms +step:15514/57344 train_time:9061762ms step_avg:584.10ms +step:15515/57344 train_time:9062297ms step_avg:584.10ms +grad accum step:3879/14336 +step:15516/57344 train_time:9063623ms step_avg:584.15ms +step:15517/57344 train_time:9063640ms step_avg:584.11ms +step:15518/57344 train_time:9063888ms step_avg:584.09ms +step:15519/57344 train_time:9064438ms step_avg:584.09ms +grad accum step:3880/14336 +step:15520/57344 train_time:9065721ms step_avg:584.13ms +step:15521/57344 train_time:9065738ms step_avg:584.09ms +step:15522/57344 train_time:9065983ms step_avg:584.07ms +step:15523/57344 train_time:9066529ms step_avg:584.07ms +grad accum step:3881/14336 +step:15524/57344 train_time:9067804ms step_avg:584.12ms +step:15525/57344 train_time:9067820ms step_avg:584.08ms +step:15526/57344 train_time:9068063ms step_avg:584.06ms +step:15527/57344 train_time:9068605ms step_avg:584.05ms +grad accum step:3882/14336 +step:15528/57344 train_time:9069909ms step_avg:584.10ms +step:15529/57344 train_time:9069926ms step_avg:584.06ms +step:15530/57344 train_time:9070172ms step_avg:584.04ms +step:15531/57344 train_time:9070715ms step_avg:584.04ms +grad accum step:3883/14336 +step:15532/57344 train_time:9071984ms step_avg:584.08ms +step:15533/57344 train_time:9072001ms step_avg:584.05ms +step:15534/57344 train_time:9072242ms step_avg:584.02ms +step:15535/57344 train_time:9072772ms step_avg:584.02ms +grad accum step:3884/14336 +step:15536/57344 train_time:9074055ms step_avg:584.07ms +step:15537/57344 train_time:9074072ms step_avg:584.03ms +step:15538/57344 train_time:9074318ms step_avg:584.01ms +step:15539/57344 train_time:9074859ms step_avg:584.01ms +grad accum step:3885/14336 +step:15540/57344 train_time:9076133ms step_avg:584.05ms +step:15541/57344 train_time:9076150ms step_avg:584.01ms +step:15542/57344 train_time:9076396ms step_avg:583.99ms +step:15543/57344 train_time:9076943ms step_avg:583.99ms +grad accum step:3886/14336 +step:15544/57344 train_time:9078237ms step_avg:584.03ms +step:15545/57344 train_time:9078254ms step_avg:584.00ms +step:15546/57344 train_time:9082135ms step_avg:584.21ms +step:15547/57344 train_time:9082444ms step_avg:584.19ms +grad accum step:3887/14336 +step:15548/57344 train_time:9083735ms step_avg:584.24ms +step:15549/57344 train_time:9086119ms step_avg:584.35ms +step:15550/57344 train_time:9086426ms step_avg:584.34ms +step:15551/57344 train_time:9086963ms step_avg:584.33ms +grad accum step:3888/14336 +step:15552/57344 train_time:9088252ms step_avg:584.38ms +step:15552/57344 val_loss:6.798154 train_time:9088253ms step_avg:584.38ms +step:15553/57344 train_time:9088264ms step_avg:584.34ms +step:15554/57344 train_time:9088485ms step_avg:584.32ms +step:15555/57344 train_time:9089014ms step_avg:584.31ms +grad accum step:3889/14336 +step:15556/57344 train_time:9090294ms step_avg:584.36ms +step:15557/57344 train_time:9090311ms step_avg:584.32ms +step:15558/57344 train_time:9090554ms step_avg:584.30ms +step:15559/57344 train_time:9091099ms step_avg:584.30ms +grad accum step:3890/14336 +step:15560/57344 train_time:9092375ms step_avg:584.34ms +step:15561/57344 train_time:9092392ms step_avg:584.31ms +step:15562/57344 train_time:9092637ms step_avg:584.28ms +step:15563/57344 train_time:9093176ms step_avg:584.28ms +grad accum step:3891/14336 +step:15564/57344 train_time:9094459ms step_avg:584.33ms +step:15565/57344 train_time:9094476ms step_avg:584.29ms +step:15566/57344 train_time:9094720ms step_avg:584.27ms +step:15567/57344 train_time:9095257ms step_avg:584.27ms +grad accum step:3892/14336 +step:15568/57344 train_time:9096541ms step_avg:584.31ms +step:15569/57344 train_time:9096558ms step_avg:584.27ms +step:15570/57344 train_time:9096803ms step_avg:584.25ms +step:15571/57344 train_time:9097344ms step_avg:584.25ms +grad accum step:3893/14336 +step:15572/57344 train_time:9098612ms step_avg:584.29ms +step:15573/57344 train_time:9098629ms step_avg:584.26ms +step:15574/57344 train_time:9098873ms step_avg:584.23ms +step:15575/57344 train_time:9099417ms step_avg:584.23ms +grad accum step:3894/14336 +step:15576/57344 train_time:9100698ms step_avg:584.28ms +step:15577/57344 train_time:9100715ms step_avg:584.24ms +step:15578/57344 train_time:9100959ms step_avg:584.22ms +step:15579/57344 train_time:9101501ms step_avg:584.22ms +grad accum step:3895/14336 +step:15580/57344 train_time:9102783ms step_avg:584.26ms +step:15581/57344 train_time:9102800ms step_avg:584.22ms +step:15582/57344 train_time:9103056ms step_avg:584.20ms +step:15583/57344 train_time:9103622ms step_avg:584.20ms +grad accum step:3896/14336 +step:15584/57344 train_time:9104896ms step_avg:584.25ms +step:15585/57344 train_time:9104913ms step_avg:584.21ms +step:15586/57344 train_time:9105158ms step_avg:584.19ms +step:15587/57344 train_time:9105701ms step_avg:584.19ms +grad accum step:3897/14336 +step:15588/57344 train_time:9106985ms step_avg:584.23ms +step:15589/57344 train_time:9107002ms step_avg:584.19ms +step:15590/57344 train_time:9107247ms step_avg:584.17ms +step:15591/57344 train_time:9107795ms step_avg:584.17ms +grad accum step:3898/14336 +step:15592/57344 train_time:9109071ms step_avg:584.21ms +step:15593/57344 train_time:9109088ms step_avg:584.18ms +step:15594/57344 train_time:9109335ms step_avg:584.16ms +step:15595/57344 train_time:9109882ms step_avg:584.15ms +grad accum step:3899/14336 +step:15596/57344 train_time:9111153ms step_avg:584.20ms +step:15597/57344 train_time:9111170ms step_avg:584.16ms +step:15598/57344 train_time:9111411ms step_avg:584.14ms +step:15599/57344 train_time:9111944ms step_avg:584.14ms +grad accum step:3900/14336 +step:15600/57344 train_time:9113233ms step_avg:584.18ms +step:15601/57344 train_time:9113250ms step_avg:584.15ms +step:15602/57344 train_time:9113496ms step_avg:584.12ms +step:15603/57344 train_time:9114036ms step_avg:584.12ms +grad accum step:3901/14336 +step:15604/57344 train_time:9115327ms step_avg:584.17ms +step:15605/57344 train_time:9115343ms step_avg:584.13ms +step:15606/57344 train_time:9115587ms step_avg:584.11ms +step:15607/57344 train_time:9116132ms step_avg:584.11ms +grad accum step:3902/14336 +step:15608/57344 train_time:9117415ms step_avg:584.15ms +step:15609/57344 train_time:9117432ms step_avg:584.11ms +step:15610/57344 train_time:9117678ms step_avg:584.09ms +step:15611/57344 train_time:9118220ms step_avg:584.09ms +grad accum step:3903/14336 +step:15612/57344 train_time:9119513ms step_avg:584.13ms +step:15613/57344 train_time:9119530ms step_avg:584.10ms +step:15614/57344 train_time:9119773ms step_avg:584.08ms +step:15615/57344 train_time:9120316ms step_avg:584.07ms +grad accum step:3904/14336 +step:15616/57344 train_time:9121597ms step_avg:584.12ms +step:15616/57344 val_loss:6.791600 train_time:9121598ms step_avg:584.12ms +step:15617/57344 train_time:9121609ms step_avg:584.08ms +step:15618/57344 train_time:9121835ms step_avg:584.06ms +step:15619/57344 train_time:9122380ms step_avg:584.06ms +grad accum step:3905/14336 +step:15620/57344 train_time:9123657ms step_avg:584.10ms +step:15621/57344 train_time:9123674ms step_avg:584.06ms +step:15622/57344 train_time:9123917ms step_avg:584.04ms +step:15623/57344 train_time:9124460ms step_avg:584.04ms +grad accum step:3906/14336 +step:15624/57344 train_time:9125742ms step_avg:584.08ms +step:15625/57344 train_time:9125759ms step_avg:584.05ms +step:15626/57344 train_time:9126008ms step_avg:584.03ms +step:15627/57344 train_time:9126559ms step_avg:584.02ms +grad accum step:3907/14336 +step:15628/57344 train_time:9127852ms step_avg:584.07ms +step:15629/57344 train_time:9127869ms step_avg:584.03ms +step:15630/57344 train_time:9128114ms step_avg:584.01ms +step:15631/57344 train_time:9128661ms step_avg:584.01ms +grad accum step:3908/14336 +step:15632/57344 train_time:9129974ms step_avg:584.06ms +step:15633/57344 train_time:9129991ms step_avg:584.02ms +step:15634/57344 train_time:9130237ms step_avg:584.00ms +step:15635/57344 train_time:9130773ms step_avg:584.00ms +grad accum step:3909/14336 +step:15636/57344 train_time:9132121ms step_avg:584.04ms +step:15637/57344 train_time:9132138ms step_avg:584.01ms +step:15638/57344 train_time:9132393ms step_avg:583.99ms +step:15639/57344 train_time:9132959ms step_avg:583.99ms +grad accum step:3910/14336 +step:15640/57344 train_time:9134277ms step_avg:584.03ms +step:15641/57344 train_time:9134294ms step_avg:584.00ms +step:15642/57344 train_time:9134540ms step_avg:583.98ms +step:15643/57344 train_time:9135092ms step_avg:583.97ms +grad accum step:3911/14336 +step:15644/57344 train_time:9136392ms step_avg:584.02ms +step:15645/57344 train_time:9136409ms step_avg:583.98ms +step:15646/57344 train_time:9136662ms step_avg:583.96ms +step:15647/57344 train_time:9137223ms step_avg:583.96ms +grad accum step:3912/14336 +step:15648/57344 train_time:9138520ms step_avg:584.01ms +step:15649/57344 train_time:9138537ms step_avg:583.97ms +step:15650/57344 train_time:9138786ms step_avg:583.95ms +step:15651/57344 train_time:9139339ms step_avg:583.95ms +grad accum step:3913/14336 +step:15652/57344 train_time:9140626ms step_avg:583.99ms +step:15653/57344 train_time:9140643ms step_avg:583.95ms +step:15654/57344 train_time:9140890ms step_avg:583.93ms +step:15655/57344 train_time:9141433ms step_avg:583.93ms +grad accum step:3914/14336 +step:15656/57344 train_time:9142733ms step_avg:583.98ms +step:15657/57344 train_time:9142750ms step_avg:583.94ms +step:15658/57344 train_time:9142996ms step_avg:583.92ms +step:15659/57344 train_time:9143538ms step_avg:583.92ms +grad accum step:3915/14336 +step:15660/57344 train_time:9144833ms step_avg:583.96ms +step:15661/57344 train_time:9144849ms step_avg:583.93ms +step:15662/57344 train_time:9145098ms step_avg:583.90ms +step:15663/57344 train_time:9145643ms step_avg:583.90ms +grad accum step:3916/14336 +step:15664/57344 train_time:9146917ms step_avg:583.95ms +step:15665/57344 train_time:9146934ms step_avg:583.91ms +step:15666/57344 train_time:9147175ms step_avg:583.89ms +step:15667/57344 train_time:9147710ms step_avg:583.88ms +grad accum step:3917/14336 +step:15668/57344 train_time:9148978ms step_avg:583.93ms +step:15669/57344 train_time:9148995ms step_avg:583.89ms +step:15670/57344 train_time:9149240ms step_avg:583.87ms +step:15671/57344 train_time:9149784ms step_avg:583.87ms +grad accum step:3918/14336 +step:15672/57344 train_time:9151065ms step_avg:583.91ms +step:15673/57344 train_time:9151082ms step_avg:583.88ms +step:15674/57344 train_time:9151327ms step_avg:583.85ms +step:15675/57344 train_time:9151867ms step_avg:583.85ms +grad accum step:3919/14336 +step:15676/57344 train_time:9153162ms step_avg:583.90ms +step:15677/57344 train_time:9153179ms step_avg:583.86ms +step:15678/57344 train_time:9153425ms step_avg:583.84ms +step:15679/57344 train_time:9153972ms step_avg:583.84ms +grad accum step:3920/14336 +step:15680/57344 train_time:9155247ms step_avg:583.88ms +step:15680/57344 val_loss:6.777744 train_time:9155248ms step_avg:583.88ms +step:15681/57344 train_time:9155259ms step_avg:583.84ms +step:15682/57344 train_time:9155482ms step_avg:583.82ms +step:15683/57344 train_time:9156030ms step_avg:583.82ms +grad accum step:3921/14336 +step:15684/57344 train_time:9157332ms step_avg:583.86ms +step:15685/57344 train_time:9157348ms step_avg:583.83ms +step:15686/57344 train_time:9157594ms step_avg:583.81ms +step:15687/57344 train_time:9158136ms step_avg:583.80ms +grad accum step:3922/14336 +step:15688/57344 train_time:9159423ms step_avg:583.85ms +step:15689/57344 train_time:9159439ms step_avg:583.81ms +step:15690/57344 train_time:9159692ms step_avg:583.79ms +step:15691/57344 train_time:9160254ms step_avg:583.79ms +grad accum step:3923/14336 +step:15692/57344 train_time:9161554ms step_avg:583.84ms +step:15693/57344 train_time:9161570ms step_avg:583.80ms +step:15694/57344 train_time:9161821ms step_avg:583.78ms +step:15695/57344 train_time:9162366ms step_avg:583.78ms +grad accum step:3924/14336 +step:15696/57344 train_time:9163657ms step_avg:583.82ms +step:15697/57344 train_time:9163674ms step_avg:583.79ms +step:15698/57344 train_time:9163925ms step_avg:583.76ms +step:15699/57344 train_time:9164474ms step_avg:583.76ms +grad accum step:3925/14336 +step:15700/57344 train_time:9165747ms step_avg:583.81ms +step:15701/57344 train_time:9165765ms step_avg:583.77ms +step:15702/57344 train_time:9166013ms step_avg:583.75ms +step:15703/57344 train_time:9166560ms step_avg:583.75ms +grad accum step:3926/14336 +step:15704/57344 train_time:9167843ms step_avg:583.79ms +step:15705/57344 train_time:9167860ms step_avg:583.75ms +step:15706/57344 train_time:9168103ms step_avg:583.73ms +step:15707/57344 train_time:9168647ms step_avg:583.73ms +grad accum step:3927/14336 +step:15708/57344 train_time:9169944ms step_avg:583.78ms +step:15709/57344 train_time:9169961ms step_avg:583.74ms +step:15710/57344 train_time:9170204ms step_avg:583.72ms +step:15711/57344 train_time:9170750ms step_avg:583.72ms +grad accum step:3928/14336 +step:15712/57344 train_time:9172074ms step_avg:583.76ms +step:15713/57344 train_time:9172091ms step_avg:583.73ms +step:15714/57344 train_time:9172335ms step_avg:583.70ms +step:15715/57344 train_time:9172872ms step_avg:583.70ms +grad accum step:3929/14336 +step:15716/57344 train_time:9174175ms step_avg:583.75ms +step:15717/57344 train_time:9174191ms step_avg:583.71ms +step:15718/57344 train_time:9174438ms step_avg:583.69ms +step:15719/57344 train_time:9174979ms step_avg:583.69ms +grad accum step:3930/14336 +step:15720/57344 train_time:9176259ms step_avg:583.73ms +step:15721/57344 train_time:9176276ms step_avg:583.70ms +step:15722/57344 train_time:9176531ms step_avg:583.67ms +step:15723/57344 train_time:9177095ms step_avg:583.67ms +grad accum step:3931/14336 +step:15724/57344 train_time:9178401ms step_avg:583.72ms +step:15725/57344 train_time:9178418ms step_avg:583.68ms +step:15726/57344 train_time:9178664ms step_avg:583.66ms +step:15727/57344 train_time:9179206ms step_avg:583.66ms +grad accum step:3932/14336 +step:15728/57344 train_time:9180507ms step_avg:583.70ms +step:15729/57344 train_time:9180524ms step_avg:583.67ms +step:15730/57344 train_time:9180772ms step_avg:583.65ms +step:15731/57344 train_time:9181314ms step_avg:583.64ms +grad accum step:3933/14336 +step:15732/57344 train_time:9182584ms step_avg:583.69ms +step:15733/57344 train_time:9182601ms step_avg:583.65ms +step:15734/57344 train_time:9182850ms step_avg:583.63ms +step:15735/57344 train_time:9183393ms step_avg:583.63ms +grad accum step:3934/14336 +step:15736/57344 train_time:9184692ms step_avg:583.67ms +step:15737/57344 train_time:9184709ms step_avg:583.64ms +step:15738/57344 train_time:9184957ms step_avg:583.62ms +step:15739/57344 train_time:9185504ms step_avg:583.61ms +grad accum step:3935/14336 +step:15740/57344 train_time:9186794ms step_avg:583.66ms +step:15741/57344 train_time:9186811ms step_avg:583.62ms +step:15742/57344 train_time:9187057ms step_avg:583.60ms +step:15743/57344 train_time:9187599ms step_avg:583.60ms +grad accum step:3936/14336 +step:15744/57344 train_time:9188906ms step_avg:583.64ms +step:15744/57344 val_loss:6.765359 train_time:9188907ms step_avg:583.65ms +step:15745/57344 train_time:9189024ms step_avg:583.62ms +step:15746/57344 train_time:9189214ms step_avg:583.59ms +step:15747/57344 train_time:9189735ms step_avg:583.59ms +grad accum step:3937/14336 +step:15748/57344 train_time:9191021ms step_avg:583.63ms +step:15749/57344 train_time:9191033ms step_avg:583.59ms +step:15750/57344 train_time:9191277ms step_avg:583.57ms +step:15751/57344 train_time:9191826ms step_avg:583.57ms +grad accum step:3938/14336 +step:15752/57344 train_time:9193124ms step_avg:583.62ms +step:15753/57344 train_time:9193141ms step_avg:583.58ms +step:15754/57344 train_time:9193391ms step_avg:583.56ms +step:15755/57344 train_time:9193937ms step_avg:583.56ms +grad accum step:3939/14336 +step:15756/57344 train_time:9195241ms step_avg:583.60ms +step:15757/57344 train_time:9195258ms step_avg:583.57ms +step:15758/57344 train_time:9195503ms step_avg:583.55ms +step:15759/57344 train_time:9196046ms step_avg:583.54ms +grad accum step:3940/14336 +step:15760/57344 train_time:9197366ms step_avg:583.59ms +step:15761/57344 train_time:9197382ms step_avg:583.55ms +step:15762/57344 train_time:9197631ms step_avg:583.53ms +step:15763/57344 train_time:9198178ms step_avg:583.53ms +grad accum step:3941/14336 +step:15764/57344 train_time:9199525ms step_avg:583.58ms +step:15765/57344 train_time:9199541ms step_avg:583.54ms +step:15766/57344 train_time:9199784ms step_avg:583.52ms +step:15767/57344 train_time:9200324ms step_avg:583.52ms +grad accum step:3942/14336 +step:15768/57344 train_time:9201662ms step_avg:583.57ms +step:15769/57344 train_time:9201679ms step_avg:583.53ms +step:15770/57344 train_time:9201932ms step_avg:583.51ms +step:15771/57344 train_time:9202486ms step_avg:583.51ms +grad accum step:3943/14336 +step:15772/57344 train_time:9203812ms step_avg:583.55ms +step:15773/57344 train_time:9203829ms step_avg:583.52ms +step:15774/57344 train_time:9204077ms step_avg:583.50ms +step:15775/57344 train_time:9204633ms step_avg:583.49ms +grad accum step:3944/14336 +step:15776/57344 train_time:9205926ms step_avg:583.54ms +step:15777/57344 train_time:9205943ms step_avg:583.50ms +step:15778/57344 train_time:9206191ms step_avg:583.48ms +step:15779/57344 train_time:9206733ms step_avg:583.48ms +grad accum step:3945/14336 +step:15780/57344 train_time:9208027ms step_avg:583.53ms +step:15781/57344 train_time:9208044ms step_avg:583.49ms +step:15782/57344 train_time:9208291ms step_avg:583.47ms +step:15783/57344 train_time:9208835ms step_avg:583.47ms +grad accum step:3946/14336 +step:15784/57344 train_time:9210148ms step_avg:583.51ms +step:15785/57344 train_time:9210165ms step_avg:583.48ms +step:15786/57344 train_time:9210411ms step_avg:583.45ms +step:15787/57344 train_time:9210953ms step_avg:583.45ms +grad accum step:3947/14336 +step:15788/57344 train_time:9212257ms step_avg:583.50ms +step:15789/57344 train_time:9212274ms step_avg:583.46ms +step:15790/57344 train_time:9212522ms step_avg:583.44ms +step:15791/57344 train_time:9213068ms step_avg:583.44ms +grad accum step:3948/14336 +step:15792/57344 train_time:9214360ms step_avg:583.48ms +step:15793/57344 train_time:9214376ms step_avg:583.45ms +step:15794/57344 train_time:9214624ms step_avg:583.43ms +step:15795/57344 train_time:9215172ms step_avg:583.42ms +grad accum step:3949/14336 +step:15796/57344 train_time:9216463ms step_avg:583.47ms +step:15797/57344 train_time:9216479ms step_avg:583.43ms +step:15798/57344 train_time:9216723ms step_avg:583.41ms +step:15799/57344 train_time:9217264ms step_avg:583.41ms +grad accum step:3950/14336 +step:15800/57344 train_time:9218594ms step_avg:583.46ms +step:15801/57344 train_time:9218611ms step_avg:583.42ms +step:15802/57344 train_time:9218856ms step_avg:583.40ms +step:15803/57344 train_time:9219393ms step_avg:583.40ms +grad accum step:3951/14336 +step:15804/57344 train_time:9220667ms step_avg:583.44ms +step:15805/57344 train_time:9220684ms step_avg:583.40ms +step:15806/57344 train_time:9220928ms step_avg:583.38ms +step:15807/57344 train_time:9221473ms step_avg:583.38ms +grad accum step:3952/14336 +step:15808/57344 train_time:9222756ms step_avg:583.42ms +step:15808/57344 val_loss:6.745915 train_time:9222757ms step_avg:583.42ms +step:15809/57344 train_time:9222769ms step_avg:583.39ms +step:15810/57344 train_time:9222993ms step_avg:583.36ms +step:15811/57344 train_time:9223541ms step_avg:583.36ms +grad accum step:3953/14336 +step:15812/57344 train_time:9224831ms step_avg:583.41ms +step:15813/57344 train_time:9224848ms step_avg:583.37ms +step:15814/57344 train_time:9225096ms step_avg:583.35ms +step:15815/57344 train_time:9225638ms step_avg:583.35ms +grad accum step:3954/14336 +step:15816/57344 train_time:9226939ms step_avg:583.39ms +step:15817/57344 train_time:9226956ms step_avg:583.36ms +step:15818/57344 train_time:9227206ms step_avg:583.34ms +step:15819/57344 train_time:9227749ms step_avg:583.33ms +grad accum step:3955/14336 +step:15820/57344 train_time:9229023ms step_avg:583.38ms +step:15821/57344 train_time:9229039ms step_avg:583.34ms +step:15822/57344 train_time:9229284ms step_avg:583.32ms +step:15823/57344 train_time:9229829ms step_avg:583.32ms +grad accum step:3956/14336 +step:15824/57344 train_time:9231122ms step_avg:583.36ms +step:15825/57344 train_time:9231139ms step_avg:583.33ms +step:15826/57344 train_time:9231387ms step_avg:583.31ms +step:15827/57344 train_time:9231932ms step_avg:583.30ms +grad accum step:3957/14336 +step:15828/57344 train_time:9233211ms step_avg:583.35ms +step:15829/57344 train_time:9233227ms step_avg:583.31ms +step:15830/57344 train_time:9233476ms step_avg:583.29ms +step:15831/57344 train_time:9234022ms step_avg:583.29ms +grad accum step:3958/14336 +step:15832/57344 train_time:9235359ms step_avg:583.33ms +step:15833/57344 train_time:9235375ms step_avg:583.30ms +step:15834/57344 train_time:9235618ms step_avg:583.28ms +step:15835/57344 train_time:9236157ms step_avg:583.27ms +grad accum step:3959/14336 +step:15836/57344 train_time:9237437ms step_avg:583.32ms +step:15837/57344 train_time:9237454ms step_avg:583.28ms +step:15838/57344 train_time:9237701ms step_avg:583.26ms +step:15839/57344 train_time:9238246ms step_avg:583.26ms +grad accum step:3960/14336 +step:15840/57344 train_time:9239529ms step_avg:583.30ms +step:15841/57344 train_time:9239546ms step_avg:583.27ms +step:15842/57344 train_time:9239793ms step_avg:583.25ms +step:15843/57344 train_time:9240331ms step_avg:583.24ms +grad accum step:3961/14336 +step:15844/57344 train_time:9241649ms step_avg:583.29ms +step:15845/57344 train_time:9241666ms step_avg:583.25ms +step:15846/57344 train_time:9241920ms step_avg:583.23ms +step:15847/57344 train_time:9242488ms step_avg:583.23ms +grad accum step:3962/14336 +step:15848/57344 train_time:9243810ms step_avg:583.28ms +step:15849/57344 train_time:9243827ms step_avg:583.24ms +step:15850/57344 train_time:9244072ms step_avg:583.22ms +step:15851/57344 train_time:9244617ms step_avg:583.22ms +grad accum step:3963/14336 +step:15852/57344 train_time:9245886ms step_avg:583.26ms +step:15853/57344 train_time:9245903ms step_avg:583.23ms +step:15854/57344 train_time:9246152ms step_avg:583.21ms +step:15855/57344 train_time:9246700ms step_avg:583.20ms +grad accum step:3964/14336 +step:15856/57344 train_time:9247976ms step_avg:583.25ms +step:15857/57344 train_time:9247992ms step_avg:583.21ms +step:15858/57344 train_time:9248241ms step_avg:583.19ms +step:15859/57344 train_time:9248783ms step_avg:583.19ms +grad accum step:3965/14336 +step:15860/57344 train_time:9250060ms step_avg:583.23ms +step:15861/57344 train_time:9250077ms step_avg:583.20ms +step:15862/57344 train_time:9250324ms step_avg:583.18ms +step:15863/57344 train_time:9250869ms step_avg:583.17ms +grad accum step:3966/14336 +step:15864/57344 train_time:9252151ms step_avg:583.22ms +step:15865/57344 train_time:9252168ms step_avg:583.18ms +step:15866/57344 train_time:9252414ms step_avg:583.16ms +step:15867/57344 train_time:9252962ms step_avg:583.16ms +grad accum step:3967/14336 +step:15868/57344 train_time:9254282ms step_avg:583.20ms +step:15869/57344 train_time:9254299ms step_avg:583.17ms +step:15870/57344 train_time:9254544ms step_avg:583.15ms +step:15871/57344 train_time:9255091ms step_avg:583.14ms +grad accum step:3968/14336 +step:15872/57344 train_time:9256375ms step_avg:583.19ms +step:15872/57344 val_loss:6.722579 train_time:9256376ms step_avg:583.19ms +step:15873/57344 train_time:9256388ms step_avg:583.15ms +step:15874/57344 train_time:9256608ms step_avg:583.13ms +step:15875/57344 train_time:9257151ms step_avg:583.13ms +grad accum step:3969/14336 +step:15876/57344 train_time:9258435ms step_avg:583.17ms +step:15877/57344 train_time:9258452ms step_avg:583.14ms +step:15878/57344 train_time:9258703ms step_avg:583.12ms +step:15879/57344 train_time:9259257ms step_avg:583.11ms +grad accum step:3970/14336 +step:15880/57344 train_time:9260533ms step_avg:583.16ms +step:15881/57344 train_time:9260550ms step_avg:583.12ms +step:15882/57344 train_time:9260793ms step_avg:583.10ms +step:15883/57344 train_time:9261339ms step_avg:583.10ms +grad accum step:3971/14336 +step:15884/57344 train_time:9262624ms step_avg:583.14ms +step:15885/57344 train_time:9262641ms step_avg:583.11ms +step:15886/57344 train_time:9262889ms step_avg:583.09ms +step:15887/57344 train_time:9263432ms step_avg:583.08ms +grad accum step:3972/14336 +step:15888/57344 train_time:9264720ms step_avg:583.13ms +step:15889/57344 train_time:9264737ms step_avg:583.09ms +step:15890/57344 train_time:9264980ms step_avg:583.07ms +step:15891/57344 train_time:9265523ms step_avg:583.07ms +grad accum step:3973/14336 +step:15892/57344 train_time:9266831ms step_avg:583.11ms +step:15893/57344 train_time:9266847ms step_avg:583.08ms +step:15894/57344 train_time:9267095ms step_avg:583.06ms +step:15895/57344 train_time:9267635ms step_avg:583.05ms +grad accum step:3974/14336 +step:15896/57344 train_time:9268927ms step_avg:583.10ms +step:15897/57344 train_time:9268945ms step_avg:583.06ms +step:15898/57344 train_time:9269197ms step_avg:583.04ms +step:15899/57344 train_time:9269748ms step_avg:583.04ms +grad accum step:3975/14336 +step:15900/57344 train_time:9271053ms step_avg:583.09ms +step:15901/57344 train_time:9271070ms step_avg:583.05ms +step:15902/57344 train_time:9271312ms step_avg:583.03ms +step:15903/57344 train_time:9271854ms step_avg:583.03ms +grad accum step:3976/14336 +step:15904/57344 train_time:9273134ms step_avg:583.07ms +step:15905/57344 train_time:9273151ms step_avg:583.03ms +step:15906/57344 train_time:9273397ms step_avg:583.01ms +step:15907/57344 train_time:9273943ms step_avg:583.01ms +grad accum step:3977/14336 +step:15908/57344 train_time:9275260ms step_avg:583.06ms +step:15909/57344 train_time:9275277ms step_avg:583.02ms +step:15910/57344 train_time:9275524ms step_avg:583.00ms +step:15911/57344 train_time:9276069ms step_avg:583.00ms +grad accum step:3978/14336 +step:15912/57344 train_time:9277344ms step_avg:583.04ms +step:15913/57344 train_time:9277361ms step_avg:583.01ms +step:15914/57344 train_time:9277605ms step_avg:582.98ms +step:15915/57344 train_time:9278152ms step_avg:582.98ms +grad accum step:3979/14336 +step:15916/57344 train_time:9279445ms step_avg:583.03ms +step:15917/57344 train_time:9279462ms step_avg:582.99ms +step:15918/57344 train_time:9279708ms step_avg:582.97ms +step:15919/57344 train_time:9280250ms step_avg:582.97ms +grad accum step:3980/14336 +step:15920/57344 train_time:9281529ms step_avg:583.01ms +step:15921/57344 train_time:9281546ms step_avg:582.98ms +step:15922/57344 train_time:9281800ms step_avg:582.95ms +step:15923/57344 train_time:9282361ms step_avg:582.95ms +grad accum step:3981/14336 +step:15924/57344 train_time:9283646ms step_avg:583.00ms +step:15925/57344 train_time:9283663ms step_avg:582.96ms +step:15926/57344 train_time:9283912ms step_avg:582.94ms +step:15927/57344 train_time:9284468ms step_avg:582.94ms +grad accum step:3982/14336 +step:15928/57344 train_time:9285782ms step_avg:582.98ms +step:15929/57344 train_time:9285798ms step_avg:582.95ms +step:15930/57344 train_time:9286046ms step_avg:582.93ms +step:15931/57344 train_time:9286592ms step_avg:582.93ms +grad accum step:3983/14336 +step:15932/57344 train_time:9287884ms step_avg:582.97ms +step:15933/57344 train_time:9287901ms step_avg:582.93ms +step:15934/57344 train_time:9288163ms step_avg:582.91ms +step:15935/57344 train_time:9288741ms step_avg:582.91ms +grad accum step:3984/14336 +step:15936/57344 train_time:9290042ms step_avg:582.96ms +step:15936/57344 val_loss:6.718524 train_time:9290043ms step_avg:582.96ms +step:15937/57344 train_time:9290054ms step_avg:582.92ms +step:15938/57344 train_time:9290276ms step_avg:582.90ms +step:15939/57344 train_time:9290826ms step_avg:582.90ms +grad accum step:3985/14336 +step:15940/57344 train_time:9292138ms step_avg:582.94ms +step:15941/57344 train_time:9292156ms step_avg:582.91ms +step:15942/57344 train_time:9292403ms step_avg:582.89ms +step:15943/57344 train_time:9292947ms step_avg:582.89ms +grad accum step:3986/14336 +step:15944/57344 train_time:9294246ms step_avg:582.93ms +step:15945/57344 train_time:9294263ms step_avg:582.90ms +step:15946/57344 train_time:9294507ms step_avg:582.87ms +step:15947/57344 train_time:9295055ms step_avg:582.87ms +grad accum step:3987/14336 +step:15948/57344 train_time:9296350ms step_avg:582.92ms +step:15949/57344 train_time:9296367ms step_avg:582.88ms +step:15950/57344 train_time:9296615ms step_avg:582.86ms +step:15951/57344 train_time:9297155ms step_avg:582.86ms +grad accum step:3988/14336 +step:15952/57344 train_time:9298456ms step_avg:582.90ms +step:15953/57344 train_time:9298473ms step_avg:582.87ms +step:15954/57344 train_time:9298717ms step_avg:582.85ms +step:15955/57344 train_time:9299255ms step_avg:582.84ms +grad accum step:3989/14336 +step:15956/57344 train_time:9300557ms step_avg:582.89ms +step:15957/57344 train_time:9300573ms step_avg:582.85ms +step:15958/57344 train_time:9300823ms step_avg:582.83ms +step:15959/57344 train_time:9301364ms step_avg:582.83ms +grad accum step:3990/14336 +step:15960/57344 train_time:9302642ms step_avg:582.87ms +step:15961/57344 train_time:9302657ms step_avg:582.84ms +step:15962/57344 train_time:9302909ms step_avg:582.82ms +step:15963/57344 train_time:9303463ms step_avg:582.81ms +grad accum step:3991/14336 +step:15964/57344 train_time:9304796ms step_avg:582.86ms +step:15965/57344 train_time:9304813ms step_avg:582.83ms +step:15966/57344 train_time:9305063ms step_avg:582.80ms +step:15967/57344 train_time:9305611ms step_avg:582.80ms +grad accum step:3992/14336 +step:15968/57344 train_time:9306883ms step_avg:582.85ms +step:15969/57344 train_time:9306901ms step_avg:582.81ms +step:15970/57344 train_time:9307148ms step_avg:582.79ms +step:15971/57344 train_time:9307696ms step_avg:582.79ms +grad accum step:3993/14336 +step:15972/57344 train_time:9308989ms step_avg:582.83ms +step:15973/57344 train_time:9309006ms step_avg:582.80ms +step:15974/57344 train_time:9309253ms step_avg:582.78ms +step:15975/57344 train_time:9309797ms step_avg:582.77ms +grad accum step:3994/14336 +step:15976/57344 train_time:9311071ms step_avg:582.82ms +step:15977/57344 train_time:9311089ms step_avg:582.78ms +step:15978/57344 train_time:9311335ms step_avg:582.76ms +step:15979/57344 train_time:9311878ms step_avg:582.76ms +grad accum step:3995/14336 +step:15980/57344 train_time:9313161ms step_avg:582.80ms +step:15981/57344 train_time:9313178ms step_avg:582.77ms +step:15982/57344 train_time:9313423ms step_avg:582.74ms +step:15983/57344 train_time:9313963ms step_avg:582.74ms +grad accum step:3996/14336 +step:15984/57344 train_time:9315286ms step_avg:582.79ms +step:15985/57344 train_time:9315303ms step_avg:582.75ms +step:15986/57344 train_time:9315558ms step_avg:582.73ms +step:15987/57344 train_time:9316126ms step_avg:582.73ms +grad accum step:3997/14336 +step:15988/57344 train_time:9317405ms step_avg:582.77ms +step:15989/57344 train_time:9317422ms step_avg:582.74ms +step:15990/57344 train_time:9317666ms step_avg:582.72ms +step:15991/57344 train_time:9318209ms step_avg:582.72ms +grad accum step:3998/14336 +step:15992/57344 train_time:9319486ms step_avg:582.76ms +step:15993/57344 train_time:9319503ms step_avg:582.72ms +step:15994/57344 train_time:9319746ms step_avg:582.70ms +step:15995/57344 train_time:9320288ms step_avg:582.70ms +grad accum step:3999/14336 +step:15996/57344 train_time:9321613ms step_avg:582.75ms +step:15997/57344 train_time:9321630ms step_avg:582.71ms +step:15998/57344 train_time:9321880ms step_avg:582.69ms +step:15999/57344 train_time:9322428ms step_avg:582.69ms +grad accum step:4000/14336 +step:16000/57344 train_time:9323719ms step_avg:582.73ms +step:16000/57344 val_loss:6.762109 train_time:9323720ms step_avg:582.73ms +step:16001/57344 train_time:9323732ms step_avg:582.70ms +step:16002/57344 train_time:9323953ms step_avg:582.67ms +step:16003/57344 train_time:9324498ms step_avg:582.67ms +grad accum step:4001/14336 +step:16004/57344 train_time:9325778ms step_avg:582.72ms +step:16005/57344 train_time:9325795ms step_avg:582.68ms +step:16006/57344 train_time:9326045ms step_avg:582.66ms +step:16007/57344 train_time:9326588ms step_avg:582.66ms +grad accum step:4002/14336 +step:16008/57344 train_time:9327872ms step_avg:582.70ms +step:16009/57344 train_time:9327889ms step_avg:582.67ms +step:16010/57344 train_time:9328138ms step_avg:582.64ms +step:16011/57344 train_time:9328689ms step_avg:582.64ms +grad accum step:4003/14336 +step:16012/57344 train_time:9329981ms step_avg:582.69ms +step:16013/57344 train_time:9329998ms step_avg:582.65ms +step:16014/57344 train_time:9330244ms step_avg:582.63ms +step:16015/57344 train_time:9330788ms step_avg:582.63ms +grad accum step:4004/14336 +step:16016/57344 train_time:9332065ms step_avg:582.67ms +step:16017/57344 train_time:9332082ms step_avg:582.64ms +step:16018/57344 train_time:9332328ms step_avg:582.62ms +step:16019/57344 train_time:9332878ms step_avg:582.61ms +grad accum step:4005/14336 +step:16020/57344 train_time:9334172ms step_avg:582.66ms +step:16021/57344 train_time:9334189ms step_avg:582.62ms +step:16022/57344 train_time:9334434ms step_avg:582.60ms +step:16023/57344 train_time:9334977ms step_avg:582.60ms +grad accum step:4006/14336 +step:16024/57344 train_time:9336268ms step_avg:582.64ms +step:16025/57344 train_time:9336285ms step_avg:582.61ms +step:16026/57344 train_time:9336531ms step_avg:582.59ms +step:16027/57344 train_time:9337077ms step_avg:582.58ms +grad accum step:4007/14336 +step:16028/57344 train_time:9338370ms step_avg:582.63ms +step:16029/57344 train_time:9338387ms step_avg:582.59ms +step:16030/57344 train_time:9338631ms step_avg:582.57ms +step:16031/57344 train_time:9339177ms step_avg:582.57ms +grad accum step:4008/14336 +step:16032/57344 train_time:9340467ms step_avg:582.61ms +step:16033/57344 train_time:9340485ms step_avg:582.58ms +step:16034/57344 train_time:9340729ms step_avg:582.56ms +step:16035/57344 train_time:9341266ms step_avg:582.55ms +grad accum step:4009/14336 +step:16036/57344 train_time:9342556ms step_avg:582.60ms +step:16037/57344 train_time:9342573ms step_avg:582.56ms +step:16038/57344 train_time:9342819ms step_avg:582.54ms +step:16039/57344 train_time:9343362ms step_avg:582.54ms +grad accum step:4010/14336 +step:16040/57344 train_time:9344644ms step_avg:582.58ms +step:16041/57344 train_time:9344661ms step_avg:582.55ms +step:16042/57344 train_time:9344909ms step_avg:582.53ms +step:16043/57344 train_time:9345454ms step_avg:582.53ms +grad accum step:4011/14336 +step:16044/57344 train_time:9347569ms step_avg:582.62ms +step:16045/57344 train_time:9347582ms step_avg:582.59ms +step:16046/57344 train_time:9347800ms step_avg:582.56ms +step:16047/57344 train_time:9348351ms step_avg:582.56ms +grad accum step:4012/14336 +step:16048/57344 train_time:9349642ms step_avg:582.60ms +step:16049/57344 train_time:9349659ms step_avg:582.57ms +step:16050/57344 train_time:9349904ms step_avg:582.55ms +step:16051/57344 train_time:9350450ms step_avg:582.55ms +grad accum step:4013/14336 +step:16052/57344 train_time:9351746ms step_avg:582.59ms +step:16053/57344 train_time:9351763ms step_avg:582.56ms +step:16054/57344 train_time:9352006ms step_avg:582.53ms +step:16055/57344 train_time:9352546ms step_avg:582.53ms +grad accum step:4014/14336 +step:16056/57344 train_time:9353830ms step_avg:582.58ms +step:16057/57344 train_time:9353847ms step_avg:582.54ms +step:16058/57344 train_time:9354095ms step_avg:582.52ms +step:16059/57344 train_time:9354642ms step_avg:582.52ms +grad accum step:4015/14336 +step:16060/57344 train_time:9355945ms step_avg:582.56ms +step:16061/57344 train_time:9355961ms step_avg:582.53ms +step:16062/57344 train_time:9356209ms step_avg:582.51ms +step:16063/57344 train_time:9356749ms step_avg:582.50ms +grad accum step:4016/14336 +step:16064/57344 train_time:9358024ms step_avg:582.55ms +step:16064/57344 val_loss:6.754011 train_time:9358025ms step_avg:582.55ms +step:16065/57344 train_time:9358037ms step_avg:582.51ms +step:16066/57344 train_time:9358261ms step_avg:582.49ms +step:16067/57344 train_time:9358799ms step_avg:582.49ms +grad accum step:4017/14336 +step:16068/57344 train_time:9360093ms step_avg:582.53ms +step:16069/57344 train_time:9360110ms step_avg:582.49ms +step:16070/57344 train_time:9360356ms step_avg:582.47ms +step:16071/57344 train_time:9360900ms step_avg:582.47ms +grad accum step:4018/14336 +step:16072/57344 train_time:9362207ms step_avg:582.52ms +step:16073/57344 train_time:9362224ms step_avg:582.48ms +step:16074/57344 train_time:9362471ms step_avg:582.46ms +step:16075/57344 train_time:9363010ms step_avg:582.46ms +grad accum step:4019/14336 +step:16076/57344 train_time:9364300ms step_avg:582.50ms +step:16077/57344 train_time:9364317ms step_avg:582.47ms +step:16078/57344 train_time:9364566ms step_avg:582.45ms +step:16079/57344 train_time:9365114ms step_avg:582.44ms +grad accum step:4020/14336 +step:16080/57344 train_time:9366403ms step_avg:582.49ms +step:16081/57344 train_time:9366420ms step_avg:582.45ms +step:16082/57344 train_time:9366665ms step_avg:582.43ms +step:16083/57344 train_time:9367208ms step_avg:582.43ms +grad accum step:4021/14336 +step:16084/57344 train_time:9368509ms step_avg:582.47ms +step:16085/57344 train_time:9368526ms step_avg:582.44ms +step:16086/57344 train_time:9368770ms step_avg:582.42ms +step:16087/57344 train_time:9369317ms step_avg:582.42ms +grad accum step:4022/14336 +step:16088/57344 train_time:9370629ms step_avg:582.46ms +step:16089/57344 train_time:9370646ms step_avg:582.43ms +step:16090/57344 train_time:9370891ms step_avg:582.40ms +step:16091/57344 train_time:9371434ms step_avg:582.40ms +grad accum step:4023/14336 +step:16092/57344 train_time:9372708ms step_avg:582.45ms +step:16093/57344 train_time:9372725ms step_avg:582.41ms +step:16094/57344 train_time:9372971ms step_avg:582.39ms +step:16095/57344 train_time:9373523ms step_avg:582.39ms +grad accum step:4024/14336 +step:16096/57344 train_time:9374800ms step_avg:582.43ms +step:16097/57344 train_time:9374817ms step_avg:582.40ms +step:16098/57344 train_time:9375062ms step_avg:582.37ms +step:16099/57344 train_time:9375604ms step_avg:582.37ms +grad accum step:4025/14336 +step:16100/57344 train_time:9376911ms step_avg:582.42ms +step:16101/57344 train_time:9376928ms step_avg:582.38ms +step:16102/57344 train_time:9377177ms step_avg:582.36ms +step:16103/57344 train_time:9377722ms step_avg:582.36ms +grad accum step:4026/14336 +step:16104/57344 train_time:9379006ms step_avg:582.40ms +step:16105/57344 train_time:9379023ms step_avg:582.37ms +step:16106/57344 train_time:9379266ms step_avg:582.35ms +step:16107/57344 train_time:9379806ms step_avg:582.34ms +grad accum step:4027/14336 +step:16108/57344 train_time:9381145ms step_avg:582.39ms +step:16109/57344 train_time:9381161ms step_avg:582.36ms +step:16110/57344 train_time:9381416ms step_avg:582.33ms +step:16111/57344 train_time:9381980ms step_avg:582.33ms +grad accum step:4028/14336 +step:16112/57344 train_time:9383256ms step_avg:582.38ms +step:16113/57344 train_time:9383273ms step_avg:582.34ms +step:16114/57344 train_time:9383518ms step_avg:582.32ms +step:16115/57344 train_time:9384069ms step_avg:582.32ms +grad accum step:4029/14336 +step:16116/57344 train_time:9385376ms step_avg:582.36ms +step:16117/57344 train_time:9385393ms step_avg:582.33ms +step:16118/57344 train_time:9385638ms step_avg:582.31ms +step:16119/57344 train_time:9386174ms step_avg:582.30ms +grad accum step:4030/14336 +step:16120/57344 train_time:9387454ms step_avg:582.35ms +step:16121/57344 train_time:9387471ms step_avg:582.31ms +step:16122/57344 train_time:9387717ms step_avg:582.29ms +step:16123/57344 train_time:9388262ms step_avg:582.29ms +grad accum step:4031/14336 +step:16124/57344 train_time:9389541ms step_avg:582.33ms +step:16125/57344 train_time:9389557ms step_avg:582.30ms +step:16126/57344 train_time:9389803ms step_avg:582.28ms +step:16127/57344 train_time:9390344ms step_avg:582.27ms +grad accum step:4032/14336 +step:16128/57344 train_time:9391627ms step_avg:582.32ms +step:16128/57344 val_loss:6.763496 train_time:9391628ms step_avg:582.32ms +step:16129/57344 train_time:9391864ms step_avg:582.30ms +step:16130/57344 train_time:9391949ms step_avg:582.27ms +step:16131/57344 train_time:9392503ms step_avg:582.26ms +grad accum step:4033/14336 +step:16132/57344 train_time:9393946ms step_avg:582.32ms +step:16133/57344 train_time:9393957ms step_avg:582.28ms +step:16134/57344 train_time:9394176ms step_avg:582.26ms +step:16135/57344 train_time:9394725ms step_avg:582.26ms +grad accum step:4034/14336 +step:16136/57344 train_time:9395991ms step_avg:582.30ms +step:16137/57344 train_time:9396008ms step_avg:582.26ms +step:16138/57344 train_time:9396253ms step_avg:582.24ms +step:16139/57344 train_time:9396789ms step_avg:582.24ms +grad accum step:4035/14336 +step:16140/57344 train_time:9398086ms step_avg:582.29ms +step:16141/57344 train_time:9398103ms step_avg:582.25ms +step:16142/57344 train_time:9398348ms step_avg:582.23ms +step:16143/57344 train_time:9398891ms step_avg:582.23ms +grad accum step:4036/14336 +step:16144/57344 train_time:9400181ms step_avg:582.27ms +step:16145/57344 train_time:9400198ms step_avg:582.24ms +step:16146/57344 train_time:9400445ms step_avg:582.22ms +step:16147/57344 train_time:9400993ms step_avg:582.21ms +grad accum step:4037/14336 +step:16148/57344 train_time:9402266ms step_avg:582.26ms +step:16149/57344 train_time:9402283ms step_avg:582.22ms +step:16150/57344 train_time:9402537ms step_avg:582.20ms +step:16151/57344 train_time:9403095ms step_avg:582.20ms +grad accum step:4038/14336 +step:16152/57344 train_time:9404414ms step_avg:582.24ms +step:16153/57344 train_time:9404431ms step_avg:582.21ms +step:16154/57344 train_time:9404688ms step_avg:582.19ms +step:16155/57344 train_time:9405264ms step_avg:582.19ms +grad accum step:4039/14336 +step:16156/57344 train_time:9406558ms step_avg:582.23ms +step:16157/57344 train_time:9406575ms step_avg:582.20ms +step:16158/57344 train_time:9406819ms step_avg:582.18ms +step:16159/57344 train_time:9407360ms step_avg:582.17ms +grad accum step:4040/14336 +step:16160/57344 train_time:9408642ms step_avg:582.22ms +step:16161/57344 train_time:9408659ms step_avg:582.18ms +step:16162/57344 train_time:9408902ms step_avg:582.16ms +step:16163/57344 train_time:9409447ms step_avg:582.16ms +grad accum step:4041/14336 +step:16164/57344 train_time:9410728ms step_avg:582.20ms +step:16165/57344 train_time:9410745ms step_avg:582.17ms +step:16166/57344 train_time:9410997ms step_avg:582.15ms +step:16167/57344 train_time:9411555ms step_avg:582.15ms +grad accum step:4042/14336 +step:16168/57344 train_time:9412855ms step_avg:582.19ms +step:16169/57344 train_time:9412872ms step_avg:582.16ms +step:16170/57344 train_time:9413117ms step_avg:582.13ms +step:16171/57344 train_time:9413663ms step_avg:582.13ms +grad accum step:4043/14336 +step:16172/57344 train_time:9414962ms step_avg:582.18ms +step:16173/57344 train_time:9414978ms step_avg:582.14ms +step:16174/57344 train_time:9415224ms step_avg:582.12ms +step:16175/57344 train_time:9415771ms step_avg:582.12ms +grad accum step:4044/14336 +step:16176/57344 train_time:9417053ms step_avg:582.16ms +step:16177/57344 train_time:9417070ms step_avg:582.13ms +step:16178/57344 train_time:9417314ms step_avg:582.11ms +step:16179/57344 train_time:9417854ms step_avg:582.10ms +grad accum step:4045/14336 +step:16180/57344 train_time:9419137ms step_avg:582.15ms +step:16181/57344 train_time:9419154ms step_avg:582.11ms +step:16182/57344 train_time:9419398ms step_avg:582.09ms +step:16183/57344 train_time:9419942ms step_avg:582.09ms +grad accum step:4046/14336 +step:16184/57344 train_time:9421228ms step_avg:582.13ms +step:16185/57344 train_time:9421245ms step_avg:582.10ms +step:16186/57344 train_time:9421489ms step_avg:582.08ms +step:16187/57344 train_time:9422024ms step_avg:582.07ms +grad accum step:4047/14336 +step:16188/57344 train_time:9423309ms step_avg:582.12ms +step:16189/57344 train_time:9423326ms step_avg:582.08ms +step:16190/57344 train_time:9423571ms step_avg:582.06ms +step:16191/57344 train_time:9424113ms step_avg:582.06ms +grad accum step:4048/14336 +step:16192/57344 train_time:9425433ms step_avg:582.10ms +step:16192/57344 val_loss:6.770032 train_time:9425434ms step_avg:582.10ms +step:16193/57344 train_time:9425446ms step_avg:582.07ms +step:16194/57344 train_time:9425676ms step_avg:582.05ms +step:16195/57344 train_time:9426235ms step_avg:582.05ms +grad accum step:4049/14336 +step:16196/57344 train_time:9427533ms step_avg:582.09ms +step:16197/57344 train_time:9427550ms step_avg:582.06ms +step:16198/57344 train_time:9427800ms step_avg:582.03ms +step:16199/57344 train_time:9428352ms step_avg:582.03ms +grad accum step:4050/14336 +step:16200/57344 train_time:9429655ms step_avg:582.08ms +step:16201/57344 train_time:9429671ms step_avg:582.04ms +step:16202/57344 train_time:9429916ms step_avg:582.02ms +step:16203/57344 train_time:9430459ms step_avg:582.02ms +grad accum step:4051/14336 +step:16204/57344 train_time:9431776ms step_avg:582.06ms +step:16205/57344 train_time:9431792ms step_avg:582.03ms +step:16206/57344 train_time:9432042ms step_avg:582.01ms +step:16207/57344 train_time:9432595ms step_avg:582.01ms +grad accum step:4052/14336 +step:16208/57344 train_time:9433928ms step_avg:582.05ms +step:16209/57344 train_time:9433945ms step_avg:582.02ms +step:16210/57344 train_time:9434193ms step_avg:582.00ms +step:16211/57344 train_time:9434733ms step_avg:582.00ms +grad accum step:4053/14336 +step:16212/57344 train_time:9436005ms step_avg:582.04ms +step:16213/57344 train_time:9436022ms step_avg:582.00ms +step:16214/57344 train_time:9436268ms step_avg:581.98ms +step:16215/57344 train_time:9436810ms step_avg:581.98ms +grad accum step:4054/14336 +step:16216/57344 train_time:9438092ms step_avg:582.02ms +step:16217/57344 train_time:9438108ms step_avg:581.99ms +step:16218/57344 train_time:9438355ms step_avg:581.97ms +step:16219/57344 train_time:9438899ms step_avg:581.97ms +grad accum step:4055/14336 +step:16220/57344 train_time:9440171ms step_avg:582.01ms +step:16221/57344 train_time:9440189ms step_avg:581.97ms +step:16222/57344 train_time:9440439ms step_avg:581.95ms +step:16223/57344 train_time:9440985ms step_avg:581.95ms +grad accum step:4056/14336 +step:16224/57344 train_time:9442281ms step_avg:581.99ms +step:16225/57344 train_time:9442298ms step_avg:581.96ms +step:16226/57344 train_time:9442545ms step_avg:581.94ms +step:16227/57344 train_time:9443089ms step_avg:581.94ms +grad accum step:4057/14336 +step:16228/57344 train_time:9444369ms step_avg:581.98ms +step:16229/57344 train_time:9444386ms step_avg:581.95ms +step:16230/57344 train_time:9444631ms step_avg:581.92ms +step:16231/57344 train_time:9445174ms step_avg:581.92ms +grad accum step:4058/14336 +step:16232/57344 train_time:9446452ms step_avg:581.96ms +step:16233/57344 train_time:9446468ms step_avg:581.93ms +step:16234/57344 train_time:9446716ms step_avg:581.91ms +step:16235/57344 train_time:9447259ms step_avg:581.91ms +grad accum step:4059/14336 +step:16236/57344 train_time:9448586ms step_avg:581.95ms +step:16237/57344 train_time:9448603ms step_avg:581.92ms +step:16238/57344 train_time:9448865ms step_avg:581.90ms +step:16239/57344 train_time:9449448ms step_avg:581.90ms +grad accum step:4060/14336 +step:16240/57344 train_time:9450747ms step_avg:581.94ms +step:16241/57344 train_time:9450763ms step_avg:581.91ms +step:16242/57344 train_time:9451014ms step_avg:581.89ms +step:16243/57344 train_time:9451567ms step_avg:581.89ms +grad accum step:4061/14336 +step:16244/57344 train_time:9452901ms step_avg:581.93ms +step:16245/57344 train_time:9452918ms step_avg:581.90ms +step:16246/57344 train_time:9453160ms step_avg:581.88ms +step:16247/57344 train_time:9453699ms step_avg:581.87ms +grad accum step:4062/14336 +step:16248/57344 train_time:9454982ms step_avg:581.92ms +step:16249/57344 train_time:9454999ms step_avg:581.88ms +step:16250/57344 train_time:9455242ms step_avg:581.86ms +step:16251/57344 train_time:9455783ms step_avg:581.86ms +grad accum step:4063/14336 +step:16252/57344 train_time:9457077ms step_avg:581.90ms +step:16253/57344 train_time:9457094ms step_avg:581.87ms +step:16254/57344 train_time:9457340ms step_avg:581.85ms +step:16255/57344 train_time:9457890ms step_avg:581.84ms +grad accum step:4064/14336 +step:16256/57344 train_time:9459225ms step_avg:581.89ms +step:16256/57344 val_loss:6.806127 train_time:9459225ms step_avg:581.89ms +step:16257/57344 train_time:9459237ms step_avg:581.86ms +step:16258/57344 train_time:9459465ms step_avg:581.83ms +step:16259/57344 train_time:9460016ms step_avg:581.83ms +grad accum step:4065/14336 +step:16260/57344 train_time:9461292ms step_avg:581.88ms +step:16261/57344 train_time:9461309ms step_avg:581.84ms +step:16262/57344 train_time:9461555ms step_avg:581.82ms +step:16263/57344 train_time:9462092ms step_avg:581.82ms +grad accum step:4066/14336 +step:16264/57344 train_time:9463397ms step_avg:581.86ms +step:16265/57344 train_time:9463414ms step_avg:581.83ms +step:16266/57344 train_time:9463666ms step_avg:581.81ms +step:16267/57344 train_time:9464229ms step_avg:581.81ms +grad accum step:4067/14336 +step:16268/57344 train_time:9465515ms step_avg:581.85ms +step:16269/57344 train_time:9465532ms step_avg:581.81ms +step:16270/57344 train_time:9465777ms step_avg:581.79ms +step:16271/57344 train_time:9466320ms step_avg:581.79ms +grad accum step:4068/14336 +step:16272/57344 train_time:9467601ms step_avg:581.83ms +step:16273/57344 train_time:9467618ms step_avg:581.80ms +step:16274/57344 train_time:9467861ms step_avg:581.78ms +step:16275/57344 train_time:9468405ms step_avg:581.78ms +grad accum step:4069/14336 +step:16276/57344 train_time:9469685ms step_avg:581.82ms +step:16277/57344 train_time:9469702ms step_avg:581.78ms +step:16278/57344 train_time:9469947ms step_avg:581.76ms +step:16279/57344 train_time:9470490ms step_avg:581.76ms +grad accum step:4070/14336 +step:16280/57344 train_time:9471772ms step_avg:581.80ms +step:16281/57344 train_time:9471789ms step_avg:581.77ms +step:16282/57344 train_time:9472034ms step_avg:581.75ms +step:16283/57344 train_time:9472576ms step_avg:581.75ms +grad accum step:4071/14336 +step:16284/57344 train_time:9473856ms step_avg:581.79ms +step:16285/57344 train_time:9473873ms step_avg:581.75ms +step:16286/57344 train_time:9474125ms step_avg:581.73ms +step:16287/57344 train_time:9474688ms step_avg:581.73ms +grad accum step:4072/14336 +step:16288/57344 train_time:9476005ms step_avg:581.78ms +step:16289/57344 train_time:9476022ms step_avg:581.74ms +step:16290/57344 train_time:9476279ms step_avg:581.72ms +step:16291/57344 train_time:9476854ms step_avg:581.72ms +grad accum step:4073/14336 +step:16292/57344 train_time:9478171ms step_avg:581.77ms +step:16293/57344 train_time:9478188ms step_avg:581.73ms +step:16294/57344 train_time:9478430ms step_avg:581.71ms +step:16295/57344 train_time:9478975ms step_avg:581.71ms +grad accum step:4074/14336 +step:16296/57344 train_time:9480256ms step_avg:581.75ms +step:16297/57344 train_time:9480274ms step_avg:581.72ms +step:16298/57344 train_time:9480524ms step_avg:581.70ms +step:16299/57344 train_time:9481075ms step_avg:581.70ms +grad accum step:4075/14336 +step:16300/57344 train_time:9482353ms step_avg:581.74ms +step:16301/57344 train_time:9482370ms step_avg:581.70ms +step:16302/57344 train_time:9482617ms step_avg:581.68ms +step:16303/57344 train_time:9483154ms step_avg:581.68ms +grad accum step:4076/14336 +step:16304/57344 train_time:9484456ms step_avg:581.73ms +step:16305/57344 train_time:9484473ms step_avg:581.69ms +step:16306/57344 train_time:9484721ms step_avg:581.67ms +step:16307/57344 train_time:9485262ms step_avg:581.67ms +grad accum step:4077/14336 +step:16308/57344 train_time:9486541ms step_avg:581.71ms +step:16309/57344 train_time:9486558ms step_avg:581.68ms +step:16310/57344 train_time:9486804ms step_avg:581.66ms +step:16311/57344 train_time:9487351ms step_avg:581.65ms +grad accum step:4078/14336 +step:16312/57344 train_time:9488629ms step_avg:581.70ms +step:16313/57344 train_time:9488646ms step_avg:581.66ms +step:16314/57344 train_time:9488890ms step_avg:581.64ms +step:16315/57344 train_time:9489434ms step_avg:581.64ms +grad accum step:4079/14336 +step:16316/57344 train_time:9490716ms step_avg:581.68ms +step:16317/57344 train_time:9490734ms step_avg:581.65ms +step:16318/57344 train_time:9490981ms step_avg:581.63ms +step:16319/57344 train_time:9491526ms step_avg:581.62ms +grad accum step:4080/14336 +step:16320/57344 train_time:9492807ms step_avg:581.67ms +step:16320/57344 val_loss:6.793598 train_time:9492808ms step_avg:581.67ms +step:16321/57344 train_time:9492820ms step_avg:581.63ms +step:16322/57344 train_time:9493045ms step_avg:581.61ms +step:16323/57344 train_time:9493588ms step_avg:581.61ms +grad accum step:4081/14336 +step:16324/57344 train_time:9494883ms step_avg:581.65ms +step:16325/57344 train_time:9494900ms step_avg:581.62ms +step:16326/57344 train_time:9495143ms step_avg:581.60ms +step:16327/57344 train_time:9495683ms step_avg:581.59ms +grad accum step:4082/14336 +step:16328/57344 train_time:9497051ms step_avg:581.64ms +step:16329/57344 train_time:9497067ms step_avg:581.61ms +step:16330/57344 train_time:9497315ms step_avg:581.59ms +step:16331/57344 train_time:9497852ms step_avg:581.58ms +grad accum step:4083/14336 +step:16332/57344 train_time:9499128ms step_avg:581.63ms +step:16333/57344 train_time:9499145ms step_avg:581.59ms +step:16334/57344 train_time:9499390ms step_avg:581.57ms +step:16335/57344 train_time:9499931ms step_avg:581.57ms +grad accum step:4084/14336 +step:16336/57344 train_time:9501217ms step_avg:581.61ms +step:16337/57344 train_time:9501234ms step_avg:581.58ms +step:16338/57344 train_time:9501478ms step_avg:581.56ms +step:16339/57344 train_time:9502022ms step_avg:581.55ms +grad accum step:4085/14336 +step:16340/57344 train_time:9503305ms step_avg:581.60ms +step:16341/57344 train_time:9503322ms step_avg:581.56ms +step:16342/57344 train_time:9503571ms step_avg:581.54ms +step:16343/57344 train_time:9504116ms step_avg:581.54ms +grad accum step:4086/14336 +step:16344/57344 train_time:9505407ms step_avg:581.58ms +step:16345/57344 train_time:9505425ms step_avg:581.55ms +step:16346/57344 train_time:9505678ms step_avg:581.53ms +step:16347/57344 train_time:9506238ms step_avg:581.53ms +grad accum step:4087/14336 +step:16348/57344 train_time:9507511ms step_avg:581.57ms +step:16349/57344 train_time:9507528ms step_avg:581.54ms +step:16350/57344 train_time:9507777ms step_avg:581.52ms +step:16351/57344 train_time:9508322ms step_avg:581.51ms +grad accum step:4088/14336 +step:16352/57344 train_time:9509599ms step_avg:581.56ms +step:16353/57344 train_time:9509616ms step_avg:581.52ms +step:16354/57344 train_time:9509865ms step_avg:581.50ms +step:16355/57344 train_time:9510414ms step_avg:581.50ms +grad accum step:4089/14336 +step:16356/57344 train_time:9511698ms step_avg:581.54ms +step:16357/57344 train_time:9511716ms step_avg:581.51ms +step:16358/57344 train_time:9511964ms step_avg:581.49ms +step:16359/57344 train_time:9512513ms step_avg:581.48ms +grad accum step:4090/14336 +step:16360/57344 train_time:9513813ms step_avg:581.53ms +step:16361/57344 train_time:9513830ms step_avg:581.49ms +step:16362/57344 train_time:9514076ms step_avg:581.47ms +step:16363/57344 train_time:9514620ms step_avg:581.47ms +grad accum step:4091/14336 +step:16364/57344 train_time:9515897ms step_avg:581.51ms +step:16365/57344 train_time:9515914ms step_avg:581.48ms +step:16366/57344 train_time:9516157ms step_avg:581.46ms +step:16367/57344 train_time:9516706ms step_avg:581.46ms +grad accum step:4092/14336 +step:16368/57344 train_time:9518024ms step_avg:581.50ms +step:16369/57344 train_time:9518041ms step_avg:581.47ms +step:16370/57344 train_time:9518286ms step_avg:581.45ms +step:16371/57344 train_time:9518832ms step_avg:581.44ms +grad accum step:4093/14336 +step:16372/57344 train_time:9520115ms step_avg:581.49ms +step:16373/57344 train_time:9520132ms step_avg:581.45ms +step:16374/57344 train_time:9520380ms step_avg:581.43ms +step:16375/57344 train_time:9520925ms step_avg:581.43ms +grad accum step:4094/14336 +step:16376/57344 train_time:9522246ms step_avg:581.48ms +step:16377/57344 train_time:9522263ms step_avg:581.44ms +step:16378/57344 train_time:9522507ms step_avg:581.42ms +step:16379/57344 train_time:9523050ms step_avg:581.42ms +grad accum step:4095/14336 +step:16380/57344 train_time:9524332ms step_avg:581.46ms +step:16381/57344 train_time:9524350ms step_avg:581.43ms +step:16382/57344 train_time:9524598ms step_avg:581.41ms +step:16383/57344 train_time:9525140ms step_avg:581.40ms +grad accum step:4096/14336 +step:16384/57344 train_time:9526466ms step_avg:581.45ms +step:16384/57344 val_loss:6.812854 train_time:9526467ms step_avg:581.45ms +step:16385/57344 train_time:9526478ms step_avg:581.41ms +step:16386/57344 train_time:9526704ms step_avg:581.39ms +step:16387/57344 train_time:9527256ms step_avg:581.39ms +grad accum step:4097/14336 +step:16388/57344 train_time:9528532ms step_avg:581.43ms +step:16389/57344 train_time:9528549ms step_avg:581.40ms +step:16390/57344 train_time:9528793ms step_avg:581.38ms +step:16391/57344 train_time:9529340ms step_avg:581.38ms +grad accum step:4098/14336 +step:16392/57344 train_time:9530632ms step_avg:581.42ms +step:16393/57344 train_time:9530649ms step_avg:581.39ms +step:16394/57344 train_time:9530894ms step_avg:581.36ms +step:16395/57344 train_time:9531425ms step_avg:581.36ms +grad accum step:4099/14336 +step:16396/57344 train_time:9532707ms step_avg:581.40ms +step:16397/57344 train_time:9532724ms step_avg:581.37ms +step:16398/57344 train_time:9532972ms step_avg:581.35ms +step:16399/57344 train_time:9533512ms step_avg:581.35ms +grad accum step:4100/14336 +step:16400/57344 train_time:9534792ms step_avg:581.39ms +step:16401/57344 train_time:9534809ms step_avg:581.36ms +step:16402/57344 train_time:9535056ms step_avg:581.33ms +step:16403/57344 train_time:9535606ms step_avg:581.33ms +grad accum step:4101/14336 +step:16404/57344 train_time:9536897ms step_avg:581.38ms +step:16405/57344 train_time:9536914ms step_avg:581.34ms +step:16406/57344 train_time:9537163ms step_avg:581.32ms +step:16407/57344 train_time:9537709ms step_avg:581.32ms +grad accum step:4102/14336 +step:16408/57344 train_time:9539000ms step_avg:581.36ms +step:16409/57344 train_time:9539018ms step_avg:581.33ms +step:16410/57344 train_time:9539264ms step_avg:581.31ms +step:16411/57344 train_time:9539816ms step_avg:581.31ms +grad accum step:4103/14336 +step:16412/57344 train_time:9541102ms step_avg:581.35ms +step:16413/57344 train_time:9541119ms step_avg:581.31ms +step:16414/57344 train_time:9541368ms step_avg:581.29ms +step:16415/57344 train_time:9541923ms step_avg:581.29ms +grad accum step:4104/14336 +step:16416/57344 train_time:9543214ms step_avg:581.34ms +step:16417/57344 train_time:9543231ms step_avg:581.30ms +step:16418/57344 train_time:9543476ms step_avg:581.28ms +step:16419/57344 train_time:9544021ms step_avg:581.28ms +grad accum step:4105/14336 +step:16420/57344 train_time:9545297ms step_avg:581.32ms +step:16421/57344 train_time:9545314ms step_avg:581.29ms +step:16422/57344 train_time:9545560ms step_avg:581.27ms +step:16423/57344 train_time:9546113ms step_avg:581.26ms +grad accum step:4106/14336 +step:16424/57344 train_time:9547410ms step_avg:581.31ms +step:16425/57344 train_time:9547427ms step_avg:581.27ms +step:16426/57344 train_time:9547675ms step_avg:581.25ms +step:16427/57344 train_time:9548225ms step_avg:581.25ms +grad accum step:4107/14336 +step:16428/57344 train_time:9549520ms step_avg:581.30ms +step:16429/57344 train_time:9549537ms step_avg:581.26ms +step:16430/57344 train_time:9549784ms step_avg:581.24ms +step:16431/57344 train_time:9550330ms step_avg:581.24ms +grad accum step:4108/14336 +step:16432/57344 train_time:9551610ms step_avg:581.28ms +step:16433/57344 train_time:9551627ms step_avg:581.25ms +step:16434/57344 train_time:9551873ms step_avg:581.23ms +step:16435/57344 train_time:9552414ms step_avg:581.22ms +grad accum step:4109/14336 +step:16436/57344 train_time:9553702ms step_avg:581.27ms +step:16437/57344 train_time:9553719ms step_avg:581.23ms +step:16438/57344 train_time:9553963ms step_avg:581.21ms +step:16439/57344 train_time:9554512ms step_avg:581.21ms +grad accum step:4110/14336 +step:16440/57344 train_time:9555826ms step_avg:581.25ms +step:16441/57344 train_time:9555843ms step_avg:581.22ms +step:16442/57344 train_time:9556089ms step_avg:581.20ms +step:16443/57344 train_time:9556637ms step_avg:581.20ms +grad accum step:4111/14336 +step:16444/57344 train_time:9557937ms step_avg:581.24ms +step:16445/57344 train_time:9557955ms step_avg:581.21ms +step:16446/57344 train_time:9558203ms step_avg:581.19ms +step:16447/57344 train_time:9558753ms step_avg:581.19ms +grad accum step:4112/14336 +step:16448/57344 train_time:9561268ms step_avg:581.30ms +step:16448/57344 val_loss:6.807650 train_time:9561269ms step_avg:581.30ms +step:16449/57344 train_time:9561281ms step_avg:581.27ms +step:16450/57344 train_time:9561591ms step_avg:581.25ms +step:16451/57344 train_time:9562123ms step_avg:581.25ms +grad accum step:4113/14336 +step:16452/57344 train_time:9563413ms step_avg:581.29ms +step:16453/57344 train_time:9563430ms step_avg:581.26ms +step:16454/57344 train_time:9563672ms step_avg:581.24ms +step:16455/57344 train_time:9564212ms step_avg:581.23ms +grad accum step:4114/14336 +step:16456/57344 train_time:9565497ms step_avg:581.28ms +step:16457/57344 train_time:9565514ms step_avg:581.24ms +step:16458/57344 train_time:9565760ms step_avg:581.22ms +step:16459/57344 train_time:9566310ms step_avg:581.22ms +grad accum step:4115/14336 +step:16460/57344 train_time:9567582ms step_avg:581.26ms +step:16461/57344 train_time:9567598ms step_avg:581.23ms +step:16462/57344 train_time:9567845ms step_avg:581.21ms +step:16463/57344 train_time:9568392ms step_avg:581.21ms +grad accum step:4116/14336 +step:16464/57344 train_time:9569660ms step_avg:581.25ms +step:16465/57344 train_time:9569677ms step_avg:581.21ms +step:16466/57344 train_time:9569920ms step_avg:581.19ms +step:16467/57344 train_time:9570457ms step_avg:581.19ms +grad accum step:4117/14336 +step:16468/57344 train_time:9571747ms step_avg:581.23ms +step:16469/57344 train_time:9571763ms step_avg:581.20ms +step:16470/57344 train_time:9572014ms step_avg:581.18ms +step:16471/57344 train_time:9572571ms step_avg:581.18ms +grad accum step:4118/14336 +step:16472/57344 train_time:9573866ms step_avg:581.22ms +step:16473/57344 train_time:9573882ms step_avg:581.19ms +step:16474/57344 train_time:9574127ms step_avg:581.17ms +step:16475/57344 train_time:9574673ms step_avg:581.16ms +grad accum step:4119/14336 +step:16476/57344 train_time:9575953ms step_avg:581.21ms +step:16477/57344 train_time:9575970ms step_avg:581.17ms +step:16478/57344 train_time:9576217ms step_avg:581.15ms +step:16479/57344 train_time:9576759ms step_avg:581.15ms +grad accum step:4120/14336 +step:16480/57344 train_time:9578033ms step_avg:581.19ms +step:16481/57344 train_time:9578050ms step_avg:581.16ms +step:16482/57344 train_time:9578293ms step_avg:581.14ms +step:16483/57344 train_time:9578827ms step_avg:581.13ms +grad accum step:4121/14336 +step:16484/57344 train_time:9580128ms step_avg:581.18ms +step:16485/57344 train_time:9580145ms step_avg:581.14ms +step:16486/57344 train_time:9580388ms step_avg:581.12ms +step:16487/57344 train_time:9580924ms step_avg:581.12ms +grad accum step:4122/14336 +step:16488/57344 train_time:9582203ms step_avg:581.16ms +step:16489/57344 train_time:9582219ms step_avg:581.13ms +step:16490/57344 train_time:9582470ms step_avg:581.11ms +step:16491/57344 train_time:9583028ms step_avg:581.11ms +grad accum step:4123/14336 +step:16492/57344 train_time:9584305ms step_avg:581.15ms +step:16493/57344 train_time:9584322ms step_avg:581.11ms +step:16494/57344 train_time:9584564ms step_avg:581.09ms +step:16495/57344 train_time:9585109ms step_avg:581.09ms +grad accum step:4124/14336 +step:16496/57344 train_time:9586386ms step_avg:581.13ms +step:16497/57344 train_time:9586403ms step_avg:581.10ms +step:16498/57344 train_time:9586646ms step_avg:581.08ms +step:16499/57344 train_time:9587187ms step_avg:581.08ms +grad accum step:4125/14336 +step:16500/57344 train_time:9588465ms step_avg:581.12ms +step:16501/57344 train_time:9588483ms step_avg:581.08ms +step:16502/57344 train_time:9588726ms step_avg:581.06ms +step:16503/57344 train_time:9589255ms step_avg:581.06ms +grad accum step:4126/14336 +step:16504/57344 train_time:9590536ms step_avg:581.10ms +step:16505/57344 train_time:9590553ms step_avg:581.07ms +step:16506/57344 train_time:9590798ms step_avg:581.05ms +step:16507/57344 train_time:9591343ms step_avg:581.05ms +grad accum step:4127/14336 +step:16508/57344 train_time:9593636ms step_avg:581.15ms +step:16509/57344 train_time:9593649ms step_avg:581.12ms +step:16510/57344 train_time:9593868ms step_avg:581.09ms +step:16511/57344 train_time:9594418ms step_avg:581.09ms +grad accum step:4128/14336 +step:16512/57344 train_time:9595723ms step_avg:581.14ms +step:16512/57344 val_loss:6.819201 train_time:9595723ms step_avg:581.14ms +step:16513/57344 train_time:9595735ms step_avg:581.10ms +step:16514/57344 train_time:9595959ms step_avg:581.08ms +step:16515/57344 train_time:9596494ms step_avg:581.08ms +grad accum step:4129/14336 +step:16516/57344 train_time:9597771ms step_avg:581.12ms +step:16517/57344 train_time:9597787ms step_avg:581.09ms +step:16518/57344 train_time:9598034ms step_avg:581.07ms +step:16519/57344 train_time:9598577ms step_avg:581.06ms +grad accum step:4130/14336 +step:16520/57344 train_time:9599877ms step_avg:581.11ms +step:16521/57344 train_time:9599894ms step_avg:581.07ms +step:16522/57344 train_time:9600142ms step_avg:581.05ms +step:16523/57344 train_time:9600692ms step_avg:581.05ms +grad accum step:4131/14336 +step:16524/57344 train_time:9601983ms step_avg:581.09ms +step:16525/57344 train_time:9602000ms step_avg:581.06ms +step:16526/57344 train_time:9602245ms step_avg:581.04ms +step:16527/57344 train_time:9602785ms step_avg:581.04ms +grad accum step:4132/14336 +step:16528/57344 train_time:9604072ms step_avg:581.08ms +step:16529/57344 train_time:9604089ms step_avg:581.04ms +step:16530/57344 train_time:9604333ms step_avg:581.02ms +step:16531/57344 train_time:9604880ms step_avg:581.02ms +grad accum step:4133/14336 +step:16532/57344 train_time:9606217ms step_avg:581.07ms +step:16533/57344 train_time:9606234ms step_avg:581.03ms +step:16534/57344 train_time:9606479ms step_avg:581.01ms +step:16535/57344 train_time:9607018ms step_avg:581.01ms +grad accum step:4134/14336 +step:16536/57344 train_time:9608337ms step_avg:581.06ms +step:16537/57344 train_time:9608353ms step_avg:581.02ms +step:16538/57344 train_time:9608608ms step_avg:581.00ms +step:16539/57344 train_time:9609173ms step_avg:581.00ms +grad accum step:4135/14336 +step:16540/57344 train_time:9610476ms step_avg:581.04ms +step:16541/57344 train_time:9610493ms step_avg:581.01ms +step:16542/57344 train_time:9610739ms step_avg:580.99ms +step:16543/57344 train_time:9611285ms step_avg:580.99ms +grad accum step:4136/14336 +step:16544/57344 train_time:9612565ms step_avg:581.03ms +step:16545/57344 train_time:9612582ms step_avg:581.00ms +step:16546/57344 train_time:9612826ms step_avg:580.98ms +step:16547/57344 train_time:9613375ms step_avg:580.97ms +grad accum step:4137/14336 +step:16548/57344 train_time:9614664ms step_avg:581.02ms +step:16549/57344 train_time:9614681ms step_avg:580.98ms +step:16550/57344 train_time:9614932ms step_avg:580.96ms +step:16551/57344 train_time:9615484ms step_avg:580.96ms +grad accum step:4138/14336 +step:16552/57344 train_time:9616783ms step_avg:581.00ms +step:16553/57344 train_time:9616801ms step_avg:580.97ms +step:16554/57344 train_time:9617044ms step_avg:580.95ms +step:16555/57344 train_time:9617581ms step_avg:580.95ms +grad accum step:4139/14336 +step:16556/57344 train_time:9618889ms step_avg:580.99ms +step:16557/57344 train_time:9618906ms step_avg:580.96ms +step:16558/57344 train_time:9619150ms step_avg:580.94ms +step:16559/57344 train_time:9619683ms step_avg:580.93ms +grad accum step:4140/14336 +step:16560/57344 train_time:9620965ms step_avg:580.98ms +step:16561/57344 train_time:9620982ms step_avg:580.94ms +step:16562/57344 train_time:9621227ms step_avg:580.92ms +step:16563/57344 train_time:9621766ms step_avg:580.92ms +grad accum step:4141/14336 +step:16564/57344 train_time:9623063ms step_avg:580.96ms +step:16565/57344 train_time:9623080ms step_avg:580.93ms +step:16566/57344 train_time:9623326ms step_avg:580.91ms +step:16567/57344 train_time:9623871ms step_avg:580.91ms +grad accum step:4142/14336 +step:16568/57344 train_time:9625173ms step_avg:580.95ms +step:16569/57344 train_time:9625190ms step_avg:580.92ms +step:16570/57344 train_time:9625436ms step_avg:580.90ms +step:16571/57344 train_time:9625976ms step_avg:580.89ms +grad accum step:4143/14336 +step:16572/57344 train_time:9627251ms step_avg:580.93ms +step:16573/57344 train_time:9627268ms step_avg:580.90ms +step:16574/57344 train_time:9627515ms step_avg:580.88ms +step:16575/57344 train_time:9628058ms step_avg:580.88ms +grad accum step:4144/14336 +step:16576/57344 train_time:9629340ms step_avg:580.92ms +step:16576/57344 val_loss:6.821900 train_time:9629341ms step_avg:580.92ms +step:16577/57344 train_time:9629353ms step_avg:580.89ms +step:16578/57344 train_time:9629574ms step_avg:580.86ms +step:16579/57344 train_time:9630103ms step_avg:580.86ms +grad accum step:4145/14336 +step:16580/57344 train_time:9631371ms step_avg:580.90ms +step:16581/57344 train_time:9631388ms step_avg:580.87ms +step:16582/57344 train_time:9631636ms step_avg:580.85ms +step:16583/57344 train_time:9632180ms step_avg:580.85ms +grad accum step:4146/14336 +step:16584/57344 train_time:9633480ms step_avg:580.89ms +step:16585/57344 train_time:9633497ms step_avg:580.86ms +step:16586/57344 train_time:9633745ms step_avg:580.84ms +step:16587/57344 train_time:9634296ms step_avg:580.83ms +grad accum step:4147/14336 +step:16588/57344 train_time:9635596ms step_avg:580.88ms +step:16589/57344 train_time:9635613ms step_avg:580.84ms +step:16590/57344 train_time:9635862ms step_avg:580.82ms +step:16591/57344 train_time:9636414ms step_avg:580.82ms +grad accum step:4148/14336 +step:16592/57344 train_time:9637742ms step_avg:580.87ms +step:16593/57344 train_time:9637759ms step_avg:580.83ms +step:16594/57344 train_time:9638009ms step_avg:580.81ms +step:16595/57344 train_time:9638555ms step_avg:580.81ms +grad accum step:4149/14336 +step:16596/57344 train_time:9639856ms step_avg:580.85ms +step:16597/57344 train_time:9639873ms step_avg:580.82ms +step:16598/57344 train_time:9640117ms step_avg:580.80ms +step:16599/57344 train_time:9640658ms step_avg:580.80ms +grad accum step:4150/14336 +step:16600/57344 train_time:9641973ms step_avg:580.84ms +step:16601/57344 train_time:9641990ms step_avg:580.81ms +step:16602/57344 train_time:9642236ms step_avg:580.79ms +step:16603/57344 train_time:9642775ms step_avg:580.79ms +grad accum step:4151/14336 +step:16604/57344 train_time:9644071ms step_avg:580.83ms +step:16605/57344 train_time:9644088ms step_avg:580.79ms +step:16606/57344 train_time:9644335ms step_avg:580.77ms +step:16607/57344 train_time:9644878ms step_avg:580.77ms +grad accum step:4152/14336 +step:16608/57344 train_time:9646175ms step_avg:580.81ms +step:16609/57344 train_time:9646192ms step_avg:580.78ms +step:16610/57344 train_time:9646438ms step_avg:580.76ms +step:16611/57344 train_time:9646984ms step_avg:580.76ms +grad accum step:4153/14336 +step:16612/57344 train_time:9648277ms step_avg:580.80ms +step:16613/57344 train_time:9648294ms step_avg:580.77ms +step:16614/57344 train_time:9648538ms step_avg:580.75ms +step:16615/57344 train_time:9649086ms step_avg:580.75ms +grad accum step:4154/14336 +step:16616/57344 train_time:9650361ms step_avg:580.79ms +step:16617/57344 train_time:9650379ms step_avg:580.75ms +step:16618/57344 train_time:9650624ms step_avg:580.73ms +step:16619/57344 train_time:9651170ms step_avg:580.73ms +grad accum step:4155/14336 +step:16620/57344 train_time:9652468ms step_avg:580.77ms +step:16621/57344 train_time:9652485ms step_avg:580.74ms +step:16622/57344 train_time:9652732ms step_avg:580.72ms +step:16623/57344 train_time:9653284ms step_avg:580.72ms +grad accum step:4156/14336 +step:16624/57344 train_time:9654592ms step_avg:580.76ms +step:16625/57344 train_time:9654609ms step_avg:580.73ms +step:16626/57344 train_time:9654853ms step_avg:580.71ms +step:16627/57344 train_time:9655395ms step_avg:580.71ms +grad accum step:4157/14336 +step:16628/57344 train_time:9656670ms step_avg:580.75ms +step:16629/57344 train_time:9656686ms step_avg:580.71ms +step:16630/57344 train_time:9656939ms step_avg:580.69ms +step:16631/57344 train_time:9657493ms step_avg:580.69ms +grad accum step:4158/14336 +step:16632/57344 train_time:9658785ms step_avg:580.74ms +step:16633/57344 train_time:9658802ms step_avg:580.70ms +step:16634/57344 train_time:9659051ms step_avg:580.68ms +step:16635/57344 train_time:9659602ms step_avg:580.68ms +grad accum step:4159/14336 +step:16636/57344 train_time:9660887ms step_avg:580.72ms +step:16637/57344 train_time:9660904ms step_avg:580.69ms +step:16638/57344 train_time:9661150ms step_avg:580.67ms +step:16639/57344 train_time:9661697ms step_avg:580.67ms +grad accum step:4160/14336 +step:16640/57344 train_time:9663013ms step_avg:580.71ms +step:16640/57344 val_loss:6.835638 train_time:9663013ms step_avg:580.71ms +step:16641/57344 train_time:9663025ms step_avg:580.68ms +step:16642/57344 train_time:9663254ms step_avg:580.65ms +step:16643/57344 train_time:9663823ms step_avg:580.65ms +grad accum step:4161/14336 +step:16644/57344 train_time:9665130ms step_avg:580.70ms +step:16645/57344 train_time:9665147ms step_avg:580.66ms +step:16646/57344 train_time:9665398ms step_avg:580.64ms +step:16647/57344 train_time:9665955ms step_avg:580.64ms +grad accum step:4162/14336 +step:16648/57344 train_time:9667239ms step_avg:580.68ms +step:16649/57344 train_time:9667256ms step_avg:580.65ms +step:16650/57344 train_time:9667499ms step_avg:580.63ms +step:16651/57344 train_time:9668041ms step_avg:580.63ms +grad accum step:4163/14336 +step:16652/57344 train_time:9669324ms step_avg:580.67ms +step:16653/57344 train_time:9669341ms step_avg:580.64ms +step:16654/57344 train_time:9669588ms step_avg:580.62ms +step:16655/57344 train_time:9670131ms step_avg:580.61ms +grad accum step:4164/14336 +step:16656/57344 train_time:9671417ms step_avg:580.66ms +step:16657/57344 train_time:9671434ms step_avg:580.62ms +step:16658/57344 train_time:9671689ms step_avg:580.60ms +step:16659/57344 train_time:9672255ms step_avg:580.60ms +grad accum step:4165/14336 +step:16660/57344 train_time:9673579ms step_avg:580.65ms +step:16661/57344 train_time:9673596ms step_avg:580.61ms +step:16662/57344 train_time:9673845ms step_avg:580.59ms +step:16663/57344 train_time:9674389ms step_avg:580.59ms +grad accum step:4166/14336 +step:16664/57344 train_time:9675674ms step_avg:580.63ms +step:16665/57344 train_time:9675692ms step_avg:580.60ms +step:16666/57344 train_time:9675945ms step_avg:580.58ms +step:16667/57344 train_time:9676505ms step_avg:580.58ms +grad accum step:4167/14336 +step:16668/57344 train_time:9677795ms step_avg:580.62ms +step:16669/57344 train_time:9677812ms step_avg:580.59ms +step:16670/57344 train_time:9678058ms step_avg:580.57ms +step:16671/57344 train_time:9678608ms step_avg:580.57ms +grad accum step:4168/14336 +step:16672/57344 train_time:9679925ms step_avg:580.61ms +step:16673/57344 train_time:9679942ms step_avg:580.58ms +step:16674/57344 train_time:9680187ms step_avg:580.56ms +step:16675/57344 train_time:9680731ms step_avg:580.55ms +grad accum step:4169/14336 +step:16676/57344 train_time:9682050ms step_avg:580.60ms +step:16677/57344 train_time:9682067ms step_avg:580.56ms +step:16678/57344 train_time:9682315ms step_avg:580.54ms +step:16679/57344 train_time:9682856ms step_avg:580.54ms +grad accum step:4170/14336 +step:16680/57344 train_time:9684128ms step_avg:580.58ms +step:16681/57344 train_time:9684145ms step_avg:580.55ms +step:16682/57344 train_time:9684390ms step_avg:580.53ms +step:16683/57344 train_time:9684932ms step_avg:580.53ms +grad accum step:4171/14336 +step:16684/57344 train_time:9686210ms step_avg:580.57ms +step:16685/57344 train_time:9686227ms step_avg:580.54ms +step:16686/57344 train_time:9686475ms step_avg:580.52ms +step:16687/57344 train_time:9687020ms step_avg:580.51ms +grad accum step:4172/14336 +step:16688/57344 train_time:9688296ms step_avg:580.55ms +step:16689/57344 train_time:9688313ms step_avg:580.52ms +step:16690/57344 train_time:9688563ms step_avg:580.50ms +step:16691/57344 train_time:9689106ms step_avg:580.50ms +grad accum step:4173/14336 +step:16692/57344 train_time:9690383ms step_avg:580.54ms +step:16693/57344 train_time:9690400ms step_avg:580.51ms +step:16694/57344 train_time:9690647ms step_avg:580.49ms +step:16695/57344 train_time:9691197ms step_avg:580.48ms +grad accum step:4174/14336 +step:16696/57344 train_time:9692492ms step_avg:580.53ms +step:16697/57344 train_time:9692509ms step_avg:580.49ms +step:16698/57344 train_time:9692757ms step_avg:580.47ms +step:16699/57344 train_time:9693301ms step_avg:580.47ms +grad accum step:4175/14336 +step:16700/57344 train_time:9696524ms step_avg:580.63ms +step:16701/57344 train_time:9696536ms step_avg:580.60ms +step:16702/57344 train_time:9696815ms step_avg:580.58ms +step:16703/57344 train_time:9697370ms step_avg:580.58ms +grad accum step:4176/14336 +step:16704/57344 train_time:9698661ms step_avg:580.62ms +step:16704/57344 val_loss:6.838716 train_time:9698661ms step_avg:580.62ms +step:16705/57344 train_time:9698673ms step_avg:580.59ms +step:16706/57344 train_time:9698898ms step_avg:580.56ms +step:16707/57344 train_time:9699444ms step_avg:580.56ms +grad accum step:4177/14336 +step:16708/57344 train_time:9700750ms step_avg:580.61ms +step:16709/57344 train_time:9700766ms step_avg:580.57ms +step:16710/57344 train_time:9701011ms step_avg:580.55ms +step:16711/57344 train_time:9701554ms step_avg:580.55ms +grad accum step:4178/14336 +step:16712/57344 train_time:9702867ms step_avg:580.59ms +step:16713/57344 train_time:9702884ms step_avg:580.56ms +step:16714/57344 train_time:9703134ms step_avg:580.54ms +step:16715/57344 train_time:9703678ms step_avg:580.54ms +grad accum step:4179/14336 +step:16716/57344 train_time:9704950ms step_avg:580.58ms +step:16717/57344 train_time:9704967ms step_avg:580.54ms +step:16718/57344 train_time:9705218ms step_avg:580.53ms +step:16719/57344 train_time:9705764ms step_avg:580.52ms +grad accum step:4180/14336 +step:16720/57344 train_time:9707061ms step_avg:580.57ms +step:16721/57344 train_time:9707078ms step_avg:580.53ms +step:16722/57344 train_time:9707325ms step_avg:580.51ms +step:16723/57344 train_time:9707864ms step_avg:580.51ms +grad accum step:4181/14336 +step:16724/57344 train_time:9709180ms step_avg:580.55ms +step:16725/57344 train_time:9709197ms step_avg:580.52ms +step:16726/57344 train_time:9709442ms step_avg:580.50ms +step:16727/57344 train_time:9709984ms step_avg:580.50ms +grad accum step:4182/14336 +step:16728/57344 train_time:9711262ms step_avg:580.54ms +step:16729/57344 train_time:9711279ms step_avg:580.51ms +step:16730/57344 train_time:9711528ms step_avg:580.49ms +step:16731/57344 train_time:9712072ms step_avg:580.48ms +grad accum step:4183/14336 +step:16732/57344 train_time:9713386ms step_avg:580.53ms +step:16733/57344 train_time:9713403ms step_avg:580.49ms +step:16734/57344 train_time:9713652ms step_avg:580.47ms +step:16735/57344 train_time:9714202ms step_avg:580.47ms +grad accum step:4184/14336 +step:16736/57344 train_time:9715510ms step_avg:580.52ms +step:16737/57344 train_time:9715527ms step_avg:580.48ms +step:16738/57344 train_time:9715778ms step_avg:580.46ms +step:16739/57344 train_time:9716331ms step_avg:580.46ms +grad accum step:4185/14336 +step:16740/57344 train_time:9717677ms step_avg:580.51ms +step:16741/57344 train_time:9717694ms step_avg:580.47ms +step:16742/57344 train_time:9717952ms step_avg:580.45ms +step:16743/57344 train_time:9718518ms step_avg:580.45ms +grad accum step:4186/14336 +step:16744/57344 train_time:9719806ms step_avg:580.49ms +step:16745/57344 train_time:9719823ms step_avg:580.46ms +step:16746/57344 train_time:9720075ms step_avg:580.44ms +step:16747/57344 train_time:9720623ms step_avg:580.44ms +grad accum step:4187/14336 +step:16748/57344 train_time:9721917ms step_avg:580.48ms +step:16749/57344 train_time:9721934ms step_avg:580.45ms +step:16750/57344 train_time:9722178ms step_avg:580.43ms +step:16751/57344 train_time:9722724ms step_avg:580.43ms +grad accum step:4188/14336 +step:16752/57344 train_time:9724002ms step_avg:580.47ms +step:16753/57344 train_time:9724019ms step_avg:580.43ms +step:16754/57344 train_time:9724263ms step_avg:580.41ms +step:16755/57344 train_time:9724810ms step_avg:580.41ms +grad accum step:4189/14336 +step:16756/57344 train_time:9726091ms step_avg:580.45ms +step:16757/57344 train_time:9726108ms step_avg:580.42ms +step:16758/57344 train_time:9726353ms step_avg:580.40ms +step:16759/57344 train_time:9726893ms step_avg:580.40ms +grad accum step:4190/14336 +step:16760/57344 train_time:9728202ms step_avg:580.44ms +step:16761/57344 train_time:9728219ms step_avg:580.41ms +step:16762/57344 train_time:9728468ms step_avg:580.39ms +step:16763/57344 train_time:9729021ms step_avg:580.39ms +grad accum step:4191/14336 +step:16764/57344 train_time:9730299ms step_avg:580.43ms +step:16765/57344 train_time:9730316ms step_avg:580.39ms +step:16766/57344 train_time:9730559ms step_avg:580.37ms +step:16767/57344 train_time:9731105ms step_avg:580.37ms +grad accum step:4192/14336 +step:16768/57344 train_time:9732408ms step_avg:580.42ms +step:16768/57344 val_loss:6.850279 train_time:9732409ms step_avg:580.42ms +step:16769/57344 train_time:9732420ms step_avg:580.38ms +step:16770/57344 train_time:9732655ms step_avg:580.36ms +step:16771/57344 train_time:9733227ms step_avg:580.36ms +grad accum step:4193/14336 +step:16772/57344 train_time:9734525ms step_avg:580.40ms +step:16773/57344 train_time:9734542ms step_avg:580.37ms +step:16774/57344 train_time:9734787ms step_avg:580.35ms +step:16775/57344 train_time:9735328ms step_avg:580.35ms +grad accum step:4194/14336 +step:16776/57344 train_time:9736624ms step_avg:580.39ms +step:16777/57344 train_time:9736641ms step_avg:580.36ms +step:16778/57344 train_time:9736886ms step_avg:580.34ms +step:16779/57344 train_time:9737428ms step_avg:580.33ms +grad accum step:4195/14336 +step:16780/57344 train_time:9738710ms step_avg:580.38ms +step:16781/57344 train_time:9738727ms step_avg:580.34ms +step:16782/57344 train_time:9738973ms step_avg:580.32ms +step:16783/57344 train_time:9739513ms step_avg:580.32ms +grad accum step:4196/14336 +step:16784/57344 train_time:9740791ms step_avg:580.36ms +step:16785/57344 train_time:9740808ms step_avg:580.33ms +step:16786/57344 train_time:9741055ms step_avg:580.31ms +step:16787/57344 train_time:9741604ms step_avg:580.31ms +grad accum step:4197/14336 +step:16788/57344 train_time:9742904ms step_avg:580.35ms +step:16789/57344 train_time:9742921ms step_avg:580.32ms +step:16790/57344 train_time:9743168ms step_avg:580.30ms +step:16791/57344 train_time:9743706ms step_avg:580.29ms +grad accum step:4198/14336 +step:16792/57344 train_time:9745024ms step_avg:580.34ms +step:16793/57344 train_time:9745041ms step_avg:580.30ms +step:16794/57344 train_time:9745292ms step_avg:580.28ms +step:16795/57344 train_time:9745847ms step_avg:580.28ms +grad accum step:4199/14336 +step:16796/57344 train_time:9747170ms step_avg:580.33ms +step:16797/57344 train_time:9747187ms step_avg:580.29ms +step:16798/57344 train_time:9747435ms step_avg:580.27ms +step:16799/57344 train_time:9747980ms step_avg:580.27ms +grad accum step:4200/14336 +step:16800/57344 train_time:9749280ms step_avg:580.31ms +step:16801/57344 train_time:9749297ms step_avg:580.28ms +step:16802/57344 train_time:9749541ms step_avg:580.26ms +step:16803/57344 train_time:9750078ms step_avg:580.26ms +grad accum step:4201/14336 +step:16804/57344 train_time:9751377ms step_avg:580.30ms +step:16805/57344 train_time:9751393ms step_avg:580.27ms +step:16806/57344 train_time:9751639ms step_avg:580.25ms +step:16807/57344 train_time:9752185ms step_avg:580.25ms +grad accum step:4202/14336 +step:16808/57344 train_time:9753464ms step_avg:580.29ms +step:16809/57344 train_time:9753481ms step_avg:580.25ms +step:16810/57344 train_time:9753728ms step_avg:580.23ms +step:16811/57344 train_time:9754267ms step_avg:580.23ms +grad accum step:4203/14336 +step:16812/57344 train_time:9755552ms step_avg:580.27ms +step:16813/57344 train_time:9755569ms step_avg:580.24ms +step:16814/57344 train_time:9755816ms step_avg:580.22ms +step:16815/57344 train_time:9756356ms step_avg:580.22ms +grad accum step:4204/14336 +step:16816/57344 train_time:9757634ms step_avg:580.26ms +step:16817/57344 train_time:9757651ms step_avg:580.23ms +step:16818/57344 train_time:9757895ms step_avg:580.21ms +step:16819/57344 train_time:9758440ms step_avg:580.20ms +grad accum step:4205/14336 +step:16820/57344 train_time:9759719ms step_avg:580.24ms +step:16821/57344 train_time:9759737ms step_avg:580.21ms +step:16822/57344 train_time:9759986ms step_avg:580.19ms +step:16823/57344 train_time:9760529ms step_avg:580.19ms +grad accum step:4206/14336 +step:16824/57344 train_time:9761824ms step_avg:580.23ms +step:16825/57344 train_time:9761841ms step_avg:580.20ms +step:16826/57344 train_time:9762089ms step_avg:580.18ms +step:16827/57344 train_time:9762637ms step_avg:580.18ms +grad accum step:4207/14336 +step:16828/57344 train_time:9763952ms step_avg:580.22ms +step:16829/57344 train_time:9763969ms step_avg:580.19ms +step:16830/57344 train_time:9764220ms step_avg:580.17ms +step:16831/57344 train_time:9764772ms step_avg:580.17ms +grad accum step:4208/14336 +step:16832/57344 train_time:9766062ms step_avg:580.21ms +step:16832/57344 val_loss:6.851334 train_time:9766062ms step_avg:580.21ms +step:16833/57344 train_time:9766074ms step_avg:580.17ms +step:16834/57344 train_time:9766305ms step_avg:580.15ms +step:16835/57344 train_time:9766859ms step_avg:580.15ms +grad accum step:4209/14336 +step:16836/57344 train_time:9768176ms step_avg:580.20ms +step:16837/57344 train_time:9768193ms step_avg:580.16ms +step:16838/57344 train_time:9768439ms step_avg:580.14ms +step:16839/57344 train_time:9768987ms step_avg:580.14ms +grad accum step:4210/14336 +step:16840/57344 train_time:9770286ms step_avg:580.18ms +step:16841/57344 train_time:9770303ms step_avg:580.15ms +step:16842/57344 train_time:9770548ms step_avg:580.13ms +step:16843/57344 train_time:9771093ms step_avg:580.13ms +grad accum step:4211/14336 +step:16844/57344 train_time:9772371ms step_avg:580.17ms +step:16845/57344 train_time:9772388ms step_avg:580.14ms +step:16846/57344 train_time:9772632ms step_avg:580.12ms +step:16847/57344 train_time:9773178ms step_avg:580.11ms +grad accum step:4212/14336 +step:16848/57344 train_time:9774474ms step_avg:580.16ms +step:16849/57344 train_time:9774491ms step_avg:580.12ms +step:16850/57344 train_time:9774740ms step_avg:580.10ms +step:16851/57344 train_time:9775283ms step_avg:580.10ms +grad accum step:4213/14336 +step:16852/57344 train_time:9776559ms step_avg:580.14ms +step:16853/57344 train_time:9776576ms step_avg:580.11ms +step:16854/57344 train_time:9776822ms step_avg:580.09ms +step:16855/57344 train_time:9777374ms step_avg:580.09ms +grad accum step:4214/14336 +step:16856/57344 train_time:9778669ms step_avg:580.13ms +step:16857/57344 train_time:9778686ms step_avg:580.10ms +step:16858/57344 train_time:9778934ms step_avg:580.08ms +step:16859/57344 train_time:9779473ms step_avg:580.07ms +grad accum step:4215/14336 +step:16860/57344 train_time:9780750ms step_avg:580.12ms +step:16861/57344 train_time:9780767ms step_avg:580.08ms +step:16862/57344 train_time:9781013ms step_avg:580.06ms +step:16863/57344 train_time:9781570ms step_avg:580.06ms +grad accum step:4216/14336 +step:16864/57344 train_time:9782877ms step_avg:580.10ms +step:16865/57344 train_time:9782894ms step_avg:580.07ms +step:16866/57344 train_time:9783137ms step_avg:580.05ms +step:16867/57344 train_time:9783675ms step_avg:580.05ms +grad accum step:4217/14336 +step:16868/57344 train_time:9784965ms step_avg:580.09ms +step:16869/57344 train_time:9784982ms step_avg:580.06ms +step:16870/57344 train_time:9785229ms step_avg:580.04ms +step:16871/57344 train_time:9785773ms step_avg:580.04ms +grad accum step:4218/14336 +step:16872/57344 train_time:9787048ms step_avg:580.08ms +step:16873/57344 train_time:9787065ms step_avg:580.04ms +step:16874/57344 train_time:9787311ms step_avg:580.02ms +step:16875/57344 train_time:9787855ms step_avg:580.02ms +grad accum step:4219/14336 +step:16876/57344 train_time:9789205ms step_avg:580.07ms +step:16877/57344 train_time:9789222ms step_avg:580.03ms +step:16878/57344 train_time:9789470ms step_avg:580.01ms +step:16879/57344 train_time:9790009ms step_avg:580.01ms +grad accum step:4220/14336 +step:16880/57344 train_time:9791302ms step_avg:580.05ms +step:16881/57344 train_time:9791319ms step_avg:580.02ms +step:16882/57344 train_time:9791565ms step_avg:580.00ms +step:16883/57344 train_time:9792110ms step_avg:580.00ms +grad accum step:4221/14336 +step:16884/57344 train_time:9793427ms step_avg:580.04ms +step:16885/57344 train_time:9793444ms step_avg:580.01ms +step:16886/57344 train_time:9793691ms step_avg:579.99ms +step:16887/57344 train_time:9794241ms step_avg:579.99ms +grad accum step:4222/14336 +step:16888/57344 train_time:9795578ms step_avg:580.03ms +step:16889/57344 train_time:9795595ms step_avg:580.00ms +step:16890/57344 train_time:9795839ms step_avg:579.98ms +step:16891/57344 train_time:9796381ms step_avg:579.98ms +grad accum step:4223/14336 +step:16892/57344 train_time:9797682ms step_avg:580.02ms +step:16893/57344 train_time:9797699ms step_avg:579.99ms +step:16894/57344 train_time:9797946ms step_avg:579.97ms +step:16895/57344 train_time:9798504ms step_avg:579.96ms +grad accum step:4224/14336 +step:16896/57344 train_time:9799817ms step_avg:580.01ms +step:16896/57344 val_loss:6.866141 train_time:9799817ms step_avg:580.01ms +step:16897/57344 train_time:9799829ms step_avg:579.97ms +step:16898/57344 train_time:9800052ms step_avg:579.95ms +step:16899/57344 train_time:9800597ms step_avg:579.95ms +grad accum step:4225/14336 +step:16900/57344 train_time:9801870ms step_avg:579.99ms +step:16901/57344 train_time:9801887ms step_avg:579.96ms +step:16902/57344 train_time:9802130ms step_avg:579.94ms +step:16903/57344 train_time:9802677ms step_avg:579.94ms +grad accum step:4226/14336 +step:16904/57344 train_time:9803977ms step_avg:579.98ms +step:16905/57344 train_time:9803994ms step_avg:579.95ms +step:16906/57344 train_time:9804241ms step_avg:579.93ms +step:16907/57344 train_time:9804783ms step_avg:579.92ms +grad accum step:4227/14336 +step:16908/57344 train_time:9806061ms step_avg:579.97ms +step:16909/57344 train_time:9806078ms step_avg:579.93ms +step:16910/57344 train_time:9806325ms step_avg:579.91ms +step:16911/57344 train_time:9806870ms step_avg:579.91ms +grad accum step:4228/14336 +step:16912/57344 train_time:9808160ms step_avg:579.95ms +step:16913/57344 train_time:9808178ms step_avg:579.92ms +step:16914/57344 train_time:9808429ms step_avg:579.90ms +step:16915/57344 train_time:9808997ms step_avg:579.90ms +grad accum step:4229/14336 +step:16916/57344 train_time:9810356ms step_avg:579.95ms +step:16917/57344 train_time:9810373ms step_avg:579.91ms +step:16918/57344 train_time:9810618ms step_avg:579.89ms +step:16919/57344 train_time:9811161ms step_avg:579.89ms +grad accum step:4230/14336 +step:16920/57344 train_time:9812462ms step_avg:579.93ms +step:16921/57344 train_time:9812479ms step_avg:579.90ms +step:16922/57344 train_time:9812725ms step_avg:579.88ms +step:16923/57344 train_time:9813268ms step_avg:579.88ms +grad accum step:4231/14336 +step:16924/57344 train_time:9814542ms step_avg:579.92ms +step:16925/57344 train_time:9814559ms step_avg:579.89ms +step:16926/57344 train_time:9814805ms step_avg:579.87ms +step:16927/57344 train_time:9815355ms step_avg:579.86ms +grad accum step:4232/14336 +step:16928/57344 train_time:9816633ms step_avg:579.91ms +step:16929/57344 train_time:9816650ms step_avg:579.87ms +step:16930/57344 train_time:9816895ms step_avg:579.85ms +step:16931/57344 train_time:9817435ms step_avg:579.85ms +grad accum step:4233/14336 +step:16932/57344 train_time:9818720ms step_avg:579.89ms +step:16933/57344 train_time:9818737ms step_avg:579.86ms +step:16934/57344 train_time:9818983ms step_avg:579.84ms +step:16935/57344 train_time:9819524ms step_avg:579.84ms +grad accum step:4234/14336 +step:16936/57344 train_time:9820804ms step_avg:579.88ms +step:16937/57344 train_time:9820821ms step_avg:579.84ms +step:16938/57344 train_time:9821066ms step_avg:579.82ms +step:16939/57344 train_time:9821612ms step_avg:579.82ms +grad accum step:4235/14336 +step:16940/57344 train_time:9822933ms step_avg:579.87ms +step:16941/57344 train_time:9822950ms step_avg:579.83ms +step:16942/57344 train_time:9823194ms step_avg:579.81ms +step:16943/57344 train_time:9823732ms step_avg:579.81ms +grad accum step:4236/14336 +step:16944/57344 train_time:9825018ms step_avg:579.85ms +step:16945/57344 train_time:9825035ms step_avg:579.82ms +step:16946/57344 train_time:9825281ms step_avg:579.80ms +step:16947/57344 train_time:9825825ms step_avg:579.80ms +grad accum step:4237/14336 +step:16948/57344 train_time:9827120ms step_avg:579.84ms +step:16949/57344 train_time:9827137ms step_avg:579.81ms +step:16950/57344 train_time:9827386ms step_avg:579.79ms +step:16951/57344 train_time:9827937ms step_avg:579.79ms +grad accum step:4238/14336 +step:16952/57344 train_time:9829219ms step_avg:579.83ms +step:16953/57344 train_time:9829236ms step_avg:579.79ms +step:16954/57344 train_time:9829484ms step_avg:579.77ms +step:16955/57344 train_time:9830032ms step_avg:579.77ms +grad accum step:4239/14336 +step:16956/57344 train_time:9831324ms step_avg:579.81ms +step:16957/57344 train_time:9831342ms step_avg:579.78ms +step:16958/57344 train_time:9831589ms step_avg:579.76ms +step:16959/57344 train_time:9832134ms step_avg:579.76ms +grad accum step:4240/14336 +step:16960/57344 train_time:9833416ms step_avg:579.80ms +step:16960/57344 val_loss:6.858786 train_time:9833416ms step_avg:579.80ms +step:16961/57344 train_time:9833428ms step_avg:579.77ms +step:16962/57344 train_time:9833656ms step_avg:579.75ms +step:16963/57344 train_time:9834211ms step_avg:579.74ms +grad accum step:4241/14336 +step:16964/57344 train_time:9835486ms step_avg:579.79ms +step:16965/57344 train_time:9835503ms step_avg:579.75ms +step:16966/57344 train_time:9835749ms step_avg:579.73ms +step:16967/57344 train_time:9836294ms step_avg:579.73ms +grad accum step:4242/14336 +step:16968/57344 train_time:9837572ms step_avg:579.77ms +step:16969/57344 train_time:9837589ms step_avg:579.74ms +step:16970/57344 train_time:9837835ms step_avg:579.72ms +step:16971/57344 train_time:9838381ms step_avg:579.72ms +grad accum step:4243/14336 +step:16972/57344 train_time:9839654ms step_avg:579.76ms +step:16973/57344 train_time:9839671ms step_avg:579.72ms +step:16974/57344 train_time:9839918ms step_avg:579.71ms +step:16975/57344 train_time:9840461ms step_avg:579.70ms +grad accum step:4244/14336 +step:16976/57344 train_time:9841747ms step_avg:579.74ms +step:16977/57344 train_time:9841763ms step_avg:579.71ms +step:16978/57344 train_time:9842013ms step_avg:579.69ms +step:16979/57344 train_time:9842562ms step_avg:579.69ms +grad accum step:4245/14336 +step:16980/57344 train_time:9843831ms step_avg:579.73ms +step:16981/57344 train_time:9843848ms step_avg:579.70ms +step:16982/57344 train_time:9844091ms step_avg:579.68ms +step:16983/57344 train_time:9844633ms step_avg:579.68ms +grad accum step:4246/14336 +step:16984/57344 train_time:9845917ms step_avg:579.72ms +step:16985/57344 train_time:9845934ms step_avg:579.68ms +step:16986/57344 train_time:9846179ms step_avg:579.66ms +step:16987/57344 train_time:9846722ms step_avg:579.66ms +grad accum step:4247/14336 +step:16988/57344 train_time:9848003ms step_avg:579.70ms +step:16989/57344 train_time:9848020ms step_avg:579.67ms +step:16990/57344 train_time:9848267ms step_avg:579.65ms +step:16991/57344 train_time:9848814ms step_avg:579.65ms +grad accum step:4248/14336 +step:16992/57344 train_time:9850098ms step_avg:579.69ms +step:16993/57344 train_time:9850114ms step_avg:579.66ms +step:16994/57344 train_time:9850364ms step_avg:579.64ms +step:16995/57344 train_time:9850914ms step_avg:579.64ms +grad accum step:4249/14336 +step:16996/57344 train_time:9852196ms step_avg:579.68ms +step:16997/57344 train_time:9852213ms step_avg:579.64ms +step:16998/57344 train_time:9852461ms step_avg:579.62ms +step:16999/57344 train_time:9853004ms step_avg:579.62ms +grad accum step:4250/14336 +step:17000/57344 train_time:9854287ms step_avg:579.66ms +step:17001/57344 train_time:9854304ms step_avg:579.63ms +step:17002/57344 train_time:9854550ms step_avg:579.61ms +step:17003/57344 train_time:9855089ms step_avg:579.61ms +grad accum step:4251/14336 +step:17004/57344 train_time:9856371ms step_avg:579.65ms +step:17005/57344 train_time:9856388ms step_avg:579.62ms +step:17006/57344 train_time:9856632ms step_avg:579.60ms +step:17007/57344 train_time:9857170ms step_avg:579.59ms +grad accum step:4252/14336 +step:17008/57344 train_time:9858456ms step_avg:579.64ms +step:17009/57344 train_time:9858473ms step_avg:579.60ms +step:17010/57344 train_time:9858723ms step_avg:579.58ms +step:17011/57344 train_time:9859268ms step_avg:579.58ms +grad accum step:4253/14336 +step:17012/57344 train_time:9860563ms step_avg:579.62ms +step:17013/57344 train_time:9860580ms step_avg:579.59ms +step:17014/57344 train_time:9860834ms step_avg:579.57ms +step:17015/57344 train_time:9861392ms step_avg:579.57ms +grad accum step:4254/14336 +step:17016/57344 train_time:9862676ms step_avg:579.61ms +step:17017/57344 train_time:9862694ms step_avg:579.58ms +step:17018/57344 train_time:9862942ms step_avg:579.56ms +step:17019/57344 train_time:9863488ms step_avg:579.56ms +grad accum step:4255/14336 +step:17020/57344 train_time:9892661ms step_avg:581.24ms +step:17021/57344 train_time:9892673ms step_avg:581.20ms +step:17022/57344 train_time:9892968ms step_avg:581.19ms +step:17023/57344 train_time:9893509ms step_avg:581.18ms +grad accum step:4256/14336 +step:17024/57344 train_time:9894784ms step_avg:581.23ms +step:17024/57344 val_loss:6.862143 train_time:9894785ms step_avg:581.23ms +step:17025/57344 train_time:9894797ms step_avg:581.19ms +step:17026/57344 train_time:9895022ms step_avg:581.17ms +step:17027/57344 train_time:9895566ms step_avg:581.17ms +grad accum step:4257/14336 +step:17028/57344 train_time:9896840ms step_avg:581.21ms +step:17029/57344 train_time:9896857ms step_avg:581.18ms +step:17030/57344 train_time:9897102ms step_avg:581.16ms +step:17031/57344 train_time:9897651ms step_avg:581.16ms +grad accum step:4258/14336 +step:17032/57344 train_time:9898936ms step_avg:581.20ms +step:17033/57344 train_time:9898953ms step_avg:581.16ms +step:17034/57344 train_time:9899193ms step_avg:581.14ms +step:17035/57344 train_time:9899734ms step_avg:581.14ms +grad accum step:4259/14336 +step:17036/57344 train_time:9901015ms step_avg:581.18ms +step:17037/57344 train_time:9901032ms step_avg:581.15ms +step:17038/57344 train_time:9901278ms step_avg:581.13ms +step:17039/57344 train_time:9935213ms step_avg:583.09ms +grad accum step:4260/14336 +step:17040/57344 train_time:9947370ms step_avg:583.77ms +step:17041/57344 train_time:9947386ms step_avg:583.73ms +step:17042/57344 train_time:9947628ms step_avg:583.71ms +step:17043/57344 train_time:9948162ms step_avg:583.71ms +grad accum step:4261/14336 +step:17044/57344 train_time:9949434ms step_avg:583.75ms +step:17045/57344 train_time:9949450ms step_avg:583.72ms +step:17046/57344 train_time:9949691ms step_avg:583.70ms +step:17047/57344 train_time:9950219ms step_avg:583.69ms +grad accum step:4262/14336 +step:17048/57344 train_time:9951481ms step_avg:583.73ms +step:17049/57344 train_time:9951498ms step_avg:583.70ms +step:17050/57344 train_time:9951738ms step_avg:583.68ms +step:17051/57344 train_time:9952272ms step_avg:583.68ms +grad accum step:4263/14336 +step:17052/57344 train_time:9953548ms step_avg:583.72ms +step:17053/57344 train_time:9953564ms step_avg:583.68ms +step:17054/57344 train_time:9953805ms step_avg:583.66ms +step:17055/57344 train_time:9954333ms step_avg:583.66ms +grad accum step:4264/14336 +step:17056/57344 train_time:9955619ms step_avg:583.70ms +step:17057/57344 train_time:9955636ms step_avg:583.67ms +step:17058/57344 train_time:9955880ms step_avg:583.65ms +step:17059/57344 train_time:9956424ms step_avg:583.65ms +grad accum step:4265/14336 +step:17060/57344 train_time:9957706ms step_avg:583.69ms +step:17061/57344 train_time:9957723ms step_avg:583.65ms +step:17062/57344 train_time:9957965ms step_avg:583.63ms +step:17063/57344 train_time:9958492ms step_avg:583.63ms +grad accum step:4266/14336 +step:17064/57344 train_time:9959777ms step_avg:583.67ms +step:17065/57344 train_time:9959794ms step_avg:583.64ms +step:17066/57344 train_time:9960038ms step_avg:583.62ms +step:17067/57344 train_time:9960589ms step_avg:583.62ms +grad accum step:4267/14336 +step:17068/57344 train_time:9961867ms step_avg:583.66ms +step:17069/57344 train_time:9961884ms step_avg:583.62ms +step:17070/57344 train_time:9962129ms step_avg:583.60ms +step:17071/57344 train_time:9962677ms step_avg:583.60ms +grad accum step:4268/14336 +step:17072/57344 train_time:9963954ms step_avg:583.64ms +step:17073/57344 train_time:9963971ms step_avg:583.61ms +step:17074/57344 train_time:9964216ms step_avg:583.59ms +step:17075/57344 train_time:9964765ms step_avg:583.59ms +grad accum step:4269/14336 +step:17076/57344 train_time:9966019ms step_avg:583.63ms +step:17077/57344 train_time:9966036ms step_avg:583.59ms +step:17078/57344 train_time:9966276ms step_avg:583.57ms +step:17079/57344 train_time:9966803ms step_avg:583.57ms +grad accum step:4270/14336 +step:17080/57344 train_time:9984632ms step_avg:584.58ms +step:17081/57344 train_time:9984644ms step_avg:584.55ms +step:17082/57344 train_time:9984901ms step_avg:584.53ms +step:17083/57344 train_time:9985444ms step_avg:584.53ms +grad accum step:4271/14336 +step:17084/57344 train_time:9986720ms step_avg:584.57ms +step:17085/57344 train_time:9986737ms step_avg:584.53ms +step:17086/57344 train_time:9986982ms step_avg:584.51ms +step:17087/57344 train_time:9987520ms step_avg:584.51ms +grad accum step:4272/14336 +step:17088/57344 train_time:9988793ms step_avg:584.55ms +step:17088/57344 val_loss:6.874677 train_time:9988794ms step_avg:584.55ms +step:17089/57344 train_time:9989088ms step_avg:584.53ms +step:17090/57344 train_time:9989100ms step_avg:584.50ms +step:17091/57344 train_time:9989614ms step_avg:584.50ms +grad accum step:4273/14336 +step:17092/57344 train_time:9990891ms step_avg:584.54ms +step:17093/57344 train_time:9990908ms step_avg:584.50ms +step:17094/57344 train_time:9991153ms step_avg:584.48ms +step:17095/57344 train_time:9991700ms step_avg:584.48ms +grad accum step:4274/14336 +step:17096/57344 train_time:9992990ms step_avg:584.52ms +step:17097/57344 train_time:9993007ms step_avg:584.49ms +step:17098/57344 train_time:9993250ms step_avg:584.47ms +step:17099/57344 train_time:9993793ms step_avg:584.47ms +grad accum step:4275/14336 +step:17100/57344 train_time:9995073ms step_avg:584.51ms +step:17101/57344 train_time:9995089ms step_avg:584.47ms +step:17102/57344 train_time:9995334ms step_avg:584.45ms +step:17103/57344 train_time:9995877ms step_avg:584.45ms +grad accum step:4276/14336 +step:17104/57344 train_time:9997177ms step_avg:584.49ms +step:17105/57344 train_time:9997194ms step_avg:584.46ms +step:17106/57344 train_time:9997437ms step_avg:584.44ms +step:17107/57344 train_time:9997974ms step_avg:584.44ms +grad accum step:4277/14336 +step:17108/57344 train_time:9999281ms step_avg:584.48ms +step:17109/57344 train_time:9999298ms step_avg:584.45ms +step:17110/57344 train_time:9999544ms step_avg:584.43ms +step:17111/57344 train_time:10000094ms step_avg:584.42ms +grad accum step:4278/14336 +step:17112/57344 train_time:10001426ms step_avg:584.47ms +step:17113/57344 train_time:10001442ms step_avg:584.44ms +step:17114/57344 train_time:10001687ms step_avg:584.42ms +step:17115/57344 train_time:10002226ms step_avg:584.41ms +grad accum step:4279/14336 +step:17116/57344 train_time:10003524ms step_avg:584.45ms +step:17117/57344 train_time:10003542ms step_avg:584.42ms +step:17118/57344 train_time:10003788ms step_avg:584.40ms +step:17119/57344 train_time:10004333ms step_avg:584.40ms +grad accum step:4280/14336 +step:17120/57344 train_time:10005609ms step_avg:584.44ms +step:17121/57344 train_time:10005626ms step_avg:584.41ms +step:17122/57344 train_time:10005871ms step_avg:584.39ms +step:17123/57344 train_time:10006413ms step_avg:584.38ms +grad accum step:4281/14336 +step:17124/57344 train_time:10007694ms step_avg:584.42ms +step:17125/57344 train_time:10007711ms step_avg:584.39ms +step:17126/57344 train_time:10007959ms step_avg:584.37ms +step:17127/57344 train_time:10008502ms step_avg:584.37ms +grad accum step:4282/14336 +step:17128/57344 train_time:10009778ms step_avg:584.41ms +step:17129/57344 train_time:10009795ms step_avg:584.38ms +step:17130/57344 train_time:10010037ms step_avg:584.36ms +step:17131/57344 train_time:10010579ms step_avg:584.35ms +grad accum step:4283/14336 +step:17132/57344 train_time:10011858ms step_avg:584.40ms +step:17133/57344 train_time:10011875ms step_avg:584.36ms +step:17134/57344 train_time:10012131ms step_avg:584.34ms +step:17135/57344 train_time:10012690ms step_avg:584.34ms +grad accum step:4284/14336 +step:17136/57344 train_time:10013965ms step_avg:584.38ms +step:17137/57344 train_time:10013982ms step_avg:584.35ms +step:17138/57344 train_time:10014236ms step_avg:584.33ms +step:17139/57344 train_time:10014792ms step_avg:584.33ms +grad accum step:4285/14336 +step:17140/57344 train_time:10016089ms step_avg:584.37ms +step:17141/57344 train_time:10016106ms step_avg:584.34ms +step:17142/57344 train_time:10016353ms step_avg:584.32ms +step:17143/57344 train_time:10016891ms step_avg:584.31ms +grad accum step:4286/14336 +step:17144/57344 train_time:10018193ms step_avg:584.36ms +step:17145/57344 train_time:10018210ms step_avg:584.32ms +step:17146/57344 train_time:10018458ms step_avg:584.30ms +step:17147/57344 train_time:10019002ms step_avg:584.30ms +grad accum step:4287/14336 +step:17148/57344 train_time:10020300ms step_avg:584.34ms +step:17149/57344 train_time:10020317ms step_avg:584.31ms +step:17150/57344 train_time:10020563ms step_avg:584.29ms +step:17151/57344 train_time:10021105ms step_avg:584.29ms +grad accum step:4288/14336 +step:17152/57344 train_time:10022390ms step_avg:584.33ms +step:17152/57344 val_loss:6.874232 train_time:10022391ms step_avg:584.33ms +step:17153/57344 train_time:10022403ms step_avg:584.29ms +step:17154/57344 train_time:10022631ms step_avg:584.27ms +step:17155/57344 train_time:10023193ms step_avg:584.27ms +grad accum step:4289/14336 +step:17156/57344 train_time:10024519ms step_avg:584.32ms +step:17157/57344 train_time:10024536ms step_avg:584.28ms +step:17158/57344 train_time:10024788ms step_avg:584.26ms +step:17159/57344 train_time:10025343ms step_avg:584.26ms +grad accum step:4290/14336 +step:17160/57344 train_time:10026622ms step_avg:584.30ms +step:17161/57344 train_time:10026639ms step_avg:584.27ms +step:17162/57344 train_time:10026888ms step_avg:584.25ms +step:17163/57344 train_time:10027436ms step_avg:584.25ms +grad accum step:4291/14336 +step:17164/57344 train_time:10028709ms step_avg:584.29ms +step:17165/57344 train_time:10028726ms step_avg:584.25ms +step:17166/57344 train_time:10028972ms step_avg:584.23ms +step:17167/57344 train_time:10029513ms step_avg:584.23ms +grad accum step:4292/14336 +step:17168/57344 train_time:10030794ms step_avg:584.27ms +step:17169/57344 train_time:10030811ms step_avg:584.24ms +step:17170/57344 train_time:10031055ms step_avg:584.22ms +step:17171/57344 train_time:10031602ms step_avg:584.22ms +grad accum step:4293/14336 +step:17172/57344 train_time:10032888ms step_avg:584.26ms +step:17173/57344 train_time:10032904ms step_avg:584.23ms +step:17174/57344 train_time:10033151ms step_avg:584.21ms +step:17175/57344 train_time:10033696ms step_avg:584.20ms +grad accum step:4294/14336 +step:17176/57344 train_time:10034991ms step_avg:584.24ms +step:17177/57344 train_time:10035008ms step_avg:584.21ms +step:17178/57344 train_time:10035254ms step_avg:584.19ms +step:17179/57344 train_time:10035796ms step_avg:584.19ms +grad accum step:4295/14336 +step:17180/57344 train_time:10037113ms step_avg:584.23ms +step:17181/57344 train_time:10037130ms step_avg:584.20ms +step:17182/57344 train_time:10037376ms step_avg:584.18ms +step:17183/57344 train_time:10037918ms step_avg:584.18ms +grad accum step:4296/14336 +step:17184/57344 train_time:10039196ms step_avg:584.22ms +step:17185/57344 train_time:10039213ms step_avg:584.18ms +step:17186/57344 train_time:10039460ms step_avg:584.17ms +step:17187/57344 train_time:10040005ms step_avg:584.16ms +grad accum step:4297/14336 +step:17188/57344 train_time:10041302ms step_avg:584.20ms +step:17189/57344 train_time:10041319ms step_avg:584.17ms +step:17190/57344 train_time:10041571ms step_avg:584.15ms +step:17191/57344 train_time:10042130ms step_avg:584.15ms +grad accum step:4298/14336 +step:17192/57344 train_time:10043402ms step_avg:584.19ms +step:17193/57344 train_time:10043419ms step_avg:584.16ms +step:17194/57344 train_time:10043663ms step_avg:584.14ms +step:17195/57344 train_time:10044208ms step_avg:584.14ms +grad accum step:4299/14336 +step:17196/57344 train_time:10045509ms step_avg:584.18ms +step:17197/57344 train_time:10045526ms step_avg:584.14ms +step:17198/57344 train_time:10045772ms step_avg:584.12ms +step:17199/57344 train_time:10046313ms step_avg:584.12ms +grad accum step:4300/14336 +step:17200/57344 train_time:10047614ms step_avg:584.16ms +step:17201/57344 train_time:10047631ms step_avg:584.13ms +step:17202/57344 train_time:10047877ms step_avg:584.11ms +step:17203/57344 train_time:10048420ms step_avg:584.11ms +grad accum step:4301/14336 +step:17204/57344 train_time:10049692ms step_avg:584.15ms +step:17205/57344 train_time:10049709ms step_avg:584.12ms +step:17206/57344 train_time:10049953ms step_avg:584.10ms +step:17207/57344 train_time:10050491ms step_avg:584.09ms +grad accum step:4302/14336 +step:17208/57344 train_time:10051758ms step_avg:584.13ms +step:17209/57344 train_time:10051775ms step_avg:584.10ms +step:17210/57344 train_time:10052020ms step_avg:584.08ms +step:17211/57344 train_time:10052563ms step_avg:584.08ms +grad accum step:4303/14336 +step:17212/57344 train_time:10053846ms step_avg:584.12ms +step:17213/57344 train_time:10053863ms step_avg:584.09ms +step:17214/57344 train_time:10054107ms step_avg:584.07ms +step:17215/57344 train_time:10054652ms step_avg:584.06ms +grad accum step:4304/14336 +step:17216/57344 train_time:10055947ms step_avg:584.10ms +step:17216/57344 val_loss:6.878744 train_time:10055947ms step_avg:584.10ms +step:17217/57344 train_time:10056003ms step_avg:584.07ms +step:17218/57344 train_time:10056180ms step_avg:584.05ms +step:17219/57344 train_time:10056710ms step_avg:584.05ms +grad accum step:4305/14336 +step:17220/57344 train_time:10058020ms step_avg:584.09ms +step:17221/57344 train_time:10058037ms step_avg:584.06ms +step:17222/57344 train_time:10058301ms step_avg:584.04ms +step:17223/57344 train_time:10058883ms step_avg:584.04ms +grad accum step:4306/14336 +step:17224/57344 train_time:10060178ms step_avg:584.08ms +step:17225/57344 train_time:10060195ms step_avg:584.05ms +step:17226/57344 train_time:10060439ms step_avg:584.03ms +step:17227/57344 train_time:10060980ms step_avg:584.02ms +grad accum step:4307/14336 +step:17228/57344 train_time:10062261ms step_avg:584.06ms +step:17229/57344 train_time:10062278ms step_avg:584.03ms +step:17230/57344 train_time:10062531ms step_avg:584.01ms +step:17231/57344 train_time:10063087ms step_avg:584.01ms +grad accum step:4308/14336 +step:17232/57344 train_time:10064369ms step_avg:584.05ms +step:17233/57344 train_time:10064386ms step_avg:584.02ms +step:17234/57344 train_time:10064638ms step_avg:584.00ms +step:17235/57344 train_time:10065196ms step_avg:584.00ms +grad accum step:4309/14336 +step:17236/57344 train_time:10066469ms step_avg:584.04ms +step:17237/57344 train_time:10066486ms step_avg:584.00ms +step:17238/57344 train_time:10066739ms step_avg:583.99ms +step:17239/57344 train_time:10067293ms step_avg:583.98ms +grad accum step:4310/14336 +step:17240/57344 train_time:10068573ms step_avg:584.02ms +step:17241/57344 train_time:10068590ms step_avg:583.99ms +step:17242/57344 train_time:10068839ms step_avg:583.97ms +step:17243/57344 train_time:10069381ms step_avg:583.97ms +grad accum step:4311/14336 +step:17244/57344 train_time:10070658ms step_avg:584.01ms +step:17245/57344 train_time:10070674ms step_avg:583.98ms +step:17246/57344 train_time:10070920ms step_avg:583.96ms +step:17247/57344 train_time:10071466ms step_avg:583.95ms +grad accum step:4312/14336 +step:17248/57344 train_time:10072743ms step_avg:583.99ms +step:17249/57344 train_time:10072760ms step_avg:583.96ms +step:17250/57344 train_time:10073005ms step_avg:583.94ms +step:17251/57344 train_time:10073551ms step_avg:583.94ms +grad accum step:4313/14336 +step:17252/57344 train_time:10074844ms step_avg:583.98ms +step:17253/57344 train_time:10074861ms step_avg:583.95ms +step:17254/57344 train_time:10075110ms step_avg:583.93ms +step:17255/57344 train_time:10075655ms step_avg:583.93ms +grad accum step:4314/14336 +step:17256/57344 train_time:10076924ms step_avg:583.97ms +step:17257/57344 train_time:10076941ms step_avg:583.93ms +step:17258/57344 train_time:10077185ms step_avg:583.91ms +step:17259/57344 train_time:10077725ms step_avg:583.91ms +grad accum step:4315/14336 +step:17260/57344 train_time:10079032ms step_avg:583.95ms +step:17261/57344 train_time:10079049ms step_avg:583.92ms +step:17262/57344 train_time:10079300ms step_avg:583.90ms +step:17263/57344 train_time:10079848ms step_avg:583.90ms +grad accum step:4316/14336 +step:17264/57344 train_time:10081130ms step_avg:583.94ms +step:17265/57344 train_time:10081147ms step_avg:583.91ms +step:17266/57344 train_time:10081393ms step_avg:583.89ms +step:17267/57344 train_time:10081927ms step_avg:583.88ms +grad accum step:4317/14336 +step:17268/57344 train_time:10083209ms step_avg:583.92ms +step:17269/57344 train_time:10083225ms step_avg:583.89ms +step:17270/57344 train_time:10083476ms step_avg:583.87ms +step:17271/57344 train_time:10084032ms step_avg:583.87ms +grad accum step:4318/14336 +step:17272/57344 train_time:10085332ms step_avg:583.91ms +step:17273/57344 train_time:10085349ms step_avg:583.88ms +step:17274/57344 train_time:10085599ms step_avg:583.86ms +step:17275/57344 train_time:10086146ms step_avg:583.86ms +grad accum step:4319/14336 +step:17276/57344 train_time:10087437ms step_avg:583.90ms +step:17277/57344 train_time:10087453ms step_avg:583.87ms +step:17278/57344 train_time:10087700ms step_avg:583.85ms +step:17279/57344 train_time:10088240ms step_avg:583.84ms +grad accum step:4320/14336 +step:17280/57344 train_time:10089520ms step_avg:583.88ms +step:17280/57344 val_loss:6.877972 train_time:10089520ms step_avg:583.88ms +step:17281/57344 train_time:10089532ms step_avg:583.85ms +step:17282/57344 train_time:10089753ms step_avg:583.83ms +step:17283/57344 train_time:10090294ms step_avg:583.83ms +grad accum step:4321/14336 +step:17284/57344 train_time:10091582ms step_avg:583.87ms +step:17285/57344 train_time:10091599ms step_avg:583.84ms +step:17286/57344 train_time:10091845ms step_avg:583.82ms +step:17287/57344 train_time:10092390ms step_avg:583.81ms +grad accum step:4322/14336 +step:17288/57344 train_time:10093665ms step_avg:583.85ms +step:17289/57344 train_time:10093682ms step_avg:583.82ms +step:17290/57344 train_time:10093931ms step_avg:583.80ms +step:17291/57344 train_time:10094475ms step_avg:583.80ms +grad accum step:4323/14336 +step:17292/57344 train_time:10095760ms step_avg:583.84ms +step:17293/57344 train_time:10095777ms step_avg:583.81ms +step:17294/57344 train_time:10096027ms step_avg:583.79ms +step:17295/57344 train_time:10096584ms step_avg:583.79ms +grad accum step:4324/14336 +step:17296/57344 train_time:10097891ms step_avg:583.83ms +step:17297/57344 train_time:10097908ms step_avg:583.80ms +step:17298/57344 train_time:10098152ms step_avg:583.78ms +step:17299/57344 train_time:10098693ms step_avg:583.77ms +grad accum step:4325/14336 +step:17300/57344 train_time:10099992ms step_avg:583.81ms +step:17301/57344 train_time:10100009ms step_avg:583.78ms +step:17302/57344 train_time:10100254ms step_avg:583.76ms +step:17303/57344 train_time:10100805ms step_avg:583.76ms +grad accum step:4326/14336 +step:17304/57344 train_time:10102103ms step_avg:583.80ms +step:17305/57344 train_time:10102120ms step_avg:583.77ms +step:17306/57344 train_time:10102368ms step_avg:583.75ms +step:17307/57344 train_time:10102929ms step_avg:583.75ms +grad accum step:4327/14336 +step:17308/57344 train_time:10104282ms step_avg:583.79ms +step:17309/57344 train_time:10104299ms step_avg:583.76ms +step:17310/57344 train_time:10104549ms step_avg:583.74ms +step:17311/57344 train_time:10105097ms step_avg:583.74ms +grad accum step:4328/14336 +step:17312/57344 train_time:10106389ms step_avg:583.78ms +step:17313/57344 train_time:10106406ms step_avg:583.75ms +step:17314/57344 train_time:10106648ms step_avg:583.73ms +step:17315/57344 train_time:10107192ms step_avg:583.72ms +grad accum step:4329/14336 +step:17316/57344 train_time:10108492ms step_avg:583.77ms +step:17317/57344 train_time:10108509ms step_avg:583.73ms +step:17318/57344 train_time:10108754ms step_avg:583.71ms +step:17319/57344 train_time:10109301ms step_avg:583.71ms +grad accum step:4330/14336 +step:17320/57344 train_time:10110594ms step_avg:583.75ms +step:17321/57344 train_time:10110611ms step_avg:583.72ms +step:17322/57344 train_time:10110861ms step_avg:583.70ms +step:17323/57344 train_time:10111410ms step_avg:583.70ms +grad accum step:4331/14336 +step:17324/57344 train_time:10112696ms step_avg:583.74ms +step:17325/57344 train_time:10112714ms step_avg:583.71ms +step:17326/57344 train_time:10112960ms step_avg:583.69ms +step:17327/57344 train_time:10113501ms step_avg:583.68ms +grad accum step:4332/14336 +step:17328/57344 train_time:10114808ms step_avg:583.73ms +step:17329/57344 train_time:10114825ms step_avg:583.69ms +step:17330/57344 train_time:10115071ms step_avg:583.67ms +step:17331/57344 train_time:10115612ms step_avg:583.67ms +grad accum step:4333/14336 +step:17332/57344 train_time:10116888ms step_avg:583.71ms +step:17333/57344 train_time:10116905ms step_avg:583.68ms +step:17334/57344 train_time:10117152ms step_avg:583.66ms +step:17335/57344 train_time:10117690ms step_avg:583.66ms +grad accum step:4334/14336 +step:17336/57344 train_time:10118951ms step_avg:583.70ms +step:17337/57344 train_time:10118968ms step_avg:583.66ms +step:17338/57344 train_time:10119211ms step_avg:583.64ms +step:17339/57344 train_time:10119757ms step_avg:583.64ms +grad accum step:4335/14336 +step:17340/57344 train_time:10121092ms step_avg:583.68ms +step:17341/57344 train_time:10121104ms step_avg:583.65ms +step:17342/57344 train_time:10121321ms step_avg:583.63ms +step:17343/57344 train_time:10121864ms step_avg:583.63ms +grad accum step:4336/14336 +step:17344/57344 train_time:10123165ms step_avg:583.67ms +step:17344/57344 val_loss:6.898927 train_time:10123166ms step_avg:583.67ms +step:17345/57344 train_time:10123178ms step_avg:583.64ms +step:17346/57344 train_time:10123398ms step_avg:583.62ms +step:17347/57344 train_time:10123935ms step_avg:583.61ms +grad accum step:4337/14336 +step:17348/57344 train_time:10125217ms step_avg:583.65ms +step:17349/57344 train_time:10125234ms step_avg:583.62ms +step:17350/57344 train_time:10125483ms step_avg:583.60ms +step:17351/57344 train_time:10126031ms step_avg:583.60ms +grad accum step:4338/14336 +step:17352/57344 train_time:10127332ms step_avg:583.64ms +step:17353/57344 train_time:10127349ms step_avg:583.61ms +step:17354/57344 train_time:10127609ms step_avg:583.59ms +step:17355/57344 train_time:10128186ms step_avg:583.59ms +grad accum step:4339/14336 +step:17356/57344 train_time:10129482ms step_avg:583.63ms +step:17357/57344 train_time:10129499ms step_avg:583.60ms +step:17358/57344 train_time:10129742ms step_avg:583.58ms +step:17359/57344 train_time:10130284ms step_avg:583.58ms +grad accum step:4340/14336 +step:17360/57344 train_time:10131570ms step_avg:583.62ms +step:17361/57344 train_time:10131586ms step_avg:583.58ms +step:17362/57344 train_time:10131837ms step_avg:583.56ms +step:17363/57344 train_time:10132390ms step_avg:583.56ms +grad accum step:4341/14336 +step:17364/57344 train_time:10133674ms step_avg:583.60ms +step:17365/57344 train_time:10133691ms step_avg:583.57ms +step:17366/57344 train_time:10133943ms step_avg:583.55ms +step:17367/57344 train_time:10134499ms step_avg:583.55ms +grad accum step:4342/14336 +step:17368/57344 train_time:10135789ms step_avg:583.59ms +step:17369/57344 train_time:10135806ms step_avg:583.56ms +step:17370/57344 train_time:10136051ms step_avg:583.54ms +step:17371/57344 train_time:10136597ms step_avg:583.54ms +grad accum step:4343/14336 +step:17372/57344 train_time:10137897ms step_avg:583.58ms +step:17373/57344 train_time:10137914ms step_avg:583.54ms +step:17374/57344 train_time:10138162ms step_avg:583.52ms +step:17375/57344 train_time:10138703ms step_avg:583.52ms +grad accum step:4344/14336 +step:17376/57344 train_time:10139979ms step_avg:583.56ms +step:17377/57344 train_time:10139996ms step_avg:583.53ms +step:17378/57344 train_time:10140242ms step_avg:583.51ms +step:17379/57344 train_time:10140786ms step_avg:583.51ms +grad accum step:4345/14336 +step:17380/57344 train_time:10142084ms step_avg:583.55ms +step:17381/57344 train_time:10142101ms step_avg:583.52ms +step:17382/57344 train_time:10142353ms step_avg:583.50ms +step:17383/57344 train_time:10142911ms step_avg:583.50ms +grad accum step:4346/14336 +step:17384/57344 train_time:10144191ms step_avg:583.54ms +step:17385/57344 train_time:10144207ms step_avg:583.50ms +step:17386/57344 train_time:10144454ms step_avg:583.48ms +step:17387/57344 train_time:10144999ms step_avg:583.48ms +grad accum step:4347/14336 +step:17388/57344 train_time:10154135ms step_avg:583.97ms +step:17389/57344 train_time:10154152ms step_avg:583.94ms +step:17390/57344 train_time:10154393ms step_avg:583.92ms +step:17391/57344 train_time:10154923ms step_avg:583.92ms +grad accum step:4348/14336 +step:17392/57344 train_time:10156198ms step_avg:583.96ms +step:17393/57344 train_time:10156215ms step_avg:583.93ms +step:17394/57344 train_time:10156459ms step_avg:583.91ms +step:17395/57344 train_time:10157004ms step_avg:583.90ms +grad accum step:4349/14336 +step:17396/57344 train_time:10158288ms step_avg:583.94ms +step:17397/57344 train_time:10158305ms step_avg:583.91ms +step:17398/57344 train_time:10158549ms step_avg:583.89ms +step:17399/57344 train_time:10159094ms step_avg:583.89ms +grad accum step:4350/14336 +step:17400/57344 train_time:10160374ms step_avg:583.93ms +step:17401/57344 train_time:10160391ms step_avg:583.90ms +step:17402/57344 train_time:10160643ms step_avg:583.88ms +step:17403/57344 train_time:10161199ms step_avg:583.88ms +grad accum step:4351/14336 +step:17404/57344 train_time:10162497ms step_avg:583.92ms +step:17405/57344 train_time:10162514ms step_avg:583.88ms +step:17406/57344 train_time:10162759ms step_avg:583.87ms +step:17407/57344 train_time:10163298ms step_avg:583.86ms +grad accum step:4352/14336 +step:17408/57344 train_time:10164602ms step_avg:583.90ms +step:17408/57344 val_loss:6.921997 train_time:10164603ms step_avg:583.90ms +step:17409/57344 train_time:10164614ms step_avg:583.87ms +step:17410/57344 train_time:10164844ms step_avg:583.85ms +step:17411/57344 train_time:10165397ms step_avg:583.85ms +grad accum step:4353/14336 +step:17412/57344 train_time:10166674ms step_avg:583.89ms +step:17413/57344 train_time:10166691ms step_avg:583.86ms +step:17414/57344 train_time:10166938ms step_avg:583.84ms +step:17415/57344 train_time:10167479ms step_avg:583.83ms +grad accum step:4354/14336 +step:17416/57344 train_time:10168792ms step_avg:583.88ms +step:17417/57344 train_time:10168809ms step_avg:583.84ms +step:17418/57344 train_time:10169055ms step_avg:583.82ms +step:17419/57344 train_time:10169601ms step_avg:583.82ms +grad accum step:4355/14336 +step:17420/57344 train_time:10170943ms step_avg:583.87ms +step:17421/57344 train_time:10170960ms step_avg:583.83ms +step:17422/57344 train_time:10171204ms step_avg:583.81ms +step:17423/57344 train_time:10171754ms step_avg:583.81ms +grad accum step:4356/14336 +step:17424/57344 train_time:10173061ms step_avg:583.85ms +step:17425/57344 train_time:10173078ms step_avg:583.82ms +step:17426/57344 train_time:10173327ms step_avg:583.80ms +step:17427/57344 train_time:10173869ms step_avg:583.80ms +grad accum step:4357/14336 +step:17428/57344 train_time:10175145ms step_avg:583.84ms +step:17429/57344 train_time:10175162ms step_avg:583.81ms +step:17430/57344 train_time:10175405ms step_avg:583.79ms +step:17431/57344 train_time:10175950ms step_avg:583.78ms +grad accum step:4358/14336 +step:17432/57344 train_time:10177229ms step_avg:583.82ms +step:17433/57344 train_time:10177246ms step_avg:583.79ms +step:17434/57344 train_time:10177494ms step_avg:583.77ms +step:17435/57344 train_time:10178036ms step_avg:583.77ms +grad accum step:4359/14336 +step:17436/57344 train_time:10179312ms step_avg:583.81ms +step:17437/57344 train_time:10179329ms step_avg:583.78ms +step:17438/57344 train_time:10179576ms step_avg:583.76ms +step:17439/57344 train_time:10180122ms step_avg:583.76ms +grad accum step:4360/14336 +step:17440/57344 train_time:10181401ms step_avg:583.80ms +step:17441/57344 train_time:10181417ms step_avg:583.76ms +step:17442/57344 train_time:10181662ms step_avg:583.74ms +step:17443/57344 train_time:10182206ms step_avg:583.74ms +grad accum step:4361/14336 +step:17444/57344 train_time:10183486ms step_avg:583.78ms +step:17445/57344 train_time:10183503ms step_avg:583.75ms +step:17446/57344 train_time:10183750ms step_avg:583.73ms +step:17447/57344 train_time:10184292ms step_avg:583.73ms +grad accum step:4362/14336 +step:17448/57344 train_time:10185597ms step_avg:583.77ms +step:17449/57344 train_time:10185614ms step_avg:583.74ms +step:17450/57344 train_time:10185867ms step_avg:583.72ms +step:17451/57344 train_time:10186425ms step_avg:583.72ms +grad accum step:4363/14336 +step:17452/57344 train_time:10187718ms step_avg:583.76ms +step:17453/57344 train_time:10187735ms step_avg:583.72ms +step:17454/57344 train_time:10187983ms step_avg:583.70ms +step:17455/57344 train_time:10188526ms step_avg:583.70ms +grad accum step:4364/14336 +step:17456/57344 train_time:10189798ms step_avg:583.74ms +step:17457/57344 train_time:10189815ms step_avg:583.71ms +step:17458/57344 train_time:10190058ms step_avg:583.69ms +step:17459/57344 train_time:10190595ms step_avg:583.69ms +grad accum step:4365/14336 +step:17460/57344 train_time:10191870ms step_avg:583.73ms +step:17461/57344 train_time:10191887ms step_avg:583.69ms +step:17462/57344 train_time:10192135ms step_avg:583.68ms +step:17463/57344 train_time:10192678ms step_avg:583.67ms +grad accum step:4366/14336 +step:17464/57344 train_time:10193963ms step_avg:583.71ms +step:17465/57344 train_time:10193980ms step_avg:583.68ms +step:17466/57344 train_time:10194226ms step_avg:583.66ms +step:17467/57344 train_time:10194765ms step_avg:583.66ms +grad accum step:4367/14336 +step:17468/57344 train_time:10196043ms step_avg:583.70ms +step:17469/57344 train_time:10196061ms step_avg:583.67ms +step:17470/57344 train_time:10196306ms step_avg:583.65ms +step:17471/57344 train_time:10196850ms step_avg:583.64ms +grad accum step:4368/14336 +step:17472/57344 train_time:10198132ms step_avg:583.68ms +step:17472/57344 val_loss:6.906083 train_time:10198133ms step_avg:583.68ms +step:17473/57344 train_time:10198145ms step_avg:583.65ms +step:17474/57344 train_time:10198371ms step_avg:583.63ms +step:17475/57344 train_time:10198924ms step_avg:583.63ms +grad accum step:4369/14336 +step:17476/57344 train_time:10200211ms step_avg:583.67ms +step:17477/57344 train_time:10200228ms step_avg:583.64ms +step:17478/57344 train_time:10200477ms step_avg:583.62ms +step:17479/57344 train_time:10201020ms step_avg:583.62ms +grad accum step:4370/14336 +step:17480/57344 train_time:10202295ms step_avg:583.66ms +step:17481/57344 train_time:10202312ms step_avg:583.62ms +step:17482/57344 train_time:10202563ms step_avg:583.60ms +step:17483/57344 train_time:10203108ms step_avg:583.60ms +grad accum step:4371/14336 +step:17484/57344 train_time:10204402ms step_avg:583.64ms +step:17485/57344 train_time:10204419ms step_avg:583.61ms +step:17486/57344 train_time:10204664ms step_avg:583.59ms +step:17487/57344 train_time:10205206ms step_avg:583.59ms +grad accum step:4372/14336 +step:17488/57344 train_time:10206485ms step_avg:583.63ms +step:17489/57344 train_time:10206502ms step_avg:583.60ms +step:17490/57344 train_time:10206751ms step_avg:583.58ms +step:17491/57344 train_time:10207298ms step_avg:583.57ms +grad accum step:4373/14336 +step:17492/57344 train_time:10208589ms step_avg:583.61ms +step:17493/57344 train_time:10208607ms step_avg:583.58ms +step:17494/57344 train_time:10208861ms step_avg:583.56ms +step:17495/57344 train_time:10209418ms step_avg:583.56ms +grad accum step:4374/14336 +step:17496/57344 train_time:10210721ms step_avg:583.60ms +step:17497/57344 train_time:10210738ms step_avg:583.57ms +step:17498/57344 train_time:10210989ms step_avg:583.55ms +step:17499/57344 train_time:10211536ms step_avg:583.55ms +grad accum step:4375/14336 +step:17500/57344 train_time:10212806ms step_avg:583.59ms +step:17501/57344 train_time:10212823ms step_avg:583.56ms +step:17502/57344 train_time:10213073ms step_avg:583.54ms +step:17503/57344 train_time:10213619ms step_avg:583.54ms +grad accum step:4376/14336 +step:17504/57344 train_time:10214911ms step_avg:583.58ms +step:17505/57344 train_time:10214928ms step_avg:583.54ms +step:17506/57344 train_time:10215177ms step_avg:583.52ms +step:17507/57344 train_time:10215720ms step_avg:583.52ms +grad accum step:4377/14336 +step:17508/57344 train_time:10217029ms step_avg:583.56ms +step:17509/57344 train_time:10217046ms step_avg:583.53ms +step:17510/57344 train_time:10217298ms step_avg:583.51ms +step:17511/57344 train_time:10217854ms step_avg:583.51ms +grad accum step:4378/14336 +step:17512/57344 train_time:10219123ms step_avg:583.55ms +step:17513/57344 train_time:10219140ms step_avg:583.52ms +step:17514/57344 train_time:10219389ms step_avg:583.50ms +step:17515/57344 train_time:10219948ms step_avg:583.50ms +grad accum step:4379/14336 +step:17516/57344 train_time:10221249ms step_avg:583.54ms +step:17517/57344 train_time:10221266ms step_avg:583.51ms +step:17518/57344 train_time:10221511ms step_avg:583.49ms +step:17519/57344 train_time:10222054ms step_avg:583.48ms +grad accum step:4380/14336 +step:17520/57344 train_time:10223359ms step_avg:583.53ms +step:17521/57344 train_time:10223376ms step_avg:583.49ms +step:17522/57344 train_time:10223622ms step_avg:583.47ms +step:17523/57344 train_time:10224162ms step_avg:583.47ms +grad accum step:4381/14336 +step:17524/57344 train_time:10225454ms step_avg:583.51ms +step:17525/57344 train_time:10225471ms step_avg:583.48ms +step:17526/57344 train_time:10225717ms step_avg:583.46ms +step:17527/57344 train_time:10226264ms step_avg:583.46ms +grad accum step:4382/14336 +step:17528/57344 train_time:10227598ms step_avg:583.50ms +step:17529/57344 train_time:10227615ms step_avg:583.47ms +step:17530/57344 train_time:10227865ms step_avg:583.45ms +step:17531/57344 train_time:10228413ms step_avg:583.45ms +grad accum step:4383/14336 +step:17532/57344 train_time:10229735ms step_avg:583.49ms +step:17533/57344 train_time:10229752ms step_avg:583.46ms +step:17534/57344 train_time:10230003ms step_avg:583.44ms +step:17535/57344 train_time:10230553ms step_avg:583.44ms +grad accum step:4384/14336 +step:17536/57344 train_time:10231841ms step_avg:583.48ms +step:17536/57344 val_loss:6.916491 train_time:10231842ms step_avg:583.48ms +step:17537/57344 train_time:10231854ms step_avg:583.44ms +step:17538/57344 train_time:10232073ms step_avg:583.42ms +step:17539/57344 train_time:10232603ms step_avg:583.42ms +grad accum step:4385/14336 +step:17540/57344 train_time:10233883ms step_avg:583.46ms +step:17541/57344 train_time:10233900ms step_avg:583.43ms +step:17542/57344 train_time:10234147ms step_avg:583.41ms +step:17543/57344 train_time:10234693ms step_avg:583.41ms +grad accum step:4386/14336 +step:17544/57344 train_time:10235991ms step_avg:583.45ms +step:17545/57344 train_time:10236007ms step_avg:583.41ms +step:17546/57344 train_time:10236251ms step_avg:583.40ms +step:17547/57344 train_time:10236793ms step_avg:583.39ms +grad accum step:4387/14336 +step:17548/57344 train_time:10238073ms step_avg:583.43ms +step:17549/57344 train_time:10238091ms step_avg:583.40ms +step:17550/57344 train_time:10238335ms step_avg:583.38ms +step:17551/57344 train_time:10238880ms step_avg:583.38ms +grad accum step:4388/14336 +step:17552/57344 train_time:10240184ms step_avg:583.42ms +step:17553/57344 train_time:10240201ms step_avg:583.39ms +step:17554/57344 train_time:10240452ms step_avg:583.37ms +step:17555/57344 train_time:10240998ms step_avg:583.37ms +grad accum step:4389/14336 +step:17556/57344 train_time:10242288ms step_avg:583.41ms +step:17557/57344 train_time:10242305ms step_avg:583.37ms +step:17558/57344 train_time:10242554ms step_avg:583.36ms +step:17559/57344 train_time:10243102ms step_avg:583.35ms +grad accum step:4390/14336 +step:17560/57344 train_time:10244376ms step_avg:583.39ms +step:17561/57344 train_time:10244393ms step_avg:583.36ms +step:17562/57344 train_time:10244640ms step_avg:583.34ms +step:17563/57344 train_time:10245186ms step_avg:583.34ms +grad accum step:4391/14336 +step:17564/57344 train_time:10246465ms step_avg:583.38ms +step:17565/57344 train_time:10246482ms step_avg:583.35ms +step:17566/57344 train_time:10246730ms step_avg:583.33ms +step:17567/57344 train_time:10247278ms step_avg:583.33ms +grad accum step:4392/14336 +step:17568/57344 train_time:10248589ms step_avg:583.37ms +step:17569/57344 train_time:10248606ms step_avg:583.33ms +step:17570/57344 train_time:10248850ms step_avg:583.32ms +step:17571/57344 train_time:10249395ms step_avg:583.31ms +grad accum step:4393/14336 +step:17572/57344 train_time:10250672ms step_avg:583.35ms +step:17573/57344 train_time:10250689ms step_avg:583.32ms +step:17574/57344 train_time:10250939ms step_avg:583.30ms +step:17575/57344 train_time:10251488ms step_avg:583.30ms +grad accum step:4394/14336 +step:17576/57344 train_time:10252757ms step_avg:583.34ms +step:17577/57344 train_time:10252775ms step_avg:583.31ms +step:17578/57344 train_time:10253025ms step_avg:583.29ms +step:17579/57344 train_time:10253578ms step_avg:583.29ms +grad accum step:4395/14336 +step:17580/57344 train_time:10254870ms step_avg:583.33ms +step:17581/57344 train_time:10254887ms step_avg:583.29ms +step:17582/57344 train_time:10255132ms step_avg:583.27ms +step:17583/57344 train_time:10255677ms step_avg:583.27ms +grad accum step:4396/14336 +step:17584/57344 train_time:10256959ms step_avg:583.31ms +step:17585/57344 train_time:10256976ms step_avg:583.28ms +step:17586/57344 train_time:10257219ms step_avg:583.26ms +step:17587/57344 train_time:10257769ms step_avg:583.26ms +grad accum step:4397/14336 +step:17588/57344 train_time:10259070ms step_avg:583.30ms +step:17589/57344 train_time:10259087ms step_avg:583.27ms +step:17590/57344 train_time:10259332ms step_avg:583.25ms +step:17591/57344 train_time:10259877ms step_avg:583.25ms +grad accum step:4398/14336 +step:17592/57344 train_time:10261196ms step_avg:583.29ms +step:17593/57344 train_time:10261213ms step_avg:583.26ms +step:17594/57344 train_time:10261459ms step_avg:583.24ms +step:17595/57344 train_time:10262007ms step_avg:583.23ms +grad accum step:4399/14336 +step:17596/57344 train_time:10263289ms step_avg:583.27ms +step:17597/57344 train_time:10263307ms step_avg:583.24ms +step:17598/57344 train_time:10263554ms step_avg:583.22ms +step:17599/57344 train_time:10264103ms step_avg:583.22ms +grad accum step:4400/14336 +step:17600/57344 train_time:10265418ms step_avg:583.26ms +step:17600/57344 val_loss:6.908957 train_time:10265419ms step_avg:583.26ms +step:17601/57344 train_time:10265431ms step_avg:583.23ms +step:17602/57344 train_time:10265656ms step_avg:583.21ms +step:17603/57344 train_time:10266206ms step_avg:583.21ms +grad accum step:4401/14336 +step:17604/57344 train_time:10267482ms step_avg:583.25ms +step:17605/57344 train_time:10267499ms step_avg:583.21ms +step:17606/57344 train_time:10267744ms step_avg:583.20ms +step:17607/57344 train_time:10268287ms step_avg:583.19ms +grad accum step:4402/14336 +step:17608/57344 train_time:10269568ms step_avg:583.23ms +step:17609/57344 train_time:10269585ms step_avg:583.20ms +step:17610/57344 train_time:10269835ms step_avg:583.18ms +step:17611/57344 train_time:10270386ms step_avg:583.18ms +grad accum step:4403/14336 +step:17612/57344 train_time:10271694ms step_avg:583.22ms +step:17613/57344 train_time:10271711ms step_avg:583.19ms +step:17614/57344 train_time:10271960ms step_avg:583.17ms +step:17615/57344 train_time:10272513ms step_avg:583.17ms +grad accum step:4404/14336 +step:17616/57344 train_time:10273846ms step_avg:583.21ms +step:17617/57344 train_time:10273863ms step_avg:583.18ms +step:17618/57344 train_time:10274107ms step_avg:583.16ms +step:17619/57344 train_time:10274643ms step_avg:583.16ms +grad accum step:4405/14336 +step:17620/57344 train_time:10275947ms step_avg:583.20ms +step:17621/57344 train_time:10275964ms step_avg:583.17ms +step:17622/57344 train_time:10276212ms step_avg:583.15ms +step:17623/57344 train_time:10276761ms step_avg:583.14ms +grad accum step:4406/14336 +step:17624/57344 train_time:10278057ms step_avg:583.19ms +step:17625/57344 train_time:10278074ms step_avg:583.15ms +step:17626/57344 train_time:10278320ms step_avg:583.13ms +step:17627/57344 train_time:10278864ms step_avg:583.13ms +grad accum step:4407/14336 +step:17628/57344 train_time:10280149ms step_avg:583.17ms +step:17629/57344 train_time:10280166ms step_avg:583.14ms +step:17630/57344 train_time:10280411ms step_avg:583.12ms +step:17631/57344 train_time:10280956ms step_avg:583.12ms +grad accum step:4408/14336 +step:17632/57344 train_time:10282295ms step_avg:583.16ms +step:17633/57344 train_time:10282312ms step_avg:583.13ms +step:17634/57344 train_time:10282558ms step_avg:583.11ms +step:17635/57344 train_time:10283102ms step_avg:583.11ms +grad accum step:4409/14336 +step:17636/57344 train_time:10284392ms step_avg:583.15ms +step:17637/57344 train_time:10284409ms step_avg:583.12ms +step:17638/57344 train_time:10284654ms step_avg:583.10ms +step:17639/57344 train_time:10285189ms step_avg:583.09ms +grad accum step:4410/14336 +step:17640/57344 train_time:10286523ms step_avg:583.14ms +step:17641/57344 train_time:10286539ms step_avg:583.10ms +step:17642/57344 train_time:10286786ms step_avg:583.09ms +step:17643/57344 train_time:10287339ms step_avg:583.08ms +grad accum step:4411/14336 +step:17644/57344 train_time:10288667ms step_avg:583.13ms +step:17645/57344 train_time:10288684ms step_avg:583.09ms +step:17646/57344 train_time:10288931ms step_avg:583.07ms +step:17647/57344 train_time:10289468ms step_avg:583.07ms +grad accum step:4412/14336 +step:17648/57344 train_time:10290760ms step_avg:583.11ms +step:17649/57344 train_time:10290777ms step_avg:583.08ms +step:17650/57344 train_time:10291022ms step_avg:583.06ms +step:17651/57344 train_time:10291559ms step_avg:583.06ms +grad accum step:4413/14336 +step:17652/57344 train_time:10292835ms step_avg:583.10ms +step:17653/57344 train_time:10292852ms step_avg:583.07ms +step:17654/57344 train_time:10293099ms step_avg:583.05ms +step:17655/57344 train_time:10293645ms step_avg:583.04ms +grad accum step:4414/14336 +step:17656/57344 train_time:10294947ms step_avg:583.08ms +step:17657/57344 train_time:10294964ms step_avg:583.05ms +step:17658/57344 train_time:10295209ms step_avg:583.03ms +step:17659/57344 train_time:10295752ms step_avg:583.03ms +grad accum step:4415/14336 +step:17660/57344 train_time:10297033ms step_avg:583.07ms +step:17661/57344 train_time:10297050ms step_avg:583.04ms +step:17662/57344 train_time:10297301ms step_avg:583.02ms +step:17663/57344 train_time:10297847ms step_avg:583.02ms +grad accum step:4416/14336 +step:17664/57344 train_time:10299127ms step_avg:583.06ms +step:17664/57344 val_loss:6.902090 train_time:10299128ms step_avg:583.06ms +step:17665/57344 train_time:10299139ms step_avg:583.03ms +step:17666/57344 train_time:10299366ms step_avg:583.00ms +step:17667/57344 train_time:10299916ms step_avg:583.00ms +grad accum step:4417/14336 +step:17668/57344 train_time:10301218ms step_avg:583.04ms +step:17669/57344 train_time:10301235ms step_avg:583.01ms +step:17670/57344 train_time:10301484ms step_avg:582.99ms +step:17671/57344 train_time:10302034ms step_avg:582.99ms +grad accum step:4418/14336 +step:17672/57344 train_time:10303356ms step_avg:583.03ms +step:17673/57344 train_time:10303372ms step_avg:583.00ms +step:17674/57344 train_time:10303621ms step_avg:582.98ms +step:17675/57344 train_time:10304169ms step_avg:582.98ms +grad accum step:4419/14336 +step:17676/57344 train_time:10305478ms step_avg:583.02ms +step:17677/57344 train_time:10305494ms step_avg:582.99ms +step:17678/57344 train_time:10305740ms step_avg:582.97ms +step:17679/57344 train_time:10306276ms step_avg:582.97ms +grad accum step:4420/14336 +step:17680/57344 train_time:10307577ms step_avg:583.01ms +step:17681/57344 train_time:10307594ms step_avg:582.98ms +step:17682/57344 train_time:10307844ms step_avg:582.96ms +step:17683/57344 train_time:10308395ms step_avg:582.96ms +grad accum step:4421/14336 +step:17684/57344 train_time:10309698ms step_avg:583.00ms +step:17685/57344 train_time:10309715ms step_avg:582.96ms +step:17686/57344 train_time:10309969ms step_avg:582.95ms +step:17687/57344 train_time:10310529ms step_avg:582.94ms +grad accum step:4422/14336 +step:17688/57344 train_time:10311804ms step_avg:582.98ms +step:17689/57344 train_time:10311822ms step_avg:582.95ms +step:17690/57344 train_time:10312066ms step_avg:582.93ms +step:17691/57344 train_time:10312611ms step_avg:582.93ms +grad accum step:4423/14336 +step:17692/57344 train_time:10313897ms step_avg:582.97ms +step:17693/57344 train_time:10313912ms step_avg:582.94ms +step:17694/57344 train_time:10314156ms step_avg:582.92ms +step:17695/57344 train_time:10314692ms step_avg:582.92ms +grad accum step:4424/14336 +step:17696/57344 train_time:10315987ms step_avg:582.96ms +step:17697/57344 train_time:10316005ms step_avg:582.92ms +step:17698/57344 train_time:10316252ms step_avg:582.90ms +step:17699/57344 train_time:10316800ms step_avg:582.90ms +grad accum step:4425/14336 +step:17700/57344 train_time:10318106ms step_avg:582.94ms +step:17701/57344 train_time:10318122ms step_avg:582.91ms +step:17702/57344 train_time:10318368ms step_avg:582.89ms +step:17703/57344 train_time:10318904ms step_avg:582.89ms +grad accum step:4426/14336 +step:17704/57344 train_time:10329376ms step_avg:583.45ms +step:17705/57344 train_time:10329963ms step_avg:583.45ms +step:17706/57344 train_time:10330107ms step_avg:583.42ms +step:17707/57344 train_time:10330644ms step_avg:583.42ms +grad accum step:4427/14336 +step:17708/57344 train_time:10331936ms step_avg:583.46ms +step:17709/57344 train_time:10331953ms step_avg:583.43ms +step:17710/57344 train_time:10332194ms step_avg:583.41ms +step:17711/57344 train_time:10332733ms step_avg:583.41ms +grad accum step:4428/14336 +step:17712/57344 train_time:10334037ms step_avg:583.45ms +step:17713/57344 train_time:10334054ms step_avg:583.42ms +step:17714/57344 train_time:10334297ms step_avg:583.40ms +step:17715/57344 train_time:10334837ms step_avg:583.39ms +grad accum step:4429/14336 +step:17716/57344 train_time:10336117ms step_avg:583.43ms +step:17717/57344 train_time:10336133ms step_avg:583.40ms +step:17718/57344 train_time:10336379ms step_avg:583.38ms +step:17719/57344 train_time:10336922ms step_avg:583.38ms +grad accum step:4430/14336 +step:17720/57344 train_time:10338203ms step_avg:583.42ms +step:17721/57344 train_time:10338220ms step_avg:583.39ms +step:17722/57344 train_time:10338462ms step_avg:583.37ms +step:17723/57344 train_time:10339008ms step_avg:583.37ms +grad accum step:4431/14336 +step:17724/57344 train_time:10340308ms step_avg:583.41ms +step:17725/57344 train_time:10340324ms step_avg:583.38ms +step:17726/57344 train_time:10340574ms step_avg:583.36ms +step:17727/57344 train_time:10341124ms step_avg:583.35ms +grad accum step:4432/14336 +step:17728/57344 train_time:10342414ms step_avg:583.39ms +step:17728/57344 val_loss:6.908919 train_time:10342415ms step_avg:583.39ms +step:17729/57344 train_time:10342427ms step_avg:583.36ms +step:17730/57344 train_time:10342649ms step_avg:583.34ms +step:17731/57344 train_time:10343190ms step_avg:583.34ms +grad accum step:4433/14336 +step:17732/57344 train_time:10344470ms step_avg:583.38ms +step:17733/57344 train_time:10344487ms step_avg:583.35ms +step:17734/57344 train_time:10344732ms step_avg:583.33ms +step:17735/57344 train_time:10345282ms step_avg:583.33ms +grad accum step:4434/14336 +step:17736/57344 train_time:10346586ms step_avg:583.37ms +step:17737/57344 train_time:10346603ms step_avg:583.33ms +step:17738/57344 train_time:10346847ms step_avg:583.32ms +step:17739/57344 train_time:10347383ms step_avg:583.31ms +grad accum step:4435/14336 +step:17740/57344 train_time:10348673ms step_avg:583.35ms +step:17741/57344 train_time:10348689ms step_avg:583.32ms +step:17742/57344 train_time:10348932ms step_avg:583.30ms +step:17743/57344 train_time:10349472ms step_avg:583.30ms +grad accum step:4436/14336 +step:17744/57344 train_time:10350766ms step_avg:583.34ms +step:17745/57344 train_time:10350783ms step_avg:583.31ms +step:17746/57344 train_time:10351029ms step_avg:583.29ms +step:17747/57344 train_time:10351569ms step_avg:583.29ms +grad accum step:4437/14336 +step:17748/57344 train_time:10352845ms step_avg:583.32ms +step:17749/57344 train_time:10352862ms step_avg:583.29ms +step:17750/57344 train_time:10353115ms step_avg:583.27ms +step:17751/57344 train_time:10353669ms step_avg:583.27ms +grad accum step:4438/14336 +step:17752/57344 train_time:10354974ms step_avg:583.31ms +step:17753/57344 train_time:10354991ms step_avg:583.28ms +step:17754/57344 train_time:10355239ms step_avg:583.26ms +step:17755/57344 train_time:10355783ms step_avg:583.26ms +grad accum step:4439/14336 +step:17756/57344 train_time:10357056ms step_avg:583.30ms +step:17757/57344 train_time:10357073ms step_avg:583.27ms +step:17758/57344 train_time:10357318ms step_avg:583.25ms +step:17759/57344 train_time:10357863ms step_avg:583.25ms +grad accum step:4440/14336 +step:17760/57344 train_time:10359166ms step_avg:583.29ms +step:17761/57344 train_time:10359183ms step_avg:583.25ms +step:17762/57344 train_time:10359427ms step_avg:583.24ms +step:17763/57344 train_time:10359971ms step_avg:583.23ms +grad accum step:4441/14336 +step:17764/57344 train_time:10361243ms step_avg:583.27ms +step:17765/57344 train_time:10361260ms step_avg:583.24ms +step:17766/57344 train_time:10361509ms step_avg:583.22ms +step:17767/57344 train_time:10362052ms step_avg:583.22ms +grad accum step:4442/14336 +step:17768/57344 train_time:10363327ms step_avg:583.26ms +step:17769/57344 train_time:10363344ms step_avg:583.23ms +step:17770/57344 train_time:10363587ms step_avg:583.21ms +step:17771/57344 train_time:10364129ms step_avg:583.20ms +grad accum step:4443/14336 +step:17772/57344 train_time:10365421ms step_avg:583.24ms +step:17773/57344 train_time:10365438ms step_avg:583.21ms +step:17774/57344 train_time:10365685ms step_avg:583.19ms +step:17775/57344 train_time:10366225ms step_avg:583.19ms +grad accum step:4444/14336 +step:17776/57344 train_time:10367501ms step_avg:583.23ms +step:17777/57344 train_time:10367518ms step_avg:583.20ms +step:17778/57344 train_time:10367765ms step_avg:583.18ms +step:17779/57344 train_time:10368307ms step_avg:583.18ms +grad accum step:4445/14336 +step:17780/57344 train_time:10369626ms step_avg:583.22ms +step:17781/57344 train_time:10369643ms step_avg:583.19ms +step:17782/57344 train_time:10369891ms step_avg:583.17ms +step:17783/57344 train_time:10370432ms step_avg:583.17ms +grad accum step:4446/14336 +step:17784/57344 train_time:10371745ms step_avg:583.21ms +step:17785/57344 train_time:10371762ms step_avg:583.17ms +step:17786/57344 train_time:10372007ms step_avg:583.16ms +step:17787/57344 train_time:10372549ms step_avg:583.15ms +grad accum step:4447/14336 +step:17788/57344 train_time:10373849ms step_avg:583.19ms +step:17789/57344 train_time:10373866ms step_avg:583.16ms +step:17790/57344 train_time:10374122ms step_avg:583.14ms +step:17791/57344 train_time:10395189ms step_avg:584.29ms +grad accum step:4448/14336 +step:17792/57344 train_time:10396279ms step_avg:584.32ms +step:17792/57344 val_loss:6.925640 train_time:10396280ms step_avg:584.32ms +step:17793/57344 train_time:10396292ms step_avg:584.29ms +step:17794/57344 train_time:10396516ms step_avg:584.27ms +step:17795/57344 train_time:10397053ms step_avg:584.27ms +grad accum step:4449/14336 +step:17796/57344 train_time:10398343ms step_avg:584.31ms +step:17797/57344 train_time:10398359ms step_avg:584.28ms +step:17798/57344 train_time:10398602ms step_avg:584.26ms +step:17799/57344 train_time:10399133ms step_avg:584.25ms +grad accum step:4450/14336 +step:17800/57344 train_time:10400411ms step_avg:584.29ms +step:17801/57344 train_time:10400428ms step_avg:584.26ms +step:17802/57344 train_time:10400671ms step_avg:584.24ms +step:17803/57344 train_time:10401218ms step_avg:584.24ms +grad accum step:4451/14336 +step:17804/57344 train_time:10410290ms step_avg:584.72ms +step:17805/57344 train_time:10410307ms step_avg:584.68ms +step:17806/57344 train_time:10410551ms step_avg:584.67ms +step:17807/57344 train_time:10411093ms step_avg:584.66ms +grad accum step:4452/14336 +step:17808/57344 train_time:10412378ms step_avg:584.70ms +step:17809/57344 train_time:10412395ms step_avg:584.67ms +step:17810/57344 train_time:10412647ms step_avg:584.65ms +step:17811/57344 train_time:10413198ms step_avg:584.65ms +grad accum step:4453/14336 +step:17812/57344 train_time:10414484ms step_avg:584.69ms +step:17813/57344 train_time:10414501ms step_avg:584.66ms +step:17814/57344 train_time:10414742ms step_avg:584.64ms +step:17815/57344 train_time:10415287ms step_avg:584.64ms +grad accum step:4454/14336 +step:17816/57344 train_time:10416562ms step_avg:584.67ms +step:17817/57344 train_time:10416579ms step_avg:584.64ms +step:17818/57344 train_time:10416821ms step_avg:584.62ms +step:17819/57344 train_time:10417358ms step_avg:584.62ms +grad accum step:4455/14336 +step:17820/57344 train_time:10418649ms step_avg:584.66ms +step:17821/57344 train_time:10418666ms step_avg:584.63ms +step:17822/57344 train_time:10418916ms step_avg:584.61ms +step:17823/57344 train_time:10419468ms step_avg:584.61ms +grad accum step:4456/14336 +step:17824/57344 train_time:10420752ms step_avg:584.65ms +step:17825/57344 train_time:10420769ms step_avg:584.62ms +step:17826/57344 train_time:10421011ms step_avg:584.60ms +step:17827/57344 train_time:10421546ms step_avg:584.59ms +grad accum step:4457/14336 +step:17828/57344 train_time:10422864ms step_avg:584.63ms +step:17829/57344 train_time:10422881ms step_avg:584.60ms +step:17830/57344 train_time:10423127ms step_avg:584.58ms +step:17831/57344 train_time:10423673ms step_avg:584.58ms +grad accum step:4458/14336 +step:17832/57344 train_time:10424970ms step_avg:584.62ms +step:17833/57344 train_time:10424987ms step_avg:584.59ms +step:17834/57344 train_time:10425239ms step_avg:584.57ms +step:17835/57344 train_time:10425793ms step_avg:584.57ms +grad accum step:4459/14336 +step:17836/57344 train_time:10427077ms step_avg:584.61ms +step:17837/57344 train_time:10427094ms step_avg:584.58ms +step:17838/57344 train_time:10427338ms step_avg:584.56ms +step:17839/57344 train_time:10427885ms step_avg:584.56ms +grad accum step:4460/14336 +step:17840/57344 train_time:10429177ms step_avg:584.60ms +step:17841/57344 train_time:10429194ms step_avg:584.56ms +step:17842/57344 train_time:10429440ms step_avg:584.54ms +step:17843/57344 train_time:10429991ms step_avg:584.54ms +grad accum step:4461/14336 +step:17844/57344 train_time:10431272ms step_avg:584.58ms +step:17845/57344 train_time:10431289ms step_avg:584.55ms +step:17846/57344 train_time:10431533ms step_avg:584.53ms +step:17847/57344 train_time:10432076ms step_avg:584.53ms +grad accum step:4462/14336 +step:17848/57344 train_time:10452913ms step_avg:585.66ms +step:17849/57344 train_time:10452929ms step_avg:585.63ms +step:17850/57344 train_time:10453171ms step_avg:585.61ms +step:17851/57344 train_time:10453708ms step_avg:585.61ms +grad accum step:4463/14336 +step:17852/57344 train_time:10455010ms step_avg:585.65ms +step:17853/57344 train_time:10455026ms step_avg:585.62ms +step:17854/57344 train_time:10455269ms step_avg:585.60ms +step:17855/57344 train_time:10455810ms step_avg:585.60ms +grad accum step:4464/14336 +step:17856/57344 train_time:10457087ms step_avg:585.63ms +step:17856/57344 val_loss:6.928489 train_time:10457087ms step_avg:585.63ms +step:17857/57344 train_time:10457099ms step_avg:585.60ms +step:17858/57344 train_time:10457322ms step_avg:585.58ms +step:17859/57344 train_time:10457865ms step_avg:585.58ms +grad accum step:4465/14336 +step:17860/57344 train_time:10459137ms step_avg:585.62ms +step:17861/57344 train_time:10459154ms step_avg:585.59ms +step:17862/57344 train_time:10459403ms step_avg:585.57ms +step:17863/57344 train_time:10459951ms step_avg:585.57ms +grad accum step:4466/14336 +step:17864/57344 train_time:10461230ms step_avg:585.60ms +step:17865/57344 train_time:10461247ms step_avg:585.57ms +step:17866/57344 train_time:10461492ms step_avg:585.55ms +step:17867/57344 train_time:10462036ms step_avg:585.55ms +grad accum step:4467/14336 +step:17868/57344 train_time:10463318ms step_avg:585.59ms +step:17869/57344 train_time:10463335ms step_avg:585.56ms +step:17870/57344 train_time:10463588ms step_avg:585.54ms +step:17871/57344 train_time:10464144ms step_avg:585.54ms +grad accum step:4468/14336 +step:17872/57344 train_time:10465423ms step_avg:585.58ms +step:17873/57344 train_time:10465439ms step_avg:585.54ms +step:17874/57344 train_time:10465683ms step_avg:585.53ms +step:17875/57344 train_time:10466227ms step_avg:585.52ms +grad accum step:4469/14336 +step:17876/57344 train_time:10467515ms step_avg:585.56ms +step:17877/57344 train_time:10467532ms step_avg:585.53ms +step:17878/57344 train_time:10467775ms step_avg:585.51ms +step:17879/57344 train_time:10468317ms step_avg:585.51ms +grad accum step:4470/14336 +step:17880/57344 train_time:10469597ms step_avg:585.55ms +step:17881/57344 train_time:10469614ms step_avg:585.52ms +step:17882/57344 train_time:10469864ms step_avg:585.50ms +step:17883/57344 train_time:10470408ms step_avg:585.50ms +grad accum step:4471/14336 +step:17884/57344 train_time:10471694ms step_avg:585.53ms +step:17885/57344 train_time:10471711ms step_avg:585.50ms +step:17886/57344 train_time:10471963ms step_avg:585.48ms +step:17887/57344 train_time:10472522ms step_avg:585.48ms +grad accum step:4472/14336 +step:17888/57344 train_time:10473821ms step_avg:585.52ms +step:17889/57344 train_time:10473837ms step_avg:585.49ms +step:17890/57344 train_time:10474086ms step_avg:585.47ms +step:17891/57344 train_time:10474640ms step_avg:585.47ms +grad accum step:4473/14336 +step:17892/57344 train_time:10475956ms step_avg:585.51ms +step:17893/57344 train_time:10475973ms step_avg:585.48ms +step:17894/57344 train_time:10476220ms step_avg:585.46ms +step:17895/57344 train_time:10476761ms step_avg:585.46ms +grad accum step:4474/14336 +step:17896/57344 train_time:10478034ms step_avg:585.50ms +step:17897/57344 train_time:10478051ms step_avg:585.46ms +step:17898/57344 train_time:10478295ms step_avg:585.45ms +step:17899/57344 train_time:10478832ms step_avg:585.44ms +grad accum step:4475/14336 +step:17900/57344 train_time:10480106ms step_avg:585.48ms +step:17901/57344 train_time:10480123ms step_avg:585.45ms +step:17902/57344 train_time:10480369ms step_avg:585.43ms +step:17903/57344 train_time:10480912ms step_avg:585.43ms +grad accum step:4476/14336 +step:17904/57344 train_time:10482192ms step_avg:585.47ms +step:17905/57344 train_time:10482209ms step_avg:585.43ms +step:17906/57344 train_time:10482454ms step_avg:585.42ms +step:17907/57344 train_time:10483002ms step_avg:585.41ms +grad accum step:4477/14336 +step:17908/57344 train_time:10484277ms step_avg:585.45ms +step:17909/57344 train_time:10484294ms step_avg:585.42ms +step:17910/57344 train_time:10484540ms step_avg:585.40ms +step:17911/57344 train_time:10485090ms step_avg:585.40ms +grad accum step:4478/14336 +step:17912/57344 train_time:10486401ms step_avg:585.44ms +step:17913/57344 train_time:10486418ms step_avg:585.41ms +step:17914/57344 train_time:10486661ms step_avg:585.39ms +step:17915/57344 train_time:10487207ms step_avg:585.39ms +grad accum step:4479/14336 +step:17916/57344 train_time:10488503ms step_avg:585.43ms +step:17917/57344 train_time:10488520ms step_avg:585.39ms +step:17918/57344 train_time:10488767ms step_avg:585.38ms +step:17919/57344 train_time:10489315ms step_avg:585.37ms +grad accum step:4480/14336 +step:17920/57344 train_time:10490606ms step_avg:585.41ms +step:17920/57344 val_loss:6.934070 train_time:10490607ms step_avg:585.41ms +step:17921/57344 train_time:10490618ms step_avg:585.38ms +step:17922/57344 train_time:10490841ms step_avg:585.36ms +step:17923/57344 train_time:10491653ms step_avg:585.37ms +grad accum step:4481/14336 +step:17924/57344 train_time:10492742ms step_avg:585.40ms +step:17925/57344 train_time:10492753ms step_avg:585.37ms +step:17926/57344 train_time:10492978ms step_avg:585.35ms +step:17927/57344 train_time:10493513ms step_avg:585.35ms +grad accum step:4482/14336 +step:17928/57344 train_time:10494793ms step_avg:585.39ms +step:17929/57344 train_time:10494810ms step_avg:585.35ms +step:17930/57344 train_time:10495057ms step_avg:585.34ms +step:17931/57344 train_time:10495603ms step_avg:585.33ms +grad accum step:4483/14336 +step:17932/57344 train_time:10496899ms step_avg:585.37ms +step:17933/57344 train_time:10496916ms step_avg:585.34ms +step:17934/57344 train_time:10497167ms step_avg:585.32ms +step:17935/57344 train_time:10497720ms step_avg:585.32ms +grad accum step:4484/14336 +step:17936/57344 train_time:10499009ms step_avg:585.36ms +step:17937/57344 train_time:10499026ms step_avg:585.33ms +step:17938/57344 train_time:10499277ms step_avg:585.31ms +step:17939/57344 train_time:10499824ms step_avg:585.31ms +grad accum step:4485/14336 +step:17940/57344 train_time:10501129ms step_avg:585.35ms +step:17941/57344 train_time:10501146ms step_avg:585.32ms +step:17942/57344 train_time:10501395ms step_avg:585.30ms +step:17943/57344 train_time:10501934ms step_avg:585.29ms +grad accum step:4486/14336 +step:17944/57344 train_time:10503210ms step_avg:585.33ms +step:17945/57344 train_time:10503228ms step_avg:585.30ms +step:17946/57344 train_time:10503471ms step_avg:585.28ms +step:17947/57344 train_time:10504013ms step_avg:585.28ms +grad accum step:4487/14336 +step:17948/57344 train_time:10505301ms step_avg:585.32ms +step:17949/57344 train_time:10505317ms step_avg:585.29ms +step:17950/57344 train_time:10505564ms step_avg:585.27ms +step:17951/57344 train_time:10506109ms step_avg:585.27ms +grad accum step:4488/14336 +step:17952/57344 train_time:10507389ms step_avg:585.30ms +step:17953/57344 train_time:10507406ms step_avg:585.27ms +step:17954/57344 train_time:10507654ms step_avg:585.25ms +step:17955/57344 train_time:10508197ms step_avg:585.25ms +grad accum step:4489/14336 +step:17956/57344 train_time:10509477ms step_avg:585.29ms +step:17957/57344 train_time:10509494ms step_avg:585.26ms +step:17958/57344 train_time:10509741ms step_avg:585.24ms +step:17959/57344 train_time:10510290ms step_avg:585.24ms +grad accum step:4490/14336 +step:17960/57344 train_time:10511603ms step_avg:585.28ms +step:17961/57344 train_time:10511620ms step_avg:585.25ms +step:17962/57344 train_time:10511864ms step_avg:585.23ms +step:17963/57344 train_time:10512407ms step_avg:585.23ms +grad accum step:4491/14336 +step:17964/57344 train_time:10513735ms step_avg:585.27ms +step:17965/57344 train_time:10513752ms step_avg:585.24ms +step:17966/57344 train_time:10513998ms step_avg:585.22ms +step:17967/57344 train_time:10514535ms step_avg:585.21ms +grad accum step:4492/14336 +step:17968/57344 train_time:10515832ms step_avg:585.25ms +step:17969/57344 train_time:10515849ms step_avg:585.22ms +step:17970/57344 train_time:10516096ms step_avg:585.20ms +step:17971/57344 train_time:10516636ms step_avg:585.20ms +grad accum step:4493/14336 +step:17972/57344 train_time:10517945ms step_avg:585.24ms +step:17973/57344 train_time:10517962ms step_avg:585.21ms +step:17974/57344 train_time:10518214ms step_avg:585.19ms +step:17975/57344 train_time:10518768ms step_avg:585.19ms +grad accum step:4494/14336 +step:17976/57344 train_time:10520058ms step_avg:585.23ms +step:17977/57344 train_time:10520075ms step_avg:585.20ms +step:17978/57344 train_time:10520322ms step_avg:585.18ms +step:17979/57344 train_time:10520864ms step_avg:585.18ms +grad accum step:4495/14336 +step:17980/57344 train_time:10522160ms step_avg:585.21ms +step:17981/57344 train_time:10522177ms step_avg:585.18ms +step:17982/57344 train_time:10522425ms step_avg:585.16ms +step:17983/57344 train_time:10522971ms step_avg:585.16ms +grad accum step:4496/14336 +step:17984/57344 train_time:10524264ms step_avg:585.20ms +step:17984/57344 val_loss:6.948156 train_time:10524265ms step_avg:585.20ms +step:17985/57344 train_time:10524277ms step_avg:585.17ms +step:17986/57344 train_time:10524504ms step_avg:585.15ms +step:17987/57344 train_time:10525051ms step_avg:585.15ms +grad accum step:4497/14336 +step:17988/57344 train_time:10526329ms step_avg:585.19ms +step:17989/57344 train_time:10526346ms step_avg:585.15ms +step:17990/57344 train_time:10526602ms step_avg:585.14ms +step:17991/57344 train_time:10527168ms step_avg:585.14ms +grad accum step:4498/14336 +step:17992/57344 train_time:10528477ms step_avg:585.18ms +step:17993/57344 train_time:10528495ms step_avg:585.14ms +step:17994/57344 train_time:10528742ms step_avg:585.13ms +step:17995/57344 train_time:10529292ms step_avg:585.12ms +grad accum step:4499/14336 +step:17996/57344 train_time:10530612ms step_avg:585.16ms +step:17997/57344 train_time:10530628ms step_avg:585.13ms +step:17998/57344 train_time:10530871ms step_avg:585.11ms +step:17999/57344 train_time:10531416ms step_avg:585.11ms +grad accum step:4500/14336 +step:18000/57344 train_time:10532722ms step_avg:585.15ms +step:18001/57344 train_time:10532739ms step_avg:585.12ms +step:18002/57344 train_time:10532986ms step_avg:585.10ms +step:18003/57344 train_time:10533531ms step_avg:585.10ms +grad accum step:4501/14336 +step:18004/57344 train_time:10534838ms step_avg:585.14ms +step:18005/57344 train_time:10534855ms step_avg:585.11ms +step:18006/57344 train_time:10535105ms step_avg:585.09ms +step:18007/57344 train_time:10535660ms step_avg:585.09ms +grad accum step:4502/14336 +step:18008/57344 train_time:10536940ms step_avg:585.13ms +step:18009/57344 train_time:10536957ms step_avg:585.09ms +step:18010/57344 train_time:10537204ms step_avg:585.08ms +step:18011/57344 train_time:10537754ms step_avg:585.07ms +grad accum step:4503/14336 +step:18012/57344 train_time:10539068ms step_avg:585.11ms +step:18013/57344 train_time:10539085ms step_avg:585.08ms +step:18014/57344 train_time:10539334ms step_avg:585.06ms +step:18015/57344 train_time:10539878ms step_avg:585.06ms +grad accum step:4504/14336 +step:18016/57344 train_time:10541159ms step_avg:585.10ms +step:18017/57344 train_time:10541176ms step_avg:585.07ms +step:18018/57344 train_time:10541428ms step_avg:585.05ms +step:18019/57344 train_time:10541983ms step_avg:585.05ms +grad accum step:4505/14336 +step:18020/57344 train_time:10543267ms step_avg:585.09ms +step:18021/57344 train_time:10543284ms step_avg:585.06ms +step:18022/57344 train_time:10543529ms step_avg:585.04ms +step:18023/57344 train_time:10544072ms step_avg:585.03ms +grad accum step:4506/14336 +step:18024/57344 train_time:10545396ms step_avg:585.08ms +step:18025/57344 train_time:10545413ms step_avg:585.04ms +step:18026/57344 train_time:10545658ms step_avg:585.02ms +step:18027/57344 train_time:10546193ms step_avg:585.02ms +grad accum step:4507/14336 +step:18028/57344 train_time:10547466ms step_avg:585.06ms +step:18029/57344 train_time:10547484ms step_avg:585.03ms +step:18030/57344 train_time:10547727ms step_avg:585.01ms +step:18031/57344 train_time:10548261ms step_avg:585.01ms +grad accum step:4508/14336 +step:18032/57344 train_time:10549580ms step_avg:585.05ms +step:18033/57344 train_time:10549597ms step_avg:585.02ms +step:18034/57344 train_time:10549844ms step_avg:585.00ms +step:18035/57344 train_time:10550390ms step_avg:585.00ms +grad accum step:4509/14336 +step:18036/57344 train_time:10551693ms step_avg:585.04ms +step:18037/57344 train_time:10551710ms step_avg:585.00ms +step:18038/57344 train_time:10551956ms step_avg:584.98ms +step:18039/57344 train_time:10552504ms step_avg:584.98ms +grad accum step:4510/14336 +step:18040/57344 train_time:10553806ms step_avg:585.02ms +step:18041/57344 train_time:10553823ms step_avg:584.99ms +step:18042/57344 train_time:10554068ms step_avg:584.97ms +step:18043/57344 train_time:10554609ms step_avg:584.97ms +grad accum step:4511/14336 +step:18044/57344 train_time:10555885ms step_avg:585.01ms +step:18045/57344 train_time:10555902ms step_avg:584.98ms +step:18046/57344 train_time:10556154ms step_avg:584.96ms +step:18047/57344 train_time:10556709ms step_avg:584.96ms +grad accum step:4512/14336 +step:18048/57344 train_time:10558009ms step_avg:585.00ms +step:18048/57344 val_loss:6.952826 train_time:10558010ms step_avg:585.00ms +step:18049/57344 train_time:10558021ms step_avg:584.96ms +step:18050/57344 train_time:10558243ms step_avg:584.94ms +step:18051/57344 train_time:10558781ms step_avg:584.94ms +grad accum step:4513/14336 +step:18052/57344 train_time:10560083ms step_avg:584.98ms +step:18053/57344 train_time:10560101ms step_avg:584.95ms +step:18054/57344 train_time:10560344ms step_avg:584.93ms +step:18055/57344 train_time:10560889ms step_avg:584.93ms +grad accum step:4514/14336 +step:18056/57344 train_time:10562175ms step_avg:584.97ms +step:18057/57344 train_time:10562193ms step_avg:584.94ms +step:18058/57344 train_time:10562438ms step_avg:584.92ms +step:18059/57344 train_time:10563000ms step_avg:584.92ms +grad accum step:4515/14336 +step:18060/57344 train_time:10564337ms step_avg:584.96ms +step:18061/57344 train_time:10564354ms step_avg:584.93ms +step:18062/57344 train_time:10564603ms step_avg:584.91ms +step:18063/57344 train_time:10565154ms step_avg:584.91ms +grad accum step:4516/14336 +step:18064/57344 train_time:10566437ms step_avg:584.94ms +step:18065/57344 train_time:10566454ms step_avg:584.91ms +step:18066/57344 train_time:10566699ms step_avg:584.89ms +step:18067/57344 train_time:10567243ms step_avg:584.89ms +grad accum step:4517/14336 +step:18068/57344 train_time:10568524ms step_avg:584.93ms +step:18069/57344 train_time:10568541ms step_avg:584.90ms +step:18070/57344 train_time:10568785ms step_avg:584.88ms +step:18071/57344 train_time:10569328ms step_avg:584.88ms +grad accum step:4518/14336 +step:18072/57344 train_time:10570613ms step_avg:584.92ms +step:18073/57344 train_time:10570630ms step_avg:584.89ms +step:18074/57344 train_time:10570872ms step_avg:584.87ms +step:18075/57344 train_time:10571408ms step_avg:584.86ms +grad accum step:4519/14336 +step:18076/57344 train_time:10572694ms step_avg:584.90ms +step:18077/57344 train_time:10572711ms step_avg:584.87ms +step:18078/57344 train_time:10572955ms step_avg:584.85ms +step:18079/57344 train_time:10573500ms step_avg:584.85ms +grad accum step:4520/14336 +step:18080/57344 train_time:10574795ms step_avg:584.89ms +step:18081/57344 train_time:10574812ms step_avg:584.86ms +step:18082/57344 train_time:10575059ms step_avg:584.84ms +step:18083/57344 train_time:10575610ms step_avg:584.84ms +grad accum step:4521/14336 +step:18084/57344 train_time:10576944ms step_avg:584.88ms +step:18085/57344 train_time:10576961ms step_avg:584.85ms +step:18086/57344 train_time:10577206ms step_avg:584.83ms +step:18087/57344 train_time:10577752ms step_avg:584.83ms +grad accum step:4522/14336 +step:18088/57344 train_time:10579037ms step_avg:584.86ms +step:18089/57344 train_time:10579054ms step_avg:584.83ms +step:18090/57344 train_time:10579302ms step_avg:584.81ms +step:18091/57344 train_time:10579844ms step_avg:584.81ms +grad accum step:4523/14336 +step:18092/57344 train_time:10581131ms step_avg:584.85ms +step:18093/57344 train_time:10581148ms step_avg:584.82ms +step:18094/57344 train_time:10581392ms step_avg:584.80ms +step:18095/57344 train_time:10581927ms step_avg:584.80ms +grad accum step:4524/14336 +step:18096/57344 train_time:10583213ms step_avg:584.84ms +step:18097/57344 train_time:10583230ms step_avg:584.81ms +step:18098/57344 train_time:10583479ms step_avg:584.79ms +step:18099/57344 train_time:10584028ms step_avg:584.79ms +grad accum step:4525/14336 +step:18100/57344 train_time:10585317ms step_avg:584.82ms +step:18101/57344 train_time:10585334ms step_avg:584.79ms +step:18102/57344 train_time:10585590ms step_avg:584.77ms +step:18103/57344 train_time:10586154ms step_avg:584.77ms +grad accum step:4526/14336 +step:18104/57344 train_time:10587454ms step_avg:584.81ms +step:18105/57344 train_time:10587471ms step_avg:584.78ms +step:18106/57344 train_time:10587720ms step_avg:584.76ms +step:18107/57344 train_time:10588267ms step_avg:584.76ms +grad accum step:4527/14336 +step:18108/57344 train_time:10589541ms step_avg:584.80ms +step:18109/57344 train_time:10589558ms step_avg:584.77ms +step:18110/57344 train_time:10589810ms step_avg:584.75ms +step:18111/57344 train_time:10590368ms step_avg:584.75ms +grad accum step:4528/14336 +step:18112/57344 train_time:10591668ms step_avg:584.79ms +step:18112/57344 val_loss:6.956297 train_time:10591668ms step_avg:584.79ms +step:18113/57344 train_time:10591680ms step_avg:584.76ms +step:18114/57344 train_time:10591904ms step_avg:584.74ms +step:18115/57344 train_time:10592447ms step_avg:584.73ms +grad accum step:4529/14336 +step:18116/57344 train_time:10593768ms step_avg:584.77ms +step:18117/57344 train_time:10593785ms step_avg:584.74ms +step:18118/57344 train_time:10594033ms step_avg:584.72ms +step:18119/57344 train_time:10594576ms step_avg:584.72ms +grad accum step:4530/14336 +step:18120/57344 train_time:10595874ms step_avg:584.76ms +step:18121/57344 train_time:10595891ms step_avg:584.73ms +step:18122/57344 train_time:10596135ms step_avg:584.71ms +step:18123/57344 train_time:10596680ms step_avg:584.71ms +grad accum step:4531/14336 +step:18124/57344 train_time:10598012ms step_avg:584.75ms +step:18125/57344 train_time:10598028ms step_avg:584.72ms +step:18126/57344 train_time:10598290ms step_avg:584.70ms +step:18127/57344 train_time:10598870ms step_avg:584.70ms +grad accum step:4532/14336 +step:18128/57344 train_time:10600170ms step_avg:584.74ms +step:18129/57344 train_time:10600187ms step_avg:584.71ms +step:18130/57344 train_time:10600430ms step_avg:584.69ms +step:18131/57344 train_time:10600971ms step_avg:584.69ms +grad accum step:4533/14336 +step:18132/57344 train_time:10602253ms step_avg:584.73ms +step:18133/57344 train_time:10602270ms step_avg:584.69ms +step:18134/57344 train_time:10602515ms step_avg:584.68ms +step:18135/57344 train_time:10603055ms step_avg:584.67ms +grad accum step:4534/14336 +step:18136/57344 train_time:10604331ms step_avg:584.71ms +step:18137/57344 train_time:10604348ms step_avg:584.68ms +step:18138/57344 train_time:10604592ms step_avg:584.66ms +step:18139/57344 train_time:10605132ms step_avg:584.66ms +grad accum step:4535/14336 +step:18140/57344 train_time:10606416ms step_avg:584.70ms +step:18141/57344 train_time:10606433ms step_avg:584.67ms +step:18142/57344 train_time:10606681ms step_avg:584.65ms +step:18143/57344 train_time:10607225ms step_avg:584.65ms +grad accum step:4536/14336 +step:18144/57344 train_time:10608511ms step_avg:584.68ms +step:18145/57344 train_time:10608528ms step_avg:584.65ms +step:18146/57344 train_time:10608775ms step_avg:584.63ms +step:18147/57344 train_time:10609322ms step_avg:584.63ms +grad accum step:4537/14336 +step:18148/57344 train_time:10610604ms step_avg:584.67ms +step:18149/57344 train_time:10610621ms step_avg:584.64ms +step:18150/57344 train_time:10610866ms step_avg:584.62ms +step:18151/57344 train_time:10611413ms step_avg:584.62ms +grad accum step:4538/14336 +step:18152/57344 train_time:10614712ms step_avg:584.77ms +step:18153/57344 train_time:10617494ms step_avg:584.89ms +step:18154/57344 train_time:10617648ms step_avg:584.87ms +step:18155/57344 train_time:10618196ms step_avg:584.86ms +grad accum step:4539/14336 +step:18156/57344 train_time:10619470ms step_avg:584.90ms +step:18157/57344 train_time:10619487ms step_avg:584.87ms +step:18158/57344 train_time:10619729ms step_avg:584.85ms +step:18159/57344 train_time:10620273ms step_avg:584.85ms +grad accum step:4540/14336 +step:18160/57344 train_time:10621555ms step_avg:584.89ms +step:18161/57344 train_time:10621572ms step_avg:584.86ms +step:18162/57344 train_time:10621814ms step_avg:584.84ms +step:18163/57344 train_time:10684074ms step_avg:588.23ms +grad accum step:4541/14336 +step:18164/57344 train_time:10710077ms step_avg:589.63ms +step:18165/57344 train_time:10710089ms step_avg:589.60ms +step:18166/57344 train_time:10710374ms step_avg:589.58ms +step:18167/57344 train_time:10710924ms step_avg:589.58ms +grad accum step:4542/14336 +step:18168/57344 train_time:10712197ms step_avg:589.62ms +step:18169/57344 train_time:10712214ms step_avg:589.59ms +step:18170/57344 train_time:10712449ms step_avg:589.57ms +step:18171/57344 train_time:10712978ms step_avg:589.56ms +grad accum step:4543/14336 +step:18172/57344 train_time:10714236ms step_avg:589.60ms +step:18173/57344 train_time:10714252ms step_avg:589.57ms +step:18174/57344 train_time:10714496ms step_avg:589.55ms +step:18175/57344 train_time:10715028ms step_avg:589.55ms +grad accum step:4544/14336 +step:18176/57344 train_time:10716292ms step_avg:589.58ms +step:18176/57344 val_loss:6.964755 train_time:10716293ms step_avg:589.58ms +step:18177/57344 train_time:10716305ms step_avg:589.55ms +step:18178/57344 train_time:10716530ms step_avg:589.53ms +step:18179/57344 train_time:10717071ms step_avg:589.53ms +grad accum step:4545/14336 +step:18180/57344 train_time:10718341ms step_avg:589.57ms +step:18181/57344 train_time:10718358ms step_avg:589.54ms +step:18182/57344 train_time:10718607ms step_avg:589.52ms +step:18183/57344 train_time:10719149ms step_avg:589.51ms +grad accum step:4546/14336 +step:18184/57344 train_time:10720430ms step_avg:589.55ms +step:18185/57344 train_time:10720447ms step_avg:589.52ms +step:18186/57344 train_time:10720701ms step_avg:589.50ms +step:18187/57344 train_time:10721259ms step_avg:589.50ms +grad accum step:4547/14336 +step:18188/57344 train_time:10722549ms step_avg:589.54ms +step:18189/57344 train_time:10722565ms step_avg:589.51ms +step:18190/57344 train_time:10722810ms step_avg:589.49ms +step:18191/57344 train_time:10723358ms step_avg:589.49ms +grad accum step:4548/14336 +step:18192/57344 train_time:10724665ms step_avg:589.53ms +step:18193/57344 train_time:10724682ms step_avg:589.49ms +step:18194/57344 train_time:10724923ms step_avg:589.48ms +step:18195/57344 train_time:10725459ms step_avg:589.47ms +grad accum step:4549/14336 +step:18196/57344 train_time:10726731ms step_avg:589.51ms +step:18197/57344 train_time:10726748ms step_avg:589.48ms +step:18198/57344 train_time:10727004ms step_avg:589.46ms +step:18199/57344 train_time:10727565ms step_avg:589.46ms +grad accum step:4550/14336 +step:18200/57344 train_time:10728855ms step_avg:589.50ms +step:18201/57344 train_time:10728872ms step_avg:589.47ms +step:18202/57344 train_time:10729114ms step_avg:589.45ms +step:18203/57344 train_time:10729646ms step_avg:589.44ms +grad accum step:4551/14336 +step:18204/57344 train_time:10730920ms step_avg:589.48ms +step:18205/57344 train_time:10730937ms step_avg:589.45ms +step:18206/57344 train_time:10731183ms step_avg:589.43ms +step:18207/57344 train_time:10731728ms step_avg:589.43ms +grad accum step:4552/14336 +step:18208/57344 train_time:10733051ms step_avg:589.47ms +step:18209/57344 train_time:10733068ms step_avg:589.44ms +step:18210/57344 train_time:10733313ms step_avg:589.42ms +step:18211/57344 train_time:10733855ms step_avg:589.42ms +grad accum step:4553/14336 +step:18212/57344 train_time:10735135ms step_avg:589.45ms +step:18213/57344 train_time:10735152ms step_avg:589.42ms +step:18214/57344 train_time:10735397ms step_avg:589.40ms +step:18215/57344 train_time:10735939ms step_avg:589.40ms +grad accum step:4554/14336 +step:18216/57344 train_time:10737218ms step_avg:589.44ms +step:18217/57344 train_time:10737236ms step_avg:589.41ms +step:18218/57344 train_time:10737482ms step_avg:589.39ms +step:18219/57344 train_time:10738027ms step_avg:589.39ms +grad accum step:4555/14336 +step:18220/57344 train_time:10739307ms step_avg:589.42ms +step:18221/57344 train_time:10739324ms step_avg:589.39ms +step:18222/57344 train_time:10739571ms step_avg:589.37ms +step:18223/57344 train_time:10740116ms step_avg:589.37ms +grad accum step:4556/14336 +step:18224/57344 train_time:10741423ms step_avg:589.41ms +step:18225/57344 train_time:10741440ms step_avg:589.38ms +step:18226/57344 train_time:10741685ms step_avg:589.36ms +step:18227/57344 train_time:10742223ms step_avg:589.36ms +grad accum step:4557/14336 +step:18228/57344 train_time:10743497ms step_avg:589.40ms +step:18229/57344 train_time:10743514ms step_avg:589.36ms +step:18230/57344 train_time:10743758ms step_avg:589.34ms +step:18231/57344 train_time:10744307ms step_avg:589.34ms +grad accum step:4558/14336 +step:18232/57344 train_time:10745578ms step_avg:589.38ms +step:18233/57344 train_time:10745595ms step_avg:589.35ms +step:18234/57344 train_time:10745847ms step_avg:589.33ms +step:18235/57344 train_time:10746397ms step_avg:589.33ms +grad accum step:4559/14336 +step:18236/57344 train_time:10747690ms step_avg:589.37ms +step:18237/57344 train_time:10747707ms step_avg:589.34ms +step:18238/57344 train_time:10747957ms step_avg:589.32ms +step:18239/57344 train_time:10748507ms step_avg:589.31ms +grad accum step:4560/14336 +step:18240/57344 train_time:10749797ms step_avg:589.35ms +step:18240/57344 val_loss:6.981714 train_time:10749798ms step_avg:589.35ms +step:18241/57344 train_time:10749810ms step_avg:589.32ms +step:18242/57344 train_time:10750036ms step_avg:589.30ms +step:18243/57344 train_time:10750585ms step_avg:589.30ms +grad accum step:4561/14336 +step:18244/57344 train_time:10751862ms step_avg:589.34ms +step:18245/57344 train_time:10751879ms step_avg:589.31ms +step:18246/57344 train_time:10752125ms step_avg:589.29ms +step:18247/57344 train_time:10752665ms step_avg:589.28ms +grad accum step:4562/14336 +step:18248/57344 train_time:10753940ms step_avg:589.32ms +step:18249/57344 train_time:10753957ms step_avg:589.29ms +step:18250/57344 train_time:10754202ms step_avg:589.27ms +step:18251/57344 train_time:10754747ms step_avg:589.27ms +grad accum step:4563/14336 +step:18252/57344 train_time:10756027ms step_avg:589.31ms +step:18253/57344 train_time:10756044ms step_avg:589.28ms +step:18254/57344 train_time:10756290ms step_avg:589.26ms +step:18255/57344 train_time:10756830ms step_avg:589.25ms +grad accum step:4564/14336 +step:18256/57344 train_time:10758117ms step_avg:589.29ms +step:18257/57344 train_time:10758134ms step_avg:589.26ms +step:18258/57344 train_time:10758380ms step_avg:589.24ms +step:18259/57344 train_time:10758913ms step_avg:589.24ms +grad accum step:4565/14336 +step:18260/57344 train_time:10760214ms step_avg:589.28ms +step:18261/57344 train_time:10760231ms step_avg:589.25ms +step:18262/57344 train_time:10760484ms step_avg:589.23ms +step:18263/57344 train_time:10761043ms step_avg:589.23ms +grad accum step:4566/14336 +step:18264/57344 train_time:10762320ms step_avg:589.26ms +step:18265/57344 train_time:10762337ms step_avg:589.23ms +step:18266/57344 train_time:10762588ms step_avg:589.21ms +step:18267/57344 train_time:10763141ms step_avg:589.21ms +grad accum step:4567/14336 +step:18268/57344 train_time:10764430ms step_avg:589.25ms +step:18269/57344 train_time:10764447ms step_avg:589.22ms +step:18270/57344 train_time:10764694ms step_avg:589.20ms +step:18271/57344 train_time:10765234ms step_avg:589.20ms +grad accum step:4568/14336 +step:18272/57344 train_time:10766513ms step_avg:589.24ms +step:18273/57344 train_time:10766529ms step_avg:589.20ms +step:18274/57344 train_time:10766773ms step_avg:589.19ms +step:18275/57344 train_time:10767318ms step_avg:589.18ms +grad accum step:4569/14336 +step:18276/57344 train_time:10768624ms step_avg:589.22ms +step:18277/57344 train_time:10768641ms step_avg:589.19ms +step:18278/57344 train_time:10768886ms step_avg:589.17ms +step:18279/57344 train_time:10769429ms step_avg:589.17ms +grad accum step:4570/14336 +step:18280/57344 train_time:10770740ms step_avg:589.21ms +step:18281/57344 train_time:10770757ms step_avg:589.18ms +step:18282/57344 train_time:10771006ms step_avg:589.16ms +step:18283/57344 train_time:10771553ms step_avg:589.16ms +grad accum step:4571/14336 +step:18284/57344 train_time:10772827ms step_avg:589.19ms +step:18285/57344 train_time:10772845ms step_avg:589.16ms +step:18286/57344 train_time:10773089ms step_avg:589.14ms +step:18287/57344 train_time:10773635ms step_avg:589.14ms +grad accum step:4572/14336 +step:18288/57344 train_time:10774916ms step_avg:589.18ms +step:18289/57344 train_time:10774933ms step_avg:589.15ms +step:18290/57344 train_time:10775178ms step_avg:589.13ms +step:18291/57344 train_time:10775727ms step_avg:589.13ms +grad accum step:4573/14336 +step:18292/57344 train_time:10777035ms step_avg:589.17ms +step:18293/57344 train_time:10777052ms step_avg:589.14ms +step:18294/57344 train_time:10777298ms step_avg:589.12ms +step:18295/57344 train_time:10777831ms step_avg:589.11ms +grad accum step:4574/14336 +step:18296/57344 train_time:10779115ms step_avg:589.15ms +step:18297/57344 train_time:10779132ms step_avg:589.12ms +step:18298/57344 train_time:10779377ms step_avg:589.10ms +step:18299/57344 train_time:10779922ms step_avg:589.10ms +grad accum step:4575/14336 +step:18300/57344 train_time:10781198ms step_avg:589.14ms +step:18301/57344 train_time:10781215ms step_avg:589.11ms +step:18302/57344 train_time:10781459ms step_avg:589.09ms +step:18303/57344 train_time:10781996ms step_avg:589.08ms +grad accum step:4576/14336 +step:18304/57344 train_time:10783270ms step_avg:589.12ms +step:18304/57344 val_loss:6.982418 train_time:10783271ms step_avg:589.12ms +step:18305/57344 train_time:10783283ms step_avg:589.09ms +step:18306/57344 train_time:10783510ms step_avg:589.07ms +step:18307/57344 train_time:10784053ms step_avg:589.07ms +grad accum step:4577/14336 +step:18308/57344 train_time:10785352ms step_avg:589.11ms +step:18309/57344 train_time:10785369ms step_avg:589.07ms +step:18310/57344 train_time:10785616ms step_avg:589.06ms +step:18311/57344 train_time:10786153ms step_avg:589.05ms +grad accum step:4578/14336 +step:18312/57344 train_time:10787428ms step_avg:589.09ms +step:18313/57344 train_time:10787445ms step_avg:589.06ms +step:18314/57344 train_time:10787695ms step_avg:589.04ms +step:18315/57344 train_time:10788241ms step_avg:589.04ms +grad accum step:4579/14336 +step:18316/57344 train_time:10789553ms step_avg:589.08ms +step:18317/57344 train_time:10789570ms step_avg:589.05ms +step:18318/57344 train_time:10789818ms step_avg:589.03ms +step:18319/57344 train_time:10790360ms step_avg:589.03ms +grad accum step:4580/14336 +step:18320/57344 train_time:10791637ms step_avg:589.06ms +step:18321/57344 train_time:10791654ms step_avg:589.03ms +step:18322/57344 train_time:10791902ms step_avg:589.01ms +step:18323/57344 train_time:10792445ms step_avg:589.01ms +grad accum step:4581/14336 +step:18324/57344 train_time:10793737ms step_avg:589.05ms +step:18325/57344 train_time:10793754ms step_avg:589.02ms +step:18326/57344 train_time:10794008ms step_avg:589.00ms +step:18327/57344 train_time:10794573ms step_avg:589.00ms +grad accum step:4582/14336 +step:18328/57344 train_time:10795866ms step_avg:589.04ms +step:18329/57344 train_time:10795883ms step_avg:589.01ms +step:18330/57344 train_time:10796136ms step_avg:588.99ms +step:18331/57344 train_time:10796695ms step_avg:588.99ms +grad accum step:4583/14336 +step:18332/57344 train_time:10797984ms step_avg:589.02ms +step:18333/57344 train_time:10798001ms step_avg:588.99ms +step:18334/57344 train_time:10798252ms step_avg:588.97ms +step:18335/57344 train_time:10798805ms step_avg:588.97ms +grad accum step:4584/14336 +step:18336/57344 train_time:10800082ms step_avg:589.01ms +step:18337/57344 train_time:10800098ms step_avg:588.98ms +step:18338/57344 train_time:10800349ms step_avg:588.96ms +step:18339/57344 train_time:10800900ms step_avg:588.96ms +grad accum step:4585/14336 +step:18340/57344 train_time:10802214ms step_avg:589.00ms +step:18341/57344 train_time:10802231ms step_avg:588.97ms +step:18342/57344 train_time:10802479ms step_avg:588.95ms +step:18343/57344 train_time:10803026ms step_avg:588.95ms +grad accum step:4586/14336 +step:18344/57344 train_time:10804334ms step_avg:588.98ms +step:18345/57344 train_time:10804351ms step_avg:588.95ms +step:18346/57344 train_time:10804599ms step_avg:588.93ms +step:18347/57344 train_time:10805148ms step_avg:588.93ms +grad accum step:4587/14336 +step:18348/57344 train_time:10806439ms step_avg:588.97ms +step:18349/57344 train_time:10806456ms step_avg:588.94ms +step:18350/57344 train_time:10806710ms step_avg:588.92ms +step:18351/57344 train_time:10807282ms step_avg:588.92ms +grad accum step:4588/14336 +step:18352/57344 train_time:10808682ms step_avg:588.96ms +step:18353/57344 train_time:10808699ms step_avg:588.93ms +step:18354/57344 train_time:10808943ms step_avg:588.91ms +step:18355/57344 train_time:10809491ms step_avg:588.91ms +grad accum step:4589/14336 +step:18356/57344 train_time:10810798ms step_avg:588.95ms +step:18357/57344 train_time:10810815ms step_avg:588.92ms +step:18358/57344 train_time:10811062ms step_avg:588.90ms +step:18359/57344 train_time:10811612ms step_avg:588.90ms +grad accum step:4590/14336 +step:18360/57344 train_time:10812908ms step_avg:588.94ms +step:18361/57344 train_time:10812925ms step_avg:588.91ms +step:18362/57344 train_time:10813173ms step_avg:588.89ms +step:18363/57344 train_time:10813717ms step_avg:588.89ms +grad accum step:4591/14336 +step:18364/57344 train_time:10815027ms step_avg:588.93ms +step:18365/57344 train_time:10815044ms step_avg:588.89ms +step:18366/57344 train_time:10815289ms step_avg:588.88ms +step:18367/57344 train_time:10815827ms step_avg:588.87ms +grad accum step:4592/14336 +step:18368/57344 train_time:10817123ms step_avg:588.91ms +step:18368/57344 val_loss:7.002724 train_time:10817123ms step_avg:588.91ms +step:18369/57344 train_time:10817135ms step_avg:588.88ms +step:18370/57344 train_time:10817359ms step_avg:588.86ms +step:18371/57344 train_time:10817914ms step_avg:588.86ms +grad accum step:4593/14336 +step:18372/57344 train_time:10819225ms step_avg:588.90ms +step:18373/57344 train_time:10819243ms step_avg:588.87ms +step:18374/57344 train_time:10819486ms step_avg:588.85ms +step:18375/57344 train_time:10820021ms step_avg:588.84ms +grad accum step:4594/14336 +step:18376/57344 train_time:10821289ms step_avg:588.88ms +step:18377/57344 train_time:10821306ms step_avg:588.85ms +step:18378/57344 train_time:10821556ms step_avg:588.83ms +step:18379/57344 train_time:10822108ms step_avg:588.83ms +grad accum step:4595/14336 +step:18380/57344 train_time:10823394ms step_avg:588.87ms +step:18381/57344 train_time:10823411ms step_avg:588.84ms +step:18382/57344 train_time:10823663ms step_avg:588.82ms +step:18383/57344 train_time:10824220ms step_avg:588.82ms +grad accum step:4596/14336 +step:18384/57344 train_time:10825547ms step_avg:588.86ms +step:18385/57344 train_time:10825564ms step_avg:588.83ms +step:18386/57344 train_time:10825811ms step_avg:588.81ms +step:18387/57344 train_time:10826351ms step_avg:588.80ms +grad accum step:4597/14336 +step:18388/57344 train_time:10827659ms step_avg:588.84ms +step:18389/57344 train_time:10827676ms step_avg:588.81ms +step:18390/57344 train_time:10827921ms step_avg:588.79ms +step:18391/57344 train_time:10828460ms step_avg:588.79ms +grad accum step:4598/14336 +step:18392/57344 train_time:10829751ms step_avg:588.83ms +step:18393/57344 train_time:10829769ms step_avg:588.80ms +step:18394/57344 train_time:10830013ms step_avg:588.78ms +step:18395/57344 train_time:10830557ms step_avg:588.78ms +grad accum step:4599/14336 +step:18396/57344 train_time:10831842ms step_avg:588.82ms +step:18397/57344 train_time:10831859ms step_avg:588.78ms +step:18398/57344 train_time:10832104ms step_avg:588.77ms +step:18399/57344 train_time:10832650ms step_avg:588.76ms +grad accum step:4600/14336 +step:18400/57344 train_time:10833928ms step_avg:588.80ms +step:18401/57344 train_time:10833945ms step_avg:588.77ms +step:18402/57344 train_time:10834191ms step_avg:588.75ms +step:18403/57344 train_time:10834738ms step_avg:588.75ms +grad accum step:4601/14336 +step:18404/57344 train_time:10836036ms step_avg:588.79ms +step:18405/57344 train_time:10836052ms step_avg:588.76ms +step:18406/57344 train_time:10836302ms step_avg:588.74ms +step:18407/57344 train_time:10836847ms step_avg:588.74ms +grad accum step:4602/14336 +step:18408/57344 train_time:10845424ms step_avg:589.17ms +step:18409/57344 train_time:10845436ms step_avg:589.14ms +step:18410/57344 train_time:10845715ms step_avg:589.12ms +step:18411/57344 train_time:10846248ms step_avg:589.12ms +grad accum step:4603/14336 +step:18412/57344 train_time:10847553ms step_avg:589.16ms +step:18413/57344 train_time:10847569ms step_avg:589.13ms +step:18414/57344 train_time:10847827ms step_avg:589.11ms +step:18415/57344 train_time:10848399ms step_avg:589.11ms +grad accum step:4604/14336 +step:18416/57344 train_time:10849698ms step_avg:589.15ms +step:18417/57344 train_time:10849716ms step_avg:589.11ms +step:18418/57344 train_time:10849959ms step_avg:589.10ms +step:18419/57344 train_time:10850492ms step_avg:589.09ms +grad accum step:4605/14336 +step:18420/57344 train_time:10851776ms step_avg:589.13ms +step:18421/57344 train_time:10851793ms step_avg:589.10ms +step:18422/57344 train_time:10852045ms step_avg:589.08ms +step:18423/57344 train_time:10852600ms step_avg:589.08ms +grad accum step:4606/14336 +step:18424/57344 train_time:10853906ms step_avg:589.12ms +step:18425/57344 train_time:10853923ms step_avg:589.09ms +step:18426/57344 train_time:10854174ms step_avg:589.07ms +step:18427/57344 train_time:10854728ms step_avg:589.07ms +grad accum step:4607/14336 +step:18428/57344 train_time:10856017ms step_avg:589.10ms +step:18429/57344 train_time:10856034ms step_avg:589.07ms +step:18430/57344 train_time:10856283ms step_avg:589.05ms +step:18431/57344 train_time:10856828ms step_avg:589.05ms +grad accum step:4608/14336 +step:18432/57344 train_time:10858125ms step_avg:589.09ms +step:18432/57344 val_loss:7.004488 train_time:10858125ms step_avg:589.09ms +step:18433/57344 train_time:10858137ms step_avg:589.06ms +step:18434/57344 train_time:10862780ms step_avg:589.28ms +step:18435/57344 train_time:10863110ms step_avg:589.27ms +grad accum step:4609/14336 +step:18436/57344 train_time:10864387ms step_avg:589.30ms +step:18437/57344 train_time:10864403ms step_avg:589.27ms +step:18438/57344 train_time:10864650ms step_avg:589.25ms +step:18439/57344 train_time:10865200ms step_avg:589.25ms +grad accum step:4610/14336 +step:18440/57344 train_time:10866469ms step_avg:589.29ms +step:18441/57344 train_time:10866486ms step_avg:589.26ms +step:18442/57344 train_time:10866727ms step_avg:589.24ms +step:18443/57344 train_time:10867265ms step_avg:589.24ms +grad accum step:4611/14336 +step:18444/57344 train_time:10868539ms step_avg:589.27ms +step:18445/57344 train_time:10868556ms step_avg:589.24ms +step:18446/57344 train_time:10868803ms step_avg:589.22ms +step:18447/57344 train_time:10869346ms step_avg:589.22ms +grad accum step:4612/14336 +step:18448/57344 train_time:10870612ms step_avg:589.26ms +step:18449/57344 train_time:10870629ms step_avg:589.23ms +step:18450/57344 train_time:10870873ms step_avg:589.21ms +step:18451/57344 train_time:10871419ms step_avg:589.20ms +grad accum step:4613/14336 +step:18452/57344 train_time:10872717ms step_avg:589.24ms +step:18453/57344 train_time:10872734ms step_avg:589.21ms +step:18454/57344 train_time:10872979ms step_avg:589.19ms +step:18455/57344 train_time:10873509ms step_avg:589.19ms +grad accum step:4614/14336 +step:18456/57344 train_time:10874786ms step_avg:589.23ms +step:18457/57344 train_time:10874803ms step_avg:589.20ms +step:18458/57344 train_time:10875045ms step_avg:589.18ms +step:18459/57344 train_time:10875580ms step_avg:589.17ms +grad accum step:4615/14336 +step:18460/57344 train_time:10876869ms step_avg:589.21ms +step:18461/57344 train_time:10876886ms step_avg:589.18ms +step:18462/57344 train_time:10877132ms step_avg:589.16ms +step:18463/57344 train_time:10877676ms step_avg:589.16ms +grad accum step:4616/14336 +step:18464/57344 train_time:10878954ms step_avg:589.20ms +step:18465/57344 train_time:10878971ms step_avg:589.17ms +step:18466/57344 train_time:10879217ms step_avg:589.15ms +step:18467/57344 train_time:10879759ms step_avg:589.15ms +grad accum step:4617/14336 +step:18468/57344 train_time:10881043ms step_avg:589.18ms +step:18469/57344 train_time:10881060ms step_avg:589.15ms +step:18470/57344 train_time:10881306ms step_avg:589.13ms +step:18471/57344 train_time:10881844ms step_avg:589.13ms +grad accum step:4618/14336 +step:18472/57344 train_time:10883696ms step_avg:589.20ms +step:18473/57344 train_time:10883709ms step_avg:589.17ms +step:18474/57344 train_time:10883923ms step_avg:589.15ms +step:18475/57344 train_time:10884455ms step_avg:589.15ms +grad accum step:4619/14336 +step:18476/57344 train_time:10885731ms step_avg:589.18ms +step:18477/57344 train_time:10885748ms step_avg:589.15ms +step:18478/57344 train_time:10885997ms step_avg:589.13ms +step:18479/57344 train_time:10886548ms step_avg:589.13ms +grad accum step:4620/14336 +step:18480/57344 train_time:10887839ms step_avg:589.17ms +step:18481/57344 train_time:10887856ms step_avg:589.14ms +step:18482/57344 train_time:10888098ms step_avg:589.12ms +step:18483/57344 train_time:10888641ms step_avg:589.12ms +grad accum step:4621/14336 +step:18484/57344 train_time:10889916ms step_avg:589.15ms +step:18485/57344 train_time:10889933ms step_avg:589.12ms +step:18486/57344 train_time:10890178ms step_avg:589.10ms +step:18487/57344 train_time:10890718ms step_avg:589.10ms +grad accum step:4622/14336 +step:18488/57344 train_time:10891983ms step_avg:589.14ms +step:18489/57344 train_time:10892000ms step_avg:589.11ms +step:18490/57344 train_time:10892249ms step_avg:589.09ms +step:18491/57344 train_time:10892804ms step_avg:589.09ms +grad accum step:4623/14336 +step:18492/57344 train_time:10894090ms step_avg:589.12ms +step:18493/57344 train_time:10894106ms step_avg:589.09ms +step:18494/57344 train_time:10894352ms step_avg:589.07ms +step:18495/57344 train_time:10894895ms step_avg:589.07ms +grad accum step:4624/14336 +step:18496/57344 train_time:10896190ms step_avg:589.11ms +step:18496/57344 val_loss:7.008335 train_time:10896191ms step_avg:589.11ms +step:18497/57344 train_time:10896203ms step_avg:589.08ms +step:18498/57344 train_time:10896429ms step_avg:589.06ms +step:18499/57344 train_time:10896975ms step_avg:589.06ms +grad accum step:4625/14336 +step:18500/57344 train_time:10898275ms step_avg:589.10ms +step:18501/57344 train_time:10898292ms step_avg:589.07ms +step:18502/57344 train_time:10898536ms step_avg:589.05ms +step:18503/57344 train_time:10899076ms step_avg:589.04ms +grad accum step:4626/14336 +step:18504/57344 train_time:10900360ms step_avg:589.08ms +step:18505/57344 train_time:10900377ms step_avg:589.05ms +step:18506/57344 train_time:10900619ms step_avg:589.03ms +step:18507/57344 train_time:10901158ms step_avg:589.03ms +grad accum step:4627/14336 +step:18508/57344 train_time:10902473ms step_avg:589.07ms +step:18509/57344 train_time:10902490ms step_avg:589.04ms +step:18510/57344 train_time:10902740ms step_avg:589.02ms +step:18511/57344 train_time:10903281ms step_avg:589.02ms +grad accum step:4628/14336 +step:18512/57344 train_time:10904555ms step_avg:589.05ms +step:18513/57344 train_time:10904572ms step_avg:589.02ms +step:18514/57344 train_time:10904816ms step_avg:589.00ms +step:18515/57344 train_time:10905360ms step_avg:589.00ms +grad accum step:4629/14336 +step:18516/57344 train_time:10906662ms step_avg:589.04ms +step:18517/57344 train_time:10906679ms step_avg:589.01ms +step:18518/57344 train_time:10906922ms step_avg:588.99ms +step:18519/57344 train_time:10907466ms step_avg:588.99ms +grad accum step:4630/14336 +step:18520/57344 train_time:10908747ms step_avg:589.03ms +step:18521/57344 train_time:10908764ms step_avg:588.99ms +step:18522/57344 train_time:10909007ms step_avg:588.98ms +step:18523/57344 train_time:10909550ms step_avg:588.97ms +grad accum step:4631/14336 +step:18524/57344 train_time:10910868ms step_avg:589.01ms +step:18525/57344 train_time:10910885ms step_avg:588.98ms +step:18526/57344 train_time:10911134ms step_avg:588.96ms +step:18527/57344 train_time:10911678ms step_avg:588.96ms +grad accum step:4632/14336 +step:18528/57344 train_time:10912953ms step_avg:589.00ms +step:18529/57344 train_time:10912970ms step_avg:588.97ms +step:18530/57344 train_time:10913213ms step_avg:588.95ms +step:18531/57344 train_time:10913759ms step_avg:588.95ms +grad accum step:4633/14336 +step:18532/57344 train_time:10915075ms step_avg:588.99ms +step:18533/57344 train_time:10915092ms step_avg:588.95ms +step:18534/57344 train_time:10915339ms step_avg:588.94ms +step:18535/57344 train_time:10915885ms step_avg:588.93ms +grad accum step:4634/14336 +step:18536/57344 train_time:10917156ms step_avg:588.97ms +step:18537/57344 train_time:10917173ms step_avg:588.94ms +step:18538/57344 train_time:10917425ms step_avg:588.92ms +step:18539/57344 train_time:10917973ms step_avg:588.92ms +grad accum step:4635/14336 +step:18540/57344 train_time:10919260ms step_avg:588.96ms +step:18541/57344 train_time:10919277ms step_avg:588.93ms +step:18542/57344 train_time:10919520ms step_avg:588.91ms +step:18543/57344 train_time:10920063ms step_avg:588.90ms +grad accum step:4636/14336 +step:18544/57344 train_time:10921355ms step_avg:588.94ms +step:18545/57344 train_time:10921372ms step_avg:588.91ms +step:18546/57344 train_time:10921615ms step_avg:588.89ms +step:18547/57344 train_time:10922147ms step_avg:588.89ms +grad accum step:4637/14336 +step:18548/57344 train_time:10923468ms step_avg:588.93ms +step:18549/57344 train_time:10923485ms step_avg:588.90ms +step:18550/57344 train_time:10923736ms step_avg:588.88ms +step:18551/57344 train_time:10924287ms step_avg:588.88ms +grad accum step:4638/14336 +step:18552/57344 train_time:10925568ms step_avg:588.92ms +step:18553/57344 train_time:10925585ms step_avg:588.89ms +step:18554/57344 train_time:10925839ms step_avg:588.87ms +step:18555/57344 train_time:10926395ms step_avg:588.87ms +grad accum step:4639/14336 +step:18556/57344 train_time:10927674ms step_avg:588.90ms +step:18557/57344 train_time:10927690ms step_avg:588.87ms +step:18558/57344 train_time:10927941ms step_avg:588.85ms +step:18559/57344 train_time:10928488ms step_avg:588.85ms +grad accum step:4640/14336 +step:18560/57344 train_time:10929773ms step_avg:588.89ms +step:18560/57344 val_loss:7.001398 train_time:10929774ms step_avg:588.89ms +step:18561/57344 train_time:10929786ms step_avg:588.86ms +step:18562/57344 train_time:10930008ms step_avg:588.84ms +step:18563/57344 train_time:10930551ms step_avg:588.84ms +grad accum step:4641/14336 +step:18564/57344 train_time:10931831ms step_avg:588.87ms +step:18565/57344 train_time:10931848ms step_avg:588.84ms +step:18566/57344 train_time:10932093ms step_avg:588.82ms +step:18567/57344 train_time:10932637ms step_avg:588.82ms +grad accum step:4642/14336 +step:18568/57344 train_time:10933920ms step_avg:588.86ms +step:18569/57344 train_time:10933937ms step_avg:588.83ms +step:18570/57344 train_time:10934181ms step_avg:588.81ms +step:18571/57344 train_time:10934723ms step_avg:588.81ms +grad accum step:4643/14336 +step:18572/57344 train_time:10936042ms step_avg:588.85ms +step:18573/57344 train_time:10936059ms step_avg:588.81ms +step:18574/57344 train_time:10936302ms step_avg:588.80ms +step:18575/57344 train_time:10936844ms step_avg:588.79ms +grad accum step:4644/14336 +step:18576/57344 train_time:10938120ms step_avg:588.83ms +step:18577/57344 train_time:10938137ms step_avg:588.80ms +step:18578/57344 train_time:10938381ms step_avg:588.78ms +step:18579/57344 train_time:10938936ms step_avg:588.78ms +grad accum step:4645/14336 +step:18580/57344 train_time:10940266ms step_avg:588.82ms +step:18581/57344 train_time:10940283ms step_avg:588.79ms +step:18582/57344 train_time:10940528ms step_avg:588.77ms +step:18583/57344 train_time:10941075ms step_avg:588.77ms +grad accum step:4646/14336 +step:18584/57344 train_time:10942347ms step_avg:588.80ms +step:18585/57344 train_time:10942364ms step_avg:588.77ms +step:18586/57344 train_time:10942612ms step_avg:588.76ms +step:18587/57344 train_time:10943161ms step_avg:588.75ms +grad accum step:4647/14336 +step:18588/57344 train_time:10944452ms step_avg:588.79ms +step:18589/57344 train_time:10944468ms step_avg:588.76ms +step:18590/57344 train_time:10944720ms step_avg:588.74ms +step:18591/57344 train_time:10945270ms step_avg:588.74ms +grad accum step:4648/14336 +step:18592/57344 train_time:10946579ms step_avg:588.78ms +step:18593/57344 train_time:10946596ms step_avg:588.75ms +step:18594/57344 train_time:10946842ms step_avg:588.73ms +step:18595/57344 train_time:10947396ms step_avg:588.73ms +grad accum step:4649/14336 +step:18596/57344 train_time:10948718ms step_avg:588.77ms +step:18597/57344 train_time:10948735ms step_avg:588.74ms +step:18598/57344 train_time:10948981ms step_avg:588.72ms +step:18599/57344 train_time:10949527ms step_avg:588.72ms +grad accum step:4650/14336 +step:18600/57344 train_time:10950798ms step_avg:588.75ms +step:18601/57344 train_time:10951091ms step_avg:588.74ms +step:18602/57344 train_time:10951305ms step_avg:588.72ms +step:18603/57344 train_time:10951847ms step_avg:588.71ms +grad accum step:4651/14336 +step:18604/57344 train_time:10953114ms step_avg:588.75ms +step:18605/57344 train_time:10953131ms step_avg:588.72ms +step:18606/57344 train_time:10953375ms step_avg:588.70ms +step:18607/57344 train_time:10953917ms step_avg:588.70ms +grad accum step:4652/14336 +step:18608/57344 train_time:10955193ms step_avg:588.74ms +step:18609/57344 train_time:10955210ms step_avg:588.70ms +step:18610/57344 train_time:10955456ms step_avg:588.69ms +step:18611/57344 train_time:10956000ms step_avg:588.68ms +grad accum step:4653/14336 +step:18612/57344 train_time:10957276ms step_avg:588.72ms +step:18613/57344 train_time:10957294ms step_avg:588.69ms +step:18614/57344 train_time:10957542ms step_avg:588.67ms +step:18615/57344 train_time:10958093ms step_avg:588.67ms +grad accum step:4654/14336 +step:18616/57344 train_time:10959405ms step_avg:588.71ms +step:18617/57344 train_time:10959421ms step_avg:588.68ms +step:18618/57344 train_time:10959668ms step_avg:588.66ms +step:18619/57344 train_time:10960210ms step_avg:588.66ms +grad accum step:4655/14336 +step:18620/57344 train_time:10963250ms step_avg:588.79ms +step:18621/57344 train_time:10963262ms step_avg:588.76ms +step:18622/57344 train_time:10963534ms step_avg:588.74ms +step:18623/57344 train_time:10964105ms step_avg:588.74ms +grad accum step:4656/14336 +step:18624/57344 train_time:10965386ms step_avg:588.78ms +step:18624/57344 val_loss:6.994998 train_time:10965386ms step_avg:588.78ms +step:18625/57344 train_time:10965398ms step_avg:588.75ms +step:18626/57344 train_time:10965620ms step_avg:588.73ms +step:18627/57344 train_time:10966161ms step_avg:588.72ms +grad accum step:4657/14336 +step:18628/57344 train_time:10967464ms step_avg:588.76ms +step:18629/57344 train_time:10967481ms step_avg:588.73ms +step:18630/57344 train_time:10967724ms step_avg:588.71ms +step:18631/57344 train_time:10968268ms step_avg:588.71ms +grad accum step:4658/14336 +step:18632/57344 train_time:10969545ms step_avg:588.75ms +step:18633/57344 train_time:10969562ms step_avg:588.72ms +step:18634/57344 train_time:10969807ms step_avg:588.70ms +step:18635/57344 train_time:10970352ms step_avg:588.70ms +grad accum step:4659/14336 +step:18636/57344 train_time:10971631ms step_avg:588.73ms +step:18637/57344 train_time:10971648ms step_avg:588.70ms +step:18638/57344 train_time:10971893ms step_avg:588.68ms +step:18639/57344 train_time:10972431ms step_avg:588.68ms +grad accum step:4660/14336 +step:18640/57344 train_time:10973707ms step_avg:588.72ms +step:18641/57344 train_time:10973724ms step_avg:588.69ms +step:18642/57344 train_time:10973970ms step_avg:588.67ms +step:18643/57344 train_time:10974524ms step_avg:588.67ms +grad accum step:4661/14336 +step:18644/57344 train_time:10975831ms step_avg:588.71ms +step:18645/57344 train_time:10975848ms step_avg:588.68ms +step:18646/57344 train_time:10976097ms step_avg:588.66ms +step:18647/57344 train_time:10976645ms step_avg:588.65ms +grad accum step:4662/14336 +step:18648/57344 train_time:10977943ms step_avg:588.69ms +step:18649/57344 train_time:10977960ms step_avg:588.66ms +step:18650/57344 train_time:10978207ms step_avg:588.64ms +step:18651/57344 train_time:10978744ms step_avg:588.64ms +grad accum step:4663/14336 +step:18652/57344 train_time:10980023ms step_avg:588.68ms +step:18653/57344 train_time:10980040ms step_avg:588.65ms +step:18654/57344 train_time:10980286ms step_avg:588.63ms +step:18655/57344 train_time:10980828ms step_avg:588.63ms +grad accum step:4664/14336 +step:18656/57344 train_time:10982163ms step_avg:588.67ms +step:18657/57344 train_time:10982180ms step_avg:588.64ms +step:18658/57344 train_time:10982421ms step_avg:588.62ms +step:18659/57344 train_time:10982956ms step_avg:588.61ms +grad accum step:4665/14336 +step:18660/57344 train_time:10984248ms step_avg:588.65ms +step:18661/57344 train_time:10984265ms step_avg:588.62ms +step:18662/57344 train_time:10984510ms step_avg:588.60ms +step:18663/57344 train_time:10985056ms step_avg:588.60ms +grad accum step:4666/14336 +step:18664/57344 train_time:10986330ms step_avg:588.64ms +step:18665/57344 train_time:10986347ms step_avg:588.61ms +step:18666/57344 train_time:10986595ms step_avg:588.59ms +step:18667/57344 train_time:10987137ms step_avg:588.59ms +grad accum step:4667/14336 +step:18668/57344 train_time:10988413ms step_avg:588.62ms +step:18669/57344 train_time:10988430ms step_avg:588.59ms +step:18670/57344 train_time:10988675ms step_avg:588.57ms +step:18671/57344 train_time:10989224ms step_avg:588.57ms +grad accum step:4668/14336 +step:18672/57344 train_time:10990518ms step_avg:588.61ms +step:18673/57344 train_time:10990535ms step_avg:588.58ms +step:18674/57344 train_time:10990781ms step_avg:588.56ms +step:18675/57344 train_time:10991327ms step_avg:588.56ms +grad accum step:4669/14336 +step:18676/57344 train_time:10993090ms step_avg:588.62ms +step:18677/57344 train_time:10993107ms step_avg:588.59ms +step:18678/57344 train_time:10993353ms step_avg:588.57ms +step:18679/57344 train_time:10993893ms step_avg:588.57ms +grad accum step:4670/14336 +step:18680/57344 train_time:10995194ms step_avg:588.61ms +step:18681/57344 train_time:10995211ms step_avg:588.58ms +step:18682/57344 train_time:10995455ms step_avg:588.56ms +step:18683/57344 train_time:10996004ms step_avg:588.56ms +grad accum step:4671/14336 +step:18684/57344 train_time:10997308ms step_avg:588.59ms +step:18685/57344 train_time:10997325ms step_avg:588.56ms +step:18686/57344 train_time:10997576ms step_avg:588.55ms +step:18687/57344 train_time:10998132ms step_avg:588.54ms +grad accum step:4672/14336 +step:18688/57344 train_time:10999403ms step_avg:588.58ms +step:18688/57344 val_loss:6.993669 train_time:10999404ms step_avg:588.58ms +step:18689/57344 train_time:10999416ms step_avg:588.55ms +step:18690/57344 train_time:10999642ms step_avg:588.53ms +step:18691/57344 train_time:11000185ms step_avg:588.53ms +grad accum step:4673/14336 +step:18692/57344 train_time:11001461ms step_avg:588.57ms +step:18693/57344 train_time:11001478ms step_avg:588.53ms +step:18694/57344 train_time:11001724ms step_avg:588.52ms +step:18695/57344 train_time:11002268ms step_avg:588.51ms +grad accum step:4674/14336 +step:18696/57344 train_time:11003539ms step_avg:588.55ms +step:18697/57344 train_time:11003556ms step_avg:588.52ms +step:18698/57344 train_time:11003803ms step_avg:588.50ms +step:18699/57344 train_time:11004354ms step_avg:588.50ms +grad accum step:4675/14336 +step:18700/57344 train_time:11005652ms step_avg:588.54ms +step:18701/57344 train_time:11005669ms step_avg:588.51ms +step:18702/57344 train_time:11005910ms step_avg:588.49ms +step:18703/57344 train_time:11006441ms step_avg:588.49ms +grad accum step:4676/14336 +step:18704/57344 train_time:11007708ms step_avg:588.52ms +step:18705/57344 train_time:11007725ms step_avg:588.49ms +step:18706/57344 train_time:11007967ms step_avg:588.47ms +step:18707/57344 train_time:11008509ms step_avg:588.47ms +grad accum step:4677/14336 +step:18708/57344 train_time:11009788ms step_avg:588.51ms +step:18709/57344 train_time:11009805ms step_avg:588.48ms +step:18710/57344 train_time:11010053ms step_avg:588.46ms +step:18711/57344 train_time:11010601ms step_avg:588.46ms +grad accum step:4678/14336 +step:18712/57344 train_time:11011879ms step_avg:588.49ms +step:18713/57344 train_time:11011896ms step_avg:588.46ms +step:18714/57344 train_time:11012141ms step_avg:588.44ms +step:18715/57344 train_time:11012682ms step_avg:588.44ms +grad accum step:4679/14336 +step:18716/57344 train_time:11013978ms step_avg:588.48ms +step:18717/57344 train_time:11013995ms step_avg:588.45ms +step:18718/57344 train_time:11014247ms step_avg:588.43ms +step:18719/57344 train_time:11014807ms step_avg:588.43ms +grad accum step:4680/14336 +step:18720/57344 train_time:11016081ms step_avg:588.47ms +step:18721/57344 train_time:11016098ms step_avg:588.44ms +step:18722/57344 train_time:11016341ms step_avg:588.42ms +step:18723/57344 train_time:11016880ms step_avg:588.41ms +grad accum step:4681/14336 +step:18724/57344 train_time:11018178ms step_avg:588.45ms +step:18725/57344 train_time:11018195ms step_avg:588.42ms +step:18726/57344 train_time:11018444ms step_avg:588.40ms +step:18727/57344 train_time:11018998ms step_avg:588.40ms +grad accum step:4682/14336 +step:18728/57344 train_time:11033194ms step_avg:589.13ms +step:18729/57344 train_time:11033206ms step_avg:589.10ms +step:18730/57344 train_time:11033461ms step_avg:589.08ms +step:18731/57344 train_time:11034009ms step_avg:589.08ms +grad accum step:4683/14336 +step:18732/57344 train_time:11035292ms step_avg:589.11ms +step:18733/57344 train_time:11035309ms step_avg:589.08ms +step:18734/57344 train_time:11035555ms step_avg:589.07ms +step:18735/57344 train_time:11036105ms step_avg:589.06ms +grad accum step:4684/14336 +step:18736/57344 train_time:11037381ms step_avg:589.10ms +step:18737/57344 train_time:11037398ms step_avg:589.07ms +step:18738/57344 train_time:11037643ms step_avg:589.05ms +step:18739/57344 train_time:11038188ms step_avg:589.05ms +grad accum step:4685/14336 +step:18740/57344 train_time:11039474ms step_avg:589.09ms +step:18741/57344 train_time:11039490ms step_avg:589.06ms +step:18742/57344 train_time:11039733ms step_avg:589.04ms +step:18743/57344 train_time:11040270ms step_avg:589.03ms +grad accum step:4686/14336 +step:18744/57344 train_time:11041546ms step_avg:589.07ms +step:18745/57344 train_time:11041563ms step_avg:589.04ms +step:18746/57344 train_time:11041806ms step_avg:589.02ms +step:18747/57344 train_time:11042343ms step_avg:589.02ms +grad accum step:4687/14336 +step:18748/57344 train_time:11043625ms step_avg:589.06ms +step:18749/57344 train_time:11043642ms step_avg:589.03ms +step:18750/57344 train_time:11043890ms step_avg:589.01ms +step:18751/57344 train_time:11044439ms step_avg:589.01ms +grad accum step:4688/14336 +step:18752/57344 train_time:11045758ms step_avg:589.04ms +step:18752/57344 val_loss:6.993299 train_time:11045759ms step_avg:589.04ms +step:18753/57344 train_time:11045771ms step_avg:589.01ms +step:18754/57344 train_time:11046009ms step_avg:588.99ms +step:18755/57344 train_time:11046581ms step_avg:588.99ms +grad accum step:4689/14336 +step:18756/57344 train_time:11047838ms step_avg:589.03ms +step:18757/57344 train_time:11047855ms step_avg:589.00ms +step:18758/57344 train_time:11048111ms step_avg:588.98ms +step:18759/57344 train_time:11048692ms step_avg:588.98ms +grad accum step:4690/14336 +step:18760/57344 train_time:11050028ms step_avg:589.02ms +step:18761/57344 train_time:11050046ms step_avg:588.99ms +step:18762/57344 train_time:11050287ms step_avg:588.97ms +step:18763/57344 train_time:11050820ms step_avg:588.97ms +grad accum step:4691/14336 +step:18764/57344 train_time:11052092ms step_avg:589.01ms +step:18765/57344 train_time:11052109ms step_avg:588.97ms +step:18766/57344 train_time:11052355ms step_avg:588.96ms +step:18767/57344 train_time:11052901ms step_avg:588.95ms +grad accum step:4692/14336 +step:18768/57344 train_time:11054198ms step_avg:588.99ms +step:18769/57344 train_time:11054215ms step_avg:588.96ms +step:18770/57344 train_time:11054462ms step_avg:588.94ms +step:18771/57344 train_time:11055009ms step_avg:588.94ms +grad accum step:4693/14336 +step:18772/57344 train_time:11056316ms step_avg:588.98ms +step:18773/57344 train_time:11056333ms step_avg:588.95ms +step:18774/57344 train_time:11056576ms step_avg:588.93ms +step:18775/57344 train_time:11057112ms step_avg:588.93ms +grad accum step:4694/14336 +step:18776/57344 train_time:11058385ms step_avg:588.96ms +step:18777/57344 train_time:11058402ms step_avg:588.93ms +step:18778/57344 train_time:11058653ms step_avg:588.92ms +step:18779/57344 train_time:11059212ms step_avg:588.91ms +grad accum step:4695/14336 +step:18780/57344 train_time:11060509ms step_avg:588.95ms +step:18781/57344 train_time:11060526ms step_avg:588.92ms +step:18782/57344 train_time:11060776ms step_avg:588.90ms +step:18783/57344 train_time:11061328ms step_avg:588.90ms +grad accum step:4696/14336 +step:18784/57344 train_time:11062620ms step_avg:588.94ms +step:18785/57344 train_time:11062637ms step_avg:588.91ms +step:18786/57344 train_time:11062882ms step_avg:588.89ms +step:18787/57344 train_time:11063424ms step_avg:588.89ms +grad accum step:4697/14336 +step:18788/57344 train_time:11064704ms step_avg:588.92ms +step:18789/57344 train_time:11064722ms step_avg:588.89ms +step:18790/57344 train_time:11064969ms step_avg:588.88ms +step:18791/57344 train_time:11065514ms step_avg:588.87ms +grad accum step:4698/14336 +step:18792/57344 train_time:11066794ms step_avg:588.91ms +step:18793/57344 train_time:11066811ms step_avg:588.88ms +step:18794/57344 train_time:11067058ms step_avg:588.86ms +step:18795/57344 train_time:11067601ms step_avg:588.86ms +grad accum step:4699/14336 +step:18796/57344 train_time:11068875ms step_avg:588.90ms +step:18797/57344 train_time:11068892ms step_avg:588.86ms +step:18798/57344 train_time:11069139ms step_avg:588.85ms +step:18799/57344 train_time:11069680ms step_avg:588.84ms +grad accum step:4700/14336 +step:18800/57344 train_time:11070958ms step_avg:588.88ms +step:18801/57344 train_time:11070975ms step_avg:588.85ms +step:18802/57344 train_time:11071222ms step_avg:588.83ms +step:18803/57344 train_time:11071768ms step_avg:588.83ms +grad accum step:4701/14336 +step:18804/57344 train_time:11073088ms step_avg:588.87ms +step:18805/57344 train_time:11073105ms step_avg:588.84ms +step:18806/57344 train_time:11073348ms step_avg:588.82ms +step:18807/57344 train_time:11073894ms step_avg:588.82ms +grad accum step:4702/14336 +step:18808/57344 train_time:11075173ms step_avg:588.85ms +step:18809/57344 train_time:11075190ms step_avg:588.82ms +step:18810/57344 train_time:11075433ms step_avg:588.81ms +step:18811/57344 train_time:11075981ms step_avg:588.80ms +grad accum step:4703/14336 +step:18812/57344 train_time:11077261ms step_avg:588.84ms +step:18813/57344 train_time:11077278ms step_avg:588.81ms +step:18814/57344 train_time:11077532ms step_avg:588.79ms +step:18815/57344 train_time:11078092ms step_avg:588.79ms +grad accum step:4704/14336 +step:18816/57344 train_time:11079409ms step_avg:588.83ms +step:18816/57344 val_loss:6.991408 train_time:11079410ms step_avg:588.83ms +step:18817/57344 train_time:11079421ms step_avg:588.80ms +step:18818/57344 train_time:11079645ms step_avg:588.78ms +step:18819/57344 train_time:11080187ms step_avg:588.78ms +grad accum step:4705/14336 +step:18820/57344 train_time:11081500ms step_avg:588.82ms +step:18821/57344 train_time:11081516ms step_avg:588.78ms +step:18822/57344 train_time:11081764ms step_avg:588.77ms +step:18823/57344 train_time:11082303ms step_avg:588.76ms +grad accum step:4706/14336 +step:18824/57344 train_time:11083619ms step_avg:588.80ms +step:18825/57344 train_time:11083636ms step_avg:588.77ms +step:18826/57344 train_time:11083883ms step_avg:588.75ms +step:18827/57344 train_time:11084426ms step_avg:588.75ms +grad accum step:4707/14336 +step:18828/57344 train_time:11085700ms step_avg:588.79ms +step:18829/57344 train_time:11085718ms step_avg:588.76ms +step:18830/57344 train_time:11085968ms step_avg:588.74ms +step:18831/57344 train_time:11086521ms step_avg:588.74ms +grad accum step:4708/14336 +step:18832/57344 train_time:11087806ms step_avg:588.77ms +step:18833/57344 train_time:11087823ms step_avg:588.74ms +step:18834/57344 train_time:11088073ms step_avg:588.73ms +step:18835/57344 train_time:11088618ms step_avg:588.72ms +grad accum step:4709/14336 +step:18836/57344 train_time:11089913ms step_avg:588.76ms +step:18837/57344 train_time:11089930ms step_avg:588.73ms +step:18838/57344 train_time:11090175ms step_avg:588.71ms +step:18839/57344 train_time:11090720ms step_avg:588.71ms +grad accum step:4710/14336 +step:18840/57344 train_time:11092006ms step_avg:588.75ms +step:18841/57344 train_time:11092023ms step_avg:588.72ms +step:18842/57344 train_time:11092270ms step_avg:588.70ms +step:18843/57344 train_time:11092809ms step_avg:588.70ms +grad accum step:4711/14336 +step:18844/57344 train_time:11094070ms step_avg:588.73ms +step:18845/57344 train_time:11094086ms step_avg:588.70ms +step:18846/57344 train_time:11094333ms step_avg:588.68ms +step:18847/57344 train_time:11094876ms step_avg:588.68ms +grad accum step:4712/14336 +step:18848/57344 train_time:11096175ms step_avg:588.72ms +step:18849/57344 train_time:11096191ms step_avg:588.69ms +step:18850/57344 train_time:11096432ms step_avg:588.67ms +step:18851/57344 train_time:11096972ms step_avg:588.67ms +grad accum step:4713/14336 +step:18852/57344 train_time:11098303ms step_avg:588.71ms +step:18853/57344 train_time:11098320ms step_avg:588.68ms +step:18854/57344 train_time:11098573ms step_avg:588.66ms +step:18855/57344 train_time:11099142ms step_avg:588.66ms +grad accum step:4714/14336 +step:18856/57344 train_time:11100452ms step_avg:588.70ms +step:18857/57344 train_time:11100469ms step_avg:588.67ms +step:18858/57344 train_time:11100717ms step_avg:588.65ms +step:18859/57344 train_time:11101255ms step_avg:588.64ms +grad accum step:4715/14336 +step:18860/57344 train_time:11102536ms step_avg:588.68ms +step:18861/57344 train_time:11102553ms step_avg:588.65ms +step:18862/57344 train_time:11102800ms step_avg:588.63ms +step:18863/57344 train_time:11103341ms step_avg:588.63ms +grad accum step:4716/14336 +step:18864/57344 train_time:11104618ms step_avg:588.67ms +step:18865/57344 train_time:11104635ms step_avg:588.64ms +step:18866/57344 train_time:11104891ms step_avg:588.62ms +step:18867/57344 train_time:11105457ms step_avg:588.62ms +grad accum step:4717/14336 +step:18868/57344 train_time:11106764ms step_avg:588.66ms +step:18869/57344 train_time:11106782ms step_avg:588.63ms +step:18870/57344 train_time:11107029ms step_avg:588.61ms +step:18871/57344 train_time:11107577ms step_avg:588.61ms +grad accum step:4718/14336 +step:18872/57344 train_time:11108853ms step_avg:588.64ms +step:18873/57344 train_time:11108870ms step_avg:588.61ms +step:18874/57344 train_time:11109112ms step_avg:588.59ms +step:18875/57344 train_time:11109648ms step_avg:588.59ms +grad accum step:4719/14336 +step:18876/57344 train_time:11110924ms step_avg:588.63ms +step:18877/57344 train_time:11110941ms step_avg:588.60ms +step:18878/57344 train_time:11111185ms step_avg:588.58ms +step:18879/57344 train_time:11111731ms step_avg:588.58ms +grad accum step:4720/14336 +step:18880/57344 train_time:11113009ms step_avg:588.61ms +step:18880/57344 val_loss:6.994536 train_time:11113010ms step_avg:588.61ms +step:18881/57344 train_time:11113022ms step_avg:588.58ms +step:18882/57344 train_time:11113245ms step_avg:588.56ms +step:18883/57344 train_time:11113786ms step_avg:588.56ms +grad accum step:4721/14336 +step:18884/57344 train_time:11115057ms step_avg:588.60ms +step:18885/57344 train_time:11115074ms step_avg:588.57ms +step:18886/57344 train_time:11115320ms step_avg:588.55ms +step:18887/57344 train_time:11115869ms step_avg:588.55ms +grad accum step:4722/14336 +step:18888/57344 train_time:11117148ms step_avg:588.58ms +step:18889/57344 train_time:11117165ms step_avg:588.55ms +step:18890/57344 train_time:11117412ms step_avg:588.53ms +step:18891/57344 train_time:11117956ms step_avg:588.53ms +grad accum step:4723/14336 +step:18892/57344 train_time:11119234ms step_avg:588.57ms +step:18893/57344 train_time:11119251ms step_avg:588.54ms +step:18894/57344 train_time:11119494ms step_avg:588.52ms +step:18895/57344 train_time:11120035ms step_avg:588.52ms +grad accum step:4724/14336 +step:18896/57344 train_time:11121332ms step_avg:588.55ms +step:18897/57344 train_time:11121348ms step_avg:588.52ms +step:18898/57344 train_time:11121599ms step_avg:588.51ms +step:18899/57344 train_time:11122160ms step_avg:588.51ms +grad accum step:4725/14336 +step:18900/57344 train_time:11123510ms step_avg:588.55ms +step:18901/57344 train_time:11123527ms step_avg:588.52ms +step:18902/57344 train_time:11123772ms step_avg:588.50ms +step:18903/57344 train_time:11124318ms step_avg:588.49ms +grad accum step:4726/14336 +step:18904/57344 train_time:11125634ms step_avg:588.53ms +step:18905/57344 train_time:11125652ms step_avg:588.50ms +step:18906/57344 train_time:11125896ms step_avg:588.48ms +step:18907/57344 train_time:11126439ms step_avg:588.48ms +grad accum step:4727/14336 +step:18908/57344 train_time:11127715ms step_avg:588.52ms +step:18909/57344 train_time:11127732ms step_avg:588.49ms +step:18910/57344 train_time:11127980ms step_avg:588.47ms +step:18911/57344 train_time:11128530ms step_avg:588.47ms +grad accum step:4728/14336 +step:18912/57344 train_time:11129802ms step_avg:588.50ms +step:18913/57344 train_time:11129820ms step_avg:588.47ms +step:18914/57344 train_time:11130069ms step_avg:588.46ms +step:18915/57344 train_time:11130626ms step_avg:588.45ms +grad accum step:4729/14336 +step:18916/57344 train_time:11131928ms step_avg:588.49ms +step:18917/57344 train_time:11131945ms step_avg:588.46ms +step:18918/57344 train_time:11132193ms step_avg:588.44ms +step:18919/57344 train_time:11132737ms step_avg:588.44ms +grad accum step:4730/14336 +step:18920/57344 train_time:11134021ms step_avg:588.48ms +step:18921/57344 train_time:11134038ms step_avg:588.45ms +step:18922/57344 train_time:11134287ms step_avg:588.43ms +step:18923/57344 train_time:11134835ms step_avg:588.43ms +grad accum step:4731/14336 +step:18924/57344 train_time:11136155ms step_avg:588.47ms +step:18925/57344 train_time:11136172ms step_avg:588.44ms +step:18926/57344 train_time:11136421ms step_avg:588.42ms +step:18927/57344 train_time:11136970ms step_avg:588.42ms +grad accum step:4732/14336 +step:18928/57344 train_time:11138278ms step_avg:588.46ms +step:18929/57344 train_time:11138295ms step_avg:588.42ms +step:18930/57344 train_time:11138544ms step_avg:588.41ms +step:18931/57344 train_time:11139090ms step_avg:588.40ms +grad accum step:4733/14336 +step:18932/57344 train_time:11140371ms step_avg:588.44ms +step:18933/57344 train_time:11140388ms step_avg:588.41ms +step:18934/57344 train_time:11140634ms step_avg:588.39ms +step:18935/57344 train_time:11141173ms step_avg:588.39ms +grad accum step:4734/14336 +step:18936/57344 train_time:11142455ms step_avg:588.43ms +step:18937/57344 train_time:11142472ms step_avg:588.40ms +step:18938/57344 train_time:11142719ms step_avg:588.38ms +step:18939/57344 train_time:11143261ms step_avg:588.38ms +grad accum step:4735/14336 +step:18940/57344 train_time:11144558ms step_avg:588.41ms +step:18941/57344 train_time:11144575ms step_avg:588.38ms +step:18942/57344 train_time:11144831ms step_avg:588.37ms +step:18943/57344 train_time:11145392ms step_avg:588.36ms +grad accum step:4736/14336 +step:18944/57344 train_time:11146691ms step_avg:588.40ms +step:18944/57344 val_loss:6.994389 train_time:11146692ms step_avg:588.40ms +step:18945/57344 train_time:11146840ms step_avg:588.38ms +step:18946/57344 train_time:11146959ms step_avg:588.35ms +step:18947/57344 train_time:11147478ms step_avg:588.35ms +grad accum step:4737/14336 +step:18948/57344 train_time:11148780ms step_avg:588.39ms +step:18949/57344 train_time:11148792ms step_avg:588.36ms +step:18950/57344 train_time:11149024ms step_avg:588.34ms +step:18951/57344 train_time:11149566ms step_avg:588.34ms +grad accum step:4738/14336 +step:18952/57344 train_time:11150849ms step_avg:588.37ms +step:18953/57344 train_time:11150866ms step_avg:588.34ms +step:18954/57344 train_time:11151113ms step_avg:588.33ms +step:18955/57344 train_time:11151653ms step_avg:588.32ms +grad accum step:4739/14336 +step:18956/57344 train_time:11152955ms step_avg:588.36ms +step:18957/57344 train_time:11152973ms step_avg:588.33ms +step:18958/57344 train_time:11153224ms step_avg:588.31ms +step:18959/57344 train_time:11153785ms step_avg:588.31ms +grad accum step:4740/14336 +step:18960/57344 train_time:11155075ms step_avg:588.35ms +step:18961/57344 train_time:11155092ms step_avg:588.32ms +step:18962/57344 train_time:11155335ms step_avg:588.30ms +step:18963/57344 train_time:11155874ms step_avg:588.30ms +grad accum step:4741/14336 +step:18964/57344 train_time:11157144ms step_avg:588.33ms +step:18965/57344 train_time:11157161ms step_avg:588.30ms +step:18966/57344 train_time:11157410ms step_avg:588.28ms +step:18967/57344 train_time:11157957ms step_avg:588.28ms +grad accum step:4742/14336 +step:18968/57344 train_time:11159225ms step_avg:588.32ms +step:18969/57344 train_time:11159242ms step_avg:588.29ms +step:18970/57344 train_time:11159485ms step_avg:588.27ms +step:18971/57344 train_time:11160016ms step_avg:588.27ms +grad accum step:4743/14336 +step:18972/57344 train_time:11161348ms step_avg:588.31ms +step:18973/57344 train_time:11161364ms step_avg:588.28ms +step:18974/57344 train_time:11161607ms step_avg:588.26ms +step:18975/57344 train_time:11162143ms step_avg:588.26ms +grad accum step:4744/14336 +step:18976/57344 train_time:11163452ms step_avg:588.29ms +step:18977/57344 train_time:11163468ms step_avg:588.26ms +step:18978/57344 train_time:11163715ms step_avg:588.25ms +step:18979/57344 train_time:11164257ms step_avg:588.24ms +grad accum step:4745/14336 +step:18980/57344 train_time:11165554ms step_avg:588.28ms +step:18981/57344 train_time:11165571ms step_avg:588.25ms +step:18982/57344 train_time:11165818ms step_avg:588.23ms +step:18983/57344 train_time:11166367ms step_avg:588.23ms +grad accum step:4746/14336 +step:18984/57344 train_time:11167664ms step_avg:588.27ms +step:18985/57344 train_time:11167681ms step_avg:588.24ms +step:18986/57344 train_time:11167926ms step_avg:588.22ms +step:18987/57344 train_time:11168472ms step_avg:588.22ms +grad accum step:4747/14336 +step:18988/57344 train_time:11169760ms step_avg:588.25ms +step:18989/57344 train_time:11169777ms step_avg:588.22ms +step:18990/57344 train_time:11170023ms step_avg:588.21ms +step:18991/57344 train_time:11170563ms step_avg:588.20ms +grad accum step:4748/14336 +step:18992/57344 train_time:11171842ms step_avg:588.24ms +step:18993/57344 train_time:11171859ms step_avg:588.21ms +step:18994/57344 train_time:11172104ms step_avg:588.19ms +step:18995/57344 train_time:11172650ms step_avg:588.19ms +grad accum step:4749/14336 +step:18996/57344 train_time:11173941ms step_avg:588.23ms +step:18997/57344 train_time:11173958ms step_avg:588.20ms +step:18998/57344 train_time:11174208ms step_avg:588.18ms +step:18999/57344 train_time:11174758ms step_avg:588.18ms +grad accum step:4750/14336 +step:19000/57344 train_time:11176063ms step_avg:588.21ms +step:19001/57344 train_time:11176080ms step_avg:588.18ms +step:19002/57344 train_time:11176322ms step_avg:588.17ms +step:19003/57344 train_time:11176855ms step_avg:588.16ms +grad accum step:4751/14336 +step:19004/57344 train_time:11178129ms step_avg:588.20ms +step:19005/57344 train_time:11178146ms step_avg:588.17ms +step:19006/57344 train_time:11178397ms step_avg:588.15ms +step:19007/57344 train_time:11178945ms step_avg:588.15ms +grad accum step:4752/14336 +step:19008/57344 train_time:11180215ms step_avg:588.18ms +step:19008/57344 val_loss:6.993626 train_time:11180216ms step_avg:588.18ms +step:19009/57344 train_time:11180228ms step_avg:588.15ms +step:19010/57344 train_time:11180452ms step_avg:588.14ms +step:19011/57344 train_time:11180998ms step_avg:588.13ms +grad accum step:4753/14336 +step:19012/57344 train_time:11182296ms step_avg:588.17ms +step:19013/57344 train_time:11182313ms step_avg:588.14ms +step:19014/57344 train_time:11182567ms step_avg:588.12ms +step:19015/57344 train_time:11183130ms step_avg:588.12ms +grad accum step:4754/14336 +step:19016/57344 train_time:11184423ms step_avg:588.16ms +step:19017/57344 train_time:11184439ms step_avg:588.13ms +step:19018/57344 train_time:11184688ms step_avg:588.11ms +step:19019/57344 train_time:11185231ms step_avg:588.11ms +grad accum step:4755/14336 +step:19020/57344 train_time:11186516ms step_avg:588.14ms +step:19021/57344 train_time:11186533ms step_avg:588.11ms +step:19022/57344 train_time:11186775ms step_avg:588.10ms +step:19023/57344 train_time:11187318ms step_avg:588.09ms +grad accum step:4756/14336 +step:19024/57344 train_time:11188621ms step_avg:588.13ms +step:19025/57344 train_time:11188638ms step_avg:588.10ms +step:19026/57344 train_time:11188887ms step_avg:588.08ms +step:19027/57344 train_time:11189431ms step_avg:588.08ms +grad accum step:4757/14336 +step:19028/57344 train_time:11190719ms step_avg:588.12ms +step:19029/57344 train_time:11190736ms step_avg:588.09ms +step:19030/57344 train_time:11190985ms step_avg:588.07ms +step:19031/57344 train_time:11191529ms step_avg:588.07ms +grad accum step:4758/14336 +step:19032/57344 train_time:11192806ms step_avg:588.10ms +step:19033/57344 train_time:11192823ms step_avg:588.07ms +step:19034/57344 train_time:11193071ms step_avg:588.06ms +step:19035/57344 train_time:11193615ms step_avg:588.05ms +grad accum step:4759/14336 +step:19036/57344 train_time:11194916ms step_avg:588.09ms +step:19037/57344 train_time:11194933ms step_avg:588.06ms +step:19038/57344 train_time:11195183ms step_avg:588.04ms +step:19039/57344 train_time:11195724ms step_avg:588.04ms +grad accum step:4760/14336 +step:19040/57344 train_time:11197004ms step_avg:588.08ms +step:19041/57344 train_time:11197021ms step_avg:588.05ms +step:19042/57344 train_time:11197265ms step_avg:588.03ms +step:19043/57344 train_time:11197803ms step_avg:588.03ms +grad accum step:4761/14336 +step:19044/57344 train_time:11199075ms step_avg:588.06ms +step:19045/57344 train_time:11199092ms step_avg:588.03ms +step:19046/57344 train_time:11199337ms step_avg:588.02ms +step:19047/57344 train_time:11199883ms step_avg:588.01ms +grad accum step:4762/14336 +step:19048/57344 train_time:11201164ms step_avg:588.05ms +step:19049/57344 train_time:11201181ms step_avg:588.02ms +step:19050/57344 train_time:11201424ms step_avg:588.00ms +step:19051/57344 train_time:11201968ms step_avg:588.00ms +grad accum step:4763/14336 +step:19052/57344 train_time:11203254ms step_avg:588.04ms +step:19053/57344 train_time:11203271ms step_avg:588.01ms +step:19054/57344 train_time:11203516ms step_avg:587.99ms +step:19055/57344 train_time:11204062ms step_avg:587.99ms +grad accum step:4764/14336 +step:19056/57344 train_time:11205364ms step_avg:588.02ms +step:19057/57344 train_time:11205381ms step_avg:587.99ms +step:19058/57344 train_time:11205625ms step_avg:587.97ms +step:19059/57344 train_time:11206165ms step_avg:587.97ms +grad accum step:4765/14336 +step:19060/57344 train_time:11207494ms step_avg:588.01ms +step:19061/57344 train_time:11207511ms step_avg:587.98ms +step:19062/57344 train_time:11207761ms step_avg:587.96ms +step:19063/57344 train_time:11208307ms step_avg:587.96ms +grad accum step:4766/14336 +step:19064/57344 train_time:11209618ms step_avg:588.00ms +step:19065/57344 train_time:11209635ms step_avg:587.97ms +step:19066/57344 train_time:11209884ms step_avg:587.95ms +step:19067/57344 train_time:11210427ms step_avg:587.95ms +grad accum step:4767/14336 +step:19068/57344 train_time:11211698ms step_avg:587.98ms +step:19069/57344 train_time:11211715ms step_avg:587.96ms +step:19070/57344 train_time:11211961ms step_avg:587.94ms +step:19071/57344 train_time:11212511ms step_avg:587.94ms +grad accum step:4768/14336 +step:19072/57344 train_time:11213834ms step_avg:587.97ms +step:19072/57344 val_loss:6.999786 train_time:11213835ms step_avg:587.97ms +step:19073/57344 train_time:11213846ms step_avg:587.94ms +step:19074/57344 train_time:11214076ms step_avg:587.92ms +step:19075/57344 train_time:11214630ms step_avg:587.92ms +grad accum step:4769/14336 +step:19076/57344 train_time:11215920ms step_avg:587.96ms +step:19077/57344 train_time:11215937ms step_avg:587.93ms +step:19078/57344 train_time:11216180ms step_avg:587.91ms +step:19079/57344 train_time:11216713ms step_avg:587.91ms +grad accum step:4770/14336 +step:19080/57344 train_time:11218010ms step_avg:587.95ms +step:19081/57344 train_time:11218026ms step_avg:587.92ms +step:19082/57344 train_time:11218272ms step_avg:587.90ms +step:19083/57344 train_time:11218815ms step_avg:587.90ms +grad accum step:4771/14336 +step:19084/57344 train_time:11220112ms step_avg:587.93ms +step:19085/57344 train_time:11220129ms step_avg:587.90ms +step:19086/57344 train_time:11220373ms step_avg:587.89ms +step:19087/57344 train_time:11220916ms step_avg:587.88ms +grad accum step:4772/14336 +step:19088/57344 train_time:11222194ms step_avg:587.92ms +step:19089/57344 train_time:11222212ms step_avg:587.89ms +step:19090/57344 train_time:11222458ms step_avg:587.87ms +step:19091/57344 train_time:11223007ms step_avg:587.87ms +grad accum step:4773/14336 +step:19092/57344 train_time:11224286ms step_avg:587.91ms +step:19093/57344 train_time:11224303ms step_avg:587.88ms +step:19094/57344 train_time:11224553ms step_avg:587.86ms +step:19095/57344 train_time:11225102ms step_avg:587.86ms +grad accum step:4774/14336 +step:19096/57344 train_time:11226382ms step_avg:587.89ms +step:19097/57344 train_time:11226399ms step_avg:587.86ms +step:19098/57344 train_time:11226642ms step_avg:587.84ms +step:19099/57344 train_time:11227183ms step_avg:587.84ms +grad accum step:4775/14336 +step:19100/57344 train_time:11228483ms step_avg:587.88ms +step:19101/57344 train_time:11228500ms step_avg:587.85ms +step:19102/57344 train_time:11228744ms step_avg:587.83ms +step:19103/57344 train_time:11229275ms step_avg:587.83ms +grad accum step:4776/14336 +step:19104/57344 train_time:11230574ms step_avg:587.87ms +step:19105/57344 train_time:11230591ms step_avg:587.84ms +step:19106/57344 train_time:11230833ms step_avg:587.82ms +step:19107/57344 train_time:11231366ms step_avg:587.81ms +grad accum step:4777/14336 +step:19108/57344 train_time:11232649ms step_avg:587.85ms +step:19109/57344 train_time:11232666ms step_avg:587.82ms +step:19110/57344 train_time:11232910ms step_avg:587.80ms +step:19111/57344 train_time:11233449ms step_avg:587.80ms +grad accum step:4778/14336 +step:19112/57344 train_time:11234765ms step_avg:587.84ms +step:19113/57344 train_time:11234781ms step_avg:587.81ms +step:19114/57344 train_time:11235024ms step_avg:587.79ms +step:19115/57344 train_time:11235566ms step_avg:587.79ms +grad accum step:4779/14336 +step:19116/57344 train_time:11236844ms step_avg:587.82ms +step:19117/57344 train_time:11236993ms step_avg:587.80ms +step:19118/57344 train_time:11237209ms step_avg:587.78ms +step:19119/57344 train_time:11237757ms step_avg:587.78ms +grad accum step:4780/14336 +step:19120/57344 train_time:11239055ms step_avg:587.82ms +step:19121/57344 train_time:11239072ms step_avg:587.79ms +step:19122/57344 train_time:11239320ms step_avg:587.77ms +step:19123/57344 train_time:11239868ms step_avg:587.77ms +grad accum step:4781/14336 +step:19124/57344 train_time:11241157ms step_avg:587.80ms +step:19125/57344 train_time:11241174ms step_avg:587.77ms +step:19126/57344 train_time:11241423ms step_avg:587.76ms +step:19127/57344 train_time:11241977ms step_avg:587.75ms +grad accum step:4782/14336 +step:19128/57344 train_time:11243283ms step_avg:587.79ms +step:19129/57344 train_time:11243300ms step_avg:587.76ms +step:19130/57344 train_time:11243546ms step_avg:587.74ms +step:19131/57344 train_time:11244092ms step_avg:587.74ms +grad accum step:4783/14336 +step:19132/57344 train_time:11245424ms step_avg:587.78ms +step:19133/57344 train_time:11245441ms step_avg:587.75ms +step:19134/57344 train_time:11245690ms step_avg:587.73ms +step:19135/57344 train_time:11246237ms step_avg:587.73ms +grad accum step:4784/14336 +step:19136/57344 train_time:11247532ms step_avg:587.77ms +step:19136/57344 val_loss:6.992972 train_time:11247532ms step_avg:587.77ms +step:19137/57344 train_time:11247544ms step_avg:587.74ms +step:19138/57344 train_time:11247772ms step_avg:587.72ms +step:19139/57344 train_time:11248328ms step_avg:587.72ms +grad accum step:4785/14336 +step:19140/57344 train_time:11249620ms step_avg:587.75ms +step:19141/57344 train_time:11249637ms step_avg:587.72ms +step:19142/57344 train_time:11249887ms step_avg:587.71ms +step:19143/57344 train_time:11250447ms step_avg:587.71ms +grad accum step:4786/14336 +step:19144/57344 train_time:11251758ms step_avg:587.74ms +step:19145/57344 train_time:11251775ms step_avg:587.71ms +step:19146/57344 train_time:11252027ms step_avg:587.70ms +step:19147/57344 train_time:11252588ms step_avg:587.69ms +grad accum step:4787/14336 +step:19148/57344 train_time:11253898ms step_avg:587.73ms +step:19149/57344 train_time:11253915ms step_avg:587.70ms +step:19150/57344 train_time:11254163ms step_avg:587.68ms +step:19151/57344 train_time:11254713ms step_avg:587.68ms +grad accum step:4788/14336 +step:19152/57344 train_time:11256016ms step_avg:587.72ms +step:19153/57344 train_time:11256033ms step_avg:587.69ms +step:19154/57344 train_time:11256281ms step_avg:587.67ms +step:19155/57344 train_time:11256833ms step_avg:587.67ms +grad accum step:4789/14336 +step:19156/57344 train_time:11258156ms step_avg:587.71ms +step:19157/57344 train_time:11258173ms step_avg:587.68ms +step:19158/57344 train_time:11258418ms step_avg:587.66ms +step:19159/57344 train_time:11258962ms step_avg:587.66ms +grad accum step:4790/14336 +step:19160/57344 train_time:11260285ms step_avg:587.70ms +step:19161/57344 train_time:11260302ms step_avg:587.67ms +step:19162/57344 train_time:11260552ms step_avg:587.65ms +step:19163/57344 train_time:11261099ms step_avg:587.65ms +grad accum step:4791/14336 +step:19164/57344 train_time:11262375ms step_avg:587.68ms +step:19165/57344 train_time:11262392ms step_avg:587.65ms +step:19166/57344 train_time:11262642ms step_avg:587.64ms +step:19167/57344 train_time:11263189ms step_avg:587.63ms +grad accum step:4792/14336 +step:19168/57344 train_time:11264475ms step_avg:587.67ms +step:19169/57344 train_time:11264492ms step_avg:587.64ms +step:19170/57344 train_time:11264737ms step_avg:587.62ms +step:19171/57344 train_time:11265274ms step_avg:587.62ms +grad accum step:4793/14336 +step:19172/57344 train_time:11266574ms step_avg:587.66ms +step:19173/57344 train_time:11266591ms step_avg:587.63ms +step:19174/57344 train_time:11266838ms step_avg:587.61ms +step:19175/57344 train_time:11267379ms step_avg:587.61ms +grad accum step:4794/14336 +step:19176/57344 train_time:11268676ms step_avg:587.64ms +step:19177/57344 train_time:11268692ms step_avg:587.61ms +step:19178/57344 train_time:11268945ms step_avg:587.60ms +step:19179/57344 train_time:11269505ms step_avg:587.60ms +grad accum step:4795/14336 +step:19180/57344 train_time:11270803ms step_avg:587.63ms +step:19181/57344 train_time:11270820ms step_avg:587.60ms +step:19182/57344 train_time:11271069ms step_avg:587.59ms +step:19183/57344 train_time:11271629ms step_avg:587.58ms +grad accum step:4796/14336 +step:19184/57344 train_time:11272949ms step_avg:587.62ms +step:19185/57344 train_time:11272966ms step_avg:587.59ms +step:19186/57344 train_time:11273212ms step_avg:587.57ms +step:19187/57344 train_time:11273759ms step_avg:587.57ms +grad accum step:4797/14336 +step:19188/57344 train_time:11275067ms step_avg:587.61ms +step:19189/57344 train_time:11275084ms step_avg:587.58ms +step:19190/57344 train_time:11275334ms step_avg:587.56ms +step:19191/57344 train_time:11275892ms step_avg:587.56ms +grad accum step:4798/14336 +step:19192/57344 train_time:11277195ms step_avg:587.60ms +step:19193/57344 train_time:11277210ms step_avg:587.57ms +step:19194/57344 train_time:11277454ms step_avg:587.55ms +step:19195/57344 train_time:11277997ms step_avg:587.55ms +grad accum step:4799/14336 +step:19196/57344 train_time:11279275ms step_avg:587.58ms +step:19197/57344 train_time:11279292ms step_avg:587.55ms +step:19198/57344 train_time:11279539ms step_avg:587.54ms +step:19199/57344 train_time:11280095ms step_avg:587.54ms +grad accum step:4800/14336 +step:19200/57344 train_time:11281398ms step_avg:587.57ms +step:19200/57344 val_loss:6.989506 train_time:11281399ms step_avg:587.57ms +step:19201/57344 train_time:11281411ms step_avg:587.54ms +step:19202/57344 train_time:11281638ms step_avg:587.52ms +step:19203/57344 train_time:11282193ms step_avg:587.52ms +grad accum step:4801/14336 +step:19204/57344 train_time:11283508ms step_avg:587.56ms +step:19205/57344 train_time:11283525ms step_avg:587.53ms +step:19206/57344 train_time:11283779ms step_avg:587.51ms +step:19207/57344 train_time:11284344ms step_avg:587.51ms +grad accum step:4802/14336 +step:19208/57344 train_time:11285643ms step_avg:587.55ms +step:19209/57344 train_time:11285660ms step_avg:587.52ms +step:19210/57344 train_time:11285910ms step_avg:587.50ms +step:19211/57344 train_time:11286465ms step_avg:587.50ms +grad accum step:4803/14336 +step:19212/57344 train_time:11287747ms step_avg:587.54ms +step:19213/57344 train_time:11287764ms step_avg:587.51ms +step:19214/57344 train_time:11288009ms step_avg:587.49ms +step:19215/57344 train_time:11288559ms step_avg:587.49ms +grad accum step:4804/14336 +step:19216/57344 train_time:11289880ms step_avg:587.52ms +step:19217/57344 train_time:11289897ms step_avg:587.50ms +step:19218/57344 train_time:11290144ms step_avg:587.48ms +step:19219/57344 train_time:11290691ms step_avg:587.48ms +grad accum step:4805/14336 +step:19220/57344 train_time:11292034ms step_avg:587.51ms +step:19221/57344 train_time:11292051ms step_avg:587.49ms +step:19222/57344 train_time:11292303ms step_avg:587.47ms +step:19223/57344 train_time:11292863ms step_avg:587.47ms +grad accum step:4806/14336 +step:19224/57344 train_time:11294142ms step_avg:587.50ms +step:19225/57344 train_time:11294159ms step_avg:587.47ms +step:19226/57344 train_time:11294402ms step_avg:587.45ms +step:19227/57344 train_time:11294948ms step_avg:587.45ms +grad accum step:4807/14336 +step:19228/57344 train_time:11296284ms step_avg:587.49ms +step:19229/57344 train_time:11296301ms step_avg:587.46ms +step:19230/57344 train_time:11296546ms step_avg:587.44ms +step:19231/57344 train_time:11297087ms step_avg:587.44ms +grad accum step:4808/14336 +step:19232/57344 train_time:11298373ms step_avg:587.48ms +step:19233/57344 train_time:11298390ms step_avg:587.45ms +step:19234/57344 train_time:11298639ms step_avg:587.43ms +step:19235/57344 train_time:11299189ms step_avg:587.43ms +grad accum step:4809/14336 +step:19236/57344 train_time:11300519ms step_avg:587.47ms +step:19237/57344 train_time:11300536ms step_avg:587.44ms +step:19238/57344 train_time:11300780ms step_avg:587.42ms +step:19239/57344 train_time:11301320ms step_avg:587.42ms +grad accum step:4810/14336 +step:19240/57344 train_time:11302618ms step_avg:587.45ms +step:19241/57344 train_time:11302635ms step_avg:587.42ms +step:19242/57344 train_time:11302879ms step_avg:587.41ms +step:19243/57344 train_time:11303424ms step_avg:587.40ms +grad accum step:4811/14336 +step:19244/57344 train_time:11304744ms step_avg:587.44ms +step:19245/57344 train_time:11304760ms step_avg:587.41ms +step:19246/57344 train_time:11305009ms step_avg:587.40ms +step:19247/57344 train_time:11305556ms step_avg:587.39ms +grad accum step:4812/14336 +step:19248/57344 train_time:11306867ms step_avg:587.43ms +step:19249/57344 train_time:11306883ms step_avg:587.40ms +step:19250/57344 train_time:11307128ms step_avg:587.38ms +step:19251/57344 train_time:11307671ms step_avg:587.38ms +grad accum step:4813/14336 +step:19252/57344 train_time:11308959ms step_avg:587.42ms +step:19253/57344 train_time:11308977ms step_avg:587.39ms +step:19254/57344 train_time:11309228ms step_avg:587.37ms +step:19255/57344 train_time:11309780ms step_avg:587.37ms +grad accum step:4814/14336 +step:19256/57344 train_time:11311064ms step_avg:587.40ms +step:19257/57344 train_time:11311081ms step_avg:587.38ms +step:19258/57344 train_time:11311340ms step_avg:587.36ms +step:19259/57344 train_time:11311911ms step_avg:587.36ms +grad accum step:4815/14336 +step:19260/57344 train_time:11313233ms step_avg:587.40ms +step:19261/57344 train_time:11313250ms step_avg:587.37ms +step:19262/57344 train_time:11313499ms step_avg:587.35ms +step:19263/57344 train_time:11314049ms step_avg:587.35ms +grad accum step:4816/14336 +step:19264/57344 train_time:11315389ms step_avg:587.39ms +step:19264/57344 val_loss:7.006613 train_time:11315389ms step_avg:587.39ms +step:19265/57344 train_time:11315401ms step_avg:587.36ms +step:19266/57344 train_time:11315622ms step_avg:587.34ms +step:19267/57344 train_time:11316169ms step_avg:587.33ms +grad accum step:4817/14336 +step:19268/57344 train_time:11317471ms step_avg:587.37ms +step:19269/57344 train_time:11317488ms step_avg:587.34ms +step:19270/57344 train_time:11317740ms step_avg:587.32ms +step:19271/57344 train_time:11318299ms step_avg:587.32ms +grad accum step:4818/14336 +step:19272/57344 train_time:11319580ms step_avg:587.36ms +step:19273/57344 train_time:11319597ms step_avg:587.33ms +step:19274/57344 train_time:11319844ms step_avg:587.31ms +step:19275/57344 train_time:11320388ms step_avg:587.31ms +grad accum step:4819/14336 +step:19276/57344 train_time:11321691ms step_avg:587.35ms +step:19277/57344 train_time:11321708ms step_avg:587.32ms +step:19278/57344 train_time:11321954ms step_avg:587.30ms +step:19279/57344 train_time:11322503ms step_avg:587.30ms +grad accum step:4820/14336 +step:19280/57344 train_time:11323854ms step_avg:587.34ms +step:19281/57344 train_time:11323871ms step_avg:587.31ms +step:19282/57344 train_time:11324135ms step_avg:587.29ms +step:19283/57344 train_time:11324733ms step_avg:587.29ms +grad accum step:4821/14336 +step:19284/57344 train_time:11326053ms step_avg:587.33ms +step:19285/57344 train_time:11326070ms step_avg:587.30ms +step:19286/57344 train_time:11326316ms step_avg:587.28ms +step:19287/57344 train_time:11326855ms step_avg:587.28ms +grad accum step:4822/14336 +step:19288/57344 train_time:11328131ms step_avg:587.31ms +step:19289/57344 train_time:11328148ms step_avg:587.29ms +step:19290/57344 train_time:11328394ms step_avg:587.27ms +step:19291/57344 train_time:11328948ms step_avg:587.27ms +grad accum step:4823/14336 +step:19292/57344 train_time:11330279ms step_avg:587.30ms +step:19293/57344 train_time:11330295ms step_avg:587.27ms +step:19294/57344 train_time:11330538ms step_avg:587.26ms +step:19295/57344 train_time:11331086ms step_avg:587.26ms +grad accum step:4824/14336 +step:19296/57344 train_time:11332404ms step_avg:587.29ms +step:19297/57344 train_time:11332421ms step_avg:587.26ms +step:19298/57344 train_time:11332667ms step_avg:587.25ms +step:19299/57344 train_time:11333215ms step_avg:587.24ms +grad accum step:4825/14336 +step:19300/57344 train_time:11334572ms step_avg:587.28ms +step:19301/57344 train_time:11334588ms step_avg:587.25ms +step:19302/57344 train_time:11334836ms step_avg:587.24ms +step:19303/57344 train_time:11335377ms step_avg:587.23ms +grad accum step:4826/14336 +step:19304/57344 train_time:11336669ms step_avg:587.27ms +step:19305/57344 train_time:11336686ms step_avg:587.24ms +step:19306/57344 train_time:11336932ms step_avg:587.22ms +step:19307/57344 train_time:11337481ms step_avg:587.22ms +grad accum step:4827/14336 +step:19308/57344 train_time:11338833ms step_avg:587.26ms +step:19309/57344 train_time:11338850ms step_avg:587.23ms +step:19310/57344 train_time:11339096ms step_avg:587.21ms +step:19311/57344 train_time:11339638ms step_avg:587.21ms +grad accum step:4828/14336 +step:19312/57344 train_time:11340943ms step_avg:587.25ms +step:19313/57344 train_time:11340960ms step_avg:587.22ms +step:19314/57344 train_time:11341216ms step_avg:587.20ms +step:19315/57344 train_time:11341784ms step_avg:587.20ms +grad accum step:4829/14336 +step:19316/57344 train_time:11343063ms step_avg:587.24ms +step:19317/57344 train_time:11343080ms step_avg:587.21ms +step:19318/57344 train_time:11343332ms step_avg:587.19ms +step:19319/57344 train_time:11343889ms step_avg:587.19ms +grad accum step:4830/14336 +step:19320/57344 train_time:11345182ms step_avg:587.22ms +step:19321/57344 train_time:11345199ms step_avg:587.20ms +step:19322/57344 train_time:11345447ms step_avg:587.18ms +step:19323/57344 train_time:11345997ms step_avg:587.18ms +grad accum step:4831/14336 +step:19324/57344 train_time:11348250ms step_avg:587.26ms +step:19325/57344 train_time:11348262ms step_avg:587.23ms +step:19326/57344 train_time:11348480ms step_avg:587.21ms +step:19327/57344 train_time:11349022ms step_avg:587.21ms +grad accum step:4832/14336 +step:19328/57344 train_time:11350341ms step_avg:587.25ms +step:19328/57344 val_loss:7.000946 train_time:11350342ms step_avg:587.25ms +step:19329/57344 train_time:11350354ms step_avg:587.22ms +step:19330/57344 train_time:11350581ms step_avg:587.20ms +step:19331/57344 train_time:11351134ms step_avg:587.20ms +grad accum step:4833/14336 +step:19332/57344 train_time:11352410ms step_avg:587.23ms +step:19333/57344 train_time:11352427ms step_avg:587.20ms +step:19334/57344 train_time:11352678ms step_avg:587.19ms +step:19335/57344 train_time:11353254ms step_avg:587.19ms +grad accum step:4834/14336 +step:19336/57344 train_time:11354619ms step_avg:587.23ms +step:19337/57344 train_time:11354636ms step_avg:587.20ms +step:19338/57344 train_time:11354888ms step_avg:587.18ms +step:19339/57344 train_time:11355441ms step_avg:587.18ms +grad accum step:4835/14336 +step:19340/57344 train_time:11356727ms step_avg:587.21ms +step:19341/57344 train_time:11356744ms step_avg:587.18ms +step:19342/57344 train_time:11356995ms step_avg:587.17ms +step:19343/57344 train_time:11357552ms step_avg:587.17ms +grad accum step:4836/14336 +step:19344/57344 train_time:11358833ms step_avg:587.20ms +step:19345/57344 train_time:11358849ms step_avg:587.17ms +step:19346/57344 train_time:11359096ms step_avg:587.15ms +step:19347/57344 train_time:11359642ms step_avg:587.15ms +grad accum step:4837/14336 +step:19348/57344 train_time:11360973ms step_avg:587.19ms +step:19349/57344 train_time:11360990ms step_avg:587.16ms +step:19350/57344 train_time:11361244ms step_avg:587.14ms +step:19351/57344 train_time:11361804ms step_avg:587.14ms +grad accum step:4838/14336 +step:19352/57344 train_time:11363102ms step_avg:587.18ms +step:19353/57344 train_time:11363119ms step_avg:587.15ms +step:19354/57344 train_time:11363369ms step_avg:587.13ms +step:19355/57344 train_time:11363924ms step_avg:587.13ms +grad accum step:4839/14336 +step:19356/57344 train_time:11367173ms step_avg:587.27ms +step:19357/57344 train_time:11367696ms step_avg:587.27ms +step:19358/57344 train_time:11367754ms step_avg:587.24ms +step:19359/57344 train_time:11368293ms step_avg:587.24ms +grad accum step:4840/14336 +step:19360/57344 train_time:11369586ms step_avg:587.27ms +step:19361/57344 train_time:11369603ms step_avg:587.24ms +step:19362/57344 train_time:11369849ms step_avg:587.22ms +step:19363/57344 train_time:11370394ms step_avg:587.22ms +grad accum step:4841/14336 +step:19364/57344 train_time:11371695ms step_avg:587.26ms +step:19365/57344 train_time:11371712ms step_avg:587.23ms +step:19366/57344 train_time:11371958ms step_avg:587.21ms +step:19367/57344 train_time:11372504ms step_avg:587.21ms +grad accum step:4842/14336 +step:19368/57344 train_time:11373816ms step_avg:587.25ms +step:19369/57344 train_time:11373833ms step_avg:587.22ms +step:19370/57344 train_time:11374076ms step_avg:587.20ms +step:19371/57344 train_time:11374622ms step_avg:587.20ms +grad accum step:4843/14336 +step:19372/57344 train_time:11378367ms step_avg:587.36ms +step:19373/57344 train_time:11378384ms step_avg:587.33ms +step:19374/57344 train_time:11378649ms step_avg:587.32ms +step:19375/57344 train_time:11379245ms step_avg:587.32ms +grad accum step:4844/14336 +step:19376/57344 train_time:11380547ms step_avg:587.35ms +step:19377/57344 train_time:11380564ms step_avg:587.32ms +step:19378/57344 train_time:11380808ms step_avg:587.31ms +step:19379/57344 train_time:11381351ms step_avg:587.30ms +grad accum step:4845/14336 +step:19380/57344 train_time:11382668ms step_avg:587.34ms +step:19381/57344 train_time:11382685ms step_avg:587.31ms +step:19382/57344 train_time:11382936ms step_avg:587.29ms +step:19383/57344 train_time:11383492ms step_avg:587.29ms +grad accum step:4846/14336 +step:19384/57344 train_time:11384795ms step_avg:587.33ms +step:19385/57344 train_time:11384812ms step_avg:587.30ms +step:19386/57344 train_time:11385061ms step_avg:587.28ms +step:19387/57344 train_time:11385612ms step_avg:587.28ms +grad accum step:4847/14336 +step:19388/57344 train_time:11386894ms step_avg:587.32ms +step:19389/57344 train_time:11386911ms step_avg:587.29ms +step:19390/57344 train_time:11387161ms step_avg:587.27ms +step:19391/57344 train_time:11387717ms step_avg:587.27ms +grad accum step:4848/14336 +step:19392/57344 train_time:11388999ms step_avg:587.30ms +step:19392/57344 val_loss:6.998974 train_time:11389000ms step_avg:587.30ms +step:19393/57344 train_time:11389012ms step_avg:587.27ms +step:19394/57344 train_time:11389237ms step_avg:587.26ms +step:19395/57344 train_time:11389792ms step_avg:587.25ms +grad accum step:4849/14336 +step:19396/57344 train_time:11391097ms step_avg:587.29ms +step:19397/57344 train_time:11391114ms step_avg:587.26ms +step:19398/57344 train_time:11391355ms step_avg:587.24ms +step:19399/57344 train_time:11391888ms step_avg:587.24ms +grad accum step:4850/14336 +step:19400/57344 train_time:11393205ms step_avg:587.28ms +step:19401/57344 train_time:11393221ms step_avg:587.25ms +step:19402/57344 train_time:11393466ms step_avg:587.23ms +step:19403/57344 train_time:11394009ms step_avg:587.23ms +grad accum step:4851/14336 +step:19404/57344 train_time:11395347ms step_avg:587.27ms +step:19405/57344 train_time:11395363ms step_avg:587.24ms +step:19406/57344 train_time:11395616ms step_avg:587.22ms +step:19407/57344 train_time:11396176ms step_avg:587.22ms +grad accum step:4852/14336 +step:19408/57344 train_time:11397456ms step_avg:587.26ms +step:19409/57344 train_time:11397473ms step_avg:587.23ms +step:19410/57344 train_time:11397723ms step_avg:587.21ms +step:19411/57344 train_time:11398281ms step_avg:587.21ms +grad accum step:4853/14336 +step:19412/57344 train_time:11399578ms step_avg:587.24ms +step:19413/57344 train_time:11399595ms step_avg:587.21ms +step:19414/57344 train_time:11399840ms step_avg:587.20ms +step:19415/57344 train_time:11400391ms step_avg:587.20ms +grad accum step:4854/14336 +step:19416/57344 train_time:11401685ms step_avg:587.23ms +step:19417/57344 train_time:11401702ms step_avg:587.20ms +step:19418/57344 train_time:11401954ms step_avg:587.18ms +step:19419/57344 train_time:11402513ms step_avg:587.18ms +grad accum step:4855/14336 +step:19420/57344 train_time:11403821ms step_avg:587.22ms +step:19421/57344 train_time:11403838ms step_avg:587.19ms +step:19422/57344 train_time:11404087ms step_avg:587.17ms +step:19423/57344 train_time:11404651ms step_avg:587.17ms +grad accum step:4856/14336 +step:19424/57344 train_time:11405977ms step_avg:587.21ms +step:19425/57344 train_time:11405994ms step_avg:587.18ms +step:19426/57344 train_time:11406244ms step_avg:587.16ms +step:19427/57344 train_time:11406802ms step_avg:587.16ms +grad accum step:4857/14336 +step:19428/57344 train_time:11408098ms step_avg:587.20ms +step:19429/57344 train_time:11408115ms step_avg:587.17ms +step:19430/57344 train_time:11408361ms step_avg:587.15ms +step:19431/57344 train_time:11408906ms step_avg:587.15ms +grad accum step:4858/14336 +step:19432/57344 train_time:11410198ms step_avg:587.19ms +step:19433/57344 train_time:11410215ms step_avg:587.16ms +step:19434/57344 train_time:11410459ms step_avg:587.14ms +step:19435/57344 train_time:11411008ms step_avg:587.14ms +grad accum step:4859/14336 +step:19436/57344 train_time:11412302ms step_avg:587.17ms +step:19437/57344 train_time:11412318ms step_avg:587.14ms +step:19438/57344 train_time:11412566ms step_avg:587.13ms +step:19439/57344 train_time:11413115ms step_avg:587.12ms +grad accum step:4860/14336 +step:19440/57344 train_time:11414429ms step_avg:587.16ms +step:19441/57344 train_time:11414446ms step_avg:587.13ms +step:19442/57344 train_time:11414695ms step_avg:587.12ms +step:19443/57344 train_time:11415251ms step_avg:587.11ms +grad accum step:4861/14336 +step:19444/57344 train_time:11416570ms step_avg:587.15ms +step:19445/57344 train_time:11416586ms step_avg:587.12ms +step:19446/57344 train_time:11416833ms step_avg:587.10ms +step:19447/57344 train_time:11417377ms step_avg:587.10ms +grad accum step:4862/14336 +step:19448/57344 train_time:11418680ms step_avg:587.14ms +step:19449/57344 train_time:11418697ms step_avg:587.11ms +step:19450/57344 train_time:11418950ms step_avg:587.09ms +step:19451/57344 train_time:11419510ms step_avg:587.09ms +grad accum step:4863/14336 +step:19452/57344 train_time:11420876ms step_avg:587.13ms +step:19453/57344 train_time:11420893ms step_avg:587.10ms +step:19454/57344 train_time:11421135ms step_avg:587.08ms +step:19455/57344 train_time:11421680ms step_avg:587.08ms +grad accum step:4864/14336 +step:19456/57344 train_time:11422962ms step_avg:587.12ms +step:19456/57344 val_loss:7.010986 train_time:11422963ms step_avg:587.12ms +step:19457/57344 train_time:11422975ms step_avg:587.09ms +step:19458/57344 train_time:11423202ms step_avg:587.07ms +step:19459/57344 train_time:11423756ms step_avg:587.07ms +grad accum step:4865/14336 +step:19460/57344 train_time:11425063ms step_avg:587.10ms +step:19461/57344 train_time:11425080ms step_avg:587.08ms +step:19462/57344 train_time:11425326ms step_avg:587.06ms +step:19463/57344 train_time:11425885ms step_avg:587.06ms +grad accum step:4866/14336 +step:19464/57344 train_time:11427199ms step_avg:587.09ms +step:19465/57344 train_time:11427216ms step_avg:587.06ms +step:19466/57344 train_time:11427463ms step_avg:587.05ms +step:19467/57344 train_time:11428007ms step_avg:587.05ms +grad accum step:4867/14336 +step:19468/57344 train_time:11429290ms step_avg:587.08ms +step:19469/57344 train_time:11429307ms step_avg:587.05ms +step:19470/57344 train_time:11429553ms step_avg:587.03ms +step:19471/57344 train_time:11430104ms step_avg:587.03ms +grad accum step:4868/14336 +step:19472/57344 train_time:11431425ms step_avg:587.07ms +step:19473/57344 train_time:11431442ms step_avg:587.04ms +step:19474/57344 train_time:11431699ms step_avg:587.02ms +step:19475/57344 train_time:11432287ms step_avg:587.02ms +grad accum step:4869/14336 +step:19476/57344 train_time:11433661ms step_avg:587.06ms +step:19477/57344 train_time:11433678ms step_avg:587.03ms +step:19478/57344 train_time:11433928ms step_avg:587.02ms +step:19479/57344 train_time:11434478ms step_avg:587.02ms +grad accum step:4870/14336 +step:19480/57344 train_time:11435757ms step_avg:587.05ms +step:19481/57344 train_time:11435774ms step_avg:587.02ms +step:19482/57344 train_time:11436021ms step_avg:587.00ms +step:19483/57344 train_time:11436558ms step_avg:587.00ms +grad accum step:4871/14336 +step:19484/57344 train_time:11437833ms step_avg:587.04ms +step:19485/57344 train_time:11437850ms step_avg:587.01ms +step:19486/57344 train_time:11438106ms step_avg:586.99ms +step:19487/57344 train_time:11438666ms step_avg:586.99ms +grad accum step:4872/14336 +step:19488/57344 train_time:11439948ms step_avg:587.03ms +step:19489/57344 train_time:11439965ms step_avg:587.00ms +step:19490/57344 train_time:11440216ms step_avg:586.98ms +step:19491/57344 train_time:11440772ms step_avg:586.98ms +grad accum step:4873/14336 +step:19492/57344 train_time:11442063ms step_avg:587.01ms +step:19493/57344 train_time:11442080ms step_avg:586.98ms +step:19494/57344 train_time:11442325ms step_avg:586.97ms +step:19495/57344 train_time:11442871ms step_avg:586.96ms +grad accum step:4874/14336 +step:19496/57344 train_time:11444188ms step_avg:587.00ms +step:19497/57344 train_time:11444204ms step_avg:586.97ms +step:19498/57344 train_time:11444450ms step_avg:586.96ms +step:19499/57344 train_time:11444992ms step_avg:586.95ms +grad accum step:4875/14336 +step:19500/57344 train_time:11446306ms step_avg:586.99ms +step:19501/57344 train_time:11446323ms step_avg:586.96ms +step:19502/57344 train_time:11446574ms step_avg:586.94ms +step:19503/57344 train_time:11447133ms step_avg:586.94ms +grad accum step:4876/14336 +step:19504/57344 train_time:11448449ms step_avg:586.98ms +step:19505/57344 train_time:11448466ms step_avg:586.95ms +step:19506/57344 train_time:11448722ms step_avg:586.93ms +step:19507/57344 train_time:11449286ms step_avg:586.93ms +grad accum step:4877/14336 +step:19508/57344 train_time:11450605ms step_avg:586.97ms +step:19509/57344 train_time:11450621ms step_avg:586.94ms +step:19510/57344 train_time:11450872ms step_avg:586.92ms +step:19511/57344 train_time:11451428ms step_avg:586.92ms +grad accum step:4878/14336 +step:19512/57344 train_time:11452753ms step_avg:586.96ms +step:19513/57344 train_time:11452769ms step_avg:586.93ms +step:19514/57344 train_time:11453018ms step_avg:586.91ms +step:19515/57344 train_time:11453566ms step_avg:586.91ms +grad accum step:4879/14336 +step:19516/57344 train_time:11457026ms step_avg:587.06ms +step:19517/57344 train_time:11457043ms step_avg:587.03ms +step:19518/57344 train_time:11457298ms step_avg:587.01ms +step:19519/57344 train_time:11457868ms step_avg:587.01ms +grad accum step:4880/14336 +step:19520/57344 train_time:11459181ms step_avg:587.05ms +step:19520/57344 val_loss:7.025891 train_time:11459182ms step_avg:587.05ms +step:19521/57344 train_time:11459194ms step_avg:587.02ms +step:19522/57344 train_time:11459425ms step_avg:587.00ms +step:19523/57344 train_time:11459988ms step_avg:587.00ms +grad accum step:4881/14336 +step:19524/57344 train_time:11461316ms step_avg:587.04ms +step:19525/57344 train_time:11461333ms step_avg:587.01ms +step:19526/57344 train_time:11461579ms step_avg:586.99ms +step:19527/57344 train_time:11462128ms step_avg:586.99ms +grad accum step:4882/14336 +step:19528/57344 train_time:11463432ms step_avg:587.03ms +step:19529/57344 train_time:11463449ms step_avg:587.00ms +step:19530/57344 train_time:11463698ms step_avg:586.98ms +step:19531/57344 train_time:11464251ms step_avg:586.98ms +grad accum step:4883/14336 +step:19532/57344 train_time:11465553ms step_avg:587.01ms +step:19533/57344 train_time:11465570ms step_avg:586.98ms +step:19534/57344 train_time:11465815ms step_avg:586.97ms +step:19535/57344 train_time:11466357ms step_avg:586.96ms +grad accum step:4884/14336 +step:19536/57344 train_time:11467630ms step_avg:587.00ms +step:19537/57344 train_time:11467647ms step_avg:586.97ms +step:19538/57344 train_time:11467893ms step_avg:586.95ms +step:19539/57344 train_time:11468437ms step_avg:586.95ms +grad accum step:4885/14336 +step:19540/57344 train_time:11469719ms step_avg:586.99ms +step:19541/57344 train_time:11469735ms step_avg:586.96ms +step:19542/57344 train_time:11469982ms step_avg:586.94ms +step:19543/57344 train_time:11470524ms step_avg:586.94ms +grad accum step:4886/14336 +step:19544/57344 train_time:11471910ms step_avg:586.98ms +step:19545/57344 train_time:11471927ms step_avg:586.95ms +step:19546/57344 train_time:11472180ms step_avg:586.93ms +step:19547/57344 train_time:11472743ms step_avg:586.93ms +grad accum step:4887/14336 +step:19548/57344 train_time:11474049ms step_avg:586.97ms +step:19549/57344 train_time:11474065ms step_avg:586.94ms +step:19550/57344 train_time:11474312ms step_avg:586.92ms +step:19551/57344 train_time:11474856ms step_avg:586.92ms +grad accum step:4888/14336 +step:19552/57344 train_time:11476166ms step_avg:586.96ms +step:19553/57344 train_time:11476183ms step_avg:586.93ms +step:19554/57344 train_time:11476426ms step_avg:586.91ms +step:19555/57344 train_time:11476973ms step_avg:586.91ms +grad accum step:4889/14336 +step:19556/57344 train_time:11478256ms step_avg:586.94ms +step:19557/57344 train_time:11478273ms step_avg:586.91ms +step:19558/57344 train_time:11478522ms step_avg:586.90ms +step:19559/57344 train_time:11479065ms step_avg:586.89ms +grad accum step:4890/14336 +step:19560/57344 train_time:11480397ms step_avg:586.93ms +step:19561/57344 train_time:11480414ms step_avg:586.90ms +step:19562/57344 train_time:11480666ms step_avg:586.89ms +step:19563/57344 train_time:11481228ms step_avg:586.88ms +grad accum step:4891/14336 +step:19564/57344 train_time:11482547ms step_avg:586.92ms +step:19565/57344 train_time:11482563ms step_avg:586.89ms +step:19566/57344 train_time:11482811ms step_avg:586.88ms +step:19567/57344 train_time:11483357ms step_avg:586.87ms +grad accum step:4892/14336 +step:19568/57344 train_time:11484668ms step_avg:586.91ms +step:19569/57344 train_time:11484685ms step_avg:586.88ms +step:19570/57344 train_time:11484932ms step_avg:586.86ms +step:19571/57344 train_time:11485485ms step_avg:586.86ms +grad accum step:4893/14336 +step:19572/57344 train_time:11486811ms step_avg:586.90ms +step:19573/57344 train_time:11486828ms step_avg:586.87ms +step:19574/57344 train_time:11487072ms step_avg:586.85ms +step:19575/57344 train_time:11487617ms step_avg:586.85ms +grad accum step:4894/14336 +step:19576/57344 train_time:11488917ms step_avg:586.89ms +step:19577/57344 train_time:11488934ms step_avg:586.86ms +step:19578/57344 train_time:11489178ms step_avg:586.84ms +step:19579/57344 train_time:11489719ms step_avg:586.84ms +grad accum step:4895/14336 +step:19580/57344 train_time:11490997ms step_avg:586.87ms +step:19581/57344 train_time:11491014ms step_avg:586.85ms +step:19582/57344 train_time:11491265ms step_avg:586.83ms +step:19583/57344 train_time:11491822ms step_avg:586.83ms +grad accum step:4896/14336 +step:19584/57344 train_time:11498731ms step_avg:587.15ms +step:19584/57344 val_loss:6.998867 train_time:11498732ms step_avg:587.15ms +step:19585/57344 train_time:11498744ms step_avg:587.12ms +step:19586/57344 train_time:11498968ms step_avg:587.10ms +step:19587/57344 train_time:11499519ms step_avg:587.10ms +grad accum step:4897/14336 +step:19588/57344 train_time:11500797ms step_avg:587.13ms +step:19589/57344 train_time:11500814ms step_avg:587.11ms +step:19590/57344 train_time:11501059ms step_avg:587.09ms +step:19591/57344 train_time:11501623ms step_avg:587.09ms +grad accum step:4898/14336 +step:19592/57344 train_time:11502992ms step_avg:587.13ms +step:19593/57344 train_time:11503009ms step_avg:587.10ms +step:19594/57344 train_time:11503253ms step_avg:587.08ms +step:19595/57344 train_time:11503794ms step_avg:587.08ms +grad accum step:4899/14336 +step:19596/57344 train_time:11505136ms step_avg:587.12ms +step:19597/57344 train_time:11505153ms step_avg:587.09ms +step:19598/57344 train_time:11505397ms step_avg:587.07ms +step:19599/57344 train_time:11505940ms step_avg:587.07ms +grad accum step:4900/14336 +step:19600/57344 train_time:11507246ms step_avg:587.10ms +step:19601/57344 train_time:11507263ms step_avg:587.08ms +step:19602/57344 train_time:11507511ms step_avg:587.06ms +step:19603/57344 train_time:11508059ms step_avg:587.06ms +grad accum step:4901/14336 +step:19604/57344 train_time:11509352ms step_avg:587.09ms +step:19605/57344 train_time:11509369ms step_avg:587.06ms +step:19606/57344 train_time:11509615ms step_avg:587.05ms +step:19607/57344 train_time:11510164ms step_avg:587.04ms +grad accum step:4902/14336 +step:19608/57344 train_time:11511461ms step_avg:587.08ms +step:19609/57344 train_time:11511478ms step_avg:587.05ms +step:19610/57344 train_time:11511727ms step_avg:587.03ms +step:19611/57344 train_time:11512274ms step_avg:587.03ms +grad accum step:4903/14336 +step:19612/57344 train_time:11513551ms step_avg:587.07ms +step:19613/57344 train_time:11513568ms step_avg:587.04ms +step:19614/57344 train_time:11513816ms step_avg:587.02ms +step:19615/57344 train_time:11514372ms step_avg:587.02ms +grad accum step:4904/14336 +step:19616/57344 train_time:11515675ms step_avg:587.06ms +step:19617/57344 train_time:11515692ms step_avg:587.03ms +step:19618/57344 train_time:11515949ms step_avg:587.01ms +step:19619/57344 train_time:11516532ms step_avg:587.01ms +grad accum step:4905/14336 +step:19620/57344 train_time:11517861ms step_avg:587.05ms +step:19621/57344 train_time:11517878ms step_avg:587.02ms +step:19622/57344 train_time:11518124ms step_avg:587.00ms +step:19623/57344 train_time:11518670ms step_avg:587.00ms +grad accum step:4906/14336 +step:19624/57344 train_time:11519957ms step_avg:587.03ms +step:19625/57344 train_time:11519974ms step_avg:587.01ms +step:19626/57344 train_time:11520221ms step_avg:586.99ms +step:19627/57344 train_time:11520763ms step_avg:586.99ms +grad accum step:4907/14336 +step:19628/57344 train_time:11522054ms step_avg:587.02ms +step:19629/57344 train_time:11522071ms step_avg:586.99ms +step:19630/57344 train_time:11522320ms step_avg:586.98ms +step:19631/57344 train_time:11522877ms step_avg:586.97ms +grad accum step:4908/14336 +step:19632/57344 train_time:11524190ms step_avg:587.01ms +step:19633/57344 train_time:11524207ms step_avg:586.98ms +step:19634/57344 train_time:11524454ms step_avg:586.96ms +step:19635/57344 train_time:11525003ms step_avg:586.96ms +grad accum step:4909/14336 +step:19636/57344 train_time:11526355ms step_avg:587.00ms +step:19637/57344 train_time:11526372ms step_avg:586.97ms +step:19638/57344 train_time:11526638ms step_avg:586.96ms +step:19639/57344 train_time:11527236ms step_avg:586.96ms +grad accum step:4910/14336 +step:19640/57344 train_time:11528551ms step_avg:586.99ms +step:19641/57344 train_time:11528567ms step_avg:586.96ms +step:19642/57344 train_time:11528813ms step_avg:586.95ms +step:19643/57344 train_time:11529354ms step_avg:586.94ms +grad accum step:4911/14336 +step:19644/57344 train_time:11530630ms step_avg:586.98ms +step:19645/57344 train_time:11530648ms step_avg:586.95ms +step:19646/57344 train_time:11530895ms step_avg:586.93ms +step:19647/57344 train_time:11531445ms step_avg:586.93ms +grad accum step:4912/14336 +step:19648/57344 train_time:11532745ms step_avg:586.97ms +step:19648/57344 val_loss:7.006006 train_time:11532745ms step_avg:586.97ms +step:19649/57344 train_time:11532757ms step_avg:586.94ms +step:19650/57344 train_time:11532983ms step_avg:586.92ms +step:19651/57344 train_time:11533533ms step_avg:586.92ms +grad accum step:4913/14336 +step:19652/57344 train_time:11534809ms step_avg:586.95ms +step:19653/57344 train_time:11534826ms step_avg:586.92ms +step:19654/57344 train_time:11535077ms step_avg:586.91ms +step:19655/57344 train_time:11535633ms step_avg:586.91ms +grad accum step:4914/14336 +step:19656/57344 train_time:11536917ms step_avg:586.94ms +step:19657/57344 train_time:11536934ms step_avg:586.91ms +step:19658/57344 train_time:11537179ms step_avg:586.89ms +step:19659/57344 train_time:11537720ms step_avg:586.89ms +grad accum step:4915/14336 +step:19660/57344 train_time:11539041ms step_avg:586.93ms +step:19661/57344 train_time:11539058ms step_avg:586.90ms +step:19662/57344 train_time:11539306ms step_avg:586.88ms +step:19663/57344 train_time:11539855ms step_avg:586.88ms +grad accum step:4916/14336 +step:19664/57344 train_time:11541206ms step_avg:586.92ms +step:19665/57344 train_time:11541223ms step_avg:586.89ms +step:19666/57344 train_time:11541477ms step_avg:586.87ms +step:19667/57344 train_time:11542044ms step_avg:586.87ms +grad accum step:4917/14336 +step:19668/57344 train_time:11543343ms step_avg:586.91ms +step:19669/57344 train_time:11543360ms step_avg:586.88ms +step:19670/57344 train_time:11543608ms step_avg:586.86ms +step:19671/57344 train_time:11544162ms step_avg:586.86ms +grad accum step:4918/14336 +step:19672/57344 train_time:11545462ms step_avg:586.90ms +step:19673/57344 train_time:11545479ms step_avg:586.87ms +step:19674/57344 train_time:11545725ms step_avg:586.85ms +step:19675/57344 train_time:11546272ms step_avg:586.85ms +grad accum step:4919/14336 +step:19676/57344 train_time:11547587ms step_avg:586.89ms +step:19677/57344 train_time:11547604ms step_avg:586.86ms +step:19678/57344 train_time:11547848ms step_avg:586.84ms +step:19679/57344 train_time:11548391ms step_avg:586.84ms +grad accum step:4920/14336 +step:19680/57344 train_time:11549681ms step_avg:586.87ms +step:19681/57344 train_time:11549698ms step_avg:586.85ms +step:19682/57344 train_time:11549940ms step_avg:586.83ms +step:19683/57344 train_time:11550478ms step_avg:586.83ms +grad accum step:4921/14336 +step:19684/57344 train_time:11551800ms step_avg:586.86ms +step:19685/57344 train_time:11551817ms step_avg:586.83ms +step:19686/57344 train_time:11552066ms step_avg:586.82ms +step:19687/57344 train_time:11552633ms step_avg:586.82ms +grad accum step:4922/14336 +step:19688/57344 train_time:11554003ms step_avg:586.86ms +step:19689/57344 train_time:11554020ms step_avg:586.83ms +step:19690/57344 train_time:11554274ms step_avg:586.81ms +step:19691/57344 train_time:11554835ms step_avg:586.81ms +grad accum step:4923/14336 +step:19692/57344 train_time:11556177ms step_avg:586.85ms +step:19693/57344 train_time:11556194ms step_avg:586.82ms +step:19694/57344 train_time:11556461ms step_avg:586.80ms +step:19695/57344 train_time:11557078ms step_avg:586.80ms +grad accum step:4924/14336 +step:19696/57344 train_time:11558474ms step_avg:586.84ms +step:19697/57344 train_time:11558491ms step_avg:586.81ms +step:19698/57344 train_time:11558735ms step_avg:586.80ms +step:19699/57344 train_time:11559277ms step_avg:586.80ms +grad accum step:4925/14336 +step:19700/57344 train_time:11560557ms step_avg:586.83ms +step:19701/57344 train_time:11560575ms step_avg:586.80ms +step:19702/57344 train_time:11560822ms step_avg:586.78ms +step:19703/57344 train_time:11561369ms step_avg:586.78ms +grad accum step:4926/14336 +step:19704/57344 train_time:11562672ms step_avg:586.82ms +step:19705/57344 train_time:11562689ms step_avg:586.79ms +step:19706/57344 train_time:11562936ms step_avg:586.77ms +step:19707/57344 train_time:11563483ms step_avg:586.77ms +grad accum step:4927/14336 +step:19708/57344 train_time:11564786ms step_avg:586.81ms +step:19709/57344 train_time:11564802ms step_avg:586.78ms +step:19710/57344 train_time:11565045ms step_avg:586.76ms +step:19711/57344 train_time:11565587ms step_avg:586.76ms +grad accum step:4928/14336 +step:19712/57344 train_time:11566904ms step_avg:586.80ms +step:19712/57344 val_loss:7.039533 train_time:11566905ms step_avg:586.80ms +step:19713/57344 train_time:11566917ms step_avg:586.77ms +step:19714/57344 train_time:11567140ms step_avg:586.75ms +step:19715/57344 train_time:11567700ms step_avg:586.75ms +grad accum step:4929/14336 +step:19716/57344 train_time:11569120ms step_avg:586.79ms +step:19717/57344 train_time:11569137ms step_avg:586.76ms +step:19718/57344 train_time:11569388ms step_avg:586.74ms +step:19719/57344 train_time:11569944ms step_avg:586.74ms +grad accum step:4930/14336 +step:19720/57344 train_time:11571283ms step_avg:586.78ms +step:19721/57344 train_time:11571299ms step_avg:586.75ms +step:19722/57344 train_time:11571545ms step_avg:586.73ms +step:19723/57344 train_time:11572100ms step_avg:586.73ms +grad accum step:4931/14336 +step:19724/57344 train_time:11573397ms step_avg:586.77ms +step:19725/57344 train_time:11573414ms step_avg:586.74ms +step:19726/57344 train_time:11573663ms step_avg:586.72ms +step:19727/57344 train_time:11574217ms step_avg:586.72ms +grad accum step:4932/14336 +step:19728/57344 train_time:11575528ms step_avg:586.76ms +step:19729/57344 train_time:11575545ms step_avg:586.73ms +step:19730/57344 train_time:11575795ms step_avg:586.71ms +step:19731/57344 train_time:11576352ms step_avg:586.71ms +grad accum step:4933/14336 +step:19732/57344 train_time:11577663ms step_avg:586.75ms +step:19733/57344 train_time:11577680ms step_avg:586.72ms +step:19734/57344 train_time:11577929ms step_avg:586.70ms +step:19735/57344 train_time:11578487ms step_avg:586.70ms +grad accum step:4934/14336 +step:19736/57344 train_time:11579847ms step_avg:586.74ms +step:19737/57344 train_time:11579863ms step_avg:586.71ms +step:19738/57344 train_time:11580107ms step_avg:586.69ms +step:19739/57344 train_time:11580648ms step_avg:586.69ms +grad accum step:4935/14336 +step:19740/57344 train_time:11581957ms step_avg:586.73ms +step:19741/57344 train_time:11581974ms step_avg:586.70ms +step:19742/57344 train_time:11582228ms step_avg:586.68ms +step:19743/57344 train_time:11582799ms step_avg:586.68ms +grad accum step:4936/14336 +step:19744/57344 train_time:11584133ms step_avg:586.72ms +step:19745/57344 train_time:11584150ms step_avg:586.69ms +step:19746/57344 train_time:11584397ms step_avg:586.67ms +step:19747/57344 train_time:11584946ms step_avg:586.67ms +grad accum step:4937/14336 +step:19748/57344 train_time:11586240ms step_avg:586.70ms +step:19749/57344 train_time:11586258ms step_avg:586.68ms +step:19750/57344 train_time:11586504ms step_avg:586.66ms +step:19751/57344 train_time:11587058ms step_avg:586.66ms +grad accum step:4938/14336 +step:19752/57344 train_time:11588459ms step_avg:586.70ms +step:19753/57344 train_time:11588475ms step_avg:586.67ms +step:19754/57344 train_time:11588726ms step_avg:586.65ms +step:19755/57344 train_time:11589289ms step_avg:586.65ms +grad accum step:4939/14336 +step:19756/57344 train_time:11590629ms step_avg:586.69ms +step:19757/57344 train_time:11590646ms step_avg:586.66ms +step:19758/57344 train_time:11590892ms step_avg:586.64ms +step:19759/57344 train_time:11591442ms step_avg:586.64ms +grad accum step:4940/14336 +step:19760/57344 train_time:11592740ms step_avg:586.68ms +step:19761/57344 train_time:11592757ms step_avg:586.65ms +step:19762/57344 train_time:11593007ms step_avg:586.63ms +step:19763/57344 train_time:11593561ms step_avg:586.63ms +grad accum step:4941/14336 +step:19764/57344 train_time:11594885ms step_avg:586.67ms +step:19765/57344 train_time:11594902ms step_avg:586.64ms +step:19766/57344 train_time:11595151ms step_avg:586.62ms +step:19767/57344 train_time:11595700ms step_avg:586.62ms +grad accum step:4942/14336 +step:19768/57344 train_time:11596981ms step_avg:586.65ms +step:19769/57344 train_time:11596998ms step_avg:586.63ms +step:19770/57344 train_time:11597245ms step_avg:586.61ms +step:19771/57344 train_time:11597797ms step_avg:586.61ms +grad accum step:4943/14336 +step:19772/57344 train_time:11599117ms step_avg:586.64ms +step:19773/57344 train_time:11599134ms step_avg:586.61ms +step:19774/57344 train_time:11599377ms step_avg:586.60ms +step:19775/57344 train_time:11599914ms step_avg:586.59ms +grad accum step:4944/14336 +step:19776/57344 train_time:11601196ms step_avg:586.63ms +step:19776/57344 val_loss:7.015204 train_time:11601196ms step_avg:586.63ms +step:19777/57344 train_time:11601208ms step_avg:586.60ms +step:19778/57344 train_time:11601430ms step_avg:586.58ms +step:19779/57344 train_time:11601970ms step_avg:586.58ms +grad accum step:4945/14336 +step:19780/57344 train_time:11603255ms step_avg:586.62ms +step:19781/57344 train_time:11603272ms step_avg:586.59ms +step:19782/57344 train_time:11603525ms step_avg:586.57ms +step:19783/57344 train_time:11604091ms step_avg:586.57ms +grad accum step:4946/14336 +step:19784/57344 train_time:11605408ms step_avg:586.61ms +step:19785/57344 train_time:11605425ms step_avg:586.58ms +step:19786/57344 train_time:11605672ms step_avg:586.56ms +step:19787/57344 train_time:11606221ms step_avg:586.56ms +grad accum step:4947/14336 +step:19788/57344 train_time:11607520ms step_avg:586.59ms +step:19789/57344 train_time:11607537ms step_avg:586.57ms +step:19790/57344 train_time:11607780ms step_avg:586.55ms +step:19791/57344 train_time:11608330ms step_avg:586.55ms +grad accum step:4948/14336 +step:19792/57344 train_time:11609622ms step_avg:586.58ms +step:19793/57344 train_time:11609640ms step_avg:586.55ms +step:19794/57344 train_time:11609884ms step_avg:586.54ms +step:19795/57344 train_time:11610434ms step_avg:586.53ms +grad accum step:4949/14336 +step:19796/57344 train_time:11611783ms step_avg:586.57ms +step:19797/57344 train_time:11611800ms step_avg:586.54ms +step:19798/57344 train_time:11612047ms step_avg:586.53ms +step:19799/57344 train_time:11612606ms step_avg:586.52ms +grad accum step:4950/14336 +step:19800/57344 train_time:11613941ms step_avg:586.56ms +step:19801/57344 train_time:11613958ms step_avg:586.53ms +step:19802/57344 train_time:11614204ms step_avg:586.52ms +step:19803/57344 train_time:11614748ms step_avg:586.51ms +grad accum step:4951/14336 +step:19804/57344 train_time:11616053ms step_avg:586.55ms +step:19805/57344 train_time:11616070ms step_avg:586.52ms +step:19806/57344 train_time:11616318ms step_avg:586.51ms +step:19807/57344 train_time:11616862ms step_avg:586.50ms +grad accum step:4952/14336 +step:19808/57344 train_time:11618158ms step_avg:586.54ms +step:19809/57344 train_time:11618175ms step_avg:586.51ms +step:19810/57344 train_time:11618421ms step_avg:586.49ms +step:19811/57344 train_time:11618971ms step_avg:586.49ms +grad accum step:4953/14336 +step:19812/57344 train_time:11620254ms step_avg:586.53ms +step:19813/57344 train_time:11620271ms step_avg:586.50ms +step:19814/57344 train_time:11620523ms step_avg:586.48ms +step:19815/57344 train_time:11621082ms step_avg:586.48ms +grad accum step:4954/14336 +step:19816/57344 train_time:11622440ms step_avg:586.52ms +step:19817/57344 train_time:11622457ms step_avg:586.49ms +step:19818/57344 train_time:11622700ms step_avg:586.47ms +step:19819/57344 train_time:11623245ms step_avg:586.47ms +grad accum step:4955/14336 +step:19820/57344 train_time:11624553ms step_avg:586.51ms +step:19821/57344 train_time:11624569ms step_avg:586.48ms +step:19822/57344 train_time:11624815ms step_avg:586.46ms +step:19823/57344 train_time:11625366ms step_avg:586.46ms +grad accum step:4956/14336 +step:19824/57344 train_time:11626698ms step_avg:586.50ms +step:19825/57344 train_time:11626715ms step_avg:586.47ms +step:19826/57344 train_time:11626973ms step_avg:586.45ms +step:19827/57344 train_time:11627547ms step_avg:586.45ms +grad accum step:4957/14336 +step:19828/57344 train_time:11628854ms step_avg:586.49ms +step:19829/57344 train_time:11628871ms step_avg:586.46ms +step:19830/57344 train_time:11629115ms step_avg:586.44ms +step:19831/57344 train_time:11629656ms step_avg:586.44ms +grad accum step:4958/14336 +step:19832/57344 train_time:11630951ms step_avg:586.47ms +step:19833/57344 train_time:11630968ms step_avg:586.45ms +step:19834/57344 train_time:11631216ms step_avg:586.43ms +step:19835/57344 train_time:11631763ms step_avg:586.43ms +grad accum step:4959/14336 +step:19836/57344 train_time:11633055ms step_avg:586.46ms +step:19837/57344 train_time:11633072ms step_avg:586.43ms +step:19838/57344 train_time:11633325ms step_avg:586.42ms +step:19839/57344 train_time:11633894ms step_avg:586.42ms +grad accum step:4960/14336 +step:19840/57344 train_time:11635205ms step_avg:586.45ms +step:19840/57344 val_loss:7.008238 train_time:11635205ms step_avg:586.45ms +step:19841/57344 train_time:11635217ms step_avg:586.42ms +step:19842/57344 train_time:11635439ms step_avg:586.40ms +step:19843/57344 train_time:11635986ms step_avg:586.40ms +grad accum step:4961/14336 +step:19844/57344 train_time:11637304ms step_avg:586.44ms +step:19845/57344 train_time:11637321ms step_avg:586.41ms +step:19846/57344 train_time:11637570ms step_avg:586.39ms +step:19847/57344 train_time:11638116ms step_avg:586.39ms +grad accum step:4962/14336 +step:19848/57344 train_time:11639408ms step_avg:586.43ms +step:19849/57344 train_time:11639425ms step_avg:586.40ms +step:19850/57344 train_time:11639675ms step_avg:586.38ms +step:19851/57344 train_time:11640231ms step_avg:586.38ms +grad accum step:4963/14336 +step:19852/57344 train_time:11641556ms step_avg:586.42ms +step:19853/57344 train_time:11641573ms step_avg:586.39ms +step:19854/57344 train_time:11641823ms step_avg:586.37ms +step:19855/57344 train_time:11642380ms step_avg:586.37ms +grad accum step:4964/14336 +step:19856/57344 train_time:11643660ms step_avg:586.41ms +step:19857/57344 train_time:11643677ms step_avg:586.38ms +step:19858/57344 train_time:11643924ms step_avg:586.36ms +step:19859/57344 train_time:11644473ms step_avg:586.36ms +grad accum step:4965/14336 +step:19860/57344 train_time:11645792ms step_avg:586.39ms +step:19861/57344 train_time:11645809ms step_avg:586.37ms +step:19862/57344 train_time:11646058ms step_avg:586.35ms +step:19863/57344 train_time:11646601ms step_avg:586.35ms +grad accum step:4966/14336 +step:19864/57344 train_time:11647880ms step_avg:586.38ms +step:19865/57344 train_time:11647897ms step_avg:586.35ms +step:19866/57344 train_time:11648149ms step_avg:586.34ms +step:19867/57344 train_time:11648730ms step_avg:586.34ms +grad accum step:4967/14336 +step:19868/57344 train_time:11650068ms step_avg:586.37ms +step:19869/57344 train_time:11650085ms step_avg:586.34ms +step:19870/57344 train_time:11650332ms step_avg:586.33ms +step:19871/57344 train_time:11650881ms step_avg:586.33ms +grad accum step:4968/14336 +step:19872/57344 train_time:11652186ms step_avg:586.36ms +step:19873/57344 train_time:11652203ms step_avg:586.33ms +step:19874/57344 train_time:11652450ms step_avg:586.32ms +step:19875/57344 train_time:11652997ms step_avg:586.31ms +grad accum step:4969/14336 +step:19876/57344 train_time:11654288ms step_avg:586.35ms +step:19877/57344 train_time:11654305ms step_avg:586.32ms +step:19878/57344 train_time:11654556ms step_avg:586.30ms +step:19879/57344 train_time:11655112ms step_avg:586.30ms +grad accum step:4970/14336 +step:19880/57344 train_time:11656408ms step_avg:586.34ms +step:19881/57344 train_time:11656425ms step_avg:586.31ms +step:19882/57344 train_time:11656674ms step_avg:586.29ms +step:19883/57344 train_time:11657232ms step_avg:586.29ms +grad accum step:4971/14336 +step:19884/57344 train_time:11658526ms step_avg:586.33ms +step:19885/57344 train_time:11658543ms step_avg:586.30ms +step:19886/57344 train_time:11658785ms step_avg:586.28ms +step:19887/57344 train_time:11659328ms step_avg:586.28ms +grad accum step:4972/14336 +step:19888/57344 train_time:11660606ms step_avg:586.31ms +step:19889/57344 train_time:11660623ms step_avg:586.29ms +step:19890/57344 train_time:11660873ms step_avg:586.27ms +step:19891/57344 train_time:11661423ms step_avg:586.27ms +grad accum step:4973/14336 +step:19892/57344 train_time:11662709ms step_avg:586.30ms +step:19893/57344 train_time:11662726ms step_avg:586.27ms +step:19894/57344 train_time:11662972ms step_avg:586.26ms +step:19895/57344 train_time:11663518ms step_avg:586.25ms +grad accum step:4974/14336 +step:19896/57344 train_time:11664794ms step_avg:586.29ms +step:19897/57344 train_time:11664811ms step_avg:586.26ms +step:19898/57344 train_time:11665056ms step_avg:586.24ms +step:19899/57344 train_time:11665602ms step_avg:586.24ms +grad accum step:4975/14336 +step:19900/57344 train_time:11666920ms step_avg:586.28ms +step:19901/57344 train_time:11666936ms step_avg:586.25ms +step:19902/57344 train_time:11667180ms step_avg:586.23ms +step:19903/57344 train_time:11667723ms step_avg:586.23ms +grad accum step:4976/14336 +step:19904/57344 train_time:11669015ms step_avg:586.26ms +step:19904/57344 val_loss:7.005112 train_time:11669015ms step_avg:586.26ms +step:19905/57344 train_time:11669027ms step_avg:586.24ms +step:19906/57344 train_time:11669253ms step_avg:586.22ms +step:19907/57344 train_time:11669799ms step_avg:586.22ms +grad accum step:4977/14336 +step:19908/57344 train_time:11671117ms step_avg:586.25ms +step:19909/57344 train_time:11671133ms step_avg:586.22ms +step:19910/57344 train_time:11671384ms step_avg:586.21ms +step:19911/57344 train_time:11671941ms step_avg:586.21ms +grad accum step:4978/14336 +step:19912/57344 train_time:11673242ms step_avg:586.24ms +step:19913/57344 train_time:11673259ms step_avg:586.21ms +step:19914/57344 train_time:11673503ms step_avg:586.20ms +step:19915/57344 train_time:11674050ms step_avg:586.19ms +grad accum step:4979/14336 +step:19916/57344 train_time:11675347ms step_avg:586.23ms +step:19917/57344 train_time:11675364ms step_avg:586.20ms +step:19918/57344 train_time:11675608ms step_avg:586.18ms +step:19919/57344 train_time:11676154ms step_avg:586.18ms +grad accum step:4980/14336 +step:19920/57344 train_time:11677458ms step_avg:586.22ms +step:19921/57344 train_time:11677475ms step_avg:586.19ms +step:19922/57344 train_time:11677724ms step_avg:586.17ms +step:19923/57344 train_time:11678271ms step_avg:586.17ms +grad accum step:4981/14336 +step:19924/57344 train_time:11679583ms step_avg:586.21ms +step:19925/57344 train_time:11679600ms step_avg:586.18ms +step:19926/57344 train_time:11679846ms step_avg:586.16ms +step:19927/57344 train_time:11680401ms step_avg:586.16ms +grad accum step:4982/14336 +step:19928/57344 train_time:11681759ms step_avg:586.20ms +step:19929/57344 train_time:11681775ms step_avg:586.17ms +step:19930/57344 train_time:11682025ms step_avg:586.15ms +step:19931/57344 train_time:11682578ms step_avg:586.15ms +grad accum step:4983/14336 +step:19932/57344 train_time:11683890ms step_avg:586.19ms +step:19933/57344 train_time:11683907ms step_avg:586.16ms +step:19934/57344 train_time:11684151ms step_avg:586.14ms +step:19935/57344 train_time:11684699ms step_avg:586.14ms +grad accum step:4984/14336 +step:19936/57344 train_time:11686000ms step_avg:586.18ms +step:19937/57344 train_time:11686017ms step_avg:586.15ms +step:19938/57344 train_time:11686262ms step_avg:586.13ms +step:19939/57344 train_time:11686811ms step_avg:586.13ms +grad accum step:4985/14336 +step:19940/57344 train_time:11688148ms step_avg:586.17ms +step:19941/57344 train_time:11688165ms step_avg:586.14ms +step:19942/57344 train_time:11688408ms step_avg:586.12ms +step:19943/57344 train_time:11688959ms step_avg:586.12ms +grad accum step:4986/14336 +step:19944/57344 train_time:11690278ms step_avg:586.16ms +step:19945/57344 train_time:11690294ms step_avg:586.13ms +step:19946/57344 train_time:11690545ms step_avg:586.11ms +step:19947/57344 train_time:11691102ms step_avg:586.11ms +grad accum step:4987/14336 +step:19948/57344 train_time:11692428ms step_avg:586.15ms +step:19949/57344 train_time:11692445ms step_avg:586.12ms +step:19950/57344 train_time:11692702ms step_avg:586.10ms +step:19951/57344 train_time:11693282ms step_avg:586.10ms +grad accum step:4988/14336 +step:19952/57344 train_time:11694608ms step_avg:586.14ms +step:19953/57344 train_time:11694625ms step_avg:586.11ms +step:19954/57344 train_time:11694870ms step_avg:586.09ms +step:19955/57344 train_time:11695413ms step_avg:586.09ms +grad accum step:4989/14336 +step:19956/57344 train_time:11696687ms step_avg:586.12ms +step:19957/57344 train_time:11696704ms step_avg:586.10ms +step:19958/57344 train_time:11696953ms step_avg:586.08ms +step:19959/57344 train_time:11697516ms step_avg:586.08ms +grad accum step:4990/14336 +step:19960/57344 train_time:11698830ms step_avg:586.11ms +step:19961/57344 train_time:11698847ms step_avg:586.09ms +step:19962/57344 train_time:11699089ms step_avg:586.07ms +step:19963/57344 train_time:11699637ms step_avg:586.07ms +grad accum step:4991/14336 +step:19964/57344 train_time:11700926ms step_avg:586.10ms +step:19965/57344 train_time:11700943ms step_avg:586.07ms +step:19966/57344 train_time:11701191ms step_avg:586.06ms +step:19967/57344 train_time:11701743ms step_avg:586.05ms +grad accum step:4992/14336 +step:19968/57344 train_time:11703060ms step_avg:586.09ms +step:19968/57344 val_loss:7.013822 train_time:11703061ms step_avg:586.09ms +step:19969/57344 train_time:11703073ms step_avg:586.06ms +step:19970/57344 train_time:11703297ms step_avg:586.04ms +step:19971/57344 train_time:11703851ms step_avg:586.04ms +grad accum step:4993/14336 +step:19972/57344 train_time:11705174ms step_avg:586.08ms +step:19973/57344 train_time:11705191ms step_avg:586.05ms +step:19974/57344 train_time:11705441ms step_avg:586.03ms +step:19975/57344 train_time:11705998ms step_avg:586.03ms +grad accum step:4994/14336 +step:19976/57344 train_time:11707303ms step_avg:586.07ms +step:19977/57344 train_time:11707320ms step_avg:586.04ms +step:19978/57344 train_time:11707566ms step_avg:586.02ms +step:19979/57344 train_time:11708113ms step_avg:586.02ms +grad accum step:4995/14336 +step:19980/57344 train_time:11709424ms step_avg:586.06ms +step:19981/57344 train_time:11709441ms step_avg:586.03ms +step:19982/57344 train_time:11709692ms step_avg:586.01ms +step:19983/57344 train_time:11710248ms step_avg:586.01ms +grad accum step:4996/14336 +step:19984/57344 train_time:11711545ms step_avg:586.05ms +step:19985/57344 train_time:11711562ms step_avg:586.02ms +step:19986/57344 train_time:11711807ms step_avg:586.00ms +step:19987/57344 train_time:11712353ms step_avg:586.00ms +grad accum step:4997/14336 +step:19988/57344 train_time:11713658ms step_avg:586.03ms +step:19989/57344 train_time:11713675ms step_avg:586.01ms +step:19990/57344 train_time:11713922ms step_avg:585.99ms +step:19991/57344 train_time:11714474ms step_avg:585.99ms +grad accum step:4998/14336 +step:19992/57344 train_time:11715779ms step_avg:586.02ms +step:19993/57344 train_time:11715797ms step_avg:585.99ms +step:19994/57344 train_time:11716046ms step_avg:585.98ms +step:19995/57344 train_time:11716596ms step_avg:585.98ms +grad accum step:4999/14336 +step:19996/57344 train_time:11717889ms step_avg:586.01ms +step:19997/57344 train_time:11717906ms step_avg:585.98ms +step:19998/57344 train_time:11718160ms step_avg:585.97ms +step:19999/57344 train_time:11718727ms step_avg:585.97ms +grad accum step:5000/14336 +step:20000/57344 train_time:11720039ms step_avg:586.00ms +step:20001/57344 train_time:11720056ms step_avg:585.97ms +step:20002/57344 train_time:11720301ms step_avg:585.96ms +step:20003/57344 train_time:11720845ms step_avg:585.95ms +grad accum step:5001/14336 +step:20004/57344 train_time:11722137ms step_avg:585.99ms +step:20005/57344 train_time:11722154ms step_avg:585.96ms +step:20006/57344 train_time:11722405ms step_avg:585.94ms +step:20007/57344 train_time:11722966ms step_avg:585.94ms +grad accum step:5002/14336 +step:20008/57344 train_time:11724244ms step_avg:585.98ms +step:20009/57344 train_time:11724261ms step_avg:585.95ms +step:20010/57344 train_time:11724510ms step_avg:585.93ms +step:20011/57344 train_time:11725058ms step_avg:585.93ms +grad accum step:5003/14336 +step:20012/57344 train_time:11726389ms step_avg:585.97ms +step:20013/57344 train_time:11726406ms step_avg:585.94ms +step:20014/57344 train_time:11726651ms step_avg:585.92ms +step:20015/57344 train_time:11727206ms step_avg:585.92ms +grad accum step:5004/14336 +step:20016/57344 train_time:11728526ms step_avg:585.96ms +step:20017/57344 train_time:11728543ms step_avg:585.93ms +step:20018/57344 train_time:11728788ms step_avg:585.91ms +step:20019/57344 train_time:11729347ms step_avg:585.91ms +grad accum step:5005/14336 +step:20020/57344 train_time:11730690ms step_avg:585.95ms +step:20021/57344 train_time:11730707ms step_avg:585.92ms +step:20022/57344 train_time:11730952ms step_avg:585.90ms +step:20023/57344 train_time:11731501ms step_avg:585.90ms +grad accum step:5006/14336 +step:20024/57344 train_time:11732822ms step_avg:585.94ms +step:20025/57344 train_time:11732838ms step_avg:585.91ms +step:20026/57344 train_time:11733081ms step_avg:585.89ms +step:20027/57344 train_time:11733621ms step_avg:585.89ms +grad accum step:5007/14336 +step:20028/57344 train_time:11734919ms step_avg:585.93ms +step:20029/57344 train_time:11734936ms step_avg:585.90ms +step:20030/57344 train_time:11735185ms step_avg:585.88ms +step:20031/57344 train_time:11735733ms step_avg:585.88ms +grad accum step:5008/14336 +step:20032/57344 train_time:11737041ms step_avg:585.91ms +step:20032/57344 val_loss:7.006802 train_time:11737042ms step_avg:585.91ms +step:20033/57344 train_time:11737054ms step_avg:585.89ms +step:20034/57344 train_time:11737279ms step_avg:585.87ms +step:20035/57344 train_time:11737834ms step_avg:585.87ms +grad accum step:5009/14336 +step:20036/57344 train_time:11739160ms step_avg:585.90ms +step:20037/57344 train_time:11739177ms step_avg:585.87ms +step:20038/57344 train_time:11739443ms step_avg:585.86ms +step:20039/57344 train_time:11740037ms step_avg:585.86ms +grad accum step:5010/14336 +step:20040/57344 train_time:11741389ms step_avg:585.90ms +step:20041/57344 train_time:11741406ms step_avg:585.87ms +step:20042/57344 train_time:11741661ms step_avg:585.85ms +step:20043/57344 train_time:11742243ms step_avg:585.85ms +grad accum step:5011/14336 +step:20044/57344 train_time:11743560ms step_avg:585.89ms +step:20045/57344 train_time:11743577ms step_avg:585.86ms +step:20046/57344 train_time:11743827ms step_avg:585.84ms +step:20047/57344 train_time:11744386ms step_avg:585.84ms +grad accum step:5012/14336 +step:20048/57344 train_time:11745685ms step_avg:585.88ms +step:20049/57344 train_time:11745702ms step_avg:585.85ms +step:20050/57344 train_time:11745948ms step_avg:585.83ms +step:20051/57344 train_time:11746498ms step_avg:585.83ms +grad accum step:5013/14336 +step:20052/57344 train_time:11747796ms step_avg:585.87ms +step:20053/57344 train_time:11747813ms step_avg:585.84ms +step:20054/57344 train_time:11748060ms step_avg:585.82ms +step:20055/57344 train_time:11748603ms step_avg:585.82ms +grad accum step:5014/14336 +step:20056/57344 train_time:11749903ms step_avg:585.85ms +step:20057/57344 train_time:11749920ms step_avg:585.83ms +step:20058/57344 train_time:11750172ms step_avg:585.81ms +step:20059/57344 train_time:11750738ms step_avg:585.81ms +grad accum step:5015/14336 +step:20060/57344 train_time:11752064ms step_avg:585.85ms +step:20061/57344 train_time:11752081ms step_avg:585.82ms +step:20062/57344 train_time:11752327ms step_avg:585.80ms +step:20063/57344 train_time:11752871ms step_avg:585.80ms +grad accum step:5016/14336 +step:20064/57344 train_time:11754172ms step_avg:585.83ms +step:20065/57344 train_time:11754189ms step_avg:585.81ms +step:20066/57344 train_time:11754446ms step_avg:585.79ms +step:20067/57344 train_time:11755016ms step_avg:585.79ms +grad accum step:5017/14336 +step:20068/57344 train_time:11756292ms step_avg:585.82ms +step:20069/57344 train_time:11756309ms step_avg:585.79ms +step:20070/57344 train_time:11756554ms step_avg:585.78ms +step:20071/57344 train_time:11757104ms step_avg:585.78ms +grad accum step:5018/14336 +step:20072/57344 train_time:11758418ms step_avg:585.81ms +step:20073/57344 train_time:11758435ms step_avg:585.78ms +step:20074/57344 train_time:11758679ms step_avg:585.77ms +step:20075/57344 train_time:11759222ms step_avg:585.76ms +grad accum step:5019/14336 +step:20076/57344 train_time:11760538ms step_avg:585.80ms +step:20077/57344 train_time:11760555ms step_avg:585.77ms +step:20078/57344 train_time:11760799ms step_avg:585.76ms +step:20079/57344 train_time:11761346ms step_avg:585.75ms +grad accum step:5020/14336 +step:20080/57344 train_time:11762651ms step_avg:585.79ms +step:20081/57344 train_time:11762668ms step_avg:585.76ms +step:20082/57344 train_time:11762917ms step_avg:585.74ms +step:20083/57344 train_time:11763481ms step_avg:585.74ms +grad accum step:5021/14336 +step:20084/57344 train_time:11764814ms step_avg:585.78ms +step:20085/57344 train_time:11764831ms step_avg:585.75ms +step:20086/57344 train_time:11765074ms step_avg:585.74ms +step:20087/57344 train_time:11765620ms step_avg:585.73ms +grad accum step:5022/14336 +step:20088/57344 train_time:11766944ms step_avg:585.77ms +step:20089/57344 train_time:11766961ms step_avg:585.74ms +step:20090/57344 train_time:11767221ms step_avg:585.73ms +step:20091/57344 train_time:11767802ms step_avg:585.73ms +grad accum step:5023/14336 +step:20092/57344 train_time:11769080ms step_avg:585.76ms +step:20093/57344 train_time:11769097ms step_avg:585.73ms +step:20094/57344 train_time:11769353ms step_avg:585.71ms +step:20095/57344 train_time:11769923ms step_avg:585.71ms +grad accum step:5024/14336 +step:20096/57344 train_time:11771220ms step_avg:585.75ms +step:20096/57344 val_loss:7.017865 train_time:11771221ms step_avg:585.75ms +step:20097/57344 train_time:11771232ms step_avg:585.72ms +step:20098/57344 train_time:11771457ms step_avg:585.70ms +step:20099/57344 train_time:11771996ms step_avg:585.70ms +grad accum step:5025/14336 +step:20100/57344 train_time:11773279ms step_avg:585.74ms +step:20101/57344 train_time:11773296ms step_avg:585.71ms +step:20102/57344 train_time:11773542ms step_avg:585.69ms +step:20103/57344 train_time:11774084ms step_avg:585.69ms +grad accum step:5026/14336 +step:20104/57344 train_time:11775362ms step_avg:585.72ms +step:20105/57344 train_time:11775378ms step_avg:585.69ms +step:20106/57344 train_time:11775628ms step_avg:585.68ms +step:20107/57344 train_time:11776178ms step_avg:585.68ms +grad accum step:5027/14336 +step:20108/57344 train_time:11777471ms step_avg:585.71ms +step:20109/57344 train_time:11777488ms step_avg:585.68ms +step:20110/57344 train_time:11777735ms step_avg:585.67ms +step:20111/57344 train_time:11778275ms step_avg:585.66ms +grad accum step:5028/14336 +step:20112/57344 train_time:11779547ms step_avg:585.70ms +step:20113/57344 train_time:11779564ms step_avg:585.67ms +step:20114/57344 train_time:11779812ms step_avg:585.65ms +step:20115/57344 train_time:11780360ms step_avg:585.65ms +grad accum step:5029/14336 +step:20116/57344 train_time:11781634ms step_avg:585.68ms +step:20117/57344 train_time:11781651ms step_avg:585.66ms +step:20118/57344 train_time:11781896ms step_avg:585.64ms +step:20119/57344 train_time:11782442ms step_avg:585.64ms +grad accum step:5030/14336 +step:20120/57344 train_time:11783757ms step_avg:585.67ms +step:20121/57344 train_time:11783774ms step_avg:585.65ms +step:20122/57344 train_time:11784019ms step_avg:585.63ms +step:20123/57344 train_time:11784567ms step_avg:585.63ms +grad accum step:5031/14336 +step:20124/57344 train_time:11785888ms step_avg:585.66ms +step:20125/57344 train_time:11785905ms step_avg:585.64ms +step:20126/57344 train_time:11786158ms step_avg:585.62ms +step:20127/57344 train_time:11786710ms step_avg:585.62ms +grad accum step:5032/14336 +step:20128/57344 train_time:11787991ms step_avg:585.65ms +step:20129/57344 train_time:11788007ms step_avg:585.62ms +step:20130/57344 train_time:11788257ms step_avg:585.61ms +step:20131/57344 train_time:11788810ms step_avg:585.60ms +grad accum step:5033/14336 +step:20132/57344 train_time:11790110ms step_avg:585.64ms +step:20133/57344 train_time:11790127ms step_avg:585.61ms +step:20134/57344 train_time:11790380ms step_avg:585.60ms +step:20135/57344 train_time:11790947ms step_avg:585.59ms +grad accum step:5034/14336 +step:20136/57344 train_time:11792265ms step_avg:585.63ms +step:20137/57344 train_time:11792282ms step_avg:585.60ms +step:20138/57344 train_time:11792537ms step_avg:585.59ms +step:20139/57344 train_time:11793104ms step_avg:585.59ms +grad accum step:5035/14336 +step:20140/57344 train_time:11794377ms step_avg:585.62ms +step:20141/57344 train_time:11794395ms step_avg:585.59ms +step:20142/57344 train_time:11794643ms step_avg:585.57ms +step:20143/57344 train_time:11795187ms step_avg:585.57ms +grad accum step:5036/14336 +step:20144/57344 train_time:11796479ms step_avg:585.61ms +step:20145/57344 train_time:11796496ms step_avg:585.58ms +step:20146/57344 train_time:11796738ms step_avg:585.56ms +step:20147/57344 train_time:11797277ms step_avg:585.56ms +grad accum step:5037/14336 +step:20148/57344 train_time:11798612ms step_avg:585.60ms +step:20149/57344 train_time:11798629ms step_avg:585.57ms +step:20150/57344 train_time:11798887ms step_avg:585.55ms +step:20151/57344 train_time:11799466ms step_avg:585.55ms +grad accum step:5038/14336 +step:20152/57344 train_time:11800798ms step_avg:585.59ms +step:20153/57344 train_time:11800815ms step_avg:585.56ms +step:20154/57344 train_time:11801065ms step_avg:585.54ms +step:20155/57344 train_time:11801620ms step_avg:585.54ms +grad accum step:5039/14336 +step:20156/57344 train_time:11802920ms step_avg:585.58ms +step:20157/57344 train_time:11802937ms step_avg:585.55ms +step:20158/57344 train_time:11803185ms step_avg:585.53ms +step:20159/57344 train_time:11803733ms step_avg:585.53ms +grad accum step:5040/14336 +step:20160/57344 train_time:11805067ms step_avg:585.57ms +step:20160/57344 val_loss:7.015147 train_time:11805067ms step_avg:585.57ms +step:20161/57344 train_time:11805080ms step_avg:585.54ms +step:20162/57344 train_time:11805302ms step_avg:585.52ms +step:20163/57344 train_time:11805847ms step_avg:585.52ms +grad accum step:5041/14336 +step:20164/57344 train_time:11807121ms step_avg:585.55ms +step:20165/57344 train_time:11807139ms step_avg:585.53ms +step:20166/57344 train_time:11807383ms step_avg:585.51ms +step:20167/57344 train_time:11807930ms step_avg:585.51ms +grad accum step:5042/14336 +step:20168/57344 train_time:11809235ms step_avg:585.54ms +step:20169/57344 train_time:11809252ms step_avg:585.51ms +step:20170/57344 train_time:11809507ms step_avg:585.50ms +step:20171/57344 train_time:11810076ms step_avg:585.50ms +grad accum step:5043/14336 +step:20172/57344 train_time:11811349ms step_avg:585.53ms +step:20173/57344 train_time:11811366ms step_avg:585.50ms +step:20174/57344 train_time:11811609ms step_avg:585.49ms +step:20175/57344 train_time:11812142ms step_avg:585.48ms +grad accum step:5044/14336 +step:20176/57344 train_time:11813465ms step_avg:585.52ms +step:20177/57344 train_time:11813481ms step_avg:585.49ms +step:20178/57344 train_time:11813724ms step_avg:585.48ms +step:20179/57344 train_time:11814269ms step_avg:585.47ms +grad accum step:5045/14336 +step:20180/57344 train_time:11815592ms step_avg:585.51ms +step:20181/57344 train_time:11815609ms step_avg:585.48ms +step:20182/57344 train_time:11815858ms step_avg:585.47ms +step:20183/57344 train_time:11816412ms step_avg:585.46ms +grad accum step:5046/14336 +step:20184/57344 train_time:11817707ms step_avg:585.50ms +step:20185/57344 train_time:11817723ms step_avg:585.47ms +step:20186/57344 train_time:11817975ms step_avg:585.45ms +step:20187/57344 train_time:11818534ms step_avg:585.45ms +grad accum step:5047/14336 +step:20188/57344 train_time:11819851ms step_avg:585.49ms +step:20189/57344 train_time:11819868ms step_avg:585.46ms +step:20190/57344 train_time:11820120ms step_avg:585.44ms +step:20191/57344 train_time:11820681ms step_avg:585.44ms +grad accum step:5048/14336 +step:20192/57344 train_time:11821973ms step_avg:585.48ms +step:20193/57344 train_time:11821990ms step_avg:585.45ms +step:20194/57344 train_time:11822233ms step_avg:585.43ms +step:20195/57344 train_time:11822775ms step_avg:585.43ms +grad accum step:5049/14336 +step:20196/57344 train_time:11824097ms step_avg:585.47ms +step:20197/57344 train_time:11824114ms step_avg:585.44ms +step:20198/57344 train_time:11824364ms step_avg:585.42ms +step:20199/57344 train_time:11824915ms step_avg:585.42ms +grad accum step:5050/14336 +step:20200/57344 train_time:11826198ms step_avg:585.46ms +step:20201/57344 train_time:11826215ms step_avg:585.43ms +step:20202/57344 train_time:11826467ms step_avg:585.41ms +step:20203/57344 train_time:11827032ms step_avg:585.41ms +grad accum step:5051/14336 +step:20204/57344 train_time:11828395ms step_avg:585.45ms +step:20205/57344 train_time:11828413ms step_avg:585.42ms +step:20206/57344 train_time:11828667ms step_avg:585.40ms +step:20207/57344 train_time:11829247ms step_avg:585.40ms +grad accum step:5052/14336 +step:20208/57344 train_time:11830570ms step_avg:585.44ms +step:20209/57344 train_time:11830587ms step_avg:585.41ms +step:20210/57344 train_time:11830833ms step_avg:585.39ms +step:20211/57344 train_time:11831384ms step_avg:585.39ms +grad accum step:5053/14336 +step:20212/57344 train_time:11832676ms step_avg:585.43ms +step:20213/57344 train_time:11832693ms step_avg:585.40ms +step:20214/57344 train_time:11832937ms step_avg:585.38ms +step:20215/57344 train_time:11833478ms step_avg:585.38ms +grad accum step:5054/14336 +step:20216/57344 train_time:11834753ms step_avg:585.42ms +step:20217/57344 train_time:11834770ms step_avg:585.39ms +step:20218/57344 train_time:11835023ms step_avg:585.37ms +step:20219/57344 train_time:11835583ms step_avg:585.37ms +grad accum step:5055/14336 +step:20220/57344 train_time:11836875ms step_avg:585.40ms +step:20221/57344 train_time:11836892ms step_avg:585.38ms +step:20222/57344 train_time:11837138ms step_avg:585.36ms +step:20223/57344 train_time:11837687ms step_avg:585.36ms +grad accum step:5056/14336 +step:20224/57344 train_time:11839001ms step_avg:585.39ms +step:20224/57344 val_loss:7.019801 train_time:11839001ms step_avg:585.39ms +step:20225/57344 train_time:11839014ms step_avg:585.37ms +step:20226/57344 train_time:11839238ms step_avg:585.35ms +step:20227/57344 train_time:11839791ms step_avg:585.35ms +grad accum step:5057/14336 +step:20228/57344 train_time:11841115ms step_avg:585.38ms +step:20229/57344 train_time:11841131ms step_avg:585.35ms +step:20230/57344 train_time:11841374ms step_avg:585.34ms +step:20231/57344 train_time:11841923ms step_avg:585.34ms +grad accum step:5058/14336 +step:20232/57344 train_time:11843227ms step_avg:585.37ms +step:20233/57344 train_time:11843244ms step_avg:585.34ms +step:20234/57344 train_time:11843490ms step_avg:585.33ms +step:20235/57344 train_time:11844028ms step_avg:585.32ms +grad accum step:5059/14336 +step:20236/57344 train_time:11853813ms step_avg:585.78ms +step:20237/57344 train_time:11853830ms step_avg:585.75ms +step:20238/57344 train_time:11854074ms step_avg:585.73ms +step:20239/57344 train_time:11854621ms step_avg:585.73ms +grad accum step:5060/14336 +step:20240/57344 train_time:11855922ms step_avg:585.77ms +step:20241/57344 train_time:11855939ms step_avg:585.74ms +step:20242/57344 train_time:11856182ms step_avg:585.72ms +step:20243/57344 train_time:11856723ms step_avg:585.72ms +grad accum step:5061/14336 +step:20244/57344 train_time:11858024ms step_avg:585.75ms +step:20245/57344 train_time:11858041ms step_avg:585.73ms +step:20246/57344 train_time:11858284ms step_avg:585.71ms +step:20247/57344 train_time:11858838ms step_avg:585.71ms +grad accum step:5062/14336 +step:20248/57344 train_time:11860208ms step_avg:585.75ms +step:20249/57344 train_time:11860222ms step_avg:585.72ms +step:20250/57344 train_time:11860468ms step_avg:585.70ms +step:20251/57344 train_time:11861021ms step_avg:585.70ms +grad accum step:5063/14336 +step:20252/57344 train_time:11862332ms step_avg:585.74ms +step:20253/57344 train_time:11862349ms step_avg:585.71ms +step:20254/57344 train_time:11862598ms step_avg:585.69ms +step:20255/57344 train_time:11863147ms step_avg:585.69ms +grad accum step:5064/14336 +step:20256/57344 train_time:11864439ms step_avg:585.72ms +step:20257/57344 train_time:11864455ms step_avg:585.70ms +step:20258/57344 train_time:11864700ms step_avg:585.68ms +step:20259/57344 train_time:11865245ms step_avg:585.68ms +grad accum step:5065/14336 +step:20260/57344 train_time:11866560ms step_avg:585.71ms +step:20261/57344 train_time:11866577ms step_avg:585.69ms +step:20262/57344 train_time:11866824ms step_avg:585.67ms +step:20263/57344 train_time:11867377ms step_avg:585.67ms +grad accum step:5066/14336 +step:20264/57344 train_time:11868709ms step_avg:585.70ms +step:20265/57344 train_time:11868726ms step_avg:585.68ms +step:20266/57344 train_time:11868971ms step_avg:585.66ms +step:20267/57344 train_time:11869511ms step_avg:585.66ms +grad accum step:5067/14336 +step:20268/57344 train_time:11870804ms step_avg:585.69ms +step:20269/57344 train_time:11870821ms step_avg:585.66ms +step:20270/57344 train_time:11871065ms step_avg:585.65ms +step:20271/57344 train_time:11871612ms step_avg:585.65ms +grad accum step:5068/14336 +step:20272/57344 train_time:11872892ms step_avg:585.68ms +step:20273/57344 train_time:11872909ms step_avg:585.65ms +step:20274/57344 train_time:11873151ms step_avg:585.63ms +step:20275/57344 train_time:11873695ms step_avg:585.63ms +grad accum step:5069/14336 +step:20276/57344 train_time:11876974ms step_avg:585.77ms +step:20277/57344 train_time:11876988ms step_avg:585.74ms +step:20278/57344 train_time:11877239ms step_avg:585.72ms +step:20279/57344 train_time:11877783ms step_avg:585.72ms +grad accum step:5070/14336 +step:20280/57344 train_time:11879062ms step_avg:585.75ms +step:20281/57344 train_time:11879079ms step_avg:585.72ms +step:20282/57344 train_time:11879325ms step_avg:585.71ms +step:20283/57344 train_time:11879872ms step_avg:585.71ms +grad accum step:5071/14336 +step:20284/57344 train_time:11881187ms step_avg:585.74ms +step:20285/57344 train_time:11881204ms step_avg:585.71ms +step:20286/57344 train_time:11881450ms step_avg:585.70ms +step:20287/57344 train_time:11881998ms step_avg:585.70ms +grad accum step:5072/14336 +step:20288/57344 train_time:11883320ms step_avg:585.73ms +step:20288/57344 val_loss:7.012786 train_time:11883321ms step_avg:585.73ms +step:20289/57344 train_time:11883333ms step_avg:585.70ms +step:20290/57344 train_time:11883635ms step_avg:585.69ms +step:20291/57344 train_time:11884181ms step_avg:585.69ms +grad accum step:5073/14336 +step:20292/57344 train_time:11885461ms step_avg:585.72ms +step:20293/57344 train_time:11885478ms step_avg:585.69ms +step:20294/57344 train_time:11885724ms step_avg:585.68ms +step:20295/57344 train_time:11886266ms step_avg:585.67ms +grad accum step:5074/14336 +step:20296/57344 train_time:11887566ms step_avg:585.71ms +step:20297/57344 train_time:11887583ms step_avg:585.68ms +step:20298/57344 train_time:11887829ms step_avg:585.67ms +step:20299/57344 train_time:11888374ms step_avg:585.66ms +grad accum step:5075/14336 +step:20300/57344 train_time:11889650ms step_avg:585.70ms +step:20301/57344 train_time:11889667ms step_avg:585.67ms +step:20302/57344 train_time:11889915ms step_avg:585.65ms +step:20303/57344 train_time:11890465ms step_avg:585.65ms +grad accum step:5076/14336 +step:20304/57344 train_time:11891771ms step_avg:585.69ms +step:20305/57344 train_time:11891788ms step_avg:585.66ms +step:20306/57344 train_time:11892033ms step_avg:585.64ms +step:20307/57344 train_time:11892581ms step_avg:585.64ms +grad accum step:5077/14336 +step:20308/57344 train_time:11893879ms step_avg:585.67ms +step:20309/57344 train_time:11893896ms step_avg:585.65ms +step:20310/57344 train_time:11894149ms step_avg:585.63ms +step:20311/57344 train_time:11894711ms step_avg:585.63ms +grad accum step:5078/14336 +step:20312/57344 train_time:11896037ms step_avg:585.67ms +step:20313/57344 train_time:11896054ms step_avg:585.64ms +step:20314/57344 train_time:11896302ms step_avg:585.62ms +step:20315/57344 train_time:11896854ms step_avg:585.62ms +grad accum step:5079/14336 +step:20316/57344 train_time:11898195ms step_avg:585.66ms +step:20317/57344 train_time:11898212ms step_avg:585.63ms +step:20318/57344 train_time:11898461ms step_avg:585.61ms +step:20319/57344 train_time:11899010ms step_avg:585.61ms +grad accum step:5080/14336 +step:20320/57344 train_time:11900321ms step_avg:585.65ms +step:20321/57344 train_time:11900338ms step_avg:585.62ms +step:20322/57344 train_time:11900585ms step_avg:585.60ms +step:20323/57344 train_time:11901126ms step_avg:585.60ms +grad accum step:5081/14336 +step:20324/57344 train_time:11902415ms step_avg:585.63ms +step:20325/57344 train_time:11902432ms step_avg:585.61ms +step:20326/57344 train_time:11902685ms step_avg:585.59ms +step:20327/57344 train_time:11903241ms step_avg:585.59ms +grad accum step:5082/14336 +step:20328/57344 train_time:11904553ms step_avg:585.62ms +step:20329/57344 train_time:11904570ms step_avg:585.60ms +step:20330/57344 train_time:11904816ms step_avg:585.58ms +step:20331/57344 train_time:11905362ms step_avg:585.58ms +grad accum step:5083/14336 +step:20332/57344 train_time:11906661ms step_avg:585.61ms +step:20333/57344 train_time:11906678ms step_avg:585.58ms +step:20334/57344 train_time:11906925ms step_avg:585.57ms +step:20335/57344 train_time:11907471ms step_avg:585.57ms +grad accum step:5084/14336 +step:20336/57344 train_time:11908828ms step_avg:585.60ms +step:20337/57344 train_time:11908845ms step_avg:585.58ms +step:20338/57344 train_time:11909096ms step_avg:585.56ms +step:20339/57344 train_time:11909657ms step_avg:585.56ms +grad accum step:5085/14336 +step:20340/57344 train_time:11910979ms step_avg:585.59ms +step:20341/57344 train_time:11910996ms step_avg:585.57ms +step:20342/57344 train_time:11911239ms step_avg:585.55ms +step:20343/57344 train_time:11911779ms step_avg:585.55ms +grad accum step:5086/14336 +step:20344/57344 train_time:11913065ms step_avg:585.58ms +step:20345/57344 train_time:11913082ms step_avg:585.55ms +step:20346/57344 train_time:11913329ms step_avg:585.54ms +step:20347/57344 train_time:11913872ms step_avg:585.53ms +grad accum step:5087/14336 +step:20348/57344 train_time:11915154ms step_avg:585.57ms +step:20349/57344 train_time:11915170ms step_avg:585.54ms +step:20350/57344 train_time:11915422ms step_avg:585.52ms +step:20351/57344 train_time:11915976ms step_avg:585.52ms +grad accum step:5088/14336 +step:20352/57344 train_time:11917286ms step_avg:585.56ms +step:20352/57344 val_loss:7.009954 train_time:11917286ms step_avg:585.56ms +step:20353/57344 train_time:11917299ms step_avg:585.53ms +step:20354/57344 train_time:11917522ms step_avg:585.51ms +step:20355/57344 train_time:11918075ms step_avg:585.51ms +grad accum step:5089/14336 +step:20356/57344 train_time:11919393ms step_avg:585.55ms +step:20357/57344 train_time:11919410ms step_avg:585.52ms +step:20358/57344 train_time:11919663ms step_avg:585.50ms +step:20359/57344 train_time:11920218ms step_avg:585.50ms +grad accum step:5090/14336 +step:20360/57344 train_time:11921509ms step_avg:585.54ms +step:20361/57344 train_time:11921526ms step_avg:585.51ms +step:20362/57344 train_time:11921774ms step_avg:585.49ms +step:20363/57344 train_time:11922326ms step_avg:585.49ms +grad accum step:5091/14336 +step:20364/57344 train_time:11923646ms step_avg:585.53ms +step:20365/57344 train_time:11923662ms step_avg:585.50ms +step:20366/57344 train_time:11923906ms step_avg:585.48ms +step:20367/57344 train_time:11924447ms step_avg:585.48ms +grad accum step:5092/14336 +step:20368/57344 train_time:11925726ms step_avg:585.51ms +step:20369/57344 train_time:11925743ms step_avg:585.48ms +step:20370/57344 train_time:11925990ms step_avg:585.47ms +step:20371/57344 train_time:11926539ms step_avg:585.47ms +grad accum step:5093/14336 +step:20372/57344 train_time:11927850ms step_avg:585.50ms +step:20373/57344 train_time:11927868ms step_avg:585.47ms +step:20374/57344 train_time:11928116ms step_avg:585.46ms +step:20375/57344 train_time:11928667ms step_avg:585.46ms +grad accum step:5094/14336 +step:20376/57344 train_time:11929987ms step_avg:585.49ms +step:20377/57344 train_time:11930004ms step_avg:585.46ms +step:20378/57344 train_time:11930254ms step_avg:585.45ms +step:20379/57344 train_time:11930809ms step_avg:585.45ms +grad accum step:5095/14336 +step:20380/57344 train_time:11932124ms step_avg:585.48ms +step:20381/57344 train_time:11932141ms step_avg:585.45ms +step:20382/57344 train_time:11932391ms step_avg:585.44ms +step:20383/57344 train_time:11932939ms step_avg:585.44ms +grad accum step:5096/14336 +step:20384/57344 train_time:11934296ms step_avg:585.47ms +step:20385/57344 train_time:11934313ms step_avg:585.45ms +step:20386/57344 train_time:11934559ms step_avg:585.43ms +step:20387/57344 train_time:11935117ms step_avg:585.43ms +grad accum step:5097/14336 +step:20388/57344 train_time:11936482ms step_avg:585.47ms +step:20389/57344 train_time:11936499ms step_avg:585.44ms +step:20390/57344 train_time:11936750ms step_avg:585.42ms +step:20391/57344 train_time:11937309ms step_avg:585.42ms +grad accum step:5098/14336 +step:20392/57344 train_time:11938586ms step_avg:585.45ms +step:20393/57344 train_time:11938603ms step_avg:585.43ms +step:20394/57344 train_time:11938850ms step_avg:585.41ms +step:20395/57344 train_time:11939397ms step_avg:585.41ms +grad accum step:5099/14336 +step:20396/57344 train_time:11940718ms step_avg:585.44ms +step:20397/57344 train_time:11940734ms step_avg:585.42ms +step:20398/57344 train_time:11940981ms step_avg:585.40ms +step:20399/57344 train_time:11941528ms step_avg:585.40ms +grad accum step:5100/14336 +step:20400/57344 train_time:11942830ms step_avg:585.43ms +step:20401/57344 train_time:11942847ms step_avg:585.40ms +step:20402/57344 train_time:11943098ms step_avg:585.39ms +step:20403/57344 train_time:11943655ms step_avg:585.39ms +grad accum step:5101/14336 +step:20404/57344 train_time:11945006ms step_avg:585.42ms +step:20405/57344 train_time:11945023ms step_avg:585.40ms +step:20406/57344 train_time:11945267ms step_avg:585.38ms +step:20407/57344 train_time:11945807ms step_avg:585.38ms +grad accum step:5102/14336 +step:20408/57344 train_time:11947120ms step_avg:585.41ms +step:20409/57344 train_time:11947137ms step_avg:585.39ms +step:20410/57344 train_time:11947388ms step_avg:585.37ms +step:20411/57344 train_time:11947950ms step_avg:585.37ms +grad accum step:5103/14336 +step:20412/57344 train_time:11949240ms step_avg:585.40ms +step:20413/57344 train_time:11949257ms step_avg:585.37ms +step:20414/57344 train_time:11949501ms step_avg:585.36ms +step:20415/57344 train_time:11950045ms step_avg:585.36ms +grad accum step:5104/14336 +step:20416/57344 train_time:11951349ms step_avg:585.39ms +step:20416/57344 val_loss:7.017318 train_time:11951349ms step_avg:585.39ms +step:20417/57344 train_time:11951844ms step_avg:585.39ms +step:20418/57344 train_time:11951868ms step_avg:585.36ms +step:20419/57344 train_time:11952416ms step_avg:585.36ms +grad accum step:5105/14336 +step:20420/57344 train_time:11953714ms step_avg:585.39ms +step:20421/57344 train_time:11953731ms step_avg:585.36ms +step:20422/57344 train_time:11953976ms step_avg:585.35ms +step:20423/57344 train_time:11954523ms step_avg:585.35ms +grad accum step:5106/14336 +step:20424/57344 train_time:11955847ms step_avg:585.38ms +step:20425/57344 train_time:11955864ms step_avg:585.35ms +step:20426/57344 train_time:11956110ms step_avg:585.34ms +step:20427/57344 train_time:11956651ms step_avg:585.34ms +grad accum step:5107/14336 +step:20428/57344 train_time:11957928ms step_avg:585.37ms +step:20429/57344 train_time:11957945ms step_avg:585.34ms +step:20430/57344 train_time:11958197ms step_avg:585.33ms +step:20431/57344 train_time:11958761ms step_avg:585.32ms +grad accum step:5108/14336 +step:20432/57344 train_time:11960049ms step_avg:585.36ms +step:20433/57344 train_time:11960066ms step_avg:585.33ms +step:20434/57344 train_time:11960315ms step_avg:585.31ms +step:20435/57344 train_time:11960865ms step_avg:585.31ms +grad accum step:5109/14336 +step:20436/57344 train_time:11962162ms step_avg:585.35ms +step:20437/57344 train_time:11962179ms step_avg:585.32ms +step:20438/57344 train_time:11962426ms step_avg:585.30ms +step:20439/57344 train_time:11962976ms step_avg:585.30ms +grad accum step:5110/14336 +step:20440/57344 train_time:11964356ms step_avg:585.34ms +step:20441/57344 train_time:11964372ms step_avg:585.31ms +step:20442/57344 train_time:11964638ms step_avg:585.30ms +step:20443/57344 train_time:11965222ms step_avg:585.30ms +grad accum step:5111/14336 +step:20444/57344 train_time:11966515ms step_avg:585.33ms +step:20445/57344 train_time:11966532ms step_avg:585.30ms +step:20446/57344 train_time:11966785ms step_avg:585.29ms +step:20447/57344 train_time:11967358ms step_avg:585.29ms +grad accum step:5112/14336 +step:20448/57344 train_time:11968678ms step_avg:585.32ms +step:20449/57344 train_time:11968695ms step_avg:585.29ms +step:20450/57344 train_time:11968948ms step_avg:585.28ms +step:20451/57344 train_time:11969507ms step_avg:585.28ms +grad accum step:5113/14336 +step:20452/57344 train_time:11970810ms step_avg:585.31ms +step:20453/57344 train_time:11970827ms step_avg:585.28ms +step:20454/57344 train_time:11971079ms step_avg:585.27ms +step:20455/57344 train_time:11971638ms step_avg:585.27ms +grad accum step:5114/14336 +step:20456/57344 train_time:11972966ms step_avg:585.30ms +step:20457/57344 train_time:11972983ms step_avg:585.28ms +step:20458/57344 train_time:11973231ms step_avg:585.26ms +step:20459/57344 train_time:11973774ms step_avg:585.26ms +grad accum step:5115/14336 +step:20460/57344 train_time:11975074ms step_avg:585.29ms +step:20461/57344 train_time:11975091ms step_avg:585.26ms +step:20462/57344 train_time:11975340ms step_avg:585.25ms +step:20463/57344 train_time:11975881ms step_avg:585.25ms +grad accum step:5116/14336 +step:20464/57344 train_time:11977193ms step_avg:585.28ms +step:20465/57344 train_time:11977210ms step_avg:585.25ms +step:20466/57344 train_time:11977465ms step_avg:585.24ms +step:20467/57344 train_time:11978029ms step_avg:585.24ms +grad accum step:5117/14336 +step:20468/57344 train_time:11979327ms step_avg:585.27ms +step:20469/57344 train_time:11979344ms step_avg:585.24ms +step:20470/57344 train_time:11979594ms step_avg:585.23ms +step:20471/57344 train_time:11980147ms step_avg:585.23ms +grad accum step:5118/14336 +step:20472/57344 train_time:11981456ms step_avg:585.26ms +step:20473/57344 train_time:11981473ms step_avg:585.23ms +step:20474/57344 train_time:11981728ms step_avg:585.22ms +step:20475/57344 train_time:11982298ms step_avg:585.22ms +grad accum step:5119/14336 +step:20476/57344 train_time:11983630ms step_avg:585.25ms +step:20477/57344 train_time:11983647ms step_avg:585.22ms +step:20478/57344 train_time:11983898ms step_avg:585.21ms +step:20479/57344 train_time:11984457ms step_avg:585.21ms +grad accum step:5120/14336 +step:20480/57344 train_time:11985753ms step_avg:585.24ms +step:20480/57344 val_loss:7.022078 train_time:11985754ms step_avg:585.24ms +step:20481/57344 train_time:11985810ms step_avg:585.22ms +step:20482/57344 train_time:11985989ms step_avg:585.20ms +step:20483/57344 train_time:11986536ms step_avg:585.19ms +grad accum step:5121/14336 +step:20484/57344 train_time:11987843ms step_avg:585.23ms +step:20485/57344 train_time:11987860ms step_avg:585.20ms +step:20486/57344 train_time:11988106ms step_avg:585.19ms +step:20487/57344 train_time:11988650ms step_avg:585.18ms +grad accum step:5122/14336 +step:20488/57344 train_time:11989952ms step_avg:585.22ms +step:20489/57344 train_time:11989969ms step_avg:585.19ms +step:20490/57344 train_time:11990220ms step_avg:585.17ms +step:20491/57344 train_time:11990780ms step_avg:585.17ms +grad accum step:5123/14336 +step:20492/57344 train_time:11992087ms step_avg:585.21ms +step:20493/57344 train_time:11992104ms step_avg:585.18ms +step:20494/57344 train_time:11992355ms step_avg:585.16ms +step:20495/57344 train_time:11992920ms step_avg:585.16ms +grad accum step:5124/14336 +step:20496/57344 train_time:11994231ms step_avg:585.20ms +step:20497/57344 train_time:11994249ms step_avg:585.17ms +step:20498/57344 train_time:11994498ms step_avg:585.15ms +step:20499/57344 train_time:11995049ms step_avg:585.15ms +grad accum step:5125/14336 +step:20500/57344 train_time:11996361ms step_avg:585.19ms +step:20501/57344 train_time:11996378ms step_avg:585.16ms +step:20502/57344 train_time:11996623ms step_avg:585.14ms +step:20503/57344 train_time:11997173ms step_avg:585.14ms +grad accum step:5126/14336 +step:20504/57344 train_time:11998470ms step_avg:585.18ms +step:20505/57344 train_time:11998487ms step_avg:585.15ms +step:20506/57344 train_time:11998737ms step_avg:585.13ms +step:20507/57344 train_time:11999282ms step_avg:585.13ms +grad accum step:5127/14336 +step:20508/57344 train_time:12000635ms step_avg:585.17ms +step:20509/57344 train_time:12000652ms step_avg:585.14ms +step:20510/57344 train_time:12000906ms step_avg:585.12ms +step:20511/57344 train_time:12001469ms step_avg:585.12ms +grad accum step:5128/14336 +step:20512/57344 train_time:12002827ms step_avg:585.16ms +step:20513/57344 train_time:12002845ms step_avg:585.13ms +step:20514/57344 train_time:12003092ms step_avg:585.12ms +step:20515/57344 train_time:12003643ms step_avg:585.12ms +grad accum step:5129/14336 +step:20516/57344 train_time:12004943ms step_avg:585.15ms +step:20517/57344 train_time:12004960ms step_avg:585.12ms +step:20518/57344 train_time:12005208ms step_avg:585.11ms +step:20519/57344 train_time:12005753ms step_avg:585.10ms +grad accum step:5130/14336 +step:20520/57344 train_time:12007051ms step_avg:585.14ms +step:20521/57344 train_time:12007068ms step_avg:585.11ms +step:20522/57344 train_time:12007317ms step_avg:585.09ms +step:20523/57344 train_time:12007861ms step_avg:585.09ms +grad accum step:5131/14336 +step:20524/57344 train_time:12009145ms step_avg:585.13ms +step:20525/57344 train_time:12009162ms step_avg:585.10ms +step:20526/57344 train_time:12009417ms step_avg:585.08ms +step:20527/57344 train_time:12009988ms step_avg:585.08ms +grad accum step:5132/14336 +step:20528/57344 train_time:12011268ms step_avg:585.12ms +step:20529/57344 train_time:12011286ms step_avg:585.09ms +step:20530/57344 train_time:12011530ms step_avg:585.07ms +step:20531/57344 train_time:12012075ms step_avg:585.07ms +grad accum step:5133/14336 +step:20532/57344 train_time:12013385ms step_avg:585.11ms +step:20533/57344 train_time:12013401ms step_avg:585.08ms +step:20534/57344 train_time:12013647ms step_avg:585.06ms +step:20535/57344 train_time:12014185ms step_avg:585.06ms +grad accum step:5134/14336 +step:20536/57344 train_time:12015485ms step_avg:585.09ms +step:20537/57344 train_time:12015502ms step_avg:585.07ms +step:20538/57344 train_time:12015748ms step_avg:585.05ms +step:20539/57344 train_time:12016294ms step_avg:585.05ms +grad accum step:5135/14336 +step:20540/57344 train_time:12017641ms step_avg:585.08ms +step:20541/57344 train_time:12017658ms step_avg:585.06ms +step:20542/57344 train_time:12017903ms step_avg:585.04ms +step:20543/57344 train_time:12018448ms step_avg:585.04ms +grad accum step:5136/14336 +step:20544/57344 train_time:12019788ms step_avg:585.08ms +step:20544/57344 val_loss:7.030802 train_time:12019788ms step_avg:585.08ms +step:20545/57344 train_time:12019801ms step_avg:585.05ms +step:20546/57344 train_time:12020027ms step_avg:585.03ms +step:20547/57344 train_time:12020582ms step_avg:585.03ms +grad accum step:5137/14336 +step:20548/57344 train_time:12021886ms step_avg:585.06ms +step:20549/57344 train_time:12021903ms step_avg:585.04ms +step:20550/57344 train_time:12022153ms step_avg:585.02ms +step:20551/57344 train_time:12022698ms step_avg:585.02ms +grad accum step:5138/14336 +step:20552/57344 train_time:12023989ms step_avg:585.05ms +step:20553/57344 train_time:12024006ms step_avg:585.02ms +step:20554/57344 train_time:12024256ms step_avg:585.01ms +step:20555/57344 train_time:12024801ms step_avg:585.01ms +grad accum step:5139/14336 +step:20556/57344 train_time:12026110ms step_avg:585.04ms +step:20557/57344 train_time:12026127ms step_avg:585.01ms +step:20558/57344 train_time:12026375ms step_avg:585.00ms +step:20559/57344 train_time:12026929ms step_avg:585.00ms +grad accum step:5140/14336 +step:20560/57344 train_time:12028246ms step_avg:585.03ms +step:20561/57344 train_time:12028263ms step_avg:585.00ms +step:20562/57344 train_time:12028515ms step_avg:584.99ms +step:20563/57344 train_time:12029072ms step_avg:584.99ms +grad accum step:5141/14336 +step:20564/57344 train_time:12030399ms step_avg:585.02ms +step:20565/57344 train_time:12030416ms step_avg:584.99ms +step:20566/57344 train_time:12030664ms step_avg:584.98ms +step:20567/57344 train_time:12031209ms step_avg:584.98ms +grad accum step:5142/14336 +step:20568/57344 train_time:12032525ms step_avg:585.01ms +step:20569/57344 train_time:12032542ms step_avg:584.98ms +step:20570/57344 train_time:12032786ms step_avg:584.97ms +step:20571/57344 train_time:12033336ms step_avg:584.97ms +grad accum step:5143/14336 +step:20572/57344 train_time:12034658ms step_avg:585.00ms +step:20573/57344 train_time:12034675ms step_avg:584.97ms +step:20574/57344 train_time:12034919ms step_avg:584.96ms +step:20575/57344 train_time:12035468ms step_avg:584.96ms +grad accum step:5144/14336 +step:20576/57344 train_time:12036831ms step_avg:584.99ms +step:20577/57344 train_time:12036848ms step_avg:584.97ms +step:20578/57344 train_time:12037093ms step_avg:584.95ms +step:20579/57344 train_time:12037635ms step_avg:584.95ms +grad accum step:5145/14336 +step:20580/57344 train_time:12038962ms step_avg:584.98ms +step:20581/57344 train_time:12038978ms step_avg:584.96ms +step:20582/57344 train_time:12039227ms step_avg:584.94ms +step:20583/57344 train_time:12039775ms step_avg:584.94ms +grad accum step:5146/14336 +step:20584/57344 train_time:12041123ms step_avg:584.97ms +step:20585/57344 train_time:12041140ms step_avg:584.95ms +step:20586/57344 train_time:12041395ms step_avg:584.93ms +step:20587/57344 train_time:12041970ms step_avg:584.93ms +grad accum step:5147/14336 +step:20588/57344 train_time:12043274ms step_avg:584.97ms +step:20589/57344 train_time:12043291ms step_avg:584.94ms +step:20590/57344 train_time:12043540ms step_avg:584.92ms +step:20591/57344 train_time:12044089ms step_avg:584.92ms +grad accum step:5148/14336 +step:20592/57344 train_time:12045399ms step_avg:584.96ms +step:20593/57344 train_time:12045416ms step_avg:584.93ms +step:20594/57344 train_time:12045661ms step_avg:584.91ms +step:20595/57344 train_time:12046203ms step_avg:584.91ms +grad accum step:5149/14336 +step:20596/57344 train_time:12047530ms step_avg:584.95ms +step:20597/57344 train_time:12047547ms step_avg:584.92ms +step:20598/57344 train_time:12047799ms step_avg:584.90ms +step:20599/57344 train_time:12048361ms step_avg:584.90ms +grad accum step:5150/14336 +step:20600/57344 train_time:12049666ms step_avg:584.94ms +step:20601/57344 train_time:12049683ms step_avg:584.91ms +step:20602/57344 train_time:12049931ms step_avg:584.89ms +step:20603/57344 train_time:12050479ms step_avg:584.89ms +grad accum step:5151/14336 +step:20604/57344 train_time:12051776ms step_avg:584.92ms +step:20605/57344 train_time:12051792ms step_avg:584.90ms +step:20606/57344 train_time:12052043ms step_avg:584.88ms +step:20607/57344 train_time:12052602ms step_avg:584.88ms +grad accum step:5152/14336 +step:20608/57344 train_time:12053933ms step_avg:584.92ms +step:20608/57344 val_loss:7.019706 train_time:12053934ms step_avg:584.92ms +step:20609/57344 train_time:12053947ms step_avg:584.89ms +step:20610/57344 train_time:12054172ms step_avg:584.87ms +step:20611/57344 train_time:12054725ms step_avg:584.87ms +grad accum step:5153/14336 +step:20612/57344 train_time:12056031ms step_avg:584.90ms +step:20613/57344 train_time:12056048ms step_avg:584.88ms +step:20614/57344 train_time:12056303ms step_avg:584.86ms +step:20615/57344 train_time:12056873ms step_avg:584.86ms +grad accum step:5154/14336 +step:20616/57344 train_time:12058183ms step_avg:584.89ms +step:20617/57344 train_time:12058200ms step_avg:584.87ms +step:20618/57344 train_time:12058457ms step_avg:584.85ms +step:20619/57344 train_time:12059029ms step_avg:584.85ms +grad accum step:5155/14336 +step:20620/57344 train_time:12060306ms step_avg:584.88ms +step:20621/57344 train_time:12060323ms step_avg:584.86ms +step:20622/57344 train_time:12060572ms step_avg:584.84ms +step:20623/57344 train_time:12061127ms step_avg:584.84ms +grad accum step:5156/14336 +step:20624/57344 train_time:12062448ms step_avg:584.87ms +step:20625/57344 train_time:12062465ms step_avg:584.85ms +step:20626/57344 train_time:12062712ms step_avg:584.83ms +step:20627/57344 train_time:12063265ms step_avg:584.83ms +grad accum step:5157/14336 +step:20628/57344 train_time:12064580ms step_avg:584.86ms +step:20629/57344 train_time:12064597ms step_avg:584.84ms +step:20630/57344 train_time:12064843ms step_avg:584.82ms +step:20631/57344 train_time:12065397ms step_avg:584.82ms +grad accum step:5158/14336 +step:20632/57344 train_time:12066734ms step_avg:584.86ms +step:20633/57344 train_time:12066750ms step_avg:584.83ms +step:20634/57344 train_time:12066998ms step_avg:584.81ms +step:20635/57344 train_time:12067542ms step_avg:584.81ms +grad accum step:5159/14336 +step:20636/57344 train_time:12068932ms step_avg:584.85ms +step:20637/57344 train_time:12068949ms step_avg:584.82ms +step:20638/57344 train_time:12069207ms step_avg:584.81ms +step:20639/57344 train_time:12069783ms step_avg:584.80ms +grad accum step:5160/14336 +step:20640/57344 train_time:12071078ms step_avg:584.84ms +step:20641/57344 train_time:12071095ms step_avg:584.81ms +step:20642/57344 train_time:12071346ms step_avg:584.80ms +step:20643/57344 train_time:12071902ms step_avg:584.79ms +grad accum step:5161/14336 +step:20644/57344 train_time:12073212ms step_avg:584.83ms +step:20645/57344 train_time:12073229ms step_avg:584.80ms +step:20646/57344 train_time:12073496ms step_avg:584.79ms +step:20647/57344 train_time:12074098ms step_avg:584.79ms +grad accum step:5162/14336 +step:20648/57344 train_time:12075423ms step_avg:584.82ms +step:20649/57344 train_time:12075440ms step_avg:584.80ms +step:20650/57344 train_time:12075685ms step_avg:584.78ms +step:20651/57344 train_time:12076234ms step_avg:584.78ms +grad accum step:5163/14336 +step:20652/57344 train_time:12077540ms step_avg:584.81ms +step:20653/57344 train_time:12077558ms step_avg:584.78ms +step:20654/57344 train_time:12077807ms step_avg:584.77ms +step:20655/57344 train_time:12078366ms step_avg:584.77ms +grad accum step:5164/14336 +step:20656/57344 train_time:12079674ms step_avg:584.80ms +step:20657/57344 train_time:12079689ms step_avg:584.77ms +step:20658/57344 train_time:12079935ms step_avg:584.76ms +step:20659/57344 train_time:12080479ms step_avg:584.76ms +grad accum step:5165/14336 +step:20660/57344 train_time:12081802ms step_avg:584.79ms +step:20661/57344 train_time:12081819ms step_avg:584.76ms +step:20662/57344 train_time:12082077ms step_avg:584.75ms +step:20663/57344 train_time:12082653ms step_avg:584.75ms +grad accum step:5166/14336 +step:20664/57344 train_time:12083963ms step_avg:584.78ms +step:20665/57344 train_time:12083979ms step_avg:584.76ms +step:20666/57344 train_time:12084227ms step_avg:584.74ms +step:20667/57344 train_time:12084772ms step_avg:584.74ms +grad accum step:5167/14336 +step:20668/57344 train_time:12086070ms step_avg:584.77ms +step:20669/57344 train_time:12086087ms step_avg:584.74ms +step:20670/57344 train_time:12086332ms step_avg:584.73ms +step:20671/57344 train_time:12086879ms step_avg:584.73ms +grad accum step:5168/14336 +step:20672/57344 train_time:12088202ms step_avg:584.76ms +step:20672/57344 val_loss:7.024773 train_time:12088203ms step_avg:584.76ms +step:20673/57344 train_time:12088215ms step_avg:584.73ms +step:20674/57344 train_time:12088436ms step_avg:584.72ms +step:20675/57344 train_time:12088978ms step_avg:584.71ms +grad accum step:5169/14336 +step:20676/57344 train_time:12090271ms step_avg:584.75ms +step:20677/57344 train_time:12090289ms step_avg:584.72ms +step:20678/57344 train_time:12090539ms step_avg:584.71ms +step:20679/57344 train_time:12091087ms step_avg:584.70ms +grad accum step:5170/14336 +step:20680/57344 train_time:12092382ms step_avg:584.74ms +step:20681/57344 train_time:12092399ms step_avg:584.71ms +step:20682/57344 train_time:12092643ms step_avg:584.69ms +step:20683/57344 train_time:12093190ms step_avg:584.69ms +grad accum step:5171/14336 +step:20684/57344 train_time:12094489ms step_avg:584.73ms +step:20685/57344 train_time:12094507ms step_avg:584.70ms +step:20686/57344 train_time:12094750ms step_avg:584.68ms +step:20687/57344 train_time:12095298ms step_avg:584.68ms +grad accum step:5172/14336 +step:20688/57344 train_time:12096582ms step_avg:584.71ms +step:20689/57344 train_time:12096599ms step_avg:584.69ms +step:20690/57344 train_time:12096844ms step_avg:584.67ms +step:20691/57344 train_time:12097392ms step_avg:584.67ms +grad accum step:5173/14336 +step:20692/57344 train_time:12098720ms step_avg:584.71ms +step:20693/57344 train_time:12098737ms step_avg:584.68ms +step:20694/57344 train_time:12098983ms step_avg:584.66ms +step:20695/57344 train_time:12099524ms step_avg:584.66ms +grad accum step:5174/14336 +step:20696/57344 train_time:12100859ms step_avg:584.70ms +step:20697/57344 train_time:12100876ms step_avg:584.67ms +step:20698/57344 train_time:12101123ms step_avg:584.65ms +step:20699/57344 train_time:12101667ms step_avg:584.65ms +grad accum step:5175/14336 +step:20700/57344 train_time:12102978ms step_avg:584.68ms +step:20701/57344 train_time:12102995ms step_avg:584.66ms +step:20702/57344 train_time:12103245ms step_avg:584.64ms +step:20703/57344 train_time:12103804ms step_avg:584.64ms +grad accum step:5176/14336 +step:20704/57344 train_time:12105135ms step_avg:584.68ms +step:20705/57344 train_time:12105152ms step_avg:584.65ms +step:20706/57344 train_time:12105403ms step_avg:584.63ms +step:20707/57344 train_time:12105973ms step_avg:584.63ms +grad accum step:5177/14336 +step:20708/57344 train_time:12107311ms step_avg:584.67ms +step:20709/57344 train_time:12107329ms step_avg:584.64ms +step:20710/57344 train_time:12107573ms step_avg:584.62ms +step:20711/57344 train_time:12108129ms step_avg:584.62ms +grad accum step:5178/14336 +step:20712/57344 train_time:12109458ms step_avg:584.66ms +step:20713/57344 train_time:12109475ms step_avg:584.63ms +step:20714/57344 train_time:12109720ms step_avg:584.62ms +step:20715/57344 train_time:12110265ms step_avg:584.61ms +grad accum step:5179/14336 +step:20716/57344 train_time:12111563ms step_avg:584.65ms +step:20717/57344 train_time:12111580ms step_avg:584.62ms +step:20718/57344 train_time:12111827ms step_avg:584.60ms +step:20719/57344 train_time:12112373ms step_avg:584.60ms +grad accum step:5180/14336 +step:20720/57344 train_time:12113699ms step_avg:584.64ms +step:20721/57344 train_time:12113716ms step_avg:584.61ms +step:20722/57344 train_time:12113959ms step_avg:584.59ms +step:20723/57344 train_time:12114510ms step_avg:584.59ms +grad accum step:5181/14336 +step:20724/57344 train_time:12115853ms step_avg:584.63ms +step:20725/57344 train_time:12115870ms step_avg:584.60ms +step:20726/57344 train_time:12116126ms step_avg:584.59ms +step:20727/57344 train_time:12116695ms step_avg:584.59ms +grad accum step:5182/14336 +step:20728/57344 train_time:12118037ms step_avg:584.62ms +step:20729/57344 train_time:12118054ms step_avg:584.59ms +step:20730/57344 train_time:12118301ms step_avg:584.58ms +step:20731/57344 train_time:12118847ms step_avg:584.58ms +grad accum step:5183/14336 +step:20732/57344 train_time:12120154ms step_avg:584.61ms +step:20733/57344 train_time:12120171ms step_avg:584.58ms +step:20734/57344 train_time:12120418ms step_avg:584.57ms +step:20735/57344 train_time:12120967ms step_avg:584.57ms +grad accum step:5184/14336 +step:20736/57344 train_time:12122280ms step_avg:584.60ms +step:20736/57344 val_loss:7.032852 train_time:12122281ms step_avg:584.60ms +step:20737/57344 train_time:12122293ms step_avg:584.57ms +step:20738/57344 train_time:12122521ms step_avg:584.56ms +step:20739/57344 train_time:12123086ms step_avg:584.56ms +grad accum step:5185/14336 +step:20740/57344 train_time:12124390ms step_avg:584.59ms +step:20741/57344 train_time:12124407ms step_avg:584.56ms +step:20742/57344 train_time:12124652ms step_avg:584.55ms +step:20743/57344 train_time:12125204ms step_avg:584.54ms +grad accum step:5186/14336 +step:20744/57344 train_time:12126552ms step_avg:584.58ms +step:20745/57344 train_time:12126568ms step_avg:584.55ms +step:20746/57344 train_time:12126824ms step_avg:584.54ms +step:20747/57344 train_time:12127393ms step_avg:584.54ms +grad accum step:5187/14336 +step:20748/57344 train_time:12128670ms step_avg:584.57ms +step:20749/57344 train_time:12128687ms step_avg:584.54ms +step:20750/57344 train_time:12128935ms step_avg:584.53ms +step:20751/57344 train_time:12129481ms step_avg:584.53ms +grad accum step:5188/14336 +step:20752/57344 train_time:12130773ms step_avg:584.56ms +step:20753/57344 train_time:12130790ms step_avg:584.53ms +step:20754/57344 train_time:12131038ms step_avg:584.52ms +step:20755/57344 train_time:12131586ms step_avg:584.51ms +grad accum step:5189/14336 +step:20756/57344 train_time:12132880ms step_avg:584.55ms +step:20757/57344 train_time:12132897ms step_avg:584.52ms +step:20758/57344 train_time:12133143ms step_avg:584.50ms +step:20759/57344 train_time:12133692ms step_avg:584.50ms +grad accum step:5190/14336 +step:20760/57344 train_time:12135014ms step_avg:584.54ms +step:20761/57344 train_time:12135031ms step_avg:584.51ms +step:20762/57344 train_time:12135286ms step_avg:584.50ms +step:20763/57344 train_time:12135852ms step_avg:584.49ms +grad accum step:5191/14336 +step:20764/57344 train_time:12137127ms step_avg:584.53ms +step:20765/57344 train_time:12137144ms step_avg:584.50ms +step:20766/57344 train_time:12137387ms step_avg:584.48ms +step:20767/57344 train_time:12137932ms step_avg:584.48ms +grad accum step:5192/14336 +step:20768/57344 train_time:12139229ms step_avg:584.52ms +step:20769/57344 train_time:12139246ms step_avg:584.49ms +step:20770/57344 train_time:12139493ms step_avg:584.47ms +step:20771/57344 train_time:12140040ms step_avg:584.47ms +grad accum step:5193/14336 +step:20772/57344 train_time:12141332ms step_avg:584.50ms +step:20773/57344 train_time:12141349ms step_avg:584.48ms +step:20774/57344 train_time:12141596ms step_avg:584.46ms +step:20775/57344 train_time:12142147ms step_avg:584.46ms +grad accum step:5194/14336 +step:20776/57344 train_time:12143441ms step_avg:584.49ms +step:20777/57344 train_time:12143458ms step_avg:584.47ms +step:20778/57344 train_time:12143704ms step_avg:584.45ms +step:20779/57344 train_time:12144246ms step_avg:584.45ms +grad accum step:5195/14336 +step:20780/57344 train_time:12145563ms step_avg:584.48ms +step:20781/57344 train_time:12145579ms step_avg:584.46ms +step:20782/57344 train_time:12145834ms step_avg:584.44ms +step:20783/57344 train_time:12146400ms step_avg:584.44ms +grad accum step:5196/14336 +step:20784/57344 train_time:12147742ms step_avg:584.48ms +step:20785/57344 train_time:12147759ms step_avg:584.45ms +step:20786/57344 train_time:12148006ms step_avg:584.43ms +step:20787/57344 train_time:12148561ms step_avg:584.43ms +grad accum step:5197/14336 +step:20788/57344 train_time:12149864ms step_avg:584.47ms +step:20789/57344 train_time:12149881ms step_avg:584.44ms +step:20790/57344 train_time:12150128ms step_avg:584.42ms +step:20791/57344 train_time:12150674ms step_avg:584.42ms +grad accum step:5198/14336 +step:20792/57344 train_time:12151974ms step_avg:584.45ms +step:20793/57344 train_time:12151991ms step_avg:584.43ms +step:20794/57344 train_time:12152239ms step_avg:584.41ms +step:20795/57344 train_time:12152784ms step_avg:584.41ms +grad accum step:5199/14336 +step:20796/57344 train_time:12154085ms step_avg:584.44ms +step:20797/57344 train_time:12154102ms step_avg:584.42ms +step:20798/57344 train_time:12154350ms step_avg:584.40ms +step:20799/57344 train_time:12154900ms step_avg:584.40ms +grad accum step:5200/14336 +step:20800/57344 train_time:12156197ms step_avg:584.43ms +step:20800/57344 val_loss:7.021889 train_time:12156197ms step_avg:584.43ms +step:20801/57344 train_time:12156210ms step_avg:584.41ms +step:20802/57344 train_time:12156434ms step_avg:584.39ms +step:20803/57344 train_time:12156981ms step_avg:584.39ms +grad accum step:5201/14336 +step:20804/57344 train_time:12158280ms step_avg:584.42ms +step:20805/57344 train_time:12158297ms step_avg:584.39ms +step:20806/57344 train_time:12158543ms step_avg:584.38ms +step:20807/57344 train_time:12159086ms step_avg:584.37ms +grad accum step:5202/14336 +step:20808/57344 train_time:12160421ms step_avg:584.41ms +step:20809/57344 train_time:12160438ms step_avg:584.38ms +step:20810/57344 train_time:12160686ms step_avg:584.37ms +step:20811/57344 train_time:12161236ms step_avg:584.37ms +grad accum step:5203/14336 +step:20812/57344 train_time:12162524ms step_avg:584.40ms +step:20813/57344 train_time:12162542ms step_avg:584.37ms +step:20814/57344 train_time:12162790ms step_avg:584.36ms +step:20815/57344 train_time:12163349ms step_avg:584.35ms +grad accum step:5204/14336 +step:20816/57344 train_time:12164673ms step_avg:584.39ms +step:20817/57344 train_time:12164690ms step_avg:584.36ms +step:20818/57344 train_time:12164936ms step_avg:584.35ms +step:20819/57344 train_time:12165485ms step_avg:584.35ms +grad accum step:5205/14336 +step:20820/57344 train_time:12166795ms step_avg:584.38ms +step:20821/57344 train_time:12166812ms step_avg:584.35ms +step:20822/57344 train_time:12167059ms step_avg:584.34ms +step:20823/57344 train_time:12167608ms step_avg:584.33ms +grad accum step:5206/14336 +step:20824/57344 train_time:12168923ms step_avg:584.37ms +step:20825/57344 train_time:12168939ms step_avg:584.34ms +step:20826/57344 train_time:12169186ms step_avg:584.33ms +step:20827/57344 train_time:12169728ms step_avg:584.32ms +grad accum step:5207/14336 +step:20828/57344 train_time:12171023ms step_avg:584.36ms +step:20829/57344 train_time:12171040ms step_avg:584.33ms +step:20830/57344 train_time:12171285ms step_avg:584.32ms +step:20831/57344 train_time:12171833ms step_avg:584.31ms +grad accum step:5208/14336 +step:20832/57344 train_time:12173156ms step_avg:584.35ms +step:20833/57344 train_time:12173173ms step_avg:584.32ms +step:20834/57344 train_time:12173417ms step_avg:584.31ms +step:20835/57344 train_time:12173954ms step_avg:584.30ms +grad accum step:5209/14336 +step:20836/57344 train_time:12175248ms step_avg:584.34ms +step:20837/57344 train_time:12175265ms step_avg:584.31ms +step:20838/57344 train_time:12175508ms step_avg:584.29ms +step:20839/57344 train_time:12176041ms step_avg:584.29ms +grad accum step:5210/14336 +step:20840/57344 train_time:12177366ms step_avg:584.33ms +step:20841/57344 train_time:12177382ms step_avg:584.30ms +step:20842/57344 train_time:12177626ms step_avg:584.28ms +step:20843/57344 train_time:12178165ms step_avg:584.28ms +grad accum step:5211/14336 +step:20844/57344 train_time:12179482ms step_avg:584.32ms +step:20845/57344 train_time:12179499ms step_avg:584.29ms +step:20846/57344 train_time:12179743ms step_avg:584.27ms +step:20847/57344 train_time:12180286ms step_avg:584.27ms +grad accum step:5212/14336 +step:20848/57344 train_time:12181601ms step_avg:584.31ms +step:20849/57344 train_time:12181618ms step_avg:584.28ms +step:20850/57344 train_time:12181868ms step_avg:584.26ms +step:20851/57344 train_time:12182416ms step_avg:584.26ms +grad accum step:5213/14336 +step:20852/57344 train_time:12183716ms step_avg:584.29ms +step:20853/57344 train_time:12183733ms step_avg:584.27ms +step:20854/57344 train_time:12183981ms step_avg:584.25ms +step:20855/57344 train_time:12184533ms step_avg:584.25ms +grad accum step:5214/14336 +step:20856/57344 train_time:12185808ms step_avg:584.28ms +step:20857/57344 train_time:12185825ms step_avg:584.26ms +step:20858/57344 train_time:12186071ms step_avg:584.24ms +step:20859/57344 train_time:12186613ms step_avg:584.24ms +grad accum step:5215/14336 +step:20860/57344 train_time:12187925ms step_avg:584.27ms +step:20861/57344 train_time:12187942ms step_avg:584.25ms +step:20862/57344 train_time:12188193ms step_avg:584.23ms +step:20863/57344 train_time:12188750ms step_avg:584.23ms +grad accum step:5216/14336 +step:20864/57344 train_time:12190068ms step_avg:584.26ms +step:20864/57344 val_loss:7.018207 train_time:12190069ms step_avg:584.26ms +step:20865/57344 train_time:12190081ms step_avg:584.24ms +step:20866/57344 train_time:12190304ms step_avg:584.22ms +step:20867/57344 train_time:12190852ms step_avg:584.22ms +grad accum step:5217/14336 +step:20868/57344 train_time:12192126ms step_avg:584.25ms +step:20869/57344 train_time:12192143ms step_avg:584.22ms +step:20870/57344 train_time:12192400ms step_avg:584.21ms +step:20871/57344 train_time:12192963ms step_avg:584.21ms +grad accum step:5218/14336 +step:20872/57344 train_time:12194283ms step_avg:584.24ms +step:20873/57344 train_time:12194299ms step_avg:584.21ms +step:20874/57344 train_time:12194549ms step_avg:584.20ms +step:20875/57344 train_time:12195106ms step_avg:584.20ms +grad accum step:5219/14336 +step:20876/57344 train_time:12196396ms step_avg:584.23ms +step:20877/57344 train_time:12196413ms step_avg:584.20ms +step:20878/57344 train_time:12196673ms step_avg:584.19ms +step:20879/57344 train_time:12197262ms step_avg:584.19ms +grad accum step:5220/14336 +step:20880/57344 train_time:12198607ms step_avg:584.22ms +step:20881/57344 train_time:12198624ms step_avg:584.20ms +step:20882/57344 train_time:12198869ms step_avg:584.18ms +step:20883/57344 train_time:12199409ms step_avg:584.18ms +grad accum step:5221/14336 +step:20884/57344 train_time:12200688ms step_avg:584.21ms +step:20885/57344 train_time:12200705ms step_avg:584.19ms +step:20886/57344 train_time:12200948ms step_avg:584.17ms +step:20887/57344 train_time:12201496ms step_avg:584.17ms +grad accum step:5222/14336 +step:20888/57344 train_time:12202827ms step_avg:584.20ms +step:20889/57344 train_time:12202844ms step_avg:584.18ms +step:20890/57344 train_time:12203089ms step_avg:584.16ms +step:20891/57344 train_time:12203639ms step_avg:584.16ms +grad accum step:5223/14336 +step:20892/57344 train_time:12204938ms step_avg:584.19ms +step:20893/57344 train_time:12204955ms step_avg:584.16ms +step:20894/57344 train_time:12205201ms step_avg:584.15ms +step:20895/57344 train_time:12205742ms step_avg:584.15ms +grad accum step:5224/14336 +step:20896/57344 train_time:12207044ms step_avg:584.18ms +step:20897/57344 train_time:12207061ms step_avg:584.15ms +step:20898/57344 train_time:12207310ms step_avg:584.14ms +step:20899/57344 train_time:12207852ms step_avg:584.14ms +grad accum step:5225/14336 +step:20900/57344 train_time:12209227ms step_avg:584.17ms +step:20901/57344 train_time:12209244ms step_avg:584.15ms +step:20902/57344 train_time:12209500ms step_avg:584.13ms +step:20903/57344 train_time:12210074ms step_avg:584.13ms +grad accum step:5226/14336 +step:20904/57344 train_time:12211370ms step_avg:584.16ms +step:20905/57344 train_time:12211386ms step_avg:584.14ms +step:20906/57344 train_time:12211634ms step_avg:584.12ms +step:20907/57344 train_time:12212184ms step_avg:584.12ms +grad accum step:5227/14336 +step:20908/57344 train_time:12213526ms step_avg:584.16ms +step:20909/57344 train_time:12213543ms step_avg:584.13ms +step:20910/57344 train_time:12213792ms step_avg:584.11ms +step:20911/57344 train_time:12214339ms step_avg:584.11ms +grad accum step:5228/14336 +step:20912/57344 train_time:12215638ms step_avg:584.14ms +step:20913/57344 train_time:12215655ms step_avg:584.12ms +step:20914/57344 train_time:12215904ms step_avg:584.10ms +step:20915/57344 train_time:12216457ms step_avg:584.10ms +grad accum step:5229/14336 +step:20916/57344 train_time:12217760ms step_avg:584.13ms +step:20917/57344 train_time:12217777ms step_avg:584.11ms +step:20918/57344 train_time:12218029ms step_avg:584.09ms +step:20919/57344 train_time:12218583ms step_avg:584.09ms +grad accum step:5230/14336 +step:20920/57344 train_time:12219891ms step_avg:584.12ms +step:20921/57344 train_time:12219908ms step_avg:584.10ms +step:20922/57344 train_time:12220159ms step_avg:584.08ms +step:20923/57344 train_time:12220722ms step_avg:584.08ms +grad accum step:5231/14336 +step:20924/57344 train_time:12235017ms step_avg:584.74ms +step:20925/57344 train_time:12235030ms step_avg:584.71ms +step:20926/57344 train_time:12235327ms step_avg:584.69ms +step:20927/57344 train_time:12235853ms step_avg:584.69ms +grad accum step:5232/14336 +step:20928/57344 train_time:12237157ms step_avg:584.73ms +step:20928/57344 val_loss:7.020712 train_time:12237157ms step_avg:584.73ms +step:20929/57344 train_time:12237170ms step_avg:584.70ms +step:20930/57344 train_time:12237400ms step_avg:584.68ms +step:20931/57344 train_time:12237967ms step_avg:584.68ms +grad accum step:5233/14336 +step:20932/57344 train_time:12239288ms step_avg:584.72ms +step:20933/57344 train_time:12239305ms step_avg:584.69ms +step:20934/57344 train_time:12239552ms step_avg:584.67ms +step:20935/57344 train_time:12240103ms step_avg:584.67ms +grad accum step:5234/14336 +step:20936/57344 train_time:12241408ms step_avg:584.71ms +step:20937/57344 train_time:12241425ms step_avg:584.68ms +step:20938/57344 train_time:12241672ms step_avg:584.66ms +step:20939/57344 train_time:12242217ms step_avg:584.66ms +grad accum step:5235/14336 +step:20940/57344 train_time:12243507ms step_avg:584.69ms +step:20941/57344 train_time:12243524ms step_avg:584.67ms +step:20942/57344 train_time:12243773ms step_avg:584.65ms +step:20943/57344 train_time:12244318ms step_avg:584.65ms +grad accum step:5236/14336 +step:20944/57344 train_time:12245631ms step_avg:584.68ms +step:20945/57344 train_time:12245648ms step_avg:584.66ms +step:20946/57344 train_time:12245893ms step_avg:584.64ms +step:20947/57344 train_time:12246442ms step_avg:584.64ms +grad accum step:5237/14336 +step:20948/57344 train_time:12247758ms step_avg:584.67ms +step:20949/57344 train_time:12247775ms step_avg:584.65ms +step:20950/57344 train_time:12248023ms step_avg:584.63ms +step:20951/57344 train_time:12248570ms step_avg:584.63ms +grad accum step:5238/14336 +step:20952/57344 train_time:12249865ms step_avg:584.66ms +step:20953/57344 train_time:12249882ms step_avg:584.64ms +step:20954/57344 train_time:12250130ms step_avg:584.62ms +step:20955/57344 train_time:12250678ms step_avg:584.62ms +grad accum step:5239/14336 +step:20956/57344 train_time:12251992ms step_avg:584.65ms +step:20957/57344 train_time:12252009ms step_avg:584.63ms +step:20958/57344 train_time:12252255ms step_avg:584.61ms +step:20959/57344 train_time:12252794ms step_avg:584.61ms +grad accum step:5240/14336 +step:20960/57344 train_time:12254086ms step_avg:584.64ms +step:20961/57344 train_time:12254103ms step_avg:584.61ms +step:20962/57344 train_time:12254350ms step_avg:584.60ms +step:20963/57344 train_time:12254900ms step_avg:584.60ms +grad accum step:5241/14336 +step:20964/57344 train_time:12256211ms step_avg:584.63ms +step:20965/57344 train_time:12256228ms step_avg:584.60ms +step:20966/57344 train_time:12256471ms step_avg:584.59ms +step:20967/57344 train_time:12257013ms step_avg:584.59ms +grad accum step:5242/14336 +step:20968/57344 train_time:12258382ms step_avg:584.62ms +step:20969/57344 train_time:12258399ms step_avg:584.60ms +step:20970/57344 train_time:12258645ms step_avg:584.58ms +step:20971/57344 train_time:12259195ms step_avg:584.58ms +grad accum step:5243/14336 +step:20972/57344 train_time:12260503ms step_avg:584.61ms +step:20973/57344 train_time:12260520ms step_avg:584.59ms +step:20974/57344 train_time:12260763ms step_avg:584.57ms +step:20975/57344 train_time:12261298ms step_avg:584.57ms +grad accum step:5244/14336 +step:20976/57344 train_time:12262616ms step_avg:584.60ms +step:20977/57344 train_time:12262633ms step_avg:584.58ms +step:20978/57344 train_time:12262884ms step_avg:584.56ms +step:20979/57344 train_time:12263440ms step_avg:584.56ms +grad accum step:5245/14336 +step:20980/57344 train_time:12264754ms step_avg:584.59ms +step:20981/57344 train_time:12264771ms step_avg:584.57ms +step:20982/57344 train_time:12265023ms step_avg:584.55ms +step:20983/57344 train_time:12265580ms step_avg:584.55ms +grad accum step:5246/14336 +step:20984/57344 train_time:12266911ms step_avg:584.58ms +step:20985/57344 train_time:12266928ms step_avg:584.56ms +step:20986/57344 train_time:12267174ms step_avg:584.54ms +step:20987/57344 train_time:12267723ms step_avg:584.54ms +grad accum step:5247/14336 +step:20988/57344 train_time:12269014ms step_avg:584.57ms +step:20989/57344 train_time:12269031ms step_avg:584.55ms +step:20990/57344 train_time:12269280ms step_avg:584.53ms +step:20991/57344 train_time:12269828ms step_avg:584.53ms +grad accum step:5248/14336 +step:20992/57344 train_time:12271103ms step_avg:584.56ms +step:20992/57344 val_loss:7.029232 train_time:12271103ms step_avg:584.56ms +step:20993/57344 train_time:12271504ms step_avg:584.55ms +step:20994/57344 train_time:12271550ms step_avg:584.53ms +step:20995/57344 train_time:12272102ms step_avg:584.52ms +grad accum step:5249/14336 +step:20996/57344 train_time:12273433ms step_avg:584.56ms +step:20997/57344 train_time:12273445ms step_avg:584.53ms +step:20998/57344 train_time:12273682ms step_avg:584.52ms +step:20999/57344 train_time:12274241ms step_avg:584.52ms +grad accum step:5250/14336 +step:21000/57344 train_time:12275557ms step_avg:584.55ms +step:21001/57344 train_time:12275574ms step_avg:584.52ms +step:21002/57344 train_time:12275820ms step_avg:584.51ms +step:21003/57344 train_time:12276373ms step_avg:584.51ms +grad accum step:5251/14336 +step:21004/57344 train_time:12277661ms step_avg:584.54ms +step:21005/57344 train_time:12277678ms step_avg:584.51ms +step:21006/57344 train_time:12277925ms step_avg:584.50ms +step:21007/57344 train_time:12278474ms step_avg:584.49ms +grad accum step:5252/14336 +step:21008/57344 train_time:12279766ms step_avg:584.53ms +step:21009/57344 train_time:12279783ms step_avg:584.50ms +step:21010/57344 train_time:12280037ms step_avg:584.49ms +step:21011/57344 train_time:12280601ms step_avg:584.48ms +grad accum step:5253/14336 +step:21012/57344 train_time:12281909ms step_avg:584.52ms +step:21013/57344 train_time:12281926ms step_avg:584.49ms +step:21014/57344 train_time:12282169ms step_avg:584.48ms +step:21015/57344 train_time:12282721ms step_avg:584.47ms +grad accum step:5254/14336 +step:21016/57344 train_time:12284038ms step_avg:584.51ms +step:21017/57344 train_time:12284054ms step_avg:584.48ms +step:21018/57344 train_time:12284309ms step_avg:584.47ms +step:21019/57344 train_time:12284873ms step_avg:584.47ms +grad accum step:5255/14336 +step:21020/57344 train_time:12286166ms step_avg:584.50ms +step:21021/57344 train_time:12286183ms step_avg:584.47ms +step:21022/57344 train_time:12286429ms step_avg:584.46ms +step:21023/57344 train_time:12286973ms step_avg:584.45ms +grad accum step:5256/14336 +step:21024/57344 train_time:12288269ms step_avg:584.49ms +step:21025/57344 train_time:12288286ms step_avg:584.46ms +step:21026/57344 train_time:12288533ms step_avg:584.44ms +step:21027/57344 train_time:12289076ms step_avg:584.44ms +grad accum step:5257/14336 +step:21028/57344 train_time:12290434ms step_avg:584.48ms +step:21029/57344 train_time:12290451ms step_avg:584.45ms +step:21030/57344 train_time:12290700ms step_avg:584.44ms +step:21031/57344 train_time:12291251ms step_avg:584.43ms +grad accum step:5258/14336 +step:21032/57344 train_time:12292592ms step_avg:584.47ms +step:21033/57344 train_time:12292609ms step_avg:584.44ms +step:21034/57344 train_time:12292855ms step_avg:584.43ms +step:21035/57344 train_time:12293404ms step_avg:584.43ms +grad accum step:5259/14336 +step:21036/57344 train_time:12294729ms step_avg:584.46ms +step:21037/57344 train_time:12294746ms step_avg:584.43ms +step:21038/57344 train_time:12295001ms step_avg:584.42ms +step:21039/57344 train_time:12295563ms step_avg:584.42ms +grad accum step:5260/14336 +step:21040/57344 train_time:12296857ms step_avg:584.45ms +step:21041/57344 train_time:12296874ms step_avg:584.42ms +step:21042/57344 train_time:12297121ms step_avg:584.41ms +step:21043/57344 train_time:12297690ms step_avg:584.41ms +grad accum step:5261/14336 +step:21044/57344 train_time:12299049ms step_avg:584.44ms +step:21045/57344 train_time:12299066ms step_avg:584.42ms +step:21046/57344 train_time:12299315ms step_avg:584.40ms +step:21047/57344 train_time:12299869ms step_avg:584.40ms +grad accum step:5262/14336 +step:21048/57344 train_time:12301200ms step_avg:584.44ms +step:21049/57344 train_time:12301217ms step_avg:584.41ms +step:21050/57344 train_time:12301459ms step_avg:584.39ms +step:21051/57344 train_time:12302001ms step_avg:584.39ms +grad accum step:5263/14336 +step:21052/57344 train_time:12303281ms step_avg:584.42ms +step:21053/57344 train_time:12303298ms step_avg:584.40ms +step:21054/57344 train_time:12303546ms step_avg:584.38ms +step:21055/57344 train_time:12304096ms step_avg:584.38ms +grad accum step:5264/14336 +step:21056/57344 train_time:12305392ms step_avg:584.41ms +step:21056/57344 val_loss:7.028652 train_time:12305393ms step_avg:584.41ms +step:21057/57344 train_time:12305405ms step_avg:584.39ms +step:21058/57344 train_time:12305626ms step_avg:584.37ms +step:21059/57344 train_time:12306168ms step_avg:584.37ms +grad accum step:5265/14336 +step:21060/57344 train_time:12307459ms step_avg:584.40ms +step:21061/57344 train_time:12307476ms step_avg:584.37ms +step:21062/57344 train_time:12307725ms step_avg:584.36ms +step:21063/57344 train_time:12308291ms step_avg:584.36ms +grad accum step:5266/14336 +step:21064/57344 train_time:12309630ms step_avg:584.39ms +step:21065/57344 train_time:12309647ms step_avg:584.36ms +step:21066/57344 train_time:12309894ms step_avg:584.35ms +step:21067/57344 train_time:12310438ms step_avg:584.35ms +grad accum step:5267/14336 +step:21068/57344 train_time:12311739ms step_avg:584.38ms +step:21069/57344 train_time:12311756ms step_avg:584.35ms +step:21070/57344 train_time:12312003ms step_avg:584.34ms +step:21071/57344 train_time:12312547ms step_avg:584.34ms +grad accum step:5268/14336 +step:21072/57344 train_time:12313860ms step_avg:584.37ms +step:21073/57344 train_time:12313877ms step_avg:584.34ms +step:21074/57344 train_time:12314133ms step_avg:584.33ms +step:21075/57344 train_time:12314704ms step_avg:584.33ms +grad accum step:5269/14336 +step:21076/57344 train_time:12315996ms step_avg:584.36ms +step:21077/57344 train_time:12316013ms step_avg:584.33ms +step:21078/57344 train_time:12316263ms step_avg:584.32ms +step:21079/57344 train_time:12316814ms step_avg:584.32ms +grad accum step:5270/14336 +step:21080/57344 train_time:12318140ms step_avg:584.35ms +step:21081/57344 train_time:12318156ms step_avg:584.33ms +step:21082/57344 train_time:12318411ms step_avg:584.31ms +step:21083/57344 train_time:12318981ms step_avg:584.31ms +grad accum step:5271/14336 +step:21084/57344 train_time:12320263ms step_avg:584.34ms +step:21085/57344 train_time:12320280ms step_avg:584.31ms +step:21086/57344 train_time:12320528ms step_avg:584.30ms +step:21087/57344 train_time:12321074ms step_avg:584.30ms +grad accum step:5272/14336 +step:21088/57344 train_time:12322376ms step_avg:584.33ms +step:21089/57344 train_time:12322393ms step_avg:584.30ms +step:21090/57344 train_time:12322642ms step_avg:584.29ms +step:21091/57344 train_time:12323196ms step_avg:584.29ms +grad accum step:5273/14336 +step:21092/57344 train_time:12324505ms step_avg:584.32ms +step:21093/57344 train_time:12324522ms step_avg:584.29ms +step:21094/57344 train_time:12324783ms step_avg:584.28ms +step:21095/57344 train_time:12325364ms step_avg:584.28ms +grad accum step:5274/14336 +step:21096/57344 train_time:12326662ms step_avg:584.31ms +step:21097/57344 train_time:12326678ms step_avg:584.29ms +step:21098/57344 train_time:12326924ms step_avg:584.27ms +step:21099/57344 train_time:12327473ms step_avg:584.27ms +grad accum step:5275/14336 +step:21100/57344 train_time:12328788ms step_avg:584.30ms +step:21101/57344 train_time:12328804ms step_avg:584.28ms +step:21102/57344 train_time:12329049ms step_avg:584.26ms +step:21103/57344 train_time:12329594ms step_avg:584.26ms +grad accum step:5276/14336 +step:21104/57344 train_time:12330870ms step_avg:584.29ms +step:21105/57344 train_time:12330887ms step_avg:584.26ms +step:21106/57344 train_time:12331133ms step_avg:584.25ms +step:21107/57344 train_time:12331683ms step_avg:584.25ms +grad accum step:5277/14336 +step:21108/57344 train_time:12332982ms step_avg:584.28ms +step:21109/57344 train_time:12332999ms step_avg:584.25ms +step:21110/57344 train_time:12333247ms step_avg:584.24ms +step:21111/57344 train_time:12333805ms step_avg:584.24ms +grad accum step:5278/14336 +step:21112/57344 train_time:12335116ms step_avg:584.27ms +step:21113/57344 train_time:12335133ms step_avg:584.24ms +step:21114/57344 train_time:12335385ms step_avg:584.23ms +step:21115/57344 train_time:12335944ms step_avg:584.23ms +grad accum step:5279/14336 +step:21116/57344 train_time:12337253ms step_avg:584.26ms +step:21117/57344 train_time:12337270ms step_avg:584.23ms +step:21118/57344 train_time:12337519ms step_avg:584.22ms +step:21119/57344 train_time:12338072ms step_avg:584.22ms +grad accum step:5280/14336 +step:21120/57344 train_time:12339382ms step_avg:584.25ms +step:21120/57344 val_loss:7.018156 train_time:12339382ms step_avg:584.25ms +step:21121/57344 train_time:12339395ms step_avg:584.22ms +step:21122/57344 train_time:12339618ms step_avg:584.21ms +step:21123/57344 train_time:12340166ms step_avg:584.21ms +grad accum step:5281/14336 +step:21124/57344 train_time:12341464ms step_avg:584.24ms +step:21125/57344 train_time:12341481ms step_avg:584.21ms +step:21126/57344 train_time:12341727ms step_avg:584.20ms +step:21127/57344 train_time:12342274ms step_avg:584.19ms +grad accum step:5282/14336 +step:21128/57344 train_time:12343569ms step_avg:584.23ms +step:21129/57344 train_time:12343586ms step_avg:584.20ms +step:21130/57344 train_time:12343832ms step_avg:584.19ms +step:21131/57344 train_time:12344396ms step_avg:584.18ms +grad accum step:5283/14336 +step:21132/57344 train_time:12345715ms step_avg:584.22ms +step:21133/57344 train_time:12345732ms step_avg:584.19ms +step:21134/57344 train_time:12345981ms step_avg:584.18ms +step:21135/57344 train_time:12346534ms step_avg:584.17ms +grad accum step:5284/14336 +step:21136/57344 train_time:12347825ms step_avg:584.21ms +step:21137/57344 train_time:12347842ms step_avg:584.18ms +step:21138/57344 train_time:12348089ms step_avg:584.17ms +step:21139/57344 train_time:12348652ms step_avg:584.16ms +grad accum step:5285/14336 +step:21140/57344 train_time:12349982ms step_avg:584.20ms +step:21141/57344 train_time:12349999ms step_avg:584.17ms +step:21142/57344 train_time:12350253ms step_avg:584.16ms +step:21143/57344 train_time:12350821ms step_avg:584.16ms +grad accum step:5286/14336 +step:21144/57344 train_time:12352095ms step_avg:584.19ms +step:21145/57344 train_time:12352112ms step_avg:584.16ms +step:21146/57344 train_time:12352361ms step_avg:584.15ms +step:21147/57344 train_time:12352915ms step_avg:584.15ms +grad accum step:5287/14336 +step:21148/57344 train_time:12354244ms step_avg:584.18ms +step:21149/57344 train_time:12354261ms step_avg:584.15ms +step:21150/57344 train_time:12354510ms step_avg:584.14ms +step:21151/57344 train_time:12355066ms step_avg:584.14ms +grad accum step:5288/14336 +step:21152/57344 train_time:12356382ms step_avg:584.17ms +step:21153/57344 train_time:12356399ms step_avg:584.14ms +step:21154/57344 train_time:12356643ms step_avg:584.13ms +step:21155/57344 train_time:12357182ms step_avg:584.13ms +grad accum step:5289/14336 +step:21156/57344 train_time:12358484ms step_avg:584.16ms +step:21157/57344 train_time:12358501ms step_avg:584.13ms +step:21158/57344 train_time:12358750ms step_avg:584.12ms +step:21159/57344 train_time:12359322ms step_avg:584.12ms +grad accum step:5290/14336 +step:21160/57344 train_time:12360687ms step_avg:584.15ms +step:21161/57344 train_time:12360704ms step_avg:584.13ms +step:21162/57344 train_time:12360955ms step_avg:584.11ms +step:21163/57344 train_time:12361509ms step_avg:584.11ms +grad accum step:5291/14336 +step:21164/57344 train_time:12362820ms step_avg:584.14ms +step:21165/57344 train_time:12362837ms step_avg:584.12ms +step:21166/57344 train_time:12363082ms step_avg:584.10ms +step:21167/57344 train_time:12363626ms step_avg:584.10ms +grad accum step:5292/14336 +step:21168/57344 train_time:12364942ms step_avg:584.13ms +step:21169/57344 train_time:12364959ms step_avg:584.11ms +step:21170/57344 train_time:12365203ms step_avg:584.09ms +step:21171/57344 train_time:12365747ms step_avg:584.09ms +grad accum step:5293/14336 +step:21172/57344 train_time:12367047ms step_avg:584.12ms +step:21173/57344 train_time:12367064ms step_avg:584.10ms +step:21174/57344 train_time:12367317ms step_avg:584.08ms +step:21175/57344 train_time:12367879ms step_avg:584.08ms +grad accum step:5294/14336 +step:21176/57344 train_time:12369200ms step_avg:584.11ms +step:21177/57344 train_time:12369217ms step_avg:584.09ms +step:21178/57344 train_time:12369464ms step_avg:584.07ms +step:21179/57344 train_time:12370017ms step_avg:584.07ms +grad accum step:5295/14336 +step:21180/57344 train_time:12371319ms step_avg:584.10ms +step:21181/57344 train_time:12371336ms step_avg:584.08ms +step:21182/57344 train_time:12371580ms step_avg:584.06ms +step:21183/57344 train_time:12372130ms step_avg:584.06ms +grad accum step:5296/14336 +step:21184/57344 train_time:12373423ms step_avg:584.09ms +step:21184/57344 val_loss:7.007094 train_time:12373424ms step_avg:584.09ms +step:21185/57344 train_time:12373436ms step_avg:584.07ms +step:21186/57344 train_time:12373661ms step_avg:584.05ms +step:21187/57344 train_time:12374212ms step_avg:584.05ms +grad accum step:5297/14336 +step:21188/57344 train_time:12375498ms step_avg:584.08ms +step:21189/57344 train_time:12375515ms step_avg:584.05ms +step:21190/57344 train_time:12375764ms step_avg:584.04ms +step:21191/57344 train_time:12376319ms step_avg:584.04ms +grad accum step:5298/14336 +step:21192/57344 train_time:12377648ms step_avg:584.07ms +step:21193/57344 train_time:12377664ms step_avg:584.04ms +step:21194/57344 train_time:12377912ms step_avg:584.03ms +step:21195/57344 train_time:12378451ms step_avg:584.03ms +grad accum step:5299/14336 +step:21196/57344 train_time:12379733ms step_avg:584.06ms +step:21197/57344 train_time:12379750ms step_avg:584.03ms +step:21198/57344 train_time:12380003ms step_avg:584.02ms +step:21199/57344 train_time:12380573ms step_avg:584.02ms +grad accum step:5300/14336 +step:21200/57344 train_time:12381873ms step_avg:584.05ms +step:21201/57344 train_time:12381890ms step_avg:584.02ms +step:21202/57344 train_time:12382140ms step_avg:584.01ms +step:21203/57344 train_time:12382686ms step_avg:584.01ms +grad accum step:5301/14336 +step:21204/57344 train_time:12383995ms step_avg:584.04ms +step:21205/57344 train_time:12384012ms step_avg:584.01ms +step:21206/57344 train_time:12384262ms step_avg:584.00ms +step:21207/57344 train_time:12384814ms step_avg:584.00ms +grad accum step:5302/14336 +step:21208/57344 train_time:12386105ms step_avg:584.03ms +step:21209/57344 train_time:12386122ms step_avg:584.00ms +step:21210/57344 train_time:12386367ms step_avg:583.99ms +step:21211/57344 train_time:12386918ms step_avg:583.99ms +grad accum step:5303/14336 +step:21212/57344 train_time:12388242ms step_avg:584.02ms +step:21213/57344 train_time:12388259ms step_avg:583.99ms +step:21214/57344 train_time:12388506ms step_avg:583.98ms +step:21215/57344 train_time:12389045ms step_avg:583.98ms +grad accum step:5304/14336 +step:21216/57344 train_time:12390401ms step_avg:584.01ms +step:21217/57344 train_time:12390418ms step_avg:583.99ms +step:21218/57344 train_time:12390665ms step_avg:583.97ms +step:21219/57344 train_time:12391221ms step_avg:583.97ms +grad accum step:5305/14336 +step:21220/57344 train_time:12392528ms step_avg:584.00ms +step:21221/57344 train_time:12392545ms step_avg:583.98ms +step:21222/57344 train_time:12392794ms step_avg:583.96ms +step:21223/57344 train_time:12393351ms step_avg:583.96ms +grad accum step:5306/14336 +step:21224/57344 train_time:12394696ms step_avg:583.99ms +step:21225/57344 train_time:12394713ms step_avg:583.97ms +step:21226/57344 train_time:12394959ms step_avg:583.95ms +step:21227/57344 train_time:12395509ms step_avg:583.95ms +grad accum step:5307/14336 +step:21228/57344 train_time:12396848ms step_avg:583.99ms +step:21229/57344 train_time:12396865ms step_avg:583.96ms +step:21230/57344 train_time:12397111ms step_avg:583.94ms +step:21231/57344 train_time:12397659ms step_avg:583.94ms +grad accum step:5308/14336 +step:21232/57344 train_time:12398938ms step_avg:583.97ms +step:21233/57344 train_time:12398955ms step_avg:583.95ms +step:21234/57344 train_time:12399202ms step_avg:583.93ms +step:21235/57344 train_time:12399754ms step_avg:583.93ms +grad accum step:5309/14336 +step:21236/57344 train_time:12401092ms step_avg:583.97ms +step:21237/57344 train_time:12401108ms step_avg:583.94ms +step:21238/57344 train_time:12401357ms step_avg:583.92ms +step:21239/57344 train_time:12401901ms step_avg:583.92ms +grad accum step:5310/14336 +step:21240/57344 train_time:12403179ms step_avg:583.95ms +step:21241/57344 train_time:12403196ms step_avg:583.93ms +step:21242/57344 train_time:12403445ms step_avg:583.91ms +step:21243/57344 train_time:12403990ms step_avg:583.91ms +grad accum step:5311/14336 +step:21244/57344 train_time:12405267ms step_avg:583.94ms +step:21245/57344 train_time:12405283ms step_avg:583.92ms +step:21246/57344 train_time:12405527ms step_avg:583.90ms +step:21247/57344 train_time:12406067ms step_avg:583.90ms +grad accum step:5312/14336 +step:21248/57344 train_time:12407455ms step_avg:583.94ms +step:21248/57344 val_loss:7.012620 train_time:12407456ms step_avg:583.94ms +step:21249/57344 train_time:12407869ms step_avg:583.93ms +step:21250/57344 train_time:12408065ms step_avg:583.91ms +step:21251/57344 train_time:12408618ms step_avg:583.91ms +grad accum step:5313/14336 +step:21252/57344 train_time:12409959ms step_avg:583.94ms +step:21253/57344 train_time:12409971ms step_avg:583.92ms +step:21254/57344 train_time:12410210ms step_avg:583.90ms +step:21255/57344 train_time:12410756ms step_avg:583.90ms +grad accum step:5314/14336 +step:21256/57344 train_time:12412058ms step_avg:583.93ms +step:21257/57344 train_time:12412075ms step_avg:583.91ms +step:21258/57344 train_time:12412321ms step_avg:583.89ms +step:21259/57344 train_time:12412867ms step_avg:583.89ms +grad accum step:5315/14336 +step:21260/57344 train_time:12414201ms step_avg:583.92ms +step:21261/57344 train_time:12414218ms step_avg:583.90ms +step:21262/57344 train_time:12414473ms step_avg:583.88ms +step:21263/57344 train_time:12415036ms step_avg:583.88ms +grad accum step:5316/14336 +step:21264/57344 train_time:12416351ms step_avg:583.91ms +step:21265/57344 train_time:12416368ms step_avg:583.89ms +step:21266/57344 train_time:12416613ms step_avg:583.87ms +step:21267/57344 train_time:12417161ms step_avg:583.87ms +grad accum step:5317/14336 +step:21268/57344 train_time:12418471ms step_avg:583.90ms +step:21269/57344 train_time:12418488ms step_avg:583.88ms +step:21270/57344 train_time:12418748ms step_avg:583.86ms +step:21271/57344 train_time:12419319ms step_avg:583.86ms +grad accum step:5318/14336 +step:21272/57344 train_time:12420648ms step_avg:583.90ms +step:21273/57344 train_time:12420665ms step_avg:583.87ms +step:21274/57344 train_time:12420908ms step_avg:583.85ms +step:21275/57344 train_time:12421435ms step_avg:583.85ms +grad accum step:5319/14336 +step:21276/57344 train_time:12422723ms step_avg:583.88ms +step:21277/57344 train_time:12422740ms step_avg:583.86ms +step:21278/57344 train_time:12422988ms step_avg:583.84ms +step:21279/57344 train_time:12423534ms step_avg:583.84ms +grad accum step:5320/14336 +step:21280/57344 train_time:12424852ms step_avg:583.87ms +step:21281/57344 train_time:12424869ms step_avg:583.85ms +step:21282/57344 train_time:12425120ms step_avg:583.83ms +step:21283/57344 train_time:12425682ms step_avg:583.83ms +grad accum step:5321/14336 +step:21284/57344 train_time:12427001ms step_avg:583.87ms +step:21285/57344 train_time:12427018ms step_avg:583.84ms +step:21286/57344 train_time:12427265ms step_avg:583.82ms +step:21287/57344 train_time:12427821ms step_avg:583.82ms +grad accum step:5322/14336 +step:21288/57344 train_time:12429131ms step_avg:583.86ms +step:21289/57344 train_time:12429148ms step_avg:583.83ms +step:21290/57344 train_time:12429397ms step_avg:583.81ms +step:21291/57344 train_time:12429943ms step_avg:583.81ms +grad accum step:5323/14336 +step:21292/57344 train_time:12431258ms step_avg:583.85ms +step:21293/57344 train_time:12431275ms step_avg:583.82ms +step:21294/57344 train_time:12431522ms step_avg:583.80ms +step:21295/57344 train_time:12432063ms step_avg:583.80ms +grad accum step:5324/14336 +step:21296/57344 train_time:12433362ms step_avg:583.84ms +step:21297/57344 train_time:12433379ms step_avg:583.81ms +step:21298/57344 train_time:12433627ms step_avg:583.79ms +step:21299/57344 train_time:12434170ms step_avg:583.79ms +grad accum step:5325/14336 +step:21300/57344 train_time:12435465ms step_avg:583.82ms +step:21301/57344 train_time:12435482ms step_avg:583.80ms +step:21302/57344 train_time:12435732ms step_avg:583.78ms +step:21303/57344 train_time:12436276ms step_avg:583.78ms +grad accum step:5326/14336 +step:21304/57344 train_time:12437587ms step_avg:583.81ms +step:21305/57344 train_time:12437604ms step_avg:583.79ms +step:21306/57344 train_time:12437854ms step_avg:583.77ms +step:21307/57344 train_time:12438398ms step_avg:583.77ms +grad accum step:5327/14336 +step:21308/57344 train_time:12439718ms step_avg:583.81ms +step:21309/57344 train_time:12439735ms step_avg:583.78ms +step:21310/57344 train_time:12439981ms step_avg:583.76ms +step:21311/57344 train_time:12440525ms step_avg:583.76ms +grad accum step:5328/14336 +step:21312/57344 train_time:12441925ms step_avg:583.80ms +step:21312/57344 val_loss:7.001192 train_time:12441926ms step_avg:583.80ms +step:21313/57344 train_time:12441939ms step_avg:583.77ms +step:21314/57344 train_time:12442164ms step_avg:583.76ms +step:21315/57344 train_time:12442714ms step_avg:583.75ms +grad accum step:5329/14336 +step:21316/57344 train_time:12444068ms step_avg:583.79ms +step:21317/57344 train_time:12444085ms step_avg:583.76ms +step:21318/57344 train_time:12444329ms step_avg:583.75ms +step:21319/57344 train_time:12444871ms step_avg:583.75ms +grad accum step:5330/14336 +step:21320/57344 train_time:12446211ms step_avg:583.78ms +step:21321/57344 train_time:12446228ms step_avg:583.75ms +step:21322/57344 train_time:12446474ms step_avg:583.74ms +step:21323/57344 train_time:12447019ms step_avg:583.74ms +grad accum step:5331/14336 +step:21324/57344 train_time:12448320ms step_avg:583.77ms +step:21325/57344 train_time:12448337ms step_avg:583.74ms +step:21326/57344 train_time:12448588ms step_avg:583.73ms +step:21327/57344 train_time:12449147ms step_avg:583.73ms +grad accum step:5332/14336 +step:21328/57344 train_time:12450469ms step_avg:583.76ms +step:21329/57344 train_time:12450486ms step_avg:583.74ms +step:21330/57344 train_time:12450735ms step_avg:583.72ms +step:21331/57344 train_time:12451289ms step_avg:583.72ms +grad accum step:5333/14336 +step:21332/57344 train_time:12452584ms step_avg:583.75ms +step:21333/57344 train_time:12452600ms step_avg:583.72ms +step:21334/57344 train_time:12452846ms step_avg:583.71ms +step:21335/57344 train_time:12453391ms step_avg:583.71ms +grad accum step:5334/14336 +step:21336/57344 train_time:12454691ms step_avg:583.74ms +step:21337/57344 train_time:12454708ms step_avg:583.71ms +step:21338/57344 train_time:12454964ms step_avg:583.70ms +step:21339/57344 train_time:12455529ms step_avg:583.70ms +grad accum step:5335/14336 +step:21340/57344 train_time:12456820ms step_avg:583.73ms +step:21341/57344 train_time:12456837ms step_avg:583.70ms +step:21342/57344 train_time:12457082ms step_avg:583.69ms +step:21343/57344 train_time:12457629ms step_avg:583.69ms +grad accum step:5336/14336 +step:21344/57344 train_time:12458932ms step_avg:583.72ms +step:21345/57344 train_time:12458948ms step_avg:583.69ms +step:21346/57344 train_time:12459194ms step_avg:583.68ms +step:21347/57344 train_time:12459741ms step_avg:583.68ms +grad accum step:5337/14336 +step:21348/57344 train_time:12461026ms step_avg:583.71ms +step:21349/57344 train_time:12461043ms step_avg:583.68ms +step:21350/57344 train_time:12461291ms step_avg:583.67ms +step:21351/57344 train_time:12461843ms step_avg:583.67ms +grad accum step:5338/14336 +step:21352/57344 train_time:12463147ms step_avg:583.70ms +step:21353/57344 train_time:12463164ms step_avg:583.67ms +step:21354/57344 train_time:12463411ms step_avg:583.66ms +step:21355/57344 train_time:12463946ms step_avg:583.65ms +grad accum step:5339/14336 +step:21356/57344 train_time:12465256ms step_avg:583.69ms +step:21357/57344 train_time:12465273ms step_avg:583.66ms +step:21358/57344 train_time:12465522ms step_avg:583.65ms +step:21359/57344 train_time:12466078ms step_avg:583.65ms +grad accum step:5340/14336 +step:21360/57344 train_time:12467387ms step_avg:583.68ms +step:21361/57344 train_time:12467404ms step_avg:583.65ms +step:21362/57344 train_time:12467653ms step_avg:583.64ms +step:21363/57344 train_time:12468197ms step_avg:583.64ms +grad accum step:5341/14336 +step:21364/57344 train_time:12469514ms step_avg:583.67ms +step:21365/57344 train_time:12469531ms step_avg:583.64ms +step:21366/57344 train_time:12469782ms step_avg:583.63ms +step:21367/57344 train_time:12470341ms step_avg:583.63ms +grad accum step:5342/14336 +step:21368/57344 train_time:12471646ms step_avg:583.66ms +step:21369/57344 train_time:12471663ms step_avg:583.63ms +step:21370/57344 train_time:12471912ms step_avg:583.62ms +step:21371/57344 train_time:12472467ms step_avg:583.62ms +grad accum step:5343/14336 +step:21372/57344 train_time:12473858ms step_avg:583.65ms +step:21373/57344 train_time:12473875ms step_avg:583.63ms +step:21374/57344 train_time:12474142ms step_avg:583.61ms +step:21375/57344 train_time:12474755ms step_avg:583.61ms +grad accum step:5344/14336 +step:21376/57344 train_time:12476082ms step_avg:583.65ms +step:21376/57344 val_loss:7.009300 train_time:12476082ms step_avg:583.65ms +step:21377/57344 train_time:12476095ms step_avg:583.62ms +step:21378/57344 train_time:12476321ms step_avg:583.61ms +step:21379/57344 train_time:12476867ms step_avg:583.60ms +grad accum step:5345/14336 +step:21380/57344 train_time:12478160ms step_avg:583.64ms +step:21381/57344 train_time:12478177ms step_avg:583.61ms +step:21382/57344 train_time:12478430ms step_avg:583.60ms +step:21383/57344 train_time:12478990ms step_avg:583.59ms +grad accum step:5346/14336 +step:21384/57344 train_time:12480319ms step_avg:583.63ms +step:21385/57344 train_time:12480336ms step_avg:583.60ms +step:21386/57344 train_time:12480582ms step_avg:583.59ms +step:21387/57344 train_time:12481125ms step_avg:583.58ms +grad accum step:5347/14336 +step:21388/57344 train_time:12482439ms step_avg:583.62ms +step:21389/57344 train_time:12482456ms step_avg:583.59ms +step:21390/57344 train_time:12482702ms step_avg:583.58ms +step:21391/57344 train_time:12483256ms step_avg:583.58ms +grad accum step:5348/14336 +step:21392/57344 train_time:12484551ms step_avg:583.61ms +step:21393/57344 train_time:12484568ms step_avg:583.58ms +step:21394/57344 train_time:12484818ms step_avg:583.57ms +step:21395/57344 train_time:12485374ms step_avg:583.57ms +grad accum step:5349/14336 +step:21396/57344 train_time:12486688ms step_avg:583.60ms +step:21397/57344 train_time:12486705ms step_avg:583.57ms +step:21398/57344 train_time:12486953ms step_avg:583.56ms +step:21399/57344 train_time:12487499ms step_avg:583.56ms +grad accum step:5350/14336 +step:21400/57344 train_time:12488808ms step_avg:583.59ms +step:21401/57344 train_time:12488825ms step_avg:583.56ms +step:21402/57344 train_time:12489071ms step_avg:583.55ms +step:21403/57344 train_time:12489622ms step_avg:583.55ms +grad accum step:5351/14336 +step:21404/57344 train_time:12490980ms step_avg:583.58ms +step:21405/57344 train_time:12490997ms step_avg:583.56ms +step:21406/57344 train_time:12491245ms step_avg:583.54ms +step:21407/57344 train_time:12491797ms step_avg:583.54ms +grad accum step:5352/14336 +step:21408/57344 train_time:12493121ms step_avg:583.57ms +step:21409/57344 train_time:12493138ms step_avg:583.55ms +step:21410/57344 train_time:12493384ms step_avg:583.53ms +step:21411/57344 train_time:12493928ms step_avg:583.53ms +grad accum step:5353/14336 +step:21412/57344 train_time:12495232ms step_avg:583.56ms +step:21413/57344 train_time:12495248ms step_avg:583.54ms +step:21414/57344 train_time:12495497ms step_avg:583.52ms +step:21415/57344 train_time:12496039ms step_avg:583.52ms +grad accum step:5354/14336 +step:21416/57344 train_time:12497328ms step_avg:583.55ms +step:21417/57344 train_time:12497345ms step_avg:583.52ms +step:21418/57344 train_time:12497591ms step_avg:583.51ms +step:21419/57344 train_time:12498140ms step_avg:583.51ms +grad accum step:5355/14336 +step:21420/57344 train_time:12499447ms step_avg:583.54ms +step:21421/57344 train_time:12499464ms step_avg:583.51ms +step:21422/57344 train_time:12499715ms step_avg:583.50ms +step:21423/57344 train_time:12500275ms step_avg:583.50ms +grad accum step:5356/14336 +step:21424/57344 train_time:12501597ms step_avg:583.53ms +step:21425/57344 train_time:12501614ms step_avg:583.51ms +step:21426/57344 train_time:12501861ms step_avg:583.49ms +step:21427/57344 train_time:12502417ms step_avg:583.49ms +grad accum step:5357/14336 +step:21428/57344 train_time:12503778ms step_avg:583.53ms +step:21429/57344 train_time:12503795ms step_avg:583.50ms +step:21430/57344 train_time:12504038ms step_avg:583.48ms +step:21431/57344 train_time:12504581ms step_avg:583.48ms +grad accum step:5358/14336 +step:21432/57344 train_time:12505884ms step_avg:583.51ms +step:21433/57344 train_time:12505901ms step_avg:583.49ms +step:21434/57344 train_time:12506150ms step_avg:583.47ms +step:21435/57344 train_time:12506702ms step_avg:583.47ms +grad accum step:5359/14336 +step:21436/57344 train_time:12508016ms step_avg:583.51ms +step:21437/57344 train_time:12508033ms step_avg:583.48ms +step:21438/57344 train_time:12508282ms step_avg:583.46ms +step:21439/57344 train_time:12508841ms step_avg:583.46ms +grad accum step:5360/14336 +step:21440/57344 train_time:12510175ms step_avg:583.50ms +step:21440/57344 val_loss:6.929464 train_time:12510176ms step_avg:583.50ms +step:21441/57344 train_time:12510188ms step_avg:583.47ms +step:21442/57344 train_time:12510412ms step_avg:583.45ms +step:21443/57344 train_time:12510964ms step_avg:583.45ms +grad accum step:5361/14336 +step:21444/57344 train_time:12512270ms step_avg:583.49ms +step:21445/57344 train_time:12512287ms step_avg:583.46ms +step:21446/57344 train_time:12512534ms step_avg:583.44ms +step:21447/57344 train_time:12513080ms step_avg:583.44ms +grad accum step:5362/14336 +step:21448/57344 train_time:12514399ms step_avg:583.48ms +step:21449/57344 train_time:12514416ms step_avg:583.45ms +step:21450/57344 train_time:12514664ms step_avg:583.43ms +step:21451/57344 train_time:12515224ms step_avg:583.43ms +grad accum step:5363/14336 +step:21452/57344 train_time:12516595ms step_avg:583.47ms +step:21453/57344 train_time:12516612ms step_avg:583.44ms +step:21454/57344 train_time:12516861ms step_avg:583.43ms +step:21455/57344 train_time:12517406ms step_avg:583.43ms +grad accum step:5364/14336 +step:21456/57344 train_time:12551587ms step_avg:584.99ms +step:21457/57344 train_time:12555363ms step_avg:585.14ms +step:21458/57344 train_time:12555615ms step_avg:585.13ms +step:21459/57344 train_time:12555628ms step_avg:585.10ms +grad accum step:5365/14336 +step:21460/57344 train_time:12556902ms step_avg:585.13ms +step:21461/57344 train_time:12556919ms step_avg:585.10ms +step:21462/57344 train_time:12557162ms step_avg:585.09ms +step:21463/57344 train_time:12557718ms step_avg:585.09ms +grad accum step:5366/14336 +step:21464/57344 train_time:12559028ms step_avg:585.12ms +step:21465/57344 train_time:12559045ms step_avg:585.09ms +step:21466/57344 train_time:12559291ms step_avg:585.08ms +step:21467/57344 train_time:12559840ms step_avg:585.08ms +grad accum step:5367/14336 +step:21468/57344 train_time:12561116ms step_avg:585.11ms +step:21469/57344 train_time:12561132ms step_avg:585.08ms +step:21470/57344 train_time:12561373ms step_avg:585.07ms +step:21471/57344 train_time:12561904ms step_avg:585.06ms +grad accum step:5368/14336 +step:21472/57344 train_time:12563202ms step_avg:585.10ms +step:21473/57344 train_time:12563219ms step_avg:585.07ms +step:21474/57344 train_time:12563456ms step_avg:585.05ms +step:21475/57344 train_time:12563983ms step_avg:585.05ms +grad accum step:5369/14336 +step:21476/57344 train_time:12565282ms step_avg:585.08ms +step:21477/57344 train_time:12565299ms step_avg:585.06ms +step:21478/57344 train_time:12565563ms step_avg:585.04ms +step:21479/57344 train_time:12566158ms step_avg:585.04ms +grad accum step:5370/14336 +step:21480/57344 train_time:12567475ms step_avg:585.08ms +step:21481/57344 train_time:12567492ms step_avg:585.05ms +step:21482/57344 train_time:12567738ms step_avg:585.04ms +step:21483/57344 train_time:12568285ms step_avg:585.03ms +grad accum step:5371/14336 +step:21484/57344 train_time:12569559ms step_avg:585.07ms +step:21485/57344 train_time:12569576ms step_avg:585.04ms +step:21486/57344 train_time:12569826ms step_avg:585.02ms +step:21487/57344 train_time:12570375ms step_avg:585.02ms +grad accum step:5372/14336 +step:21488/57344 train_time:12571650ms step_avg:585.05ms +step:21489/57344 train_time:12571667ms step_avg:585.03ms +step:21490/57344 train_time:12571913ms step_avg:585.01ms +step:21491/57344 train_time:12572460ms step_avg:585.01ms +grad accum step:5373/14336 +step:21492/57344 train_time:12573741ms step_avg:585.04ms +step:21493/57344 train_time:12573758ms step_avg:585.02ms +step:21494/57344 train_time:12574008ms step_avg:585.00ms +step:21495/57344 train_time:12574558ms step_avg:585.00ms +grad accum step:5374/14336 +step:21496/57344 train_time:12575849ms step_avg:585.03ms +step:21497/57344 train_time:12575866ms step_avg:585.01ms +step:21498/57344 train_time:12576109ms step_avg:584.99ms +step:21499/57344 train_time:12576654ms step_avg:584.99ms +grad accum step:5375/14336 +step:21500/57344 train_time:12577940ms step_avg:585.02ms +step:21501/57344 train_time:12577957ms step_avg:584.99ms +step:21502/57344 train_time:12578205ms step_avg:584.98ms +step:21503/57344 train_time:12578759ms step_avg:584.98ms +grad accum step:5376/14336 +step:21504/57344 train_time:12580067ms step_avg:585.01ms +step:21504/57344 val_loss:6.897208 train_time:12580068ms step_avg:585.01ms +step:21505/57344 train_time:12580081ms step_avg:584.98ms +step:21506/57344 train_time:12580303ms step_avg:584.97ms +step:21507/57344 train_time:12580838ms step_avg:584.96ms +grad accum step:5377/14336 +step:21508/57344 train_time:12582159ms step_avg:585.00ms +step:21509/57344 train_time:12582175ms step_avg:584.97ms +step:21510/57344 train_time:12582420ms step_avg:584.96ms +step:21511/57344 train_time:12582978ms step_avg:584.96ms +grad accum step:5378/14336 +step:21512/57344 train_time:12584343ms step_avg:584.99ms +step:21513/57344 train_time:12584360ms step_avg:584.97ms +step:21514/57344 train_time:12584607ms step_avg:584.95ms +step:21515/57344 train_time:12585153ms step_avg:584.95ms +grad accum step:5379/14336 +step:21516/57344 train_time:12586427ms step_avg:584.98ms +step:21517/57344 train_time:12586444ms step_avg:584.95ms +step:21518/57344 train_time:12586696ms step_avg:584.94ms +step:21519/57344 train_time:12587256ms step_avg:584.94ms +grad accum step:5380/14336 +step:21520/57344 train_time:12588551ms step_avg:584.97ms +step:21521/57344 train_time:12588568ms step_avg:584.94ms +step:21522/57344 train_time:12588820ms step_avg:584.93ms +step:21523/57344 train_time:12589384ms step_avg:584.93ms +grad accum step:5381/14336 +step:21524/57344 train_time:12590705ms step_avg:584.96ms +step:21525/57344 train_time:12590721ms step_avg:584.93ms +step:21526/57344 train_time:12590967ms step_avg:584.92ms +step:21527/57344 train_time:12591514ms step_avg:584.92ms +grad accum step:5382/14336 +step:21528/57344 train_time:12592791ms step_avg:584.95ms +step:21529/57344 train_time:12592808ms step_avg:584.92ms +step:21530/57344 train_time:12593059ms step_avg:584.91ms +step:21531/57344 train_time:12593605ms step_avg:584.91ms +grad accum step:5383/14336 +step:21532/57344 train_time:12594879ms step_avg:584.94ms +step:21533/57344 train_time:12594896ms step_avg:584.91ms +step:21534/57344 train_time:12595151ms step_avg:584.90ms +step:21535/57344 train_time:12595718ms step_avg:584.90ms +grad accum step:5384/14336 +step:21536/57344 train_time:12597016ms step_avg:584.93ms +step:21537/57344 train_time:12597033ms step_avg:584.90ms +step:21538/57344 train_time:12597285ms step_avg:584.89ms +step:21539/57344 train_time:12597845ms step_avg:584.89ms +grad accum step:5385/14336 +step:21540/57344 train_time:12599132ms step_avg:584.92ms +step:21541/57344 train_time:12599148ms step_avg:584.89ms +step:21542/57344 train_time:12599394ms step_avg:584.88ms +step:21543/57344 train_time:12599935ms step_avg:584.87ms +grad accum step:5386/14336 +step:21544/57344 train_time:12601213ms step_avg:584.91ms +step:21545/57344 train_time:12601230ms step_avg:584.88ms +step:21546/57344 train_time:12601476ms step_avg:584.86ms +step:21547/57344 train_time:12602028ms step_avg:584.86ms +grad accum step:5387/14336 +step:21548/57344 train_time:12603320ms step_avg:584.90ms +step:21549/57344 train_time:12603337ms step_avg:584.87ms +step:21550/57344 train_time:12603586ms step_avg:584.85ms +step:21551/57344 train_time:12604139ms step_avg:584.85ms +grad accum step:5388/14336 +step:21552/57344 train_time:12605462ms step_avg:584.89ms +step:21553/57344 train_time:12605479ms step_avg:584.86ms +step:21554/57344 train_time:12605725ms step_avg:584.84ms +step:21555/57344 train_time:12606263ms step_avg:584.84ms +grad accum step:5389/14336 +step:21556/57344 train_time:12607542ms step_avg:584.87ms +step:21557/57344 train_time:12607559ms step_avg:584.85ms +step:21558/57344 train_time:12607805ms step_avg:584.83ms +step:21559/57344 train_time:12608342ms step_avg:584.83ms +grad accum step:5390/14336 +step:21560/57344 train_time:12609617ms step_avg:584.86ms +step:21561/57344 train_time:12609634ms step_avg:584.84ms +step:21562/57344 train_time:12609885ms step_avg:584.82ms +step:21563/57344 train_time:12610445ms step_avg:584.82ms +grad accum step:5391/14336 +step:21564/57344 train_time:12611725ms step_avg:584.85ms +step:21565/57344 train_time:12611742ms step_avg:584.82ms +step:21566/57344 train_time:12611991ms step_avg:584.81ms +step:21567/57344 train_time:12612542ms step_avg:584.81ms +grad accum step:5392/14336 +step:21568/57344 train_time:12613846ms step_avg:584.84ms +step:21568/57344 val_loss:6.871654 train_time:12613846ms step_avg:584.84ms +step:21569/57344 train_time:12613859ms step_avg:584.81ms +step:21570/57344 train_time:12614084ms step_avg:584.80ms +step:21571/57344 train_time:12614635ms step_avg:584.80ms +grad accum step:5393/14336 +step:21572/57344 train_time:12615933ms step_avg:584.83ms +step:21573/57344 train_time:12615950ms step_avg:584.80ms +step:21574/57344 train_time:12616193ms step_avg:584.79ms +step:21575/57344 train_time:12616733ms step_avg:584.78ms +grad accum step:5394/14336 +step:21576/57344 train_time:12618032ms step_avg:584.82ms +step:21577/57344 train_time:12618049ms step_avg:584.79ms +step:21578/57344 train_time:12618297ms step_avg:584.78ms +step:21579/57344 train_time:12618841ms step_avg:584.77ms +grad accum step:5395/14336 +step:21580/57344 train_time:12620181ms step_avg:584.81ms +step:21581/57344 train_time:12620198ms step_avg:584.78ms +step:21582/57344 train_time:12620442ms step_avg:584.77ms +step:21583/57344 train_time:12620989ms step_avg:584.77ms +grad accum step:5396/14336 +step:21584/57344 train_time:12622290ms step_avg:584.80ms +step:21585/57344 train_time:12622307ms step_avg:584.77ms +step:21586/57344 train_time:12622550ms step_avg:584.76ms +step:21587/57344 train_time:12623098ms step_avg:584.75ms +grad accum step:5397/14336 +step:21588/57344 train_time:12624396ms step_avg:584.79ms +step:21589/57344 train_time:12624413ms step_avg:584.76ms +step:21590/57344 train_time:12624658ms step_avg:584.75ms +step:21591/57344 train_time:12625209ms step_avg:584.74ms +grad accum step:5398/14336 +step:21592/57344 train_time:12626527ms step_avg:584.78ms +step:21593/57344 train_time:12626544ms step_avg:584.75ms +step:21594/57344 train_time:12626790ms step_avg:584.74ms +step:21595/57344 train_time:12627336ms step_avg:584.73ms +grad accum step:5399/14336 +step:21596/57344 train_time:12628627ms step_avg:584.77ms +step:21597/57344 train_time:12628644ms step_avg:584.74ms +step:21598/57344 train_time:12628891ms step_avg:584.73ms +step:21599/57344 train_time:12629432ms step_avg:584.72ms +grad accum step:5400/14336 +step:21600/57344 train_time:12630749ms step_avg:584.76ms +step:21601/57344 train_time:12630766ms step_avg:584.73ms +step:21602/57344 train_time:12631015ms step_avg:584.72ms +step:21603/57344 train_time:12631560ms step_avg:584.71ms +grad accum step:5401/14336 +step:21604/57344 train_time:12632847ms step_avg:584.75ms +step:21605/57344 train_time:12632863ms step_avg:584.72ms +step:21606/57344 train_time:12633115ms step_avg:584.70ms +step:21607/57344 train_time:12633672ms step_avg:584.70ms +grad accum step:5402/14336 +step:21608/57344 train_time:12634968ms step_avg:584.74ms +step:21609/57344 train_time:12634985ms step_avg:584.71ms +step:21610/57344 train_time:12635239ms step_avg:584.69ms +step:21611/57344 train_time:12635802ms step_avg:584.69ms +grad accum step:5403/14336 +step:21612/57344 train_time:12637110ms step_avg:584.73ms +step:21613/57344 train_time:12637127ms step_avg:584.70ms +step:21614/57344 train_time:12637368ms step_avg:584.68ms +step:21615/57344 train_time:12637901ms step_avg:584.68ms +grad accum step:5404/14336 +step:21616/57344 train_time:12639195ms step_avg:584.71ms +step:21617/57344 train_time:12639212ms step_avg:584.69ms +step:21618/57344 train_time:12639457ms step_avg:584.67ms +step:21619/57344 train_time:12640003ms step_avg:584.67ms +grad accum step:5405/14336 +step:21620/57344 train_time:12641298ms step_avg:584.70ms +step:21621/57344 train_time:12641314ms step_avg:584.68ms +step:21622/57344 train_time:12641558ms step_avg:584.66ms +step:21623/57344 train_time:12642098ms step_avg:584.66ms +grad accum step:5406/14336 +step:21624/57344 train_time:12643383ms step_avg:584.69ms +step:21625/57344 train_time:12643400ms step_avg:584.67ms +step:21626/57344 train_time:12643647ms step_avg:584.65ms +step:21627/57344 train_time:12644210ms step_avg:584.65ms +grad accum step:5407/14336 +step:21628/57344 train_time:12645564ms step_avg:584.68ms +step:21629/57344 train_time:12645581ms step_avg:584.66ms +step:21630/57344 train_time:12645832ms step_avg:584.64ms +step:21631/57344 train_time:12646397ms step_avg:584.64ms +grad accum step:5408/14336 +step:21632/57344 train_time:12647731ms step_avg:584.68ms +step:21632/57344 val_loss:6.841313 train_time:12647731ms step_avg:584.68ms +step:21633/57344 train_time:12647744ms step_avg:584.65ms +step:21634/57344 train_time:12647963ms step_avg:584.63ms +step:21635/57344 train_time:12648510ms step_avg:584.63ms +grad accum step:5409/14336 +step:21636/57344 train_time:12649832ms step_avg:584.67ms +step:21637/57344 train_time:12649849ms step_avg:584.64ms +step:21638/57344 train_time:12650099ms step_avg:584.62ms +step:21639/57344 train_time:12650656ms step_avg:584.62ms +grad accum step:5410/14336 +step:21640/57344 train_time:12651943ms step_avg:584.66ms +step:21641/57344 train_time:12651960ms step_avg:584.63ms +step:21642/57344 train_time:12652208ms step_avg:584.61ms +step:21643/57344 train_time:12652757ms step_avg:584.61ms +grad accum step:5411/14336 +step:21644/57344 train_time:12654205ms step_avg:584.65ms +step:21645/57344 train_time:12654221ms step_avg:584.63ms +step:21646/57344 train_time:12654490ms step_avg:584.61ms +step:21647/57344 train_time:12655094ms step_avg:584.61ms +grad accum step:5412/14336 +step:21648/57344 train_time:12656419ms step_avg:584.65ms +step:21649/57344 train_time:12656436ms step_avg:584.62ms +step:21650/57344 train_time:12656680ms step_avg:584.60ms +step:21651/57344 train_time:12657216ms step_avg:584.60ms +grad accum step:5413/14336 +step:21652/57344 train_time:12658522ms step_avg:584.64ms +step:21653/57344 train_time:12658539ms step_avg:584.61ms +step:21654/57344 train_time:12658789ms step_avg:584.59ms +step:21655/57344 train_time:12659344ms step_avg:584.59ms +grad accum step:5414/14336 +step:21656/57344 train_time:12660638ms step_avg:584.62ms +step:21657/57344 train_time:12660655ms step_avg:584.60ms +step:21658/57344 train_time:12660905ms step_avg:584.58ms +step:21659/57344 train_time:12661458ms step_avg:584.58ms +grad accum step:5415/14336 +step:21660/57344 train_time:12662800ms step_avg:584.62ms +step:21661/57344 train_time:12662817ms step_avg:584.59ms +step:21662/57344 train_time:12663063ms step_avg:584.57ms +step:21663/57344 train_time:12663612ms step_avg:584.57ms +grad accum step:5416/14336 +step:21664/57344 train_time:12664942ms step_avg:584.61ms +step:21665/57344 train_time:12664959ms step_avg:584.58ms +step:21666/57344 train_time:12665204ms step_avg:584.57ms +step:21667/57344 train_time:12665754ms step_avg:584.56ms +grad accum step:5417/14336 +step:21668/57344 train_time:12667049ms step_avg:584.60ms +step:21669/57344 train_time:12667066ms step_avg:584.57ms +step:21670/57344 train_time:12667317ms step_avg:584.56ms +step:21671/57344 train_time:12667873ms step_avg:584.55ms +grad accum step:5418/14336 +step:21672/57344 train_time:12669192ms step_avg:584.59ms +step:21673/57344 train_time:12669208ms step_avg:584.56ms +step:21674/57344 train_time:12669459ms step_avg:584.55ms +step:21675/57344 train_time:12670017ms step_avg:584.55ms +grad accum step:5419/14336 +step:21676/57344 train_time:12671364ms step_avg:584.58ms +step:21677/57344 train_time:12671380ms step_avg:584.55ms +step:21678/57344 train_time:12671626ms step_avg:584.54ms +step:21679/57344 train_time:12672165ms step_avg:584.54ms +grad accum step:5420/14336 +step:21680/57344 train_time:12673465ms step_avg:584.57ms +step:21681/57344 train_time:12673482ms step_avg:584.54ms +step:21682/57344 train_time:12673740ms step_avg:584.53ms +step:21683/57344 train_time:12674313ms step_avg:584.53ms +grad accum step:5421/14336 +step:21684/57344 train_time:12675640ms step_avg:584.56ms +step:21685/57344 train_time:12675657ms step_avg:584.54ms +step:21686/57344 train_time:12675904ms step_avg:584.52ms +step:21687/57344 train_time:12676454ms step_avg:584.52ms +grad accum step:5422/14336 +step:21688/57344 train_time:12677735ms step_avg:584.55ms +step:21689/57344 train_time:12677752ms step_avg:584.52ms +step:21690/57344 train_time:12677996ms step_avg:584.51ms +step:21691/57344 train_time:12678536ms step_avg:584.51ms +grad accum step:5423/14336 +step:21692/57344 train_time:12679829ms step_avg:584.54ms +step:21693/57344 train_time:12679846ms step_avg:584.51ms +step:21694/57344 train_time:12680095ms step_avg:584.50ms +step:21695/57344 train_time:12680661ms step_avg:584.50ms +grad accum step:5424/14336 +step:21696/57344 train_time:12682028ms step_avg:584.53ms +step:21696/57344 val_loss:6.823667 train_time:12682029ms step_avg:584.53ms +step:21697/57344 train_time:12682041ms step_avg:584.51ms +step:21698/57344 train_time:12682268ms step_avg:584.49ms +step:21699/57344 train_time:12682831ms step_avg:584.49ms +grad accum step:5425/14336 +step:21700/57344 train_time:12684200ms step_avg:584.53ms +step:21701/57344 train_time:12684217ms step_avg:584.50ms +step:21702/57344 train_time:12684468ms step_avg:584.48ms +step:21703/57344 train_time:12685016ms step_avg:584.48ms +grad accum step:5426/14336 +step:21704/57344 train_time:12686314ms step_avg:584.52ms +step:21705/57344 train_time:12686331ms step_avg:584.49ms +step:21706/57344 train_time:12686579ms step_avg:584.47ms +step:21707/57344 train_time:12687133ms step_avg:584.47ms +grad accum step:5427/14336 +step:21708/57344 train_time:12688438ms step_avg:584.51ms +step:21709/57344 train_time:12688456ms step_avg:584.48ms +step:21710/57344 train_time:12688703ms step_avg:584.46ms +step:21711/57344 train_time:12689253ms step_avg:584.46ms +grad accum step:5428/14336 +step:21712/57344 train_time:12690549ms step_avg:584.49ms +step:21713/57344 train_time:12690566ms step_avg:584.47ms +step:21714/57344 train_time:12690823ms step_avg:584.45ms +step:21715/57344 train_time:12691395ms step_avg:584.45ms +grad accum step:5429/14336 +step:21716/57344 train_time:12692713ms step_avg:584.49ms +step:21717/57344 train_time:12692730ms step_avg:584.46ms +step:21718/57344 train_time:12692985ms step_avg:584.45ms +step:21719/57344 train_time:12693552ms step_avg:584.44ms +grad accum step:5430/14336 +step:21720/57344 train_time:12694910ms step_avg:584.48ms +step:21721/57344 train_time:12694926ms step_avg:584.45ms +step:21722/57344 train_time:12695172ms step_avg:584.44ms +step:21723/57344 train_time:12695718ms step_avg:584.44ms +grad accum step:5431/14336 +step:21724/57344 train_time:12697043ms step_avg:584.47ms +step:21725/57344 train_time:12697060ms step_avg:584.44ms +step:21726/57344 train_time:12697307ms step_avg:584.43ms +step:21727/57344 train_time:12697858ms step_avg:584.43ms +grad accum step:5432/14336 +step:21728/57344 train_time:12699179ms step_avg:584.46ms +step:21729/57344 train_time:12699196ms step_avg:584.44ms +step:21730/57344 train_time:12699444ms step_avg:584.42ms +step:21731/57344 train_time:12699997ms step_avg:584.42ms +grad accum step:5433/14336 +step:21732/57344 train_time:12701319ms step_avg:584.45ms +step:21733/57344 train_time:12701336ms step_avg:584.43ms +step:21734/57344 train_time:12701581ms step_avg:584.41ms +step:21735/57344 train_time:12702127ms step_avg:584.41ms +grad accum step:5434/14336 +step:21736/57344 train_time:12703461ms step_avg:584.44ms +step:21737/57344 train_time:12703478ms step_avg:584.42ms +step:21738/57344 train_time:12703728ms step_avg:584.40ms +step:21739/57344 train_time:12704285ms step_avg:584.40ms +grad accum step:5435/14336 +step:21740/57344 train_time:12705588ms step_avg:584.43ms +step:21741/57344 train_time:12705605ms step_avg:584.41ms +step:21742/57344 train_time:12705853ms step_avg:584.39ms +step:21743/57344 train_time:12706405ms step_avg:584.39ms +grad accum step:5436/14336 +step:21744/57344 train_time:12707711ms step_avg:584.42ms +step:21745/57344 train_time:12707728ms step_avg:584.40ms +step:21746/57344 train_time:12707979ms step_avg:584.38ms +step:21747/57344 train_time:12708522ms step_avg:584.38ms +grad accum step:5437/14336 +step:21748/57344 train_time:12709805ms step_avg:584.41ms +step:21749/57344 train_time:12709823ms step_avg:584.39ms +step:21750/57344 train_time:12710069ms step_avg:584.37ms +step:21751/57344 train_time:12710618ms step_avg:584.37ms +grad accum step:5438/14336 +step:21752/57344 train_time:12711936ms step_avg:584.40ms +step:21753/57344 train_time:12711952ms step_avg:584.38ms +step:21754/57344 train_time:12712202ms step_avg:584.36ms +step:21755/57344 train_time:12712756ms step_avg:584.36ms +grad accum step:5439/14336 +step:21756/57344 train_time:12714086ms step_avg:584.39ms +step:21757/57344 train_time:12714103ms step_avg:584.37ms +step:21758/57344 train_time:12714361ms step_avg:584.35ms +step:21759/57344 train_time:12714947ms step_avg:584.35ms +grad accum step:5440/14336 +step:21760/57344 train_time:12716265ms step_avg:584.39ms +step:21760/57344 val_loss:6.782926 train_time:12716265ms step_avg:584.39ms +step:21761/57344 train_time:12716278ms step_avg:584.36ms +step:21762/57344 train_time:12716505ms step_avg:584.34ms +step:21763/57344 train_time:12717055ms step_avg:584.34ms +grad accum step:5441/14336 +step:21764/57344 train_time:12718356ms step_avg:584.38ms +step:21765/57344 train_time:12718372ms step_avg:584.35ms +step:21766/57344 train_time:12718622ms step_avg:584.33ms +step:21767/57344 train_time:12719173ms step_avg:584.33ms +grad accum step:5442/14336 +step:21768/57344 train_time:12720489ms step_avg:584.37ms +step:21769/57344 train_time:12720506ms step_avg:584.34ms +step:21770/57344 train_time:12720766ms step_avg:584.33ms +step:21771/57344 train_time:12721343ms step_avg:584.33ms +grad accum step:5443/14336 +step:21772/57344 train_time:12722683ms step_avg:584.36ms +step:21773/57344 train_time:12722700ms step_avg:584.33ms +step:21774/57344 train_time:12722953ms step_avg:584.32ms +step:21775/57344 train_time:12723507ms step_avg:584.32ms +grad accum step:5444/14336 +step:21776/57344 train_time:12724823ms step_avg:584.35ms +step:21777/57344 train_time:12724840ms step_avg:584.32ms +step:21778/57344 train_time:12725087ms step_avg:584.31ms +step:21779/57344 train_time:12725636ms step_avg:584.31ms +grad accum step:5445/14336 +step:21780/57344 train_time:12726975ms step_avg:584.34ms +step:21781/57344 train_time:12726991ms step_avg:584.32ms +step:21782/57344 train_time:12727237ms step_avg:584.30ms +step:21783/57344 train_time:12727777ms step_avg:584.30ms +grad accum step:5446/14336 +step:21784/57344 train_time:12729094ms step_avg:584.33ms +step:21785/57344 train_time:12729111ms step_avg:584.31ms +step:21786/57344 train_time:12729360ms step_avg:584.29ms +step:21787/57344 train_time:12729906ms step_avg:584.29ms +grad accum step:5447/14336 +step:21788/57344 train_time:12731239ms step_avg:584.32ms +step:21789/57344 train_time:12731256ms step_avg:584.30ms +step:21790/57344 train_time:12731505ms step_avg:584.28ms +step:21791/57344 train_time:12732061ms step_avg:584.28ms +grad accum step:5448/14336 +step:21792/57344 train_time:12733368ms step_avg:584.31ms +step:21793/57344 train_time:12733384ms step_avg:584.29ms +step:21794/57344 train_time:12733633ms step_avg:584.27ms +step:21795/57344 train_time:12734177ms step_avg:584.27ms +grad accum step:5449/14336 +step:21796/57344 train_time:12735486ms step_avg:584.30ms +step:21797/57344 train_time:12735503ms step_avg:584.28ms +step:21798/57344 train_time:12735755ms step_avg:584.26ms +step:21799/57344 train_time:12736313ms step_avg:584.26ms +grad accum step:5450/14336 +step:21800/57344 train_time:12737591ms step_avg:584.29ms +step:21801/57344 train_time:12737608ms step_avg:584.27ms +step:21802/57344 train_time:12737860ms step_avg:584.25ms +step:21803/57344 train_time:12738413ms step_avg:584.25ms +grad accum step:5451/14336 +step:21804/57344 train_time:12739725ms step_avg:584.28ms +step:21805/57344 train_time:12739741ms step_avg:584.26ms +step:21806/57344 train_time:12739990ms step_avg:584.24ms +step:21807/57344 train_time:12740545ms step_avg:584.24ms +grad accum step:5452/14336 +step:21808/57344 train_time:12741883ms step_avg:584.28ms +step:21809/57344 train_time:12741900ms step_avg:584.25ms +step:21810/57344 train_time:12742147ms step_avg:584.23ms +step:21811/57344 train_time:12742695ms step_avg:584.23ms +grad accum step:5453/14336 +step:21812/57344 train_time:12744000ms step_avg:584.27ms +step:21813/57344 train_time:12744017ms step_avg:584.24ms +step:21814/57344 train_time:12744264ms step_avg:584.22ms +step:21815/57344 train_time:12744812ms step_avg:584.22ms +grad accum step:5454/14336 +step:21816/57344 train_time:12746111ms step_avg:584.26ms +step:21817/57344 train_time:12746128ms step_avg:584.23ms +step:21818/57344 train_time:12746377ms step_avg:584.21ms +step:21819/57344 train_time:12746926ms step_avg:584.21ms +grad accum step:5455/14336 +step:21820/57344 train_time:12748231ms step_avg:584.25ms +step:21821/57344 train_time:12748248ms step_avg:584.22ms +step:21822/57344 train_time:12748498ms step_avg:584.20ms +step:21823/57344 train_time:12749051ms step_avg:584.20ms +grad accum step:5456/14336 +step:21824/57344 train_time:12750400ms step_avg:584.24ms +step:21824/57344 val_loss:6.754741 train_time:12750401ms step_avg:584.24ms +step:21825/57344 train_time:12750413ms step_avg:584.21ms +step:21826/57344 train_time:12750639ms step_avg:584.19ms +step:21827/57344 train_time:12751199ms step_avg:584.19ms +grad accum step:5457/14336 +step:21828/57344 train_time:12752518ms step_avg:584.23ms +step:21829/57344 train_time:12752535ms step_avg:584.20ms +step:21830/57344 train_time:12752781ms step_avg:584.19ms +step:21831/57344 train_time:12753327ms step_avg:584.18ms +grad accum step:5458/14336 +step:21832/57344 train_time:12754643ms step_avg:584.22ms +step:21833/57344 train_time:12754660ms step_avg:584.19ms +step:21834/57344 train_time:12754912ms step_avg:584.18ms +step:21835/57344 train_time:12755475ms step_avg:584.18ms +grad accum step:5459/14336 +step:21836/57344 train_time:12756771ms step_avg:584.21ms +step:21837/57344 train_time:12756787ms step_avg:584.18ms +step:21838/57344 train_time:12757034ms step_avg:584.17ms +step:21839/57344 train_time:12757582ms step_avg:584.17ms +grad accum step:5460/14336 +step:21840/57344 train_time:12758892ms step_avg:584.20ms +step:21841/57344 train_time:12758907ms step_avg:584.17ms +step:21842/57344 train_time:12759154ms step_avg:584.16ms +step:21843/57344 train_time:12759700ms step_avg:584.16ms +grad accum step:5461/14336 +step:21844/57344 train_time:12761001ms step_avg:584.19ms +step:21845/57344 train_time:12761018ms step_avg:584.16ms +step:21846/57344 train_time:12761266ms step_avg:584.15ms +step:21847/57344 train_time:12761820ms step_avg:584.15ms +grad accum step:5462/14336 +step:21848/57344 train_time:12763144ms step_avg:584.18ms +step:21849/57344 train_time:12763158ms step_avg:584.15ms +step:21850/57344 train_time:12763410ms step_avg:584.14ms +step:21851/57344 train_time:12763967ms step_avg:584.14ms +grad accum step:5463/14336 +step:21852/57344 train_time:12765268ms step_avg:584.17ms +step:21853/57344 train_time:12765280ms step_avg:584.14ms +step:21854/57344 train_time:12765529ms step_avg:584.13ms +step:21855/57344 train_time:12766078ms step_avg:584.13ms +grad accum step:5464/14336 +step:21856/57344 train_time:12767426ms step_avg:584.16ms +step:21857/57344 train_time:12767442ms step_avg:584.14ms +step:21858/57344 train_time:12767688ms step_avg:584.12ms +step:21859/57344 train_time:12768243ms step_avg:584.12ms +grad accum step:5465/14336 +step:21860/57344 train_time:12769538ms step_avg:584.15ms +step:21861/57344 train_time:12769555ms step_avg:584.12ms +step:21862/57344 train_time:12769810ms step_avg:584.11ms +step:21863/57344 train_time:12770368ms step_avg:584.11ms +grad accum step:5466/14336 +step:21864/57344 train_time:12771685ms step_avg:584.14ms +step:21865/57344 train_time:12771702ms step_avg:584.12ms +step:21866/57344 train_time:12771951ms step_avg:584.10ms +step:21867/57344 train_time:12772504ms step_avg:584.10ms +grad accum step:5467/14336 +step:21868/57344 train_time:12773812ms step_avg:584.13ms +step:21869/57344 train_time:12773828ms step_avg:584.11ms +step:21870/57344 train_time:12774080ms step_avg:584.09ms +step:21871/57344 train_time:12774636ms step_avg:584.09ms +grad accum step:5468/14336 +step:21872/57344 train_time:12775926ms step_avg:584.12ms +step:21873/57344 train_time:12775943ms step_avg:584.10ms +step:21874/57344 train_time:12776188ms step_avg:584.08ms +step:21875/57344 train_time:12776730ms step_avg:584.08ms +grad accum step:5469/14336 +step:21876/57344 train_time:12778029ms step_avg:584.11ms +step:21877/57344 train_time:12778046ms step_avg:584.09ms +step:21878/57344 train_time:12778295ms step_avg:584.07ms +step:21879/57344 train_time:12778856ms step_avg:584.07ms +grad accum step:5470/14336 +step:21880/57344 train_time:12780153ms step_avg:584.10ms +step:21881/57344 train_time:12780170ms step_avg:584.08ms +step:21882/57344 train_time:12780419ms step_avg:584.06ms +step:21883/57344 train_time:12780977ms step_avg:584.06ms +grad accum step:5471/14336 +step:21884/57344 train_time:12782298ms step_avg:584.09ms +step:21885/57344 train_time:12782315ms step_avg:584.07ms +step:21886/57344 train_time:12782567ms step_avg:584.05ms +step:21887/57344 train_time:12783130ms step_avg:584.05ms +grad accum step:5472/14336 +step:21888/57344 train_time:12784452ms step_avg:584.08ms +step:21888/57344 val_loss:6.727941 train_time:12784452ms step_avg:584.08ms +step:21889/57344 train_time:12784465ms step_avg:584.06ms +step:21890/57344 train_time:12784688ms step_avg:584.04ms +step:21891/57344 train_time:12785238ms step_avg:584.04ms +grad accum step:5473/14336 +step:21892/57344 train_time:12786543ms step_avg:584.07ms +step:21893/57344 train_time:12786560ms step_avg:584.05ms +step:21894/57344 train_time:12786811ms step_avg:584.03ms +step:21895/57344 train_time:12787370ms step_avg:584.03ms +grad accum step:5474/14336 +step:21896/57344 train_time:12788689ms step_avg:584.07ms +step:21897/57344 train_time:12788706ms step_avg:584.04ms +step:21898/57344 train_time:12788951ms step_avg:584.02ms +step:21899/57344 train_time:12789497ms step_avg:584.02ms +grad accum step:5475/14336 +step:21900/57344 train_time:12790794ms step_avg:584.05ms +step:21901/57344 train_time:12790810ms step_avg:584.03ms +step:21902/57344 train_time:12791056ms step_avg:584.01ms +step:21903/57344 train_time:12791602ms step_avg:584.01ms +grad accum step:5476/14336 +step:21904/57344 train_time:12792961ms step_avg:584.05ms +step:21905/57344 train_time:12792988ms step_avg:584.02ms +step:21906/57344 train_time:12793229ms step_avg:584.01ms +step:21907/57344 train_time:12793785ms step_avg:584.00ms +grad accum step:5477/14336 +step:21908/57344 train_time:12795073ms step_avg:584.04ms +step:21909/57344 train_time:12795090ms step_avg:584.01ms +step:21910/57344 train_time:12795340ms step_avg:584.00ms +step:21911/57344 train_time:12795884ms step_avg:583.99ms +grad accum step:5478/14336 +step:21912/57344 train_time:12797211ms step_avg:584.03ms +step:21913/57344 train_time:12797228ms step_avg:584.00ms +step:21914/57344 train_time:12797478ms step_avg:583.99ms +step:21915/57344 train_time:12798039ms step_avg:583.99ms +grad accum step:5479/14336 +step:21916/57344 train_time:12799326ms step_avg:584.02ms +step:21917/57344 train_time:12799342ms step_avg:583.99ms +step:21918/57344 train_time:12799592ms step_avg:583.98ms +step:21919/57344 train_time:12800139ms step_avg:583.97ms +grad accum step:5480/14336 +step:21920/57344 train_time:12801460ms step_avg:584.01ms +step:21921/57344 train_time:12801478ms step_avg:583.98ms +step:21922/57344 train_time:12801727ms step_avg:583.97ms +step:21923/57344 train_time:12802278ms step_avg:583.97ms +grad accum step:5481/14336 +step:21924/57344 train_time:12803569ms step_avg:584.00ms +step:21925/57344 train_time:12803586ms step_avg:583.97ms +step:21926/57344 train_time:12803830ms step_avg:583.96ms +step:21927/57344 train_time:12804379ms step_avg:583.95ms +grad accum step:5482/14336 +step:21928/57344 train_time:12805670ms step_avg:583.99ms +step:21929/57344 train_time:12805686ms step_avg:583.96ms +step:21930/57344 train_time:12805935ms step_avg:583.95ms +step:21931/57344 train_time:12806482ms step_avg:583.94ms +grad accum step:5483/14336 +step:21932/57344 train_time:12807775ms step_avg:583.98ms +step:21933/57344 train_time:12807792ms step_avg:583.95ms +step:21934/57344 train_time:12808038ms step_avg:583.94ms +step:21935/57344 train_time:12808586ms step_avg:583.93ms +grad accum step:5484/14336 +step:21936/57344 train_time:12809899ms step_avg:583.97ms +step:21937/57344 train_time:12809916ms step_avg:583.94ms +step:21938/57344 train_time:12810164ms step_avg:583.93ms +step:21939/57344 train_time:12810708ms step_avg:583.92ms +grad accum step:5485/14336 +step:21940/57344 train_time:12812019ms step_avg:583.96ms +step:21941/57344 train_time:12812036ms step_avg:583.93ms +step:21942/57344 train_time:12812282ms step_avg:583.92ms +step:21943/57344 train_time:12812827ms step_avg:583.91ms +grad accum step:5486/14336 +step:21944/57344 train_time:12814128ms step_avg:583.95ms +step:21945/57344 train_time:12814145ms step_avg:583.92ms +step:21946/57344 train_time:12814392ms step_avg:583.91ms +step:21947/57344 train_time:12814938ms step_avg:583.90ms +grad accum step:5487/14336 +step:21948/57344 train_time:12816232ms step_avg:583.94ms +step:21949/57344 train_time:12816249ms step_avg:583.91ms +step:21950/57344 train_time:12816502ms step_avg:583.90ms +step:21951/57344 train_time:12817065ms step_avg:583.89ms +grad accum step:5488/14336 +step:21952/57344 train_time:12818406ms step_avg:583.93ms +step:21952/57344 val_loss:6.702205 train_time:12818406ms step_avg:583.93ms +step:21953/57344 train_time:12818419ms step_avg:583.90ms +step:21954/57344 train_time:12818646ms step_avg:583.89ms +step:21955/57344 train_time:12819198ms step_avg:583.89ms +grad accum step:5489/14336 +step:21956/57344 train_time:12820525ms step_avg:583.92ms +step:21957/57344 train_time:12820541ms step_avg:583.89ms +step:21958/57344 train_time:12820790ms step_avg:583.88ms +step:21959/57344 train_time:12821348ms step_avg:583.88ms +grad accum step:5490/14336 +step:21960/57344 train_time:12822658ms step_avg:583.91ms +step:21961/57344 train_time:12822675ms step_avg:583.88ms +step:21962/57344 train_time:12822922ms step_avg:583.87ms +step:21963/57344 train_time:12823467ms step_avg:583.87ms +grad accum step:5491/14336 +step:21964/57344 train_time:12824805ms step_avg:583.90ms +step:21965/57344 train_time:12824822ms step_avg:583.88ms +step:21966/57344 train_time:12825065ms step_avg:583.86ms +step:21967/57344 train_time:12825599ms step_avg:583.86ms +grad accum step:5492/14336 +step:21968/57344 train_time:12826880ms step_avg:583.89ms +step:21969/57344 train_time:12826897ms step_avg:583.86ms +step:21970/57344 train_time:12827145ms step_avg:583.85ms +step:21971/57344 train_time:12827690ms step_avg:583.85ms +grad accum step:5493/14336 +step:21972/57344 train_time:12829002ms step_avg:583.88ms +step:21973/57344 train_time:12829019ms step_avg:583.85ms +step:21974/57344 train_time:12829264ms step_avg:583.84ms +step:21975/57344 train_time:12829808ms step_avg:583.84ms +grad accum step:5494/14336 +step:21976/57344 train_time:12831169ms step_avg:583.87ms +step:21977/57344 train_time:12831186ms step_avg:583.85ms +step:21978/57344 train_time:12831435ms step_avg:583.83ms +step:21979/57344 train_time:12831988ms step_avg:583.83ms +grad accum step:5495/14336 +step:21980/57344 train_time:12833332ms step_avg:583.86ms +step:21981/57344 train_time:12833349ms step_avg:583.84ms +step:21982/57344 train_time:12833603ms step_avg:583.82ms +step:21983/57344 train_time:12834164ms step_avg:583.82ms +grad accum step:5496/14336 +step:21984/57344 train_time:12835478ms step_avg:583.86ms +step:21985/57344 train_time:12835495ms step_avg:583.83ms +step:21986/57344 train_time:12835746ms step_avg:583.81ms +step:21987/57344 train_time:12836306ms step_avg:583.81ms +grad accum step:5497/14336 +step:21988/57344 train_time:12837610ms step_avg:583.85ms +step:21989/57344 train_time:12837627ms step_avg:583.82ms +step:21990/57344 train_time:12837877ms step_avg:583.81ms +step:21991/57344 train_time:12838427ms step_avg:583.80ms +grad accum step:5498/14336 +step:21992/57344 train_time:12839716ms step_avg:583.84ms +step:21993/57344 train_time:12839733ms step_avg:583.81ms +step:21994/57344 train_time:12839980ms step_avg:583.79ms +step:21995/57344 train_time:12840529ms step_avg:583.79ms +grad accum step:5499/14336 +step:21996/57344 train_time:12841834ms step_avg:583.83ms +step:21997/57344 train_time:12841851ms step_avg:583.80ms +step:21998/57344 train_time:12842101ms step_avg:583.78ms +step:21999/57344 train_time:12842655ms step_avg:583.78ms +grad accum step:5500/14336 +step:22000/57344 train_time:12843996ms step_avg:583.82ms +step:22001/57344 train_time:12844013ms step_avg:583.79ms +step:22002/57344 train_time:12844257ms step_avg:583.78ms +step:22003/57344 train_time:12844802ms step_avg:583.78ms +grad accum step:5501/14336 +step:22004/57344 train_time:12846146ms step_avg:583.81ms +step:22005/57344 train_time:12846162ms step_avg:583.78ms +step:22006/57344 train_time:12846412ms step_avg:583.77ms +step:22007/57344 train_time:12846964ms step_avg:583.77ms +grad accum step:5502/14336 +step:22008/57344 train_time:12848242ms step_avg:583.80ms +step:22009/57344 train_time:12848259ms step_avg:583.77ms +step:22010/57344 train_time:12848508ms step_avg:583.76ms +step:22011/57344 train_time:12849053ms step_avg:583.76ms +grad accum step:5503/14336 +step:22012/57344 train_time:12850350ms step_avg:583.79ms +step:22013/57344 train_time:12850366ms step_avg:583.76ms +step:22014/57344 train_time:12850616ms step_avg:583.75ms +step:22015/57344 train_time:12851168ms step_avg:583.75ms +grad accum step:5504/14336 +step:22016/57344 train_time:12852467ms step_avg:583.78ms +step:22016/57344 val_loss:6.669182 train_time:12852467ms step_avg:583.78ms +step:22017/57344 train_time:12852480ms step_avg:583.75ms +step:22018/57344 train_time:12852703ms step_avg:583.74ms +step:22019/57344 train_time:12853244ms step_avg:583.73ms +grad accum step:5505/14336 +step:22020/57344 train_time:12854541ms step_avg:583.77ms +step:22021/57344 train_time:12854558ms step_avg:583.74ms +step:22022/57344 train_time:12854806ms step_avg:583.73ms +step:22023/57344 train_time:12855345ms step_avg:583.72ms +grad accum step:5506/14336 +step:22024/57344 train_time:12856651ms step_avg:583.76ms +step:22025/57344 train_time:12856668ms step_avg:583.73ms +step:22026/57344 train_time:12856919ms step_avg:583.72ms +step:22027/57344 train_time:12857480ms step_avg:583.71ms +grad accum step:5507/14336 +step:22028/57344 train_time:12858783ms step_avg:583.75ms +step:22029/57344 train_time:12858800ms step_avg:583.72ms +step:22030/57344 train_time:12859050ms step_avg:583.71ms +step:22031/57344 train_time:12859607ms step_avg:583.71ms +grad accum step:5508/14336 +step:22032/57344 train_time:12860952ms step_avg:583.74ms +step:22033/57344 train_time:12860968ms step_avg:583.71ms +step:22034/57344 train_time:12861219ms step_avg:583.70ms +step:22035/57344 train_time:12861775ms step_avg:583.70ms +grad accum step:5509/14336 +step:22036/57344 train_time:12863115ms step_avg:583.73ms +step:22037/57344 train_time:12863132ms step_avg:583.71ms +step:22038/57344 train_time:12863379ms step_avg:583.69ms +step:22039/57344 train_time:12863927ms step_avg:583.69ms +grad accum step:5510/14336 +step:22040/57344 train_time:12865251ms step_avg:583.72ms +step:22041/57344 train_time:12865268ms step_avg:583.70ms +step:22042/57344 train_time:12865516ms step_avg:583.68ms +step:22043/57344 train_time:12866068ms step_avg:583.68ms +grad accum step:5511/14336 +step:22044/57344 train_time:12867382ms step_avg:583.71ms +step:22045/57344 train_time:12867398ms step_avg:583.69ms +step:22046/57344 train_time:12867653ms step_avg:583.67ms +step:22047/57344 train_time:12868226ms step_avg:583.67ms +grad accum step:5512/14336 +step:22048/57344 train_time:12869543ms step_avg:583.71ms +step:22049/57344 train_time:12869561ms step_avg:583.68ms +step:22050/57344 train_time:12869818ms step_avg:583.67ms +step:22051/57344 train_time:12870396ms step_avg:583.66ms +grad accum step:5513/14336 +step:22052/57344 train_time:12871733ms step_avg:583.70ms +step:22053/57344 train_time:12871750ms step_avg:583.67ms +step:22054/57344 train_time:12871995ms step_avg:583.66ms +step:22055/57344 train_time:12872538ms step_avg:583.66ms +grad accum step:5514/14336 +step:22056/57344 train_time:12873882ms step_avg:583.69ms +step:22057/57344 train_time:12873899ms step_avg:583.67ms +step:22058/57344 train_time:12874152ms step_avg:583.65ms +step:22059/57344 train_time:12874715ms step_avg:583.65ms +grad accum step:5515/14336 +step:22060/57344 train_time:12876039ms step_avg:583.68ms +step:22061/57344 train_time:12876056ms step_avg:583.66ms +step:22062/57344 train_time:12876304ms step_avg:583.64ms +step:22063/57344 train_time:12876848ms step_avg:583.64ms +grad accum step:5516/14336 +step:22064/57344 train_time:12878172ms step_avg:583.67ms +step:22065/57344 train_time:12878189ms step_avg:583.65ms +step:22066/57344 train_time:12878436ms step_avg:583.63ms +step:22067/57344 train_time:12878985ms step_avg:583.63ms +grad accum step:5517/14336 +step:22068/57344 train_time:12880272ms step_avg:583.66ms +step:22069/57344 train_time:12880289ms step_avg:583.64ms +step:22070/57344 train_time:12880534ms step_avg:583.62ms +step:22071/57344 train_time:12881083ms step_avg:583.62ms +grad accum step:5518/14336 +step:22072/57344 train_time:12882401ms step_avg:583.65ms +step:22073/57344 train_time:12882418ms step_avg:583.63ms +step:22074/57344 train_time:12882662ms step_avg:583.61ms +step:22075/57344 train_time:12883205ms step_avg:583.61ms +grad accum step:5519/14336 +step:22076/57344 train_time:12884510ms step_avg:583.64ms +step:22077/57344 train_time:12884527ms step_avg:583.62ms +step:22078/57344 train_time:12884773ms step_avg:583.60ms +step:22079/57344 train_time:12885325ms step_avg:583.60ms +grad accum step:5520/14336 +step:22080/57344 train_time:12886693ms step_avg:583.64ms +step:22080/57344 val_loss:6.655616 train_time:12886693ms step_avg:583.64ms +step:22081/57344 train_time:12886706ms step_avg:583.61ms +step:22082/57344 train_time:12886933ms step_avg:583.59ms +step:22083/57344 train_time:12887488ms step_avg:583.59ms +grad accum step:5521/14336 +step:22084/57344 train_time:12888793ms step_avg:583.63ms +step:22085/57344 train_time:12888810ms step_avg:583.60ms +step:22086/57344 train_time:12889061ms step_avg:583.59ms +step:22087/57344 train_time:12889616ms step_avg:583.58ms +grad accum step:5522/14336 +step:22088/57344 train_time:12890898ms step_avg:583.62ms +step:22089/57344 train_time:12890915ms step_avg:583.59ms +step:22090/57344 train_time:12891166ms step_avg:583.57ms +step:22091/57344 train_time:12891729ms step_avg:583.57ms +grad accum step:5523/14336 +step:22092/57344 train_time:12893080ms step_avg:583.61ms +step:22093/57344 train_time:12893097ms step_avg:583.58ms +step:22094/57344 train_time:12893344ms step_avg:583.57ms +step:22095/57344 train_time:12893896ms step_avg:583.57ms +grad accum step:5524/14336 +step:22096/57344 train_time:12895207ms step_avg:583.60ms +step:22097/57344 train_time:12895224ms step_avg:583.57ms +step:22098/57344 train_time:12895472ms step_avg:583.56ms +step:22099/57344 train_time:12896022ms step_avg:583.56ms +grad accum step:5525/14336 +step:22100/57344 train_time:12897309ms step_avg:583.59ms +step:22101/57344 train_time:12897326ms step_avg:583.56ms +step:22102/57344 train_time:12897573ms step_avg:583.55ms +step:22103/57344 train_time:12898115ms step_avg:583.55ms +grad accum step:5526/14336 +step:22104/57344 train_time:12899391ms step_avg:583.58ms +step:22105/57344 train_time:12899408ms step_avg:583.55ms +step:22106/57344 train_time:12899655ms step_avg:583.54ms +step:22107/57344 train_time:12900199ms step_avg:583.53ms +grad accum step:5527/14336 +step:22108/57344 train_time:12901494ms step_avg:583.57ms +step:22109/57344 train_time:12901512ms step_avg:583.54ms +step:22110/57344 train_time:12901759ms step_avg:583.53ms +step:22111/57344 train_time:12902315ms step_avg:583.52ms +grad accum step:5528/14336 +step:22112/57344 train_time:12903632ms step_avg:583.56ms +step:22113/57344 train_time:12903649ms step_avg:583.53ms +step:22114/57344 train_time:12903896ms step_avg:583.52ms +step:22115/57344 train_time:12904449ms step_avg:583.52ms +grad accum step:5529/14336 +step:22116/57344 train_time:12905763ms step_avg:583.55ms +step:22117/57344 train_time:12905780ms step_avg:583.52ms +step:22118/57344 train_time:12906027ms step_avg:583.51ms +step:22119/57344 train_time:12906578ms step_avg:583.51ms +grad accum step:5530/14336 +step:22120/57344 train_time:12907889ms step_avg:583.54ms +step:22121/57344 train_time:12907906ms step_avg:583.51ms +step:22122/57344 train_time:12908154ms step_avg:583.50ms +step:22123/57344 train_time:12908702ms step_avg:583.50ms +grad accum step:5531/14336 +step:22124/57344 train_time:12910017ms step_avg:583.53ms +step:22125/57344 train_time:12910034ms step_avg:583.50ms +step:22126/57344 train_time:12910286ms step_avg:583.49ms +step:22127/57344 train_time:12910846ms step_avg:583.49ms +grad accum step:5532/14336 +step:22128/57344 train_time:12912145ms step_avg:583.52ms +step:22129/57344 train_time:12912162ms step_avg:583.50ms +step:22130/57344 train_time:12912409ms step_avg:583.48ms +step:22131/57344 train_time:12912966ms step_avg:583.48ms +grad accum step:5533/14336 +step:22132/57344 train_time:12914294ms step_avg:583.51ms +step:22133/57344 train_time:12914311ms step_avg:583.49ms +step:22134/57344 train_time:12914563ms step_avg:583.47ms +step:22135/57344 train_time:12915127ms step_avg:583.47ms +grad accum step:5534/14336 +step:22136/57344 train_time:12916499ms step_avg:583.51ms +step:22137/57344 train_time:12916515ms step_avg:583.48ms +step:22138/57344 train_time:12916759ms step_avg:583.47ms +step:22139/57344 train_time:12917303ms step_avg:583.46ms +grad accum step:5535/14336 +step:22140/57344 train_time:12918586ms step_avg:583.50ms +step:22141/57344 train_time:12918603ms step_avg:583.47ms +step:22142/57344 train_time:12918851ms step_avg:583.45ms +step:22143/57344 train_time:12919400ms step_avg:583.45ms +grad accum step:5536/14336 +step:22144/57344 train_time:12920713ms step_avg:583.49ms +step:22144/57344 val_loss:6.619743 train_time:12920714ms step_avg:583.49ms +step:22145/57344 train_time:12920726ms step_avg:583.46ms +step:22146/57344 train_time:12920949ms step_avg:583.44ms +step:22147/57344 train_time:12921486ms step_avg:583.44ms +grad accum step:5537/14336 +step:22148/57344 train_time:12922780ms step_avg:583.47ms +step:22149/57344 train_time:12922797ms step_avg:583.45ms +step:22150/57344 train_time:12923047ms step_avg:583.43ms +step:22151/57344 train_time:12923605ms step_avg:583.43ms +grad accum step:5538/14336 +step:22152/57344 train_time:12924951ms step_avg:583.47ms +step:22153/57344 train_time:12924968ms step_avg:583.44ms +step:22154/57344 train_time:12925218ms step_avg:583.43ms +step:22155/57344 train_time:12925777ms step_avg:583.42ms +grad accum step:5539/14336 +step:22156/57344 train_time:12927089ms step_avg:583.46ms +step:22157/57344 train_time:12927106ms step_avg:583.43ms +step:22158/57344 train_time:12927359ms step_avg:583.42ms +step:22159/57344 train_time:12927925ms step_avg:583.42ms +grad accum step:5540/14336 +step:22160/57344 train_time:12929252ms step_avg:583.45ms +step:22161/57344 train_time:12929269ms step_avg:583.42ms +step:22162/57344 train_time:12929517ms step_avg:583.41ms +step:22163/57344 train_time:12930074ms step_avg:583.41ms +grad accum step:5541/14336 +step:22164/57344 train_time:12931413ms step_avg:583.44ms +step:22165/57344 train_time:12931430ms step_avg:583.42ms +step:22166/57344 train_time:12931677ms step_avg:583.40ms +step:22167/57344 train_time:12932226ms step_avg:583.40ms +grad accum step:5542/14336 +step:22168/57344 train_time:12933521ms step_avg:583.43ms +step:22169/57344 train_time:12933538ms step_avg:583.41ms +step:22170/57344 train_time:12933788ms step_avg:583.39ms +step:22171/57344 train_time:12934342ms step_avg:583.39ms +grad accum step:5543/14336 +step:22172/57344 train_time:12935675ms step_avg:583.42ms +step:22173/57344 train_time:12935691ms step_avg:583.40ms +step:22174/57344 train_time:12935937ms step_avg:583.38ms +step:22175/57344 train_time:12936476ms step_avg:583.38ms +grad accum step:5544/14336 +step:22176/57344 train_time:12937760ms step_avg:583.41ms +step:22177/57344 train_time:12937777ms step_avg:583.39ms +step:22178/57344 train_time:12938021ms step_avg:583.37ms +step:22179/57344 train_time:12938569ms step_avg:583.37ms +grad accum step:5545/14336 +step:22180/57344 train_time:12939906ms step_avg:583.40ms +step:22181/57344 train_time:12939923ms step_avg:583.38ms +step:22182/57344 train_time:12940166ms step_avg:583.36ms +step:22183/57344 train_time:12940708ms step_avg:583.36ms +grad accum step:5546/14336 +step:22184/57344 train_time:12941993ms step_avg:583.39ms +step:22185/57344 train_time:12942010ms step_avg:583.37ms +step:22186/57344 train_time:12942257ms step_avg:583.35ms +step:22187/57344 train_time:12942798ms step_avg:583.35ms +grad accum step:5547/14336 +step:22188/57344 train_time:12944117ms step_avg:583.38ms +step:22189/57344 train_time:12944133ms step_avg:583.36ms +step:22190/57344 train_time:12944378ms step_avg:583.34ms +step:22191/57344 train_time:12944925ms step_avg:583.34ms +grad accum step:5548/14336 +step:22192/57344 train_time:12946216ms step_avg:583.37ms +step:22193/57344 train_time:12946233ms step_avg:583.35ms +step:22194/57344 train_time:12946480ms step_avg:583.33ms +step:22195/57344 train_time:12947031ms step_avg:583.33ms +grad accum step:5549/14336 +step:22196/57344 train_time:12948362ms step_avg:583.36ms +step:22197/57344 train_time:12948379ms step_avg:583.34ms +step:22198/57344 train_time:12948627ms step_avg:583.32ms +step:22199/57344 train_time:12949178ms step_avg:583.32ms +grad accum step:5550/14336 +step:22200/57344 train_time:12950471ms step_avg:583.35ms +step:22201/57344 train_time:12950488ms step_avg:583.33ms +step:22202/57344 train_time:12950750ms step_avg:583.31ms +step:22203/57344 train_time:12951340ms step_avg:583.31ms +grad accum step:5551/14336 +step:22204/57344 train_time:12952664ms step_avg:583.35ms +step:22205/57344 train_time:12952681ms step_avg:583.32ms +step:22206/57344 train_time:12952927ms step_avg:583.31ms +step:22207/57344 train_time:12953475ms step_avg:583.31ms +grad accum step:5552/14336 +step:22208/57344 train_time:12954797ms step_avg:583.34ms +step:22208/57344 val_loss:6.601605 train_time:12954797ms step_avg:583.34ms +step:22209/57344 train_time:12954810ms step_avg:583.31ms +step:22210/57344 train_time:12955032ms step_avg:583.30ms +step:22211/57344 train_time:12955572ms step_avg:583.30ms +grad accum step:5553/14336 +step:22212/57344 train_time:12956916ms step_avg:583.33ms +step:22213/57344 train_time:12956933ms step_avg:583.30ms +step:22214/57344 train_time:12957187ms step_avg:583.29ms +step:22215/57344 train_time:12957745ms step_avg:583.29ms +grad accum step:5554/14336 +step:22216/57344 train_time:12959056ms step_avg:583.32ms +step:22217/57344 train_time:12959073ms step_avg:583.30ms +step:22218/57344 train_time:12959324ms step_avg:583.28ms +step:22219/57344 train_time:12959885ms step_avg:583.28ms +grad accum step:5555/14336 +step:22220/57344 train_time:12961219ms step_avg:583.31ms +step:22221/57344 train_time:12961236ms step_avg:583.29ms +step:22222/57344 train_time:12961491ms step_avg:583.27ms +step:22223/57344 train_time:12962059ms step_avg:583.27ms +grad accum step:5556/14336 +step:22224/57344 train_time:12963410ms step_avg:583.31ms +step:22225/57344 train_time:12963427ms step_avg:583.28ms +step:22226/57344 train_time:12963671ms step_avg:583.27ms +step:22227/57344 train_time:12964210ms step_avg:583.26ms +grad accum step:5557/14336 +step:22228/57344 train_time:12965486ms step_avg:583.30ms +step:22229/57344 train_time:12965503ms step_avg:583.27ms +step:22230/57344 train_time:12965751ms step_avg:583.25ms +step:22231/57344 train_time:12966301ms step_avg:583.25ms +grad accum step:5558/14336 +step:22232/57344 train_time:12967598ms step_avg:583.29ms +step:22233/57344 train_time:12967615ms step_avg:583.26ms +step:22234/57344 train_time:12967863ms step_avg:583.24ms +step:22235/57344 train_time:12968406ms step_avg:583.24ms +grad accum step:5559/14336 +step:22236/57344 train_time:12969719ms step_avg:583.28ms +step:22237/57344 train_time:12969736ms step_avg:583.25ms +step:22238/57344 train_time:12969980ms step_avg:583.23ms +step:22239/57344 train_time:12970528ms step_avg:583.23ms +grad accum step:5560/14336 +step:22240/57344 train_time:12971881ms step_avg:583.27ms +step:22241/57344 train_time:12971898ms step_avg:583.24ms +step:22242/57344 train_time:12972155ms step_avg:583.23ms +step:22243/57344 train_time:12972723ms step_avg:583.23ms +grad accum step:5561/14336 +step:22244/57344 train_time:12974041ms step_avg:583.26ms +step:22245/57344 train_time:12974058ms step_avg:583.23ms +step:22246/57344 train_time:12974306ms step_avg:583.22ms +step:22247/57344 train_time:12974865ms step_avg:583.22ms +grad accum step:5562/14336 +step:22248/57344 train_time:12976163ms step_avg:583.25ms +step:22249/57344 train_time:12976179ms step_avg:583.23ms +step:22250/57344 train_time:12976430ms step_avg:583.21ms +step:22251/57344 train_time:12976984ms step_avg:583.21ms +grad accum step:5563/14336 +step:22252/57344 train_time:12978300ms step_avg:583.24ms +step:22253/57344 train_time:12978317ms step_avg:583.22ms +step:22254/57344 train_time:12978558ms step_avg:583.20ms +step:22255/57344 train_time:12979098ms step_avg:583.20ms +grad accum step:5564/14336 +step:22256/57344 train_time:12980399ms step_avg:583.23ms +step:22257/57344 train_time:12980416ms step_avg:583.21ms +step:22258/57344 train_time:12980679ms step_avg:583.19ms +step:22259/57344 train_time:12981265ms step_avg:583.19ms +grad accum step:5565/14336 +step:22260/57344 train_time:12982561ms step_avg:583.22ms +step:22261/57344 train_time:12982578ms step_avg:583.20ms +step:22262/57344 train_time:12982828ms step_avg:583.18ms +step:22263/57344 train_time:12983389ms step_avg:583.18ms +grad accum step:5566/14336 +step:22264/57344 train_time:12984699ms step_avg:583.22ms +step:22265/57344 train_time:12984716ms step_avg:583.19ms +step:22266/57344 train_time:12984967ms step_avg:583.17ms +step:22267/57344 train_time:12985523ms step_avg:583.17ms +grad accum step:5567/14336 +step:22268/57344 train_time:12986837ms step_avg:583.21ms +step:22269/57344 train_time:12986854ms step_avg:583.18ms +step:22270/57344 train_time:12987105ms step_avg:583.17ms +step:22271/57344 train_time:12987658ms step_avg:583.16ms +grad accum step:5568/14336 +step:22272/57344 train_time:12988988ms step_avg:583.20ms +step:22272/57344 val_loss:6.576770 train_time:12988989ms step_avg:583.20ms +step:22273/57344 train_time:12989001ms step_avg:583.17ms +step:22274/57344 train_time:12989225ms step_avg:583.16ms +step:22275/57344 train_time:12989777ms step_avg:583.15ms +grad accum step:5569/14336 +step:22276/57344 train_time:12991074ms step_avg:583.19ms +step:22277/57344 train_time:12991091ms step_avg:583.16ms +step:22278/57344 train_time:12991341ms step_avg:583.15ms +step:22279/57344 train_time:12991894ms step_avg:583.15ms +grad accum step:5570/14336 +step:22280/57344 train_time:12993218ms step_avg:583.18ms +step:22281/57344 train_time:12993235ms step_avg:583.15ms +step:22282/57344 train_time:12993481ms step_avg:583.14ms +step:22283/57344 train_time:12994031ms step_avg:583.14ms +grad accum step:5571/14336 +step:22284/57344 train_time:12995324ms step_avg:583.17ms +step:22285/57344 train_time:12995341ms step_avg:583.14ms +step:22286/57344 train_time:12995593ms step_avg:583.13ms +step:22287/57344 train_time:12996155ms step_avg:583.13ms +grad accum step:5572/14336 +step:22288/57344 train_time:12997454ms step_avg:583.16ms +step:22289/57344 train_time:12997471ms step_avg:583.13ms +step:22290/57344 train_time:12997722ms step_avg:583.12ms +step:22291/57344 train_time:12998278ms step_avg:583.12ms +grad accum step:5573/14336 +step:22292/57344 train_time:12999575ms step_avg:583.15ms +step:22293/57344 train_time:12999592ms step_avg:583.12ms +step:22294/57344 train_time:12999844ms step_avg:583.11ms +step:22295/57344 train_time:13000404ms step_avg:583.11ms +grad accum step:5574/14336 +step:22296/57344 train_time:13001750ms step_avg:583.14ms +step:22297/57344 train_time:13001766ms step_avg:583.12ms +step:22298/57344 train_time:13002022ms step_avg:583.10ms +step:22299/57344 train_time:13002595ms step_avg:583.10ms +grad accum step:5575/14336 +step:22300/57344 train_time:13003894ms step_avg:583.13ms +step:22301/57344 train_time:13003911ms step_avg:583.11ms +step:22302/57344 train_time:13004164ms step_avg:583.09ms +step:22303/57344 train_time:13004729ms step_avg:583.09ms +grad accum step:5576/14336 +step:22304/57344 train_time:13006084ms step_avg:583.13ms +step:22305/57344 train_time:13006101ms step_avg:583.10ms +step:22306/57344 train_time:13006349ms step_avg:583.09ms +step:22307/57344 train_time:13006891ms step_avg:583.09ms +grad accum step:5577/14336 +step:22308/57344 train_time:13008229ms step_avg:583.12ms +step:22309/57344 train_time:13008246ms step_avg:583.09ms +step:22310/57344 train_time:13008491ms step_avg:583.08ms +step:22311/57344 train_time:13009052ms step_avg:583.08ms +grad accum step:5578/14336 +step:22312/57344 train_time:13010414ms step_avg:583.11ms +step:22313/57344 train_time:13010431ms step_avg:583.09ms +step:22314/57344 train_time:13010678ms step_avg:583.07ms +step:22315/57344 train_time:13011226ms step_avg:583.07ms +grad accum step:5579/14336 +step:22316/57344 train_time:13012541ms step_avg:583.10ms +step:22317/57344 train_time:13012558ms step_avg:583.08ms +step:22318/57344 train_time:13012804ms step_avg:583.06ms +step:22319/57344 train_time:13013355ms step_avg:583.06ms +grad accum step:5580/14336 +step:22320/57344 train_time:13014699ms step_avg:583.10ms +step:22321/57344 train_time:13014716ms step_avg:583.07ms +step:22322/57344 train_time:13014978ms step_avg:583.06ms +step:22323/57344 train_time:13015568ms step_avg:583.06ms +grad accum step:5581/14336 +step:22324/57344 train_time:13016902ms step_avg:583.09ms +step:22325/57344 train_time:13016919ms step_avg:583.06ms +step:22326/57344 train_time:13017170ms step_avg:583.05ms +step:22327/57344 train_time:13017722ms step_avg:583.05ms +grad accum step:5582/14336 +step:22328/57344 train_time:13019079ms step_avg:583.08ms +step:22329/57344 train_time:13019096ms step_avg:583.06ms +step:22330/57344 train_time:13019348ms step_avg:583.04ms +step:22331/57344 train_time:13019920ms step_avg:583.04ms +grad accum step:5583/14336 +step:22332/57344 train_time:13021288ms step_avg:583.08ms +step:22333/57344 train_time:13021305ms step_avg:583.05ms +step:22334/57344 train_time:13021548ms step_avg:583.04ms +step:22335/57344 train_time:13022093ms step_avg:583.04ms +grad accum step:5584/14336 +step:22336/57344 train_time:13023399ms step_avg:583.07ms +step:22336/57344 val_loss:6.559417 train_time:13023400ms step_avg:583.07ms +step:22337/57344 train_time:13023412ms step_avg:583.04ms +step:22338/57344 train_time:13023634ms step_avg:583.03ms +step:22339/57344 train_time:13024174ms step_avg:583.02ms +grad accum step:5585/14336 +step:22340/57344 train_time:13025472ms step_avg:583.06ms +step:22341/57344 train_time:13025489ms step_avg:583.03ms +step:22342/57344 train_time:13025736ms step_avg:583.02ms +step:22343/57344 train_time:13026284ms step_avg:583.01ms +grad accum step:5586/14336 +step:22344/57344 train_time:13027571ms step_avg:583.05ms +step:22345/57344 train_time:13027588ms step_avg:583.02ms +step:22346/57344 train_time:13027839ms step_avg:583.01ms +step:22347/57344 train_time:13028391ms step_avg:583.00ms +grad accum step:5587/14336 +step:22348/57344 train_time:13029742ms step_avg:583.04ms +step:22349/57344 train_time:13029759ms step_avg:583.01ms +step:22350/57344 train_time:13030004ms step_avg:583.00ms +step:22351/57344 train_time:13030560ms step_avg:583.00ms +grad accum step:5588/14336 +step:22352/57344 train_time:13031890ms step_avg:583.03ms +step:22353/57344 train_time:13031907ms step_avg:583.00ms +step:22354/57344 train_time:13032166ms step_avg:582.99ms +step:22355/57344 train_time:13032762ms step_avg:582.99ms +grad accum step:5589/14336 +step:22356/57344 train_time:13034118ms step_avg:583.03ms +step:22357/57344 train_time:13034135ms step_avg:583.00ms +step:22358/57344 train_time:13034387ms step_avg:582.99ms +step:22359/57344 train_time:13034947ms step_avg:582.98ms +grad accum step:5590/14336 +step:22360/57344 train_time:13036225ms step_avg:583.02ms +step:22361/57344 train_time:13036242ms step_avg:582.99ms +step:22362/57344 train_time:13036495ms step_avg:582.98ms +step:22363/57344 train_time:13037058ms step_avg:582.97ms +grad accum step:5591/14336 +step:22364/57344 train_time:13038374ms step_avg:583.01ms +step:22365/57344 train_time:13038391ms step_avg:582.98ms +step:22366/57344 train_time:13038641ms step_avg:582.97ms +step:22367/57344 train_time:13039192ms step_avg:582.97ms +grad accum step:5592/14336 +step:22368/57344 train_time:13040482ms step_avg:583.00ms +step:22369/57344 train_time:13040498ms step_avg:582.97ms +step:22370/57344 train_time:13040751ms step_avg:582.96ms +step:22371/57344 train_time:13041312ms step_avg:582.96ms +grad accum step:5593/14336 +step:22372/57344 train_time:13042609ms step_avg:582.99ms +step:22373/57344 train_time:13042625ms step_avg:582.96ms +step:22374/57344 train_time:13042877ms step_avg:582.95ms +step:22375/57344 train_time:13043436ms step_avg:582.95ms +grad accum step:5594/14336 +step:22376/57344 train_time:13044764ms step_avg:582.98ms +step:22377/57344 train_time:13044781ms step_avg:582.95ms +step:22378/57344 train_time:13045031ms step_avg:582.94ms +step:22379/57344 train_time:13045587ms step_avg:582.94ms +grad accum step:5595/14336 +step:22380/57344 train_time:13046904ms step_avg:582.97ms +step:22381/57344 train_time:13046920ms step_avg:582.95ms +step:22382/57344 train_time:13047173ms step_avg:582.93ms +step:22383/57344 train_time:13047736ms step_avg:582.93ms +grad accum step:5596/14336 +step:22384/57344 train_time:13049039ms step_avg:582.96ms +step:22385/57344 train_time:13049056ms step_avg:582.94ms +step:22386/57344 train_time:13049303ms step_avg:582.92ms +step:22387/57344 train_time:13049851ms step_avg:582.92ms +grad accum step:5597/14336 +step:22388/57344 train_time:13051163ms step_avg:582.95ms +step:22389/57344 train_time:13051180ms step_avg:582.93ms +step:22390/57344 train_time:13051429ms step_avg:582.91ms +step:22391/57344 train_time:13051984ms step_avg:582.91ms +grad accum step:5598/14336 +step:22392/57344 train_time:13053317ms step_avg:582.95ms +step:22393/57344 train_time:13053334ms step_avg:582.92ms +step:22394/57344 train_time:13053578ms step_avg:582.91ms +step:22395/57344 train_time:13054126ms step_avg:582.90ms +grad accum step:5599/14336 +step:22396/57344 train_time:13055435ms step_avg:582.94ms +step:22397/57344 train_time:13055452ms step_avg:582.91ms +step:22398/57344 train_time:13055698ms step_avg:582.90ms +step:22399/57344 train_time:13056232ms step_avg:582.89ms +grad accum step:5600/14336 +step:22400/57344 train_time:13057561ms step_avg:582.93ms +step:22400/57344 val_loss:6.537297 train_time:13057562ms step_avg:582.93ms +step:22401/57344 train_time:13057574ms step_avg:582.90ms +step:22402/57344 train_time:13057799ms step_avg:582.89ms +step:22403/57344 train_time:13058346ms step_avg:582.88ms +grad accum step:5601/14336 +step:22404/57344 train_time:13059647ms step_avg:582.92ms +step:22405/57344 train_time:13059664ms step_avg:582.89ms +step:22406/57344 train_time:13059910ms step_avg:582.88ms +step:22407/57344 train_time:13060450ms step_avg:582.87ms +grad accum step:5602/14336 +step:22408/57344 train_time:13061738ms step_avg:582.91ms +step:22409/57344 train_time:13061755ms step_avg:582.88ms +step:22410/57344 train_time:13061999ms step_avg:582.86ms +step:22411/57344 train_time:13062537ms step_avg:582.86ms +grad accum step:5603/14336 +step:22412/57344 train_time:13063832ms step_avg:582.89ms +step:22413/57344 train_time:13063849ms step_avg:582.87ms +step:22414/57344 train_time:13064106ms step_avg:582.85ms +step:22415/57344 train_time:13064666ms step_avg:582.85ms +grad accum step:5604/14336 +step:22416/57344 train_time:13065959ms step_avg:582.89ms +step:22417/57344 train_time:13065976ms step_avg:582.86ms +step:22418/57344 train_time:13066229ms step_avg:582.85ms +step:22419/57344 train_time:13066792ms step_avg:582.84ms +grad accum step:5605/14336 +step:22420/57344 train_time:13068110ms step_avg:582.88ms +step:22421/57344 train_time:13068127ms step_avg:582.85ms +step:22422/57344 train_time:13068377ms step_avg:582.84ms +step:22423/57344 train_time:13068931ms step_avg:582.84ms +grad accum step:5606/14336 +step:22424/57344 train_time:13070247ms step_avg:582.87ms +step:22425/57344 train_time:13070264ms step_avg:582.84ms +step:22426/57344 train_time:13070514ms step_avg:582.83ms +step:22427/57344 train_time:13071069ms step_avg:582.83ms +grad accum step:5607/14336 +step:22428/57344 train_time:13072391ms step_avg:582.86ms +step:22429/57344 train_time:13072407ms step_avg:582.84ms +step:22430/57344 train_time:13072662ms step_avg:582.82ms +step:22431/57344 train_time:13073222ms step_avg:582.82ms +grad accum step:5608/14336 +step:22432/57344 train_time:13074524ms step_avg:582.85ms +step:22433/57344 train_time:13074540ms step_avg:582.83ms +step:22434/57344 train_time:13074791ms step_avg:582.81ms +step:22435/57344 train_time:13075359ms step_avg:582.81ms +grad accum step:5609/14336 +step:22436/57344 train_time:13076695ms step_avg:582.84ms +step:22437/57344 train_time:13076712ms step_avg:582.82ms +step:22438/57344 train_time:13076963ms step_avg:582.80ms +step:22439/57344 train_time:13077522ms step_avg:582.80ms +grad accum step:5610/14336 +step:22440/57344 train_time:13078844ms step_avg:582.84ms +step:22441/57344 train_time:13078861ms step_avg:582.81ms +step:22442/57344 train_time:13079107ms step_avg:582.80ms +step:22443/57344 train_time:13079655ms step_avg:582.79ms +grad accum step:5611/14336 +step:22444/57344 train_time:13081005ms step_avg:582.83ms +step:22445/57344 train_time:13081022ms step_avg:582.80ms +step:22446/57344 train_time:13081264ms step_avg:582.79ms +step:22447/57344 train_time:13081802ms step_avg:582.79ms +grad accum step:5612/14336 +step:22448/57344 train_time:13083095ms step_avg:582.82ms +step:22449/57344 train_time:13083112ms step_avg:582.79ms +step:22450/57344 train_time:13083361ms step_avg:582.78ms +step:22451/57344 train_time:13083916ms step_avg:582.78ms +grad accum step:5613/14336 +step:22452/57344 train_time:13085244ms step_avg:582.81ms +step:22453/57344 train_time:13085259ms step_avg:582.78ms +step:22454/57344 train_time:13085512ms step_avg:582.77ms +step:22455/57344 train_time:13086073ms step_avg:582.77ms +grad accum step:5614/14336 +step:22456/57344 train_time:13087385ms step_avg:582.80ms +step:22457/57344 train_time:13087402ms step_avg:582.78ms +step:22458/57344 train_time:13087651ms step_avg:582.76ms +step:22459/57344 train_time:13088215ms step_avg:582.76ms +grad accum step:5615/14336 +step:22460/57344 train_time:13089538ms step_avg:582.79ms +step:22461/57344 train_time:13089555ms step_avg:582.77ms +step:22462/57344 train_time:13089799ms step_avg:582.75ms +step:22463/57344 train_time:13090338ms step_avg:582.75ms +grad accum step:5616/14336 +step:22464/57344 train_time:13091632ms step_avg:582.78ms +step:22464/57344 val_loss:6.502707 train_time:13091632ms step_avg:582.78ms +step:22465/57344 train_time:13091645ms step_avg:582.76ms +step:22466/57344 train_time:13091866ms step_avg:582.74ms +step:22467/57344 train_time:13092409ms step_avg:582.74ms +grad accum step:5617/14336 +step:22468/57344 train_time:13093708ms step_avg:582.77ms +step:22469/57344 train_time:13093725ms step_avg:582.75ms +step:22470/57344 train_time:13093971ms step_avg:582.73ms +step:22471/57344 train_time:13094515ms step_avg:582.73ms +grad accum step:5618/14336 +step:22472/57344 train_time:13095799ms step_avg:582.76ms +step:22473/57344 train_time:13095815ms step_avg:582.74ms +step:22474/57344 train_time:13096062ms step_avg:582.72ms +step:22475/57344 train_time:13096603ms step_avg:582.72ms +grad accum step:5619/14336 +step:22476/57344 train_time:13097904ms step_avg:582.75ms +step:22477/57344 train_time:13097921ms step_avg:582.73ms +step:22478/57344 train_time:13098164ms step_avg:582.71ms +step:22479/57344 train_time:13098706ms step_avg:582.71ms +grad accum step:5620/14336 +step:22480/57344 train_time:13100011ms step_avg:582.74ms +step:22481/57344 train_time:13100027ms step_avg:582.72ms +step:22482/57344 train_time:13100276ms step_avg:582.70ms +step:22483/57344 train_time:13100826ms step_avg:582.70ms +grad accum step:5621/14336 +step:22484/57344 train_time:13102148ms step_avg:582.73ms +step:22485/57344 train_time:13102164ms step_avg:582.71ms +step:22486/57344 train_time:13102420ms step_avg:582.69ms +step:22487/57344 train_time:13102992ms step_avg:582.69ms +grad accum step:5622/14336 +step:22488/57344 train_time:13104274ms step_avg:582.72ms +step:22489/57344 train_time:13104290ms step_avg:582.70ms +step:22490/57344 train_time:13104549ms step_avg:582.68ms +step:22491/57344 train_time:13105131ms step_avg:582.68ms +grad accum step:5623/14336 +step:22492/57344 train_time:13106442ms step_avg:582.72ms +step:22493/57344 train_time:13106458ms step_avg:582.69ms +step:22494/57344 train_time:13106712ms step_avg:582.68ms +step:22495/57344 train_time:13107268ms step_avg:582.67ms +grad accum step:5624/14336 +step:22496/57344 train_time:13108610ms step_avg:582.71ms +step:22497/57344 train_time:13108627ms step_avg:582.68ms +step:22498/57344 train_time:13108874ms step_avg:582.67ms +step:22499/57344 train_time:13109419ms step_avg:582.67ms +grad accum step:5625/14336 +step:22500/57344 train_time:13110721ms step_avg:582.70ms +step:22501/57344 train_time:13110738ms step_avg:582.67ms +step:22502/57344 train_time:13110986ms step_avg:582.66ms +step:22503/57344 train_time:13111537ms step_avg:582.66ms +grad accum step:5626/14336 +step:22504/57344 train_time:13112852ms step_avg:582.69ms +step:22505/57344 train_time:13112869ms step_avg:582.66ms +step:22506/57344 train_time:13113120ms step_avg:582.65ms +step:22507/57344 train_time:13113678ms step_avg:582.65ms +grad accum step:5627/14336 +step:22508/57344 train_time:13114980ms step_avg:582.68ms +step:22509/57344 train_time:13114996ms step_avg:582.66ms +step:22510/57344 train_time:13115243ms step_avg:582.64ms +step:22511/57344 train_time:13115787ms step_avg:582.64ms +grad accum step:5628/14336 +step:22512/57344 train_time:13117060ms step_avg:582.67ms +step:22513/57344 train_time:13117077ms step_avg:582.64ms +step:22514/57344 train_time:13117324ms step_avg:582.63ms +step:22515/57344 train_time:13117868ms step_avg:582.63ms +grad accum step:5629/14336 +step:22516/57344 train_time:13119194ms step_avg:582.66ms +step:22517/57344 train_time:13119211ms step_avg:582.64ms +step:22518/57344 train_time:13119458ms step_avg:582.62ms +step:22519/57344 train_time:13120012ms step_avg:582.62ms +grad accum step:5630/14336 +step:22520/57344 train_time:13121332ms step_avg:582.65ms +step:22521/57344 train_time:13121349ms step_avg:582.63ms +step:22522/57344 train_time:13121601ms step_avg:582.61ms +step:22523/57344 train_time:13122164ms step_avg:582.61ms +grad accum step:5631/14336 +step:22524/57344 train_time:13123485ms step_avg:582.64ms +step:22525/57344 train_time:13123502ms step_avg:582.62ms +step:22526/57344 train_time:13123748ms step_avg:582.60ms +step:22527/57344 train_time:13124295ms step_avg:582.60ms +grad accum step:5632/14336 +step:22528/57344 train_time:13125592ms step_avg:582.63ms +step:22528/57344 val_loss:6.483665 train_time:13125593ms step_avg:582.63ms +step:22529/57344 train_time:13125605ms step_avg:582.61ms +step:22530/57344 train_time:13125830ms step_avg:582.59ms +step:22531/57344 train_time:13126379ms step_avg:582.59ms +grad accum step:5633/14336 +step:22532/57344 train_time:13127699ms step_avg:582.62ms +step:22533/57344 train_time:13127716ms step_avg:582.60ms +step:22534/57344 train_time:13127984ms step_avg:582.59ms +step:22535/57344 train_time:13128598ms step_avg:582.59ms +grad accum step:5634/14336 +step:22536/57344 train_time:13129988ms step_avg:582.62ms +step:22537/57344 train_time:13130005ms step_avg:582.60ms +step:22538/57344 train_time:13130251ms step_avg:582.58ms +step:22539/57344 train_time:13130801ms step_avg:582.58ms +grad accum step:5635/14336 +step:22540/57344 train_time:13132119ms step_avg:582.61ms +step:22541/57344 train_time:13132135ms step_avg:582.59ms +step:22542/57344 train_time:13132384ms step_avg:582.57ms +step:22543/57344 train_time:13132933ms step_avg:582.57ms +grad accum step:5636/14336 +step:22544/57344 train_time:13134246ms step_avg:582.60ms +step:22545/57344 train_time:13134263ms step_avg:582.58ms +step:22546/57344 train_time:13134513ms step_avg:582.57ms +step:22547/57344 train_time:13135070ms step_avg:582.56ms +grad accum step:5637/14336 +step:22548/57344 train_time:13136356ms step_avg:582.60ms +step:22549/57344 train_time:13136373ms step_avg:582.57ms +step:22550/57344 train_time:13136620ms step_avg:582.56ms +step:22551/57344 train_time:13137176ms step_avg:582.55ms +grad accum step:5638/14336 +step:22552/57344 train_time:13138512ms step_avg:582.59ms +step:22553/57344 train_time:13138528ms step_avg:582.56ms +step:22554/57344 train_time:13138781ms step_avg:582.55ms +step:22555/57344 train_time:13139338ms step_avg:582.55ms +grad accum step:5639/14336 +step:22556/57344 train_time:13140625ms step_avg:582.58ms +step:22557/57344 train_time:13140642ms step_avg:582.55ms +step:22558/57344 train_time:13140892ms step_avg:582.54ms +step:22559/57344 train_time:13141452ms step_avg:582.54ms +grad accum step:5640/14336 +step:22560/57344 train_time:13142751ms step_avg:582.57ms +step:22561/57344 train_time:13142768ms step_avg:582.54ms +step:22562/57344 train_time:13143022ms step_avg:582.53ms +step:22563/57344 train_time:13143586ms step_avg:582.53ms +grad accum step:5641/14336 +step:22564/57344 train_time:13144900ms step_avg:582.56ms +step:22565/57344 train_time:13144917ms step_avg:582.54ms +step:22566/57344 train_time:13145168ms step_avg:582.52ms +step:22567/57344 train_time:13145726ms step_avg:582.52ms +grad accum step:5642/14336 +step:22568/57344 train_time:13147102ms step_avg:582.56ms +step:22569/57344 train_time:13147119ms step_avg:582.53ms +step:22570/57344 train_time:13147365ms step_avg:582.52ms +step:22571/57344 train_time:13147922ms step_avg:582.51ms +grad accum step:5643/14336 +step:22572/57344 train_time:13149240ms step_avg:582.55ms +step:22573/57344 train_time:13149257ms step_avg:582.52ms +step:22574/57344 train_time:13149504ms step_avg:582.51ms +step:22575/57344 train_time:13150051ms step_avg:582.51ms +grad accum step:5644/14336 +step:22576/57344 train_time:13151324ms step_avg:582.54ms +step:22577/57344 train_time:13151341ms step_avg:582.51ms +step:22578/57344 train_time:13151588ms step_avg:582.50ms +step:22579/57344 train_time:13152133ms step_avg:582.49ms +grad accum step:5645/14336 +step:22580/57344 train_time:13153410ms step_avg:582.52ms +step:22581/57344 train_time:13153427ms step_avg:582.50ms +step:22582/57344 train_time:13153678ms step_avg:582.49ms +step:22583/57344 train_time:13154242ms step_avg:582.48ms +grad accum step:5646/14336 +step:22584/57344 train_time:13155565ms step_avg:582.52ms +step:22585/57344 train_time:13155583ms step_avg:582.49ms +step:22586/57344 train_time:13155837ms step_avg:582.48ms +step:22587/57344 train_time:13156404ms step_avg:582.48ms +grad accum step:5647/14336 +step:22588/57344 train_time:13157702ms step_avg:582.51ms +step:22589/57344 train_time:13157719ms step_avg:582.48ms +step:22590/57344 train_time:13157971ms step_avg:582.47ms +step:22591/57344 train_time:13158535ms step_avg:582.47ms +grad accum step:5648/14336 +step:22592/57344 train_time:13159907ms step_avg:582.50ms +step:22592/57344 val_loss:6.478091 train_time:13159907ms step_avg:582.50ms +step:22593/57344 train_time:13159920ms step_avg:582.48ms +step:22594/57344 train_time:13160148ms step_avg:582.46ms +step:22595/57344 train_time:13160709ms step_avg:582.46ms +grad accum step:5649/14336 +step:22596/57344 train_time:13162036ms step_avg:582.49ms +step:22597/57344 train_time:13162053ms step_avg:582.47ms +step:22598/57344 train_time:13162300ms step_avg:582.45ms +step:22599/57344 train_time:13162847ms step_avg:582.45ms +grad accum step:5650/14336 +step:22600/57344 train_time:13164164ms step_avg:582.49ms +step:22601/57344 train_time:13164181ms step_avg:582.46ms +step:22602/57344 train_time:13164426ms step_avg:582.45ms +step:22603/57344 train_time:13164975ms step_avg:582.44ms +grad accum step:5651/14336 +step:22604/57344 train_time:13166272ms step_avg:582.48ms +step:22605/57344 train_time:13166288ms step_avg:582.45ms +step:22606/57344 train_time:13166540ms step_avg:582.44ms +step:22607/57344 train_time:13167107ms step_avg:582.43ms +grad accum step:5652/14336 +step:22608/57344 train_time:13168439ms step_avg:582.47ms +step:22609/57344 train_time:13168456ms step_avg:582.44ms +step:22610/57344 train_time:13168707ms step_avg:582.43ms +step:22611/57344 train_time:13169261ms step_avg:582.43ms +grad accum step:5653/14336 +step:22612/57344 train_time:13170584ms step_avg:582.46ms +step:22613/57344 train_time:13170601ms step_avg:582.43ms +step:22614/57344 train_time:13170855ms step_avg:582.42ms +step:22615/57344 train_time:13171419ms step_avg:582.42ms +grad accum step:5654/14336 +step:22616/57344 train_time:13172709ms step_avg:582.45ms +step:22617/57344 train_time:13172726ms step_avg:582.43ms +step:22618/57344 train_time:13172971ms step_avg:582.41ms +step:22619/57344 train_time:13173512ms step_avg:582.41ms +grad accum step:5655/14336 +step:22620/57344 train_time:13174834ms step_avg:582.44ms +step:22621/57344 train_time:13174848ms step_avg:582.42ms +step:22622/57344 train_time:13175095ms step_avg:582.40ms +step:22623/57344 train_time:13175641ms step_avg:582.40ms +grad accum step:5656/14336 +step:22624/57344 train_time:13176944ms step_avg:582.43ms +step:22625/57344 train_time:13176961ms step_avg:582.41ms +step:22626/57344 train_time:13177209ms step_avg:582.39ms +step:22627/57344 train_time:13177758ms step_avg:582.39ms +grad accum step:5657/14336 +step:22628/57344 train_time:13179036ms step_avg:582.42ms +step:22629/57344 train_time:13179053ms step_avg:582.40ms +step:22630/57344 train_time:13179301ms step_avg:582.38ms +step:22631/57344 train_time:13179846ms step_avg:582.38ms +grad accum step:5658/14336 +step:22632/57344 train_time:13181146ms step_avg:582.41ms +step:22633/57344 train_time:13181162ms step_avg:582.39ms +step:22634/57344 train_time:13181407ms step_avg:582.37ms +step:22635/57344 train_time:13181947ms step_avg:582.37ms +grad accum step:5659/14336 +step:22636/57344 train_time:13183263ms step_avg:582.40ms +step:22637/57344 train_time:13183280ms step_avg:582.38ms +step:22638/57344 train_time:13183528ms step_avg:582.36ms +step:22639/57344 train_time:13184094ms step_avg:582.36ms +grad accum step:5660/14336 +step:22640/57344 train_time:13185441ms step_avg:582.40ms +step:22641/57344 train_time:13185458ms step_avg:582.37ms +step:22642/57344 train_time:13185706ms step_avg:582.36ms +step:22643/57344 train_time:13186257ms step_avg:582.35ms +grad accum step:5661/14336 +step:22644/57344 train_time:13187568ms step_avg:582.39ms +step:22645/57344 train_time:13187585ms step_avg:582.36ms +step:22646/57344 train_time:13187838ms step_avg:582.35ms +step:22647/57344 train_time:13188398ms step_avg:582.35ms +grad accum step:5662/14336 +step:22648/57344 train_time:13189720ms step_avg:582.38ms +step:22649/57344 train_time:13189737ms step_avg:582.35ms +step:22650/57344 train_time:13189986ms step_avg:582.34ms +step:22651/57344 train_time:13190544ms step_avg:582.34ms +grad accum step:5663/14336 +step:22652/57344 train_time:13191833ms step_avg:582.37ms +step:22653/57344 train_time:13191850ms step_avg:582.34ms +step:22654/57344 train_time:13192094ms step_avg:582.33ms +step:22655/57344 train_time:13192646ms step_avg:582.33ms +grad accum step:5664/14336 +step:22656/57344 train_time:13193947ms step_avg:582.36ms +step:22656/57344 val_loss:6.442571 train_time:13193948ms step_avg:582.36ms +step:22657/57344 train_time:13193960ms step_avg:582.33ms +step:22658/57344 train_time:13194198ms step_avg:582.32ms +step:22659/57344 train_time:13194778ms step_avg:582.32ms +grad accum step:5665/14336 +step:22660/57344 train_time:13196058ms step_avg:582.35ms +step:22661/57344 train_time:13196075ms step_avg:582.33ms +step:22662/57344 train_time:13196326ms step_avg:582.31ms +step:22663/57344 train_time:13196882ms step_avg:582.31ms +grad accum step:5666/14336 +step:22664/57344 train_time:13198197ms step_avg:582.34ms +step:22665/57344 train_time:13198213ms step_avg:582.32ms +step:22666/57344 train_time:13198462ms step_avg:582.30ms +step:22667/57344 train_time:13199007ms step_avg:582.30ms +grad accum step:5667/14336 +step:22668/57344 train_time:13200319ms step_avg:582.33ms +step:22669/57344 train_time:13200336ms step_avg:582.31ms +step:22670/57344 train_time:13200585ms step_avg:582.29ms +step:22671/57344 train_time:13201133ms step_avg:582.29ms +grad accum step:5668/14336 +step:22672/57344 train_time:13202414ms step_avg:582.32ms +step:22673/57344 train_time:13202431ms step_avg:582.30ms +step:22674/57344 train_time:13202680ms step_avg:582.28ms +step:22675/57344 train_time:13203238ms step_avg:582.28ms +grad accum step:5669/14336 +step:22676/57344 train_time:13204557ms step_avg:582.31ms +step:22677/57344 train_time:13204574ms step_avg:582.29ms +step:22678/57344 train_time:13204828ms step_avg:582.27ms +step:22679/57344 train_time:13205398ms step_avg:582.27ms +grad accum step:5670/14336 +step:22680/57344 train_time:13206712ms step_avg:582.31ms +step:22681/57344 train_time:13206729ms step_avg:582.28ms +step:22682/57344 train_time:13206977ms step_avg:582.27ms +step:22683/57344 train_time:13207523ms step_avg:582.27ms +grad accum step:5671/14336 +step:22684/57344 train_time:13208847ms step_avg:582.30ms +step:22685/57344 train_time:13208864ms step_avg:582.27ms +step:22686/57344 train_time:13209115ms step_avg:582.26ms +step:22687/57344 train_time:13209675ms step_avg:582.26ms +grad accum step:5672/14336 +step:22688/57344 train_time:13210990ms step_avg:582.29ms +step:22689/57344 train_time:13211007ms step_avg:582.26ms +step:22690/57344 train_time:13211255ms step_avg:582.25ms +step:22691/57344 train_time:13211807ms step_avg:582.25ms +grad accum step:5673/14336 +step:22692/57344 train_time:13213123ms step_avg:582.28ms +step:22693/57344 train_time:13213140ms step_avg:582.26ms +step:22694/57344 train_time:13213384ms step_avg:582.24ms +step:22695/57344 train_time:13213930ms step_avg:582.24ms +grad accum step:5674/14336 +step:22696/57344 train_time:13215208ms step_avg:582.27ms +step:22697/57344 train_time:13215225ms step_avg:582.25ms +step:22698/57344 train_time:13215473ms step_avg:582.23ms +step:22699/57344 train_time:13216023ms step_avg:582.23ms +grad accum step:5675/14336 +step:22700/57344 train_time:13217331ms step_avg:582.26ms +step:22701/57344 train_time:13217348ms step_avg:582.24ms +step:22702/57344 train_time:13217599ms step_avg:582.22ms +step:22703/57344 train_time:13218150ms step_avg:582.22ms +grad accum step:5676/14336 +step:22704/57344 train_time:13219434ms step_avg:582.25ms +step:22705/57344 train_time:13219451ms step_avg:582.23ms +step:22706/57344 train_time:13219702ms step_avg:582.21ms +step:22707/57344 train_time:13220253ms step_avg:582.21ms +grad accum step:5677/14336 +step:22708/57344 train_time:13221560ms step_avg:582.24ms +step:22709/57344 train_time:13221576ms step_avg:582.22ms +step:22710/57344 train_time:13221823ms step_avg:582.20ms +step:22711/57344 train_time:13222367ms step_avg:582.20ms +grad accum step:5678/14336 +step:22712/57344 train_time:13223663ms step_avg:582.23ms +step:22713/57344 train_time:13223680ms step_avg:582.21ms +step:22714/57344 train_time:13223929ms step_avg:582.19ms +step:22715/57344 train_time:13224484ms step_avg:582.19ms +grad accum step:5679/14336 +step:22716/57344 train_time:13225850ms step_avg:582.23ms +step:22717/57344 train_time:13225867ms step_avg:582.20ms +step:22718/57344 train_time:13226124ms step_avg:582.19ms +step:22719/57344 train_time:13226705ms step_avg:582.19ms +grad accum step:5680/14336 +step:22720/57344 train_time:13228052ms step_avg:582.22ms +step:22720/57344 val_loss:6.423617 train_time:13228052ms step_avg:582.22ms +step:22721/57344 train_time:13228065ms step_avg:582.20ms +step:22722/57344 train_time:13228298ms step_avg:582.18ms +step:22723/57344 train_time:13228861ms step_avg:582.18ms +grad accum step:5681/14336 +step:22724/57344 train_time:13230139ms step_avg:582.21ms +step:22725/57344 train_time:13230156ms step_avg:582.19ms +step:22726/57344 train_time:13230413ms step_avg:582.17ms +step:22727/57344 train_time:13230984ms step_avg:582.17ms +grad accum step:5682/14336 +step:22728/57344 train_time:13232279ms step_avg:582.20ms +step:22729/57344 train_time:13232296ms step_avg:582.18ms +step:22730/57344 train_time:13232546ms step_avg:582.16ms +step:22731/57344 train_time:13233099ms step_avg:582.16ms +grad accum step:5683/14336 +step:22732/57344 train_time:13234387ms step_avg:582.19ms +step:22733/57344 train_time:13234404ms step_avg:582.17ms +step:22734/57344 train_time:13234650ms step_avg:582.15ms +step:22735/57344 train_time:13235200ms step_avg:582.15ms +grad accum step:5684/14336 +step:22736/57344 train_time:13236508ms step_avg:582.18ms +step:22737/57344 train_time:13236525ms step_avg:582.16ms +step:22738/57344 train_time:13237453ms step_avg:582.17ms +step:22739/57344 train_time:13238790ms step_avg:582.21ms +grad accum step:5685/14336 +step:22740/57344 train_time:13239764ms step_avg:582.22ms +step:22741/57344 train_time:13239777ms step_avg:582.20ms +step:22742/57344 train_time:13240002ms step_avg:582.18ms +step:22743/57344 train_time:13240538ms step_avg:582.18ms +grad accum step:5686/14336 +step:22744/57344 train_time:13241815ms step_avg:582.21ms +step:22745/57344 train_time:13241832ms step_avg:582.19ms +step:22746/57344 train_time:13242081ms step_avg:582.17ms +step:22747/57344 train_time:13242627ms step_avg:582.17ms +grad accum step:5687/14336 +step:22748/57344 train_time:13243915ms step_avg:582.20ms +step:22749/57344 train_time:13243932ms step_avg:582.18ms +step:22750/57344 train_time:13244182ms step_avg:582.16ms +step:22751/57344 train_time:13244729ms step_avg:582.16ms +grad accum step:5688/14336 +step:22752/57344 train_time:13246026ms step_avg:582.19ms +step:22753/57344 train_time:13246043ms step_avg:582.17ms +step:22754/57344 train_time:13246305ms step_avg:582.15ms +step:22755/57344 train_time:13246895ms step_avg:582.15ms +grad accum step:5689/14336 +step:22756/57344 train_time:13248217ms step_avg:582.19ms +step:22757/57344 train_time:13248234ms step_avg:582.16ms +step:22758/57344 train_time:13248480ms step_avg:582.15ms +step:22759/57344 train_time:13249017ms step_avg:582.14ms +grad accum step:5690/14336 +step:22760/57344 train_time:13250371ms step_avg:582.18ms +step:22761/57344 train_time:13250388ms step_avg:582.15ms +step:22762/57344 train_time:13250638ms step_avg:582.14ms +step:22763/57344 train_time:13251194ms step_avg:582.14ms +grad accum step:5691/14336 +step:22764/57344 train_time:13252476ms step_avg:582.17ms +step:22765/57344 train_time:13252493ms step_avg:582.14ms +step:22766/57344 train_time:13252742ms step_avg:582.13ms +step:22767/57344 train_time:13253295ms step_avg:582.13ms +grad accum step:5692/14336 +step:22768/57344 train_time:13254596ms step_avg:582.16ms +step:22769/57344 train_time:13254613ms step_avg:582.13ms +step:22770/57344 train_time:13254863ms step_avg:582.12ms +step:22771/57344 train_time:13255413ms step_avg:582.12ms +grad accum step:5693/14336 +step:22772/57344 train_time:13256731ms step_avg:582.15ms +step:22773/57344 train_time:13256747ms step_avg:582.13ms +step:22774/57344 train_time:13256995ms step_avg:582.11ms +step:22775/57344 train_time:13257549ms step_avg:582.11ms +grad accum step:5694/14336 +step:22776/57344 train_time:13258852ms step_avg:582.14ms +step:22777/57344 train_time:13258869ms step_avg:582.12ms +step:22778/57344 train_time:13259120ms step_avg:582.10ms +step:22779/57344 train_time:13259674ms step_avg:582.10ms +grad accum step:5695/14336 +step:22780/57344 train_time:13260946ms step_avg:582.13ms +step:22781/57344 train_time:13260963ms step_avg:582.11ms +step:22782/57344 train_time:13261211ms step_avg:582.09ms +step:22783/57344 train_time:13261769ms step_avg:582.09ms +grad accum step:5696/14336 +step:22784/57344 train_time:13263096ms step_avg:582.12ms +step:22784/57344 val_loss:6.400871 train_time:13263097ms step_avg:582.12ms +step:22785/57344 train_time:13263109ms step_avg:582.10ms +step:22786/57344 train_time:13263334ms step_avg:582.08ms +step:22787/57344 train_time:13263890ms step_avg:582.08ms +grad accum step:5697/14336 +step:22788/57344 train_time:13265201ms step_avg:582.11ms +step:22789/57344 train_time:13265218ms step_avg:582.09ms +step:22790/57344 train_time:13265466ms step_avg:582.07ms +step:22791/57344 train_time:13266008ms step_avg:582.07ms +grad accum step:5698/14336 +step:22792/57344 train_time:13267341ms step_avg:582.11ms +step:22793/57344 train_time:13267358ms step_avg:582.08ms +step:22794/57344 train_time:13267608ms step_avg:582.07ms +step:22795/57344 train_time:13268158ms step_avg:582.06ms +grad accum step:5699/14336 +step:22796/57344 train_time:13269477ms step_avg:582.10ms +step:22797/57344 train_time:13269494ms step_avg:582.07ms +step:22798/57344 train_time:13269747ms step_avg:582.06ms +step:22799/57344 train_time:13270307ms step_avg:582.06ms +grad accum step:5700/14336 +step:22800/57344 train_time:13271670ms step_avg:582.09ms +step:22801/57344 train_time:13271687ms step_avg:582.07ms +step:22802/57344 train_time:13271941ms step_avg:582.05ms +step:22803/57344 train_time:13272503ms step_avg:582.05ms +grad accum step:5701/14336 +step:22804/57344 train_time:13273818ms step_avg:582.08ms +step:22805/57344 train_time:13273834ms step_avg:582.06ms +step:22806/57344 train_time:13274084ms step_avg:582.04ms +step:22807/57344 train_time:13274649ms step_avg:582.04ms +grad accum step:5702/14336 +step:22808/57344 train_time:13275967ms step_avg:582.08ms +step:22809/57344 train_time:13275983ms step_avg:582.05ms +step:22810/57344 train_time:13276234ms step_avg:582.04ms +step:22811/57344 train_time:13276788ms step_avg:582.03ms +grad accum step:5703/14336 +step:22812/57344 train_time:13278078ms step_avg:582.07ms +step:22813/57344 train_time:13278095ms step_avg:582.04ms +step:22814/57344 train_time:13278341ms step_avg:582.03ms +step:22815/57344 train_time:13278886ms step_avg:582.02ms +grad accum step:5704/14336 +step:22816/57344 train_time:13280192ms step_avg:582.06ms +step:22817/57344 train_time:13280209ms step_avg:582.03ms +step:22818/57344 train_time:13280455ms step_avg:582.02ms +step:22819/57344 train_time:13280996ms step_avg:582.01ms +grad accum step:5705/14336 +step:22820/57344 train_time:13282298ms step_avg:582.05ms +step:22821/57344 train_time:13282314ms step_avg:582.02ms +step:22822/57344 train_time:13282566ms step_avg:582.01ms +step:22823/57344 train_time:13283121ms step_avg:582.01ms +grad accum step:5706/14336 +step:22824/57344 train_time:13284416ms step_avg:582.04ms +step:22825/57344 train_time:13284433ms step_avg:582.01ms +step:22826/57344 train_time:13284681ms step_avg:582.00ms +step:22827/57344 train_time:13285231ms step_avg:582.00ms +grad accum step:5707/14336 +step:22828/57344 train_time:13286540ms step_avg:582.03ms +step:22829/57344 train_time:13286557ms step_avg:582.00ms +step:22830/57344 train_time:13286804ms step_avg:581.99ms +step:22831/57344 train_time:13287346ms step_avg:581.99ms +grad accum step:5708/14336 +step:22832/57344 train_time:13288644ms step_avg:582.02ms +step:22833/57344 train_time:13288661ms step_avg:581.99ms +step:22834/57344 train_time:13288917ms step_avg:581.98ms +step:22835/57344 train_time:13289483ms step_avg:581.98ms +grad accum step:5709/14336 +step:22836/57344 train_time:13290771ms step_avg:582.01ms +step:22837/57344 train_time:13290787ms step_avg:581.98ms +step:22838/57344 train_time:13291038ms step_avg:581.97ms +step:22839/57344 train_time:13291594ms step_avg:581.97ms +grad accum step:5710/14336 +step:22840/57344 train_time:13292937ms step_avg:582.00ms +step:22841/57344 train_time:13292954ms step_avg:581.98ms +step:22842/57344 train_time:13293208ms step_avg:581.96ms +step:22843/57344 train_time:13293781ms step_avg:581.96ms +grad accum step:5711/14336 +step:22844/57344 train_time:13295137ms step_avg:582.00ms +step:22845/57344 train_time:13295153ms step_avg:581.97ms +step:22846/57344 train_time:13295407ms step_avg:581.96ms +step:22847/57344 train_time:13295966ms step_avg:581.96ms +grad accum step:5712/14336 +step:22848/57344 train_time:13297265ms step_avg:581.99ms +step:22848/57344 val_loss:6.387420 train_time:13297265ms step_avg:581.99ms +step:22849/57344 train_time:13297278ms step_avg:581.96ms +step:22850/57344 train_time:13297498ms step_avg:581.95ms +step:22851/57344 train_time:13298040ms step_avg:581.95ms +grad accum step:5713/14336 +step:22852/57344 train_time:13299319ms step_avg:581.98ms +step:22853/57344 train_time:13299336ms step_avg:581.95ms +step:22854/57344 train_time:13299582ms step_avg:581.94ms +step:22855/57344 train_time:13300133ms step_avg:581.94ms +grad accum step:5714/14336 +step:22856/57344 train_time:13301470ms step_avg:581.97ms +step:22857/57344 train_time:13301487ms step_avg:581.94ms +step:22858/57344 train_time:13301742ms step_avg:581.93ms +step:22859/57344 train_time:13302305ms step_avg:581.93ms +grad accum step:5715/14336 +step:22860/57344 train_time:13303604ms step_avg:581.96ms +step:22861/57344 train_time:13303621ms step_avg:581.94ms +step:22862/57344 train_time:13303865ms step_avg:581.92ms +step:22863/57344 train_time:13304413ms step_avg:581.92ms +grad accum step:5716/14336 +step:22864/57344 train_time:13305716ms step_avg:581.95ms +step:22865/57344 train_time:13305733ms step_avg:581.93ms +step:22866/57344 train_time:13305980ms step_avg:581.91ms +step:22867/57344 train_time:13306522ms step_avg:581.91ms +grad accum step:5717/14336 +step:22868/57344 train_time:13307845ms step_avg:581.94ms +step:22869/57344 train_time:13307862ms step_avg:581.92ms +step:22870/57344 train_time:13308109ms step_avg:581.90ms +step:22871/57344 train_time:13308660ms step_avg:581.90ms +grad accum step:5718/14336 +step:22872/57344 train_time:13309990ms step_avg:581.93ms +step:22873/57344 train_time:13310007ms step_avg:581.91ms +step:22874/57344 train_time:13310253ms step_avg:581.89ms +step:22875/57344 train_time:13310802ms step_avg:581.89ms +grad accum step:5719/14336 +step:22876/57344 train_time:13312098ms step_avg:581.92ms +step:22877/57344 train_time:13312115ms step_avg:581.90ms +step:22878/57344 train_time:13312361ms step_avg:581.88ms +step:22879/57344 train_time:13312909ms step_avg:581.88ms +grad accum step:5720/14336 +step:22880/57344 train_time:13314224ms step_avg:581.92ms +step:22881/57344 train_time:13314241ms step_avg:581.89ms +step:22882/57344 train_time:13314488ms step_avg:581.88ms +step:22883/57344 train_time:13315030ms step_avg:581.87ms +grad accum step:5721/14336 +step:22884/57344 train_time:13316327ms step_avg:581.91ms +step:22885/57344 train_time:13316344ms step_avg:581.88ms +step:22886/57344 train_time:13316588ms step_avg:581.87ms +step:22887/57344 train_time:13317130ms step_avg:581.86ms +grad accum step:5722/14336 +step:22888/57344 train_time:13318486ms step_avg:581.90ms +step:22889/57344 train_time:13318503ms step_avg:581.87ms +step:22890/57344 train_time:13318751ms step_avg:581.86ms +step:22891/57344 train_time:13319304ms step_avg:581.86ms +grad accum step:5723/14336 +step:22892/57344 train_time:13320598ms step_avg:581.89ms +step:22893/57344 train_time:13320615ms step_avg:581.86ms +step:22894/57344 train_time:13320864ms step_avg:581.85ms +step:22895/57344 train_time:13321419ms step_avg:581.85ms +grad accum step:5724/14336 +step:22896/57344 train_time:13322742ms step_avg:581.88ms +step:22897/57344 train_time:13322759ms step_avg:581.86ms +step:22898/57344 train_time:13323010ms step_avg:581.84ms +step:22899/57344 train_time:13323575ms step_avg:581.84ms +grad accum step:5725/14336 +step:22900/57344 train_time:13324901ms step_avg:581.87ms +step:22901/57344 train_time:13324919ms step_avg:581.85ms +step:22902/57344 train_time:13325166ms step_avg:581.83ms +step:22903/57344 train_time:13325716ms step_avg:581.83ms +grad accum step:5726/14336 +step:22904/57344 train_time:13327030ms step_avg:581.86ms +step:22905/57344 train_time:13327047ms step_avg:581.84ms +step:22906/57344 train_time:13327298ms step_avg:581.83ms +step:22907/57344 train_time:13327845ms step_avg:581.82ms +grad accum step:5727/14336 +step:22908/57344 train_time:13329143ms step_avg:581.86ms +step:22909/57344 train_time:13329159ms step_avg:581.83ms +step:22910/57344 train_time:13329407ms step_avg:581.82ms +step:22911/57344 train_time:13329950ms step_avg:581.81ms +grad accum step:5728/14336 +step:22912/57344 train_time:13331269ms step_avg:581.85ms +step:22912/57344 val_loss:6.355561 train_time:13331270ms step_avg:581.85ms +step:22913/57344 train_time:13331282ms step_avg:581.82ms +step:22914/57344 train_time:13331504ms step_avg:581.81ms +step:22915/57344 train_time:13332055ms step_avg:581.80ms +grad accum step:5729/14336 +step:22916/57344 train_time:13333352ms step_avg:581.84ms +step:22917/57344 train_time:13333369ms step_avg:581.81ms +step:22918/57344 train_time:13333628ms step_avg:581.80ms +step:22919/57344 train_time:13334207ms step_avg:581.80ms +grad accum step:5730/14336 +step:22920/57344 train_time:13335492ms step_avg:581.83ms +step:22921/57344 train_time:13335509ms step_avg:581.80ms +step:22922/57344 train_time:13335756ms step_avg:581.79ms +step:22923/57344 train_time:13336304ms step_avg:581.79ms +grad accum step:5731/14336 +step:22924/57344 train_time:13337603ms step_avg:581.82ms +step:22925/57344 train_time:13337620ms step_avg:581.79ms +step:22926/57344 train_time:13337868ms step_avg:581.78ms +step:22927/57344 train_time:13338419ms step_avg:581.78ms +grad accum step:5732/14336 +step:22928/57344 train_time:13339741ms step_avg:581.81ms +step:22929/57344 train_time:13339758ms step_avg:581.79ms +step:22930/57344 train_time:13340005ms step_avg:581.77ms +step:22931/57344 train_time:13340549ms step_avg:581.77ms +grad accum step:5733/14336 +step:22932/57344 train_time:13341866ms step_avg:581.80ms +step:22933/57344 train_time:13341882ms step_avg:581.78ms +step:22934/57344 train_time:13342132ms step_avg:581.76ms +step:22935/57344 train_time:13342685ms step_avg:581.76ms +grad accum step:5734/14336 +step:22936/57344 train_time:13344005ms step_avg:581.79ms +step:22937/57344 train_time:13344022ms step_avg:581.77ms +step:22938/57344 train_time:13344268ms step_avg:581.75ms +step:22939/57344 train_time:13344815ms step_avg:581.75ms +grad accum step:5735/14336 +step:22940/57344 train_time:13346121ms step_avg:581.78ms +step:22941/57344 train_time:13346138ms step_avg:581.76ms +step:22942/57344 train_time:13346386ms step_avg:581.74ms +step:22943/57344 train_time:13346938ms step_avg:581.74ms +grad accum step:5736/14336 +step:22944/57344 train_time:13348239ms step_avg:581.77ms +step:22945/57344 train_time:13348256ms step_avg:581.75ms +step:22946/57344 train_time:13348510ms step_avg:581.74ms +step:22947/57344 train_time:13349071ms step_avg:581.73ms +grad accum step:5737/14336 +step:22948/57344 train_time:13350404ms step_avg:581.77ms +step:22949/57344 train_time:13350420ms step_avg:581.74ms +step:22950/57344 train_time:13350665ms step_avg:581.73ms +step:22951/57344 train_time:13351205ms step_avg:581.73ms +grad accum step:5738/14336 +step:22952/57344 train_time:13352482ms step_avg:581.76ms +step:22953/57344 train_time:13352499ms step_avg:581.73ms +step:22954/57344 train_time:13352752ms step_avg:581.72ms +step:22955/57344 train_time:13353316ms step_avg:581.72ms +grad accum step:5739/14336 +step:22956/57344 train_time:13354619ms step_avg:581.75ms +step:22957/57344 train_time:13354636ms step_avg:581.72ms +step:22958/57344 train_time:13354892ms step_avg:581.71ms +step:22959/57344 train_time:13355464ms step_avg:581.71ms +grad accum step:5740/14336 +step:22960/57344 train_time:13356777ms step_avg:581.74ms +step:22961/57344 train_time:13356794ms step_avg:581.72ms +step:22962/57344 train_time:13357049ms step_avg:581.70ms +step:22963/57344 train_time:13357612ms step_avg:581.70ms +grad accum step:5741/14336 +step:22964/57344 train_time:13358909ms step_avg:581.73ms +step:22965/57344 train_time:13358926ms step_avg:581.71ms +step:22966/57344 train_time:13359170ms step_avg:581.69ms +step:22967/57344 train_time:13359710ms step_avg:581.69ms +grad accum step:5742/14336 +step:22968/57344 train_time:13361005ms step_avg:581.72ms +step:22969/57344 train_time:13361022ms step_avg:581.70ms +step:22970/57344 train_time:13361281ms step_avg:581.68ms +step:22971/57344 train_time:13361868ms step_avg:581.68ms +grad accum step:5743/14336 +step:22972/57344 train_time:13363223ms step_avg:581.72ms +step:22973/57344 train_time:13363239ms step_avg:581.69ms +step:22974/57344 train_time:13363496ms step_avg:581.68ms +step:22975/57344 train_time:13364066ms step_avg:581.68ms +grad accum step:5744/14336 +step:22976/57344 train_time:13365365ms step_avg:581.71ms +step:22976/57344 val_loss:6.372940 train_time:13365365ms step_avg:581.71ms +step:22977/57344 train_time:13365378ms step_avg:581.69ms +step:22978/57344 train_time:13365600ms step_avg:581.67ms +step:22979/57344 train_time:13366153ms step_avg:581.67ms +grad accum step:5745/14336 +step:22980/57344 train_time:13367514ms step_avg:581.70ms +step:22981/57344 train_time:13367531ms step_avg:581.68ms +step:22982/57344 train_time:13367779ms step_avg:581.66ms +step:22983/57344 train_time:13368324ms step_avg:581.66ms +grad accum step:5746/14336 +step:22984/57344 train_time:13369636ms step_avg:581.69ms +step:22985/57344 train_time:13369653ms step_avg:581.67ms +step:22986/57344 train_time:13369904ms step_avg:581.65ms +step:22987/57344 train_time:13370458ms step_avg:581.65ms +grad accum step:5747/14336 +step:22988/57344 train_time:13371754ms step_avg:581.68ms +step:22989/57344 train_time:13371771ms step_avg:581.66ms +step:22990/57344 train_time:13372016ms step_avg:581.64ms +step:22991/57344 train_time:13372559ms step_avg:581.64ms +grad accum step:5748/14336 +step:22992/57344 train_time:13373861ms step_avg:581.67ms +step:22993/57344 train_time:13373878ms step_avg:581.65ms +step:22994/57344 train_time:13374123ms step_avg:581.64ms +step:22995/57344 train_time:13374663ms step_avg:581.63ms +grad accum step:5749/14336 +step:22996/57344 train_time:13375935ms step_avg:581.66ms +step:22997/57344 train_time:13375952ms step_avg:581.64ms +step:22998/57344 train_time:13376200ms step_avg:581.62ms +step:22999/57344 train_time:13376751ms step_avg:581.62ms +grad accum step:5750/14336 +step:23000/57344 train_time:13378027ms step_avg:581.65ms +step:23001/57344 train_time:13378043ms step_avg:581.63ms +step:23002/57344 train_time:13378292ms step_avg:581.61ms +step:23003/57344 train_time:13378839ms step_avg:581.61ms +grad accum step:5751/14336 +step:23004/57344 train_time:13380161ms step_avg:581.64ms +step:23005/57344 train_time:13380178ms step_avg:581.62ms +step:23006/57344 train_time:13380429ms step_avg:581.61ms +step:23007/57344 train_time:13380990ms step_avg:581.61ms +grad accum step:5752/14336 +step:23008/57344 train_time:13382317ms step_avg:581.64ms +step:23009/57344 train_time:13382334ms step_avg:581.61ms +step:23010/57344 train_time:13382583ms step_avg:581.60ms +step:23011/57344 train_time:13383144ms step_avg:581.60ms +grad accum step:5753/14336 +step:23012/57344 train_time:13384507ms step_avg:581.63ms +step:23013/57344 train_time:13384524ms step_avg:581.61ms +step:23014/57344 train_time:13384781ms step_avg:581.59ms +step:23015/57344 train_time:13385354ms step_avg:581.59ms +grad accum step:5754/14336 +step:23016/57344 train_time:13386647ms step_avg:581.62ms +step:23017/57344 train_time:13386664ms step_avg:581.60ms +step:23018/57344 train_time:13386908ms step_avg:581.58ms +step:23019/57344 train_time:13387453ms step_avg:581.58ms +grad accum step:5755/14336 +step:23020/57344 train_time:13388734ms step_avg:581.61ms +step:23021/57344 train_time:13388750ms step_avg:581.59ms +step:23022/57344 train_time:13388999ms step_avg:581.57ms +step:23023/57344 train_time:13389549ms step_avg:581.57ms +grad accum step:5756/14336 +step:23024/57344 train_time:13390881ms step_avg:581.61ms +step:23025/57344 train_time:13390898ms step_avg:581.58ms +step:23026/57344 train_time:13391145ms step_avg:581.57ms +step:23027/57344 train_time:13391689ms step_avg:581.56ms +grad accum step:5757/14336 +step:23028/57344 train_time:13393045ms step_avg:581.60ms +step:23029/57344 train_time:13393062ms step_avg:581.57ms +step:23030/57344 train_time:13393318ms step_avg:581.56ms +step:23031/57344 train_time:13393880ms step_avg:581.56ms +grad accum step:5758/14336 +step:23032/57344 train_time:13395172ms step_avg:581.59ms +step:23033/57344 train_time:13395189ms step_avg:581.57ms +step:23034/57344 train_time:13395438ms step_avg:581.55ms +step:23035/57344 train_time:13395996ms step_avg:581.55ms +grad accum step:5759/14336 +step:23036/57344 train_time:13397366ms step_avg:581.58ms +step:23037/57344 train_time:13397383ms step_avg:581.56ms +step:23038/57344 train_time:13397630ms step_avg:581.54ms +step:23039/57344 train_time:13398179ms step_avg:581.54ms +grad accum step:5760/14336 +step:23040/57344 train_time:13399520ms step_avg:581.58ms +step:23040/57344 val_loss:6.555444 train_time:13399520ms step_avg:581.58ms +step:23041/57344 train_time:13399533ms step_avg:581.55ms +step:23042/57344 train_time:13399765ms step_avg:581.54ms +step:23043/57344 train_time:13400334ms step_avg:581.54ms +grad accum step:5761/14336 +step:23044/57344 train_time:13401668ms step_avg:581.57ms +step:23045/57344 train_time:13401685ms step_avg:581.54ms +step:23046/57344 train_time:13401932ms step_avg:581.53ms +step:23047/57344 train_time:13402480ms step_avg:581.53ms +grad accum step:5762/14336 +step:23048/57344 train_time:13403805ms step_avg:581.56ms +step:23049/57344 train_time:13403821ms step_avg:581.54ms +step:23050/57344 train_time:13404070ms step_avg:581.52ms +step:23051/57344 train_time:13404616ms step_avg:581.52ms +grad accum step:5763/14336 +step:23052/57344 train_time:13405921ms step_avg:581.55ms +step:23053/57344 train_time:13405938ms step_avg:581.53ms +step:23054/57344 train_time:13406186ms step_avg:581.51ms +step:23055/57344 train_time:13406732ms step_avg:581.51ms +grad accum step:5764/14336 +step:23056/57344 train_time:13408005ms step_avg:581.54ms +step:23057/57344 train_time:13408022ms step_avg:581.52ms +step:23058/57344 train_time:13408269ms step_avg:581.50ms +step:23059/57344 train_time:13408815ms step_avg:581.50ms +grad accum step:5765/14336 +step:23060/57344 train_time:13410109ms step_avg:581.53ms +step:23061/57344 train_time:13410126ms step_avg:581.51ms +step:23062/57344 train_time:13410371ms step_avg:581.49ms +step:23063/57344 train_time:13410915ms step_avg:581.49ms +grad accum step:5766/14336 +step:23064/57344 train_time:13412232ms step_avg:581.52ms +step:23065/57344 train_time:13412249ms step_avg:581.50ms +step:23066/57344 train_time:13412498ms step_avg:581.48ms +step:23067/57344 train_time:13413052ms step_avg:581.48ms +grad accum step:5767/14336 +step:23068/57344 train_time:13414394ms step_avg:581.52ms +step:23069/57344 train_time:13414411ms step_avg:581.49ms +step:23070/57344 train_time:13414666ms step_avg:581.48ms +step:23071/57344 train_time:13415230ms step_avg:581.48ms +grad accum step:5768/14336 +step:23072/57344 train_time:13416566ms step_avg:581.51ms +step:23073/57344 train_time:13416583ms step_avg:581.48ms +step:23074/57344 train_time:13416832ms step_avg:581.47ms +step:23075/57344 train_time:13417379ms step_avg:581.47ms +grad accum step:5769/14336 +step:23076/57344 train_time:13418676ms step_avg:581.50ms +step:23077/57344 train_time:13418693ms step_avg:581.47ms +step:23078/57344 train_time:13418943ms step_avg:581.46ms +step:23079/57344 train_time:13419500ms step_avg:581.46ms +grad accum step:5770/14336 +step:23080/57344 train_time:13420795ms step_avg:581.49ms +step:23081/57344 train_time:13420812ms step_avg:581.47ms +step:23082/57344 train_time:13421060ms step_avg:581.45ms +step:23083/57344 train_time:13421608ms step_avg:581.45ms +grad accum step:5771/14336 +step:23084/57344 train_time:13422906ms step_avg:581.48ms +step:23085/57344 train_time:13422923ms step_avg:581.46ms +step:23086/57344 train_time:13423170ms step_avg:581.44ms +step:23087/57344 train_time:13423718ms step_avg:581.44ms +grad accum step:5772/14336 +step:23088/57344 train_time:13425039ms step_avg:581.47ms +step:23089/57344 train_time:13425055ms step_avg:581.45ms +step:23090/57344 train_time:13425307ms step_avg:581.43ms +step:23091/57344 train_time:13425862ms step_avg:581.43ms +grad accum step:5773/14336 +step:23092/57344 train_time:13427140ms step_avg:581.46ms +step:23093/57344 train_time:13427156ms step_avg:581.44ms +step:23094/57344 train_time:13427402ms step_avg:581.42ms +step:23095/57344 train_time:13427951ms step_avg:581.42ms +grad accum step:5774/14336 +step:23096/57344 train_time:13429264ms step_avg:581.45ms +step:23097/57344 train_time:13429281ms step_avg:581.43ms +step:23098/57344 train_time:13429537ms step_avg:581.42ms +step:23099/57344 train_time:13430102ms step_avg:581.41ms +grad accum step:5775/14336 +step:23100/57344 train_time:13431401ms step_avg:581.45ms +step:23101/57344 train_time:13431419ms step_avg:581.42ms +step:23102/57344 train_time:13431664ms step_avg:581.41ms +step:23103/57344 train_time:13432208ms step_avg:581.41ms +grad accum step:5776/14336 +step:23104/57344 train_time:13433496ms step_avg:581.44ms +step:23104/57344 val_loss:6.459857 train_time:13433496ms step_avg:581.44ms +step:23105/57344 train_time:13433509ms step_avg:581.41ms +step:23106/57344 train_time:13433732ms step_avg:581.40ms +step:23107/57344 train_time:13434273ms step_avg:581.39ms +grad accum step:5777/14336 +step:23108/57344 train_time:13435588ms step_avg:581.43ms +step:23109/57344 train_time:13435605ms step_avg:581.40ms +step:23110/57344 train_time:13435853ms step_avg:581.39ms +step:23111/57344 train_time:13436399ms step_avg:581.39ms +grad accum step:5778/14336 +step:23112/57344 train_time:13437710ms step_avg:581.42ms +step:23113/57344 train_time:13437727ms step_avg:581.39ms +step:23114/57344 train_time:13437979ms step_avg:581.38ms +step:23115/57344 train_time:13438550ms step_avg:581.38ms +grad accum step:5779/14336 +step:23116/57344 train_time:13439917ms step_avg:581.41ms +step:23117/57344 train_time:13439933ms step_avg:581.39ms +step:23118/57344 train_time:13440181ms step_avg:581.37ms +step:23119/57344 train_time:13440734ms step_avg:581.37ms +grad accum step:5780/14336 +step:23120/57344 train_time:13442038ms step_avg:581.40ms +step:23121/57344 train_time:13442055ms step_avg:581.38ms +step:23122/57344 train_time:13442304ms step_avg:581.36ms +step:23123/57344 train_time:13442850ms step_avg:581.36ms +grad accum step:5781/14336 +step:23124/57344 train_time:13444147ms step_avg:581.39ms +step:23125/57344 train_time:13444164ms step_avg:581.37ms +step:23126/57344 train_time:13444411ms step_avg:581.35ms +step:23127/57344 train_time:13444956ms step_avg:581.35ms +grad accum step:5782/14336 +step:23128/57344 train_time:13446274ms step_avg:581.39ms +step:23129/57344 train_time:13446291ms step_avg:581.36ms +step:23130/57344 train_time:13446539ms step_avg:581.35ms +step:23131/57344 train_time:13447086ms step_avg:581.34ms +grad accum step:5783/14336 +step:23132/57344 train_time:13448391ms step_avg:581.38ms +step:23133/57344 train_time:13448409ms step_avg:581.35ms +step:23134/57344 train_time:13448669ms step_avg:581.34ms +step:23135/57344 train_time:13449254ms step_avg:581.34ms +grad accum step:5784/14336 +step:23136/57344 train_time:13450594ms step_avg:581.37ms +step:23137/57344 train_time:13450611ms step_avg:581.35ms +step:23138/57344 train_time:13450861ms step_avg:581.33ms +step:23139/57344 train_time:13451422ms step_avg:581.33ms +grad accum step:5785/14336 +step:23140/57344 train_time:13452717ms step_avg:581.36ms +step:23141/57344 train_time:13452734ms step_avg:581.34ms +step:23142/57344 train_time:13452985ms step_avg:581.32ms +step:23143/57344 train_time:13453547ms step_avg:581.32ms +grad accum step:5786/14336 +step:23144/57344 train_time:13454841ms step_avg:581.35ms +step:23145/57344 train_time:13454858ms step_avg:581.33ms +step:23146/57344 train_time:13455105ms step_avg:581.31ms +step:23147/57344 train_time:13455653ms step_avg:581.31ms +grad accum step:5787/14336 +step:23148/57344 train_time:13456969ms step_avg:581.34ms +step:23149/57344 train_time:13456986ms step_avg:581.32ms +step:23150/57344 train_time:13457235ms step_avg:581.31ms +step:23151/57344 train_time:13457784ms step_avg:581.30ms +grad accum step:5788/14336 +step:23152/57344 train_time:13459103ms step_avg:581.34ms +step:23153/57344 train_time:13459120ms step_avg:581.31ms +step:23154/57344 train_time:13459369ms step_avg:581.30ms +step:23155/57344 train_time:13459910ms step_avg:581.30ms +grad accum step:5789/14336 +step:23156/57344 train_time:13461186ms step_avg:581.33ms +step:23157/57344 train_time:13461203ms step_avg:581.30ms +step:23158/57344 train_time:13461455ms step_avg:581.29ms +step:23159/57344 train_time:13462011ms step_avg:581.29ms +grad accum step:5790/14336 +step:23160/57344 train_time:13463317ms step_avg:581.32ms +step:23161/57344 train_time:13463334ms step_avg:581.29ms +step:23162/57344 train_time:13463585ms step_avg:581.28ms +step:23163/57344 train_time:13464139ms step_avg:581.28ms +grad accum step:5791/14336 +step:23164/57344 train_time:13465426ms step_avg:581.31ms +step:23165/57344 train_time:13465443ms step_avg:581.28ms +step:23166/57344 train_time:13465695ms step_avg:581.27ms +step:23167/57344 train_time:13466245ms step_avg:581.27ms +grad accum step:5792/14336 +step:23168/57344 train_time:13467557ms step_avg:581.30ms +step:23168/57344 val_loss:6.471203 train_time:13467558ms step_avg:581.30ms +step:23169/57344 train_time:13467570ms step_avg:581.28ms +step:23170/57344 train_time:13467798ms step_avg:581.26ms +step:23171/57344 train_time:13468358ms step_avg:581.26ms +grad accum step:5793/14336 +step:23172/57344 train_time:13469656ms step_avg:581.29ms +step:23173/57344 train_time:13469673ms step_avg:581.27ms +step:23174/57344 train_time:13469919ms step_avg:581.25ms +step:23175/57344 train_time:13470461ms step_avg:581.25ms +grad accum step:5794/14336 +step:23176/57344 train_time:13471775ms step_avg:581.28ms +step:23177/57344 train_time:13471792ms step_avg:581.26ms +step:23178/57344 train_time:13472041ms step_avg:581.24ms +step:23179/57344 train_time:13472590ms step_avg:581.24ms +grad accum step:5795/14336 +step:23180/57344 train_time:13473904ms step_avg:581.27ms +step:23181/57344 train_time:13473921ms step_avg:581.25ms +step:23182/57344 train_time:13474170ms step_avg:581.23ms +step:23183/57344 train_time:13474719ms step_avg:581.23ms +grad accum step:5796/14336 +step:23184/57344 train_time:13476030ms step_avg:581.26ms +step:23185/57344 train_time:13476047ms step_avg:581.24ms +step:23186/57344 train_time:13476295ms step_avg:581.23ms +step:23187/57344 train_time:13476847ms step_avg:581.22ms +grad accum step:5797/14336 +step:23188/57344 train_time:13478160ms step_avg:581.26ms +step:23189/57344 train_time:13478176ms step_avg:581.23ms +step:23190/57344 train_time:13478426ms step_avg:581.22ms +step:23191/57344 train_time:13478989ms step_avg:581.22ms +grad accum step:5798/14336 +step:23192/57344 train_time:13480327ms step_avg:581.25ms +step:23193/57344 train_time:13480344ms step_avg:581.22ms +step:23194/57344 train_time:13480590ms step_avg:581.21ms +step:23195/57344 train_time:13481135ms step_avg:581.21ms +grad accum step:5799/14336 +step:23196/57344 train_time:13482456ms step_avg:581.24ms +step:23197/57344 train_time:13482473ms step_avg:581.22ms +step:23198/57344 train_time:13482725ms step_avg:581.20ms +step:23199/57344 train_time:13483292ms step_avg:581.20ms +grad accum step:5800/14336 +step:23200/57344 train_time:13484599ms step_avg:581.23ms +step:23201/57344 train_time:13484616ms step_avg:581.21ms +step:23202/57344 train_time:13484866ms step_avg:581.19ms +step:23203/57344 train_time:13485424ms step_avg:581.19ms +grad accum step:5801/14336 +step:23204/57344 train_time:13486728ms step_avg:581.22ms +step:23205/57344 train_time:13486745ms step_avg:581.20ms +step:23206/57344 train_time:13486993ms step_avg:581.19ms +step:23207/57344 train_time:13487538ms step_avg:581.18ms +grad accum step:5802/14336 +step:23208/57344 train_time:13488841ms step_avg:581.22ms +step:23209/57344 train_time:13488858ms step_avg:581.19ms +step:23210/57344 train_time:13489106ms step_avg:581.18ms +step:23211/57344 train_time:13489656ms step_avg:581.18ms +grad accum step:5803/14336 +step:23212/57344 train_time:13490964ms step_avg:581.21ms +step:23213/57344 train_time:13490981ms step_avg:581.18ms +step:23214/57344 train_time:13491237ms step_avg:581.17ms +step:23215/57344 train_time:13491797ms step_avg:581.17ms +grad accum step:5804/14336 +step:23216/57344 train_time:13493076ms step_avg:581.20ms +step:23217/57344 train_time:13493093ms step_avg:581.17ms +step:23218/57344 train_time:13493337ms step_avg:581.16ms +step:23219/57344 train_time:13493890ms step_avg:581.16ms +grad accum step:5805/14336 +step:23220/57344 train_time:13495228ms step_avg:581.19ms +step:23221/57344 train_time:13495245ms step_avg:581.17ms +step:23222/57344 train_time:13495497ms step_avg:581.15ms +step:23223/57344 train_time:13496054ms step_avg:581.15ms +grad accum step:5806/14336 +step:23224/57344 train_time:13497357ms step_avg:581.18ms +step:23225/57344 train_time:13497374ms step_avg:581.16ms +step:23226/57344 train_time:13497621ms step_avg:581.14ms +step:23227/57344 train_time:13498173ms step_avg:581.14ms +grad accum step:5807/14336 +step:23228/57344 train_time:13499485ms step_avg:581.17ms +step:23229/57344 train_time:13499502ms step_avg:581.15ms +step:23230/57344 train_time:13499751ms step_avg:581.13ms +step:23231/57344 train_time:13500299ms step_avg:581.13ms +grad accum step:5808/14336 +step:23232/57344 train_time:13501596ms step_avg:581.16ms +step:23232/57344 val_loss:6.495924 train_time:13501596ms step_avg:581.16ms +step:23233/57344 train_time:13501609ms step_avg:581.14ms +step:23234/57344 train_time:13501835ms step_avg:581.12ms +step:23235/57344 train_time:13502389ms step_avg:581.12ms +grad accum step:5809/14336 +step:23236/57344 train_time:13503730ms step_avg:581.16ms +step:23237/57344 train_time:13503747ms step_avg:581.13ms +step:23238/57344 train_time:13503991ms step_avg:581.12ms +step:23239/57344 train_time:13504544ms step_avg:581.12ms +grad accum step:5810/14336 +step:23240/57344 train_time:13505837ms step_avg:581.15ms +step:23241/57344 train_time:13505854ms step_avg:581.12ms +step:23242/57344 train_time:13506102ms step_avg:581.11ms +step:23243/57344 train_time:13506650ms step_avg:581.11ms +grad accum step:5811/14336 +step:23244/57344 train_time:13507942ms step_avg:581.14ms +step:23245/57344 train_time:13507959ms step_avg:581.11ms +step:23246/57344 train_time:13508205ms step_avg:581.10ms +step:23247/57344 train_time:13508750ms step_avg:581.10ms +grad accum step:5812/14336 +step:23248/57344 train_time:13510032ms step_avg:581.13ms +step:23249/57344 train_time:13510049ms step_avg:581.10ms +step:23250/57344 train_time:13510296ms step_avg:581.09ms +step:23251/57344 train_time:13510840ms step_avg:581.09ms +grad accum step:5813/14336 +step:23252/57344 train_time:13512120ms step_avg:581.12ms +step:23253/57344 train_time:13512137ms step_avg:581.09ms +step:23254/57344 train_time:13512386ms step_avg:581.08ms +step:23255/57344 train_time:13512939ms step_avg:581.08ms +grad accum step:5814/14336 +step:23256/57344 train_time:13514278ms step_avg:581.11ms +step:23257/57344 train_time:13514295ms step_avg:581.09ms +step:23258/57344 train_time:13514541ms step_avg:581.07ms +step:23259/57344 train_time:13515098ms step_avg:581.07ms +grad accum step:5815/14336 +step:23260/57344 train_time:13516420ms step_avg:581.10ms +step:23261/57344 train_time:13516437ms step_avg:581.08ms +step:23262/57344 train_time:13516687ms step_avg:581.06ms +step:23263/57344 train_time:13517242ms step_avg:581.06ms +grad accum step:5816/14336 +step:23264/57344 train_time:13518560ms step_avg:581.09ms +step:23265/57344 train_time:13518577ms step_avg:581.07ms +step:23266/57344 train_time:13518826ms step_avg:581.06ms +step:23267/57344 train_time:13519373ms step_avg:581.05ms +grad accum step:5817/14336 +step:23268/57344 train_time:13520714ms step_avg:581.09ms +step:23269/57344 train_time:13520730ms step_avg:581.06ms +step:23270/57344 train_time:13520976ms step_avg:581.05ms +step:23271/57344 train_time:13521522ms step_avg:581.05ms +grad accum step:5818/14336 +step:23272/57344 train_time:13522837ms step_avg:581.08ms +step:23273/57344 train_time:13522854ms step_avg:581.05ms +step:23274/57344 train_time:13523107ms step_avg:581.04ms +step:23275/57344 train_time:13523670ms step_avg:581.04ms +grad accum step:5819/14336 +step:23276/57344 train_time:13524997ms step_avg:581.07ms +step:23277/57344 train_time:13525013ms step_avg:581.05ms +step:23278/57344 train_time:13525259ms step_avg:581.03ms +step:23279/57344 train_time:13525797ms step_avg:581.03ms +grad accum step:5820/14336 +step:23280/57344 train_time:13527103ms step_avg:581.06ms +step:23281/57344 train_time:13527120ms step_avg:581.04ms +step:23282/57344 train_time:13527369ms step_avg:581.02ms +step:23283/57344 train_time:13527909ms step_avg:581.02ms +grad accum step:5821/14336 +step:23284/57344 train_time:13529186ms step_avg:581.05ms +step:23285/57344 train_time:13529204ms step_avg:581.03ms +step:23286/57344 train_time:13529450ms step_avg:581.01ms +step:23287/57344 train_time:13529997ms step_avg:581.01ms +grad accum step:5822/14336 +step:23288/57344 train_time:13531306ms step_avg:581.04ms +step:23289/57344 train_time:13531322ms step_avg:581.02ms +step:23290/57344 train_time:13531570ms step_avg:581.00ms +step:23291/57344 train_time:13532114ms step_avg:581.00ms +grad accum step:5823/14336 +step:23292/57344 train_time:13533415ms step_avg:581.03ms +step:23293/57344 train_time:13533432ms step_avg:581.01ms +step:23294/57344 train_time:13533682ms step_avg:580.99ms +step:23295/57344 train_time:13534230ms step_avg:580.99ms +grad accum step:5824/14336 +step:23296/57344 train_time:13535567ms step_avg:581.03ms +step:23296/57344 val_loss:6.520335 train_time:13535567ms step_avg:581.03ms +step:23297/57344 train_time:13535580ms step_avg:581.00ms +step:23298/57344 train_time:13535804ms step_avg:580.99ms +step:23299/57344 train_time:13536356ms step_avg:580.98ms +grad accum step:5825/14336 +step:23300/57344 train_time:13537651ms step_avg:581.02ms +step:23301/57344 train_time:13537668ms step_avg:580.99ms +step:23302/57344 train_time:13537914ms step_avg:580.98ms +step:23303/57344 train_time:13538460ms step_avg:580.97ms +grad accum step:5826/14336 +step:23304/57344 train_time:13539760ms step_avg:581.01ms +step:23305/57344 train_time:13539777ms step_avg:580.98ms +step:23306/57344 train_time:13540020ms step_avg:580.97ms +step:23307/57344 train_time:13540563ms step_avg:580.97ms +grad accum step:5827/14336 +step:23308/57344 train_time:13541867ms step_avg:581.00ms +step:23309/57344 train_time:13541884ms step_avg:580.97ms +step:23310/57344 train_time:13542133ms step_avg:580.96ms +step:23311/57344 train_time:13542675ms step_avg:580.96ms +grad accum step:5828/14336 +step:23312/57344 train_time:13543953ms step_avg:580.99ms +step:23313/57344 train_time:13543970ms step_avg:580.96ms +step:23314/57344 train_time:13544216ms step_avg:580.95ms +step:23315/57344 train_time:13544758ms step_avg:580.95ms +grad accum step:5829/14336 +step:23316/57344 train_time:13546061ms step_avg:580.98ms +step:23317/57344 train_time:13546078ms step_avg:580.95ms +step:23318/57344 train_time:13546325ms step_avg:580.94ms +step:23319/57344 train_time:13546871ms step_avg:580.94ms +grad accum step:5830/14336 +step:23320/57344 train_time:13548163ms step_avg:580.97ms +step:23321/57344 train_time:13548180ms step_avg:580.94ms +step:23322/57344 train_time:13548430ms step_avg:580.93ms +step:23323/57344 train_time:13548981ms step_avg:580.93ms +grad accum step:5831/14336 +step:23324/57344 train_time:13550259ms step_avg:580.96ms +step:23325/57344 train_time:13550276ms step_avg:580.93ms +step:23326/57344 train_time:13550529ms step_avg:580.92ms +step:23327/57344 train_time:13551090ms step_avg:580.92ms +grad accum step:5832/14336 +step:23328/57344 train_time:13552429ms step_avg:580.95ms +step:23329/57344 train_time:13552445ms step_avg:580.93ms +step:23330/57344 train_time:13552692ms step_avg:580.91ms +step:23331/57344 train_time:13553246ms step_avg:580.91ms +grad accum step:5833/14336 +step:23332/57344 train_time:13554559ms step_avg:580.94ms +step:23333/57344 train_time:13554575ms step_avg:580.92ms +step:23334/57344 train_time:13554821ms step_avg:580.90ms +step:23335/57344 train_time:13555369ms step_avg:580.90ms +grad accum step:5834/14336 +step:23336/57344 train_time:13556723ms step_avg:580.94ms +step:23337/57344 train_time:13556740ms step_avg:580.91ms +step:23338/57344 train_time:13556994ms step_avg:580.90ms +step:23339/57344 train_time:13557563ms step_avg:580.90ms +grad accum step:5835/14336 +step:23340/57344 train_time:13558884ms step_avg:580.93ms +step:23341/57344 train_time:13558900ms step_avg:580.90ms +step:23342/57344 train_time:13559152ms step_avg:580.89ms +step:23343/57344 train_time:13559713ms step_avg:580.89ms +grad accum step:5836/14336 +step:23344/57344 train_time:13561034ms step_avg:580.92ms +step:23345/57344 train_time:13561051ms step_avg:580.90ms +step:23346/57344 train_time:13561300ms step_avg:580.88ms +step:23347/57344 train_time:13561858ms step_avg:580.88ms +grad accum step:5837/14336 +step:23348/57344 train_time:13563158ms step_avg:580.91ms +step:23349/57344 train_time:13563175ms step_avg:580.89ms +step:23350/57344 train_time:13563423ms step_avg:580.87ms +step:23351/57344 train_time:13563967ms step_avg:580.87ms +grad accum step:5838/14336 +step:23352/57344 train_time:13565238ms step_avg:580.90ms +step:23353/57344 train_time:13565255ms step_avg:580.88ms +step:23354/57344 train_time:13565505ms step_avg:580.86ms +step:23355/57344 train_time:13566074ms step_avg:580.86ms +grad accum step:5839/14336 +step:23356/57344 train_time:13568570ms step_avg:580.95ms +step:23357/57344 train_time:13568583ms step_avg:580.92ms +step:23358/57344 train_time:13568798ms step_avg:580.91ms +step:23359/57344 train_time:13569350ms step_avg:580.90ms +grad accum step:5840/14336 +step:23360/57344 train_time:13570654ms step_avg:580.94ms +step:23360/57344 val_loss:6.560977 train_time:13570655ms step_avg:580.94ms +step:23361/57344 train_time:13570667ms step_avg:580.91ms +step:23362/57344 train_time:13570895ms step_avg:580.90ms +step:23363/57344 train_time:13571452ms step_avg:580.90ms +grad accum step:5841/14336 +step:23364/57344 train_time:13572758ms step_avg:580.93ms +step:23365/57344 train_time:13572775ms step_avg:580.90ms +step:23366/57344 train_time:13573023ms step_avg:580.89ms +step:23367/57344 train_time:13573570ms step_avg:580.89ms +grad accum step:5842/14336 +step:23368/57344 train_time:13574845ms step_avg:580.92ms +step:23369/57344 train_time:13574862ms step_avg:580.89ms +step:23370/57344 train_time:13575110ms step_avg:580.88ms +step:23371/57344 train_time:13575658ms step_avg:580.88ms +grad accum step:5843/14336 +step:23372/57344 train_time:13576970ms step_avg:580.91ms +step:23373/57344 train_time:13576987ms step_avg:580.88ms +step:23374/57344 train_time:13577232ms step_avg:580.87ms +step:23375/57344 train_time:13577789ms step_avg:580.87ms +grad accum step:5844/14336 +step:23376/57344 train_time:13579109ms step_avg:580.90ms +step:23377/57344 train_time:13579125ms step_avg:580.88ms +step:23378/57344 train_time:13579369ms step_avg:580.86ms +step:23379/57344 train_time:13579917ms step_avg:580.86ms +grad accum step:5845/14336 +step:23380/57344 train_time:13581215ms step_avg:580.89ms +step:23381/57344 train_time:13581233ms step_avg:580.87ms +step:23382/57344 train_time:13581477ms step_avg:580.85ms +step:23383/57344 train_time:13582020ms step_avg:580.85ms +grad accum step:5846/14336 +step:23384/57344 train_time:13583325ms step_avg:580.88ms +step:23385/57344 train_time:13583342ms step_avg:580.86ms +step:23386/57344 train_time:13583588ms step_avg:580.84ms +step:23387/57344 train_time:13584138ms step_avg:580.84ms +grad accum step:5847/14336 +step:23388/57344 train_time:13585462ms step_avg:580.87ms +step:23389/57344 train_time:13585479ms step_avg:580.85ms +step:23390/57344 train_time:13585728ms step_avg:580.83ms +step:23391/57344 train_time:13586285ms step_avg:580.83ms +grad accum step:5848/14336 +step:23392/57344 train_time:13587586ms step_avg:580.86ms +step:23393/57344 train_time:13587603ms step_avg:580.84ms +step:23394/57344 train_time:13587852ms step_avg:580.83ms +step:23395/57344 train_time:13588393ms step_avg:580.82ms +grad accum step:5849/14336 +step:23396/57344 train_time:13589668ms step_avg:580.85ms +step:23397/57344 train_time:13589685ms step_avg:580.83ms +step:23398/57344 train_time:13589941ms step_avg:580.82ms +step:23399/57344 train_time:13590513ms step_avg:580.82ms +grad accum step:5850/14336 +step:23400/57344 train_time:13591801ms step_avg:580.85ms +step:23401/57344 train_time:13591818ms step_avg:580.82ms +step:23402/57344 train_time:13592063ms step_avg:580.81ms +step:23403/57344 train_time:13592603ms step_avg:580.81ms +grad accum step:5851/14336 +step:23404/57344 train_time:13593923ms step_avg:580.84ms +step:23405/57344 train_time:13593940ms step_avg:580.81ms +step:23406/57344 train_time:13594186ms step_avg:580.80ms +step:23407/57344 train_time:13594730ms step_avg:580.80ms +grad accum step:5852/14336 +step:23408/57344 train_time:13596026ms step_avg:580.83ms +step:23409/57344 train_time:13596043ms step_avg:580.80ms +step:23410/57344 train_time:13596290ms step_avg:580.79ms +step:23411/57344 train_time:13596833ms step_avg:580.79ms +grad accum step:5853/14336 +step:23412/57344 train_time:13598114ms step_avg:580.82ms +step:23413/57344 train_time:13598131ms step_avg:580.79ms +step:23414/57344 train_time:13598377ms step_avg:580.78ms +step:23415/57344 train_time:13598924ms step_avg:580.78ms +grad accum step:5854/14336 +step:23416/57344 train_time:13600297ms step_avg:580.81ms +step:23417/57344 train_time:13600313ms step_avg:580.79ms +step:23418/57344 train_time:13600565ms step_avg:580.77ms +step:23419/57344 train_time:13601126ms step_avg:580.77ms +grad accum step:5855/14336 +step:23420/57344 train_time:13602439ms step_avg:580.80ms +step:23421/57344 train_time:13602455ms step_avg:580.78ms +step:23422/57344 train_time:13602710ms step_avg:580.77ms +step:23423/57344 train_time:13603287ms step_avg:580.77ms +grad accum step:5856/14336 +step:23424/57344 train_time:13604604ms step_avg:580.80ms +step:23424/57344 val_loss:6.593538 train_time:13604604ms step_avg:580.80ms +step:23425/57344 train_time:13604617ms step_avg:580.77ms +step:23426/57344 train_time:13604841ms step_avg:580.76ms +step:23427/57344 train_time:13605386ms step_avg:580.76ms +grad accum step:5857/14336 +step:23428/57344 train_time:13606708ms step_avg:580.79ms +step:23429/57344 train_time:13606725ms step_avg:580.76ms +step:23430/57344 train_time:13606973ms step_avg:580.75ms +step:23431/57344 train_time:13607527ms step_avg:580.75ms +grad accum step:5858/14336 +step:23432/57344 train_time:13608835ms step_avg:580.78ms +step:23433/57344 train_time:13608851ms step_avg:580.76ms +step:23434/57344 train_time:13609101ms step_avg:580.74ms +step:23435/57344 train_time:13609658ms step_avg:580.74ms +grad accum step:5859/14336 +step:23436/57344 train_time:13611028ms step_avg:580.77ms +step:23437/57344 train_time:13611045ms step_avg:580.75ms +step:23438/57344 train_time:13611291ms step_avg:580.74ms +step:23439/57344 train_time:13611848ms step_avg:580.74ms +grad accum step:5860/14336 +step:23440/57344 train_time:13613178ms step_avg:580.77ms +step:23441/57344 train_time:13613195ms step_avg:580.74ms +step:23442/57344 train_time:13613451ms step_avg:580.73ms +step:23443/57344 train_time:13614032ms step_avg:580.73ms +grad accum step:5861/14336 +step:23444/57344 train_time:13615344ms step_avg:580.76ms +step:23445/57344 train_time:13615361ms step_avg:580.74ms +step:23446/57344 train_time:13615611ms step_avg:580.72ms +step:23447/57344 train_time:13616162ms step_avg:580.72ms +grad accum step:5862/14336 +step:23448/57344 train_time:13617505ms step_avg:580.75ms +step:23449/57344 train_time:13617522ms step_avg:580.73ms +step:23450/57344 train_time:13617766ms step_avg:580.71ms +step:23451/57344 train_time:13618306ms step_avg:580.71ms +grad accum step:5863/14336 +step:23452/57344 train_time:13619601ms step_avg:580.74ms +step:23453/57344 train_time:13619618ms step_avg:580.72ms +step:23454/57344 train_time:13619863ms step_avg:580.71ms +step:23455/57344 train_time:13620410ms step_avg:580.70ms +grad accum step:5864/14336 +step:23456/57344 train_time:13621692ms step_avg:580.73ms +step:23457/57344 train_time:13621708ms step_avg:580.71ms +step:23458/57344 train_time:13621954ms step_avg:580.70ms +step:23459/57344 train_time:13622498ms step_avg:580.69ms +grad accum step:5865/14336 +step:23460/57344 train_time:13623792ms step_avg:580.72ms +step:23461/57344 train_time:13623809ms step_avg:580.70ms +step:23462/57344 train_time:13624054ms step_avg:580.69ms +step:23463/57344 train_time:13624597ms step_avg:580.68ms +grad accum step:5866/14336 +step:23464/57344 train_time:13625879ms step_avg:580.71ms +step:23465/57344 train_time:13625896ms step_avg:580.69ms +step:23466/57344 train_time:13626145ms step_avg:580.68ms +step:23467/57344 train_time:13626687ms step_avg:580.67ms +grad accum step:5867/14336 +step:23468/57344 train_time:13628006ms step_avg:580.71ms +step:23469/57344 train_time:13628023ms step_avg:580.68ms +step:23470/57344 train_time:13628265ms step_avg:580.67ms +step:23471/57344 train_time:13628808ms step_avg:580.67ms +grad accum step:5868/14336 +step:23472/57344 train_time:13630130ms step_avg:580.70ms +step:23473/57344 train_time:13630147ms step_avg:580.67ms +step:23474/57344 train_time:13630392ms step_avg:580.66ms +step:23475/57344 train_time:13630933ms step_avg:580.66ms +grad accum step:5869/14336 +step:23476/57344 train_time:13632248ms step_avg:580.69ms +step:23477/57344 train_time:13632265ms step_avg:580.66ms +step:23478/57344 train_time:13632516ms step_avg:580.65ms +step:23479/57344 train_time:13633081ms step_avg:580.65ms +grad accum step:5870/14336 +step:23480/57344 train_time:13634410ms step_avg:580.68ms +step:23481/57344 train_time:13634427ms step_avg:580.66ms +step:23482/57344 train_time:13634686ms step_avg:580.64ms +step:23483/57344 train_time:13635270ms step_avg:580.64ms +grad accum step:5871/14336 +step:23484/57344 train_time:13636556ms step_avg:580.67ms +step:23485/57344 train_time:13636573ms step_avg:580.65ms +step:23486/57344 train_time:13636817ms step_avg:580.64ms +step:23487/57344 train_time:13637362ms step_avg:580.63ms +grad accum step:5872/14336 +step:23488/57344 train_time:13638685ms step_avg:580.67ms +step:23488/57344 val_loss:6.620380 train_time:13638686ms step_avg:580.67ms +step:23489/57344 train_time:13638698ms step_avg:580.64ms +step:23490/57344 train_time:13638922ms step_avg:580.63ms +step:23491/57344 train_time:13639476ms step_avg:580.63ms +grad accum step:5873/14336 +step:23492/57344 train_time:13640776ms step_avg:580.66ms +step:23493/57344 train_time:13640793ms step_avg:580.63ms +step:23494/57344 train_time:13641037ms step_avg:580.62ms +step:23495/57344 train_time:13641584ms step_avg:580.62ms +grad accum step:5874/14336 +step:23496/57344 train_time:13642931ms step_avg:580.65ms +step:23497/57344 train_time:13642948ms step_avg:580.63ms +step:23498/57344 train_time:13643201ms step_avg:580.61ms +step:23499/57344 train_time:13643760ms step_avg:580.61ms +grad accum step:5875/14336 +step:23500/57344 train_time:13645059ms step_avg:580.64ms +step:23501/57344 train_time:13645077ms step_avg:580.62ms +step:23502/57344 train_time:13645325ms step_avg:580.60ms +step:23503/57344 train_time:13645873ms step_avg:580.60ms +grad accum step:5876/14336 +step:23504/57344 train_time:13647212ms step_avg:580.63ms +step:23505/57344 train_time:13647229ms step_avg:580.61ms +step:23506/57344 train_time:13647474ms step_avg:580.60ms +step:23507/57344 train_time:13648016ms step_avg:580.59ms +grad accum step:5877/14336 +step:23508/57344 train_time:13649312ms step_avg:580.62ms +step:23509/57344 train_time:13649329ms step_avg:580.60ms +step:23510/57344 train_time:13649577ms step_avg:580.59ms +step:23511/57344 train_time:13650132ms step_avg:580.58ms +grad accum step:5878/14336 +step:23512/57344 train_time:13651440ms step_avg:580.62ms +step:23513/57344 train_time:13651457ms step_avg:580.59ms +step:23514/57344 train_time:13651703ms step_avg:580.58ms +step:23515/57344 train_time:13652247ms step_avg:580.58ms +grad accum step:5879/14336 +step:23516/57344 train_time:13653529ms step_avg:580.61ms +step:23517/57344 train_time:13653546ms step_avg:580.58ms +step:23518/57344 train_time:13653799ms step_avg:580.57ms +step:23519/57344 train_time:13654357ms step_avg:580.57ms +grad accum step:5880/14336 +step:23520/57344 train_time:13655653ms step_avg:580.60ms +step:23521/57344 train_time:13655670ms step_avg:580.57ms +step:23522/57344 train_time:13655918ms step_avg:580.56ms +step:23523/57344 train_time:13656477ms step_avg:580.56ms +grad accum step:5881/14336 +step:23524/57344 train_time:13657817ms step_avg:580.59ms +step:23525/57344 train_time:13657834ms step_avg:580.57ms +step:23526/57344 train_time:13658083ms step_avg:580.55ms +step:23527/57344 train_time:13658631ms step_avg:580.55ms +grad accum step:5882/14336 +step:23528/57344 train_time:13659920ms step_avg:580.58ms +step:23529/57344 train_time:13659937ms step_avg:580.56ms +step:23530/57344 train_time:13660183ms step_avg:580.54ms +step:23531/57344 train_time:13660729ms step_avg:580.54ms +grad accum step:5883/14336 +step:23532/57344 train_time:13662048ms step_avg:580.57ms +step:23533/57344 train_time:13662064ms step_avg:580.55ms +step:23534/57344 train_time:13662318ms step_avg:580.54ms +step:23535/57344 train_time:13662878ms step_avg:580.53ms +grad accum step:5884/14336 +step:23536/57344 train_time:13664170ms step_avg:580.56ms +step:23537/57344 train_time:13664187ms step_avg:580.54ms +step:23538/57344 train_time:13664434ms step_avg:580.53ms +step:23539/57344 train_time:13664971ms step_avg:580.52ms +grad accum step:5885/14336 +step:23540/57344 train_time:13666251ms step_avg:580.55ms +step:23541/57344 train_time:13666268ms step_avg:580.53ms +step:23542/57344 train_time:13666511ms step_avg:580.52ms +step:23543/57344 train_time:13667055ms step_avg:580.51ms +grad accum step:5886/14336 +step:23544/57344 train_time:13668397ms step_avg:580.55ms +step:23545/57344 train_time:13668410ms step_avg:580.52ms +step:23546/57344 train_time:13668635ms step_avg:580.51ms +step:23547/57344 train_time:13669182ms step_avg:580.51ms +grad accum step:5887/14336 +step:23548/57344 train_time:13670464ms step_avg:580.54ms +step:23549/57344 train_time:13670481ms step_avg:580.51ms +step:23550/57344 train_time:13670727ms step_avg:580.50ms +step:23551/57344 train_time:13671271ms step_avg:580.50ms +grad accum step:5888/14336 +step:23552/57344 train_time:13672575ms step_avg:580.53ms +step:23552/57344 val_loss:6.638693 train_time:13672576ms step_avg:580.53ms +step:23553/57344 train_time:13673642ms step_avg:580.55ms +step:23554/57344 train_time:13673870ms step_avg:580.53ms +step:23555/57344 train_time:13674105ms step_avg:580.52ms +grad accum step:5889/14336 +step:23556/57344 train_time:13675912ms step_avg:580.57ms +step:23557/57344 train_time:13675924ms step_avg:580.55ms +step:23558/57344 train_time:13676135ms step_avg:580.53ms +step:23559/57344 train_time:13676697ms step_avg:580.53ms +grad accum step:5890/14336 +step:23560/57344 train_time:13678025ms step_avg:580.56ms +step:23561/57344 train_time:13678042ms step_avg:580.54ms +step:23562/57344 train_time:13678288ms step_avg:580.52ms +step:23563/57344 train_time:13678837ms step_avg:580.52ms +grad accum step:5891/14336 +step:23564/57344 train_time:13680160ms step_avg:580.55ms +step:23565/57344 train_time:13680177ms step_avg:580.53ms +step:23566/57344 train_time:13680425ms step_avg:580.52ms +step:23567/57344 train_time:13680966ms step_avg:580.51ms +grad accum step:5892/14336 +step:23568/57344 train_time:13682269ms step_avg:580.54ms +step:23569/57344 train_time:13682286ms step_avg:580.52ms +step:23570/57344 train_time:13682535ms step_avg:580.51ms +step:23571/57344 train_time:13683087ms step_avg:580.51ms +grad accum step:5893/14336 +step:23572/57344 train_time:13684385ms step_avg:580.54ms +step:23573/57344 train_time:13684401ms step_avg:580.51ms +step:23574/57344 train_time:13684653ms step_avg:580.50ms +step:23575/57344 train_time:13685208ms step_avg:580.50ms +grad accum step:5894/14336 +step:23576/57344 train_time:13686504ms step_avg:580.53ms +step:23577/57344 train_time:13686521ms step_avg:580.50ms +step:23578/57344 train_time:13686777ms step_avg:580.49ms +step:23579/57344 train_time:13687355ms step_avg:580.49ms +grad accum step:5895/14336 +step:23580/57344 train_time:13688703ms step_avg:580.52ms +step:23581/57344 train_time:13688719ms step_avg:580.50ms +step:23582/57344 train_time:13688965ms step_avg:580.48ms +step:23583/57344 train_time:13689498ms step_avg:580.48ms +grad accum step:5896/14336 +step:23584/57344 train_time:13690814ms step_avg:580.51ms +step:23585/57344 train_time:13690831ms step_avg:580.49ms +step:23586/57344 train_time:13691077ms step_avg:580.47ms +step:23587/57344 train_time:13691616ms step_avg:580.47ms +grad accum step:5897/14336 +step:23588/57344 train_time:13692949ms step_avg:580.50ms +step:23589/57344 train_time:13692965ms step_avg:580.48ms +step:23590/57344 train_time:13693217ms step_avg:580.47ms +step:23591/57344 train_time:13693771ms step_avg:580.47ms +grad accum step:5898/14336 +step:23592/57344 train_time:13695058ms step_avg:580.50ms +step:23593/57344 train_time:13695075ms step_avg:580.47ms +step:23594/57344 train_time:13695322ms step_avg:580.46ms +step:23595/57344 train_time:13695870ms step_avg:580.46ms +grad accum step:5899/14336 +step:23596/57344 train_time:13697176ms step_avg:580.49ms +step:23597/57344 train_time:13697193ms step_avg:580.46ms +step:23598/57344 train_time:13697441ms step_avg:580.45ms +step:23599/57344 train_time:13697993ms step_avg:580.45ms +grad accum step:5900/14336 +step:23600/57344 train_time:13699288ms step_avg:580.48ms +step:23601/57344 train_time:13699305ms step_avg:580.45ms +step:23602/57344 train_time:13699554ms step_avg:580.44ms +step:23603/57344 train_time:13700108ms step_avg:580.44ms +grad accum step:5901/14336 +step:23604/57344 train_time:13701406ms step_avg:580.47ms +step:23605/57344 train_time:13701424ms step_avg:580.45ms +step:23606/57344 train_time:13701674ms step_avg:580.43ms +step:23607/57344 train_time:13702229ms step_avg:580.43ms +grad accum step:5902/14336 +step:23608/57344 train_time:13703555ms step_avg:580.46ms +step:23609/57344 train_time:13703572ms step_avg:580.44ms +step:23610/57344 train_time:13703822ms step_avg:580.42ms +step:23611/57344 train_time:13704380ms step_avg:580.42ms +grad accum step:5903/14336 +step:23612/57344 train_time:13705679ms step_avg:580.45ms +step:23613/57344 train_time:13705696ms step_avg:580.43ms +step:23614/57344 train_time:13705945ms step_avg:580.42ms +step:23615/57344 train_time:13706499ms step_avg:580.41ms +grad accum step:5904/14336 +step:23616/57344 train_time:13707829ms step_avg:580.45ms +step:23616/57344 val_loss:6.671075 train_time:13707830ms step_avg:580.45ms +step:23617/57344 train_time:13707842ms step_avg:580.42ms +step:23618/57344 train_time:13708069ms step_avg:580.41ms +step:23619/57344 train_time:13708616ms step_avg:580.41ms +grad accum step:5905/14336 +step:23620/57344 train_time:13709925ms step_avg:580.44ms +step:23621/57344 train_time:13709943ms step_avg:580.41ms +step:23622/57344 train_time:13710195ms step_avg:580.40ms +step:23623/57344 train_time:13710754ms step_avg:580.40ms +grad accum step:5906/14336 +step:23624/57344 train_time:13712098ms step_avg:580.43ms +step:23625/57344 train_time:13712114ms step_avg:580.41ms +step:23626/57344 train_time:13712361ms step_avg:580.39ms +step:23627/57344 train_time:13712919ms step_avg:580.39ms +grad accum step:5907/14336 +step:23628/57344 train_time:13714253ms step_avg:580.42ms +step:23629/57344 train_time:13714270ms step_avg:580.40ms +step:23630/57344 train_time:13714513ms step_avg:580.39ms +step:23631/57344 train_time:13715050ms step_avg:580.38ms +grad accum step:5908/14336 +step:23632/57344 train_time:13716372ms step_avg:580.42ms +step:23633/57344 train_time:13716389ms step_avg:580.39ms +step:23634/57344 train_time:13716648ms step_avg:580.38ms +step:23635/57344 train_time:13717233ms step_avg:580.38ms +grad accum step:5909/14336 +step:23636/57344 train_time:13718664ms step_avg:580.41ms +step:23637/57344 train_time:13718680ms step_avg:580.39ms +step:23638/57344 train_time:13718928ms step_avg:580.38ms +step:23639/57344 train_time:13719471ms step_avg:580.37ms +grad accum step:5910/14336 +step:23640/57344 train_time:13720758ms step_avg:580.40ms +step:23641/57344 train_time:13720775ms step_avg:580.38ms +step:23642/57344 train_time:13721024ms step_avg:580.37ms +step:23643/57344 train_time:13721566ms step_avg:580.36ms +grad accum step:5911/14336 +step:23644/57344 train_time:13722864ms step_avg:580.40ms +step:23645/57344 train_time:13722881ms step_avg:580.37ms +step:23646/57344 train_time:13723130ms step_avg:580.36ms +step:23647/57344 train_time:13723676ms step_avg:580.36ms +grad accum step:5912/14336 +step:23648/57344 train_time:13724983ms step_avg:580.39ms +step:23649/57344 train_time:13725000ms step_avg:580.36ms +step:23650/57344 train_time:13725246ms step_avg:580.35ms +step:23651/57344 train_time:13725796ms step_avg:580.35ms +grad accum step:5913/14336 +step:23652/57344 train_time:13727130ms step_avg:580.38ms +step:23653/57344 train_time:13727147ms step_avg:580.36ms +step:23654/57344 train_time:13727391ms step_avg:580.34ms +step:23655/57344 train_time:13727937ms step_avg:580.34ms +grad accum step:5914/14336 +step:23656/57344 train_time:13729210ms step_avg:580.37ms +step:23657/57344 train_time:13729227ms step_avg:580.35ms +step:23658/57344 train_time:13729480ms step_avg:580.33ms +step:23659/57344 train_time:13730043ms step_avg:580.33ms +grad accum step:5915/14336 +step:23660/57344 train_time:13731366ms step_avg:580.36ms +step:23661/57344 train_time:13731383ms step_avg:580.34ms +step:23662/57344 train_time:13731639ms step_avg:580.32ms +step:23663/57344 train_time:13732207ms step_avg:580.32ms +grad accum step:5916/14336 +step:23664/57344 train_time:13733560ms step_avg:580.36ms +step:23665/57344 train_time:13733577ms step_avg:580.33ms +step:23666/57344 train_time:13733828ms step_avg:580.32ms +step:23667/57344 train_time:13734385ms step_avg:580.32ms +grad accum step:5917/14336 +step:23668/57344 train_time:13735666ms step_avg:580.35ms +step:23669/57344 train_time:13735683ms step_avg:580.32ms +step:23670/57344 train_time:13735928ms step_avg:580.31ms +step:23671/57344 train_time:13736474ms step_avg:580.31ms +grad accum step:5918/14336 +step:23672/57344 train_time:13737814ms step_avg:580.34ms +step:23673/57344 train_time:13737830ms step_avg:580.32ms +step:23674/57344 train_time:13738079ms step_avg:580.30ms +step:23675/57344 train_time:13738625ms step_avg:580.30ms +grad accum step:5919/14336 +step:23676/57344 train_time:13739918ms step_avg:580.33ms +step:23677/57344 train_time:13739935ms step_avg:580.31ms +step:23678/57344 train_time:13740184ms step_avg:580.29ms +step:23679/57344 train_time:13740741ms step_avg:580.29ms +grad accum step:5920/14336 +step:23680/57344 train_time:13742032ms step_avg:580.32ms +step:23680/57344 val_loss:6.708018 train_time:13742033ms step_avg:580.32ms +step:23681/57344 train_time:13742045ms step_avg:580.30ms +step:23682/57344 train_time:13742270ms step_avg:580.28ms +step:23683/57344 train_time:13742812ms step_avg:580.28ms +grad accum step:5921/14336 +step:23684/57344 train_time:13744084ms step_avg:580.31ms +step:23685/57344 train_time:13744101ms step_avg:580.29ms +step:23686/57344 train_time:13744368ms step_avg:580.27ms +step:23687/57344 train_time:13744971ms step_avg:580.27ms +grad accum step:5922/14336 +step:23688/57344 train_time:13746294ms step_avg:580.31ms +step:23689/57344 train_time:13746311ms step_avg:580.28ms +step:23690/57344 train_time:13746571ms step_avg:580.27ms +step:23691/57344 train_time:13747145ms step_avg:580.27ms +grad accum step:5923/14336 +step:23692/57344 train_time:13748473ms step_avg:580.30ms +step:23693/57344 train_time:13748490ms step_avg:580.28ms +step:23694/57344 train_time:13748733ms step_avg:580.26ms +step:23695/57344 train_time:13749279ms step_avg:580.26ms +grad accum step:5924/14336 +step:23696/57344 train_time:13750624ms step_avg:580.29ms +step:23697/57344 train_time:13750640ms step_avg:580.27ms +step:23698/57344 train_time:13750887ms step_avg:580.26ms +step:23699/57344 train_time:13751439ms step_avg:580.25ms +grad accum step:5925/14336 +step:23700/57344 train_time:13752763ms step_avg:580.29ms +step:23701/57344 train_time:13752780ms step_avg:580.26ms +step:23702/57344 train_time:13753031ms step_avg:580.25ms +step:23703/57344 train_time:13753588ms step_avg:580.25ms +grad accum step:5926/14336 +step:23704/57344 train_time:13754868ms step_avg:580.28ms +step:23705/57344 train_time:13754885ms step_avg:580.25ms +step:23706/57344 train_time:13755139ms step_avg:580.24ms +step:23707/57344 train_time:13755696ms step_avg:580.24ms +grad accum step:5927/14336 +step:23708/57344 train_time:13756972ms step_avg:580.27ms +step:23709/57344 train_time:13756988ms step_avg:580.24ms +step:23710/57344 train_time:13757245ms step_avg:580.23ms +step:23711/57344 train_time:13757808ms step_avg:580.23ms +grad accum step:5928/14336 +step:23712/57344 train_time:13759120ms step_avg:580.26ms +step:23713/57344 train_time:13759136ms step_avg:580.24ms +step:23714/57344 train_time:13759389ms step_avg:580.22ms +step:23715/57344 train_time:13759960ms step_avg:580.22ms +grad accum step:5929/14336 +step:23716/57344 train_time:13761278ms step_avg:580.25ms +step:23717/57344 train_time:13761295ms step_avg:580.23ms +step:23718/57344 train_time:13761538ms step_avg:580.21ms +step:23719/57344 train_time:13762087ms step_avg:580.21ms +grad accum step:5930/14336 +step:23720/57344 train_time:13763409ms step_avg:580.24ms +step:23721/57344 train_time:13763426ms step_avg:580.22ms +step:23722/57344 train_time:13763667ms step_avg:580.21ms +step:23723/57344 train_time:13764213ms step_avg:580.21ms +grad accum step:5931/14336 +step:23724/57344 train_time:13765530ms step_avg:580.24ms +step:23725/57344 train_time:13765547ms step_avg:580.21ms +step:23726/57344 train_time:13765794ms step_avg:580.20ms +step:23727/57344 train_time:13766344ms step_avg:580.20ms +grad accum step:5932/14336 +step:23728/57344 train_time:13767641ms step_avg:580.23ms +step:23729/57344 train_time:13767658ms step_avg:580.20ms +step:23730/57344 train_time:13767903ms step_avg:580.19ms +step:23731/57344 train_time:13768443ms step_avg:580.19ms +grad accum step:5933/14336 +step:23732/57344 train_time:13769787ms step_avg:580.22ms +step:23733/57344 train_time:13769804ms step_avg:580.20ms +step:23734/57344 train_time:13770050ms step_avg:580.18ms +step:23735/57344 train_time:13770587ms step_avg:580.18ms +grad accum step:5934/14336 +step:23736/57344 train_time:13771882ms step_avg:580.21ms +step:23737/57344 train_time:13771899ms step_avg:580.19ms +step:23738/57344 train_time:13772149ms step_avg:580.17ms +step:23739/57344 train_time:13772698ms step_avg:580.17ms +grad accum step:5935/14336 +step:23740/57344 train_time:13774001ms step_avg:580.20ms +step:23741/57344 train_time:13774018ms step_avg:580.18ms +step:23742/57344 train_time:13774263ms step_avg:580.16ms +step:23743/57344 train_time:13774803ms step_avg:580.16ms +grad accum step:5936/14336 +step:23744/57344 train_time:13776076ms step_avg:580.19ms +step:23744/57344 val_loss:6.725985 train_time:13776077ms step_avg:580.19ms +step:23745/57344 train_time:13776090ms step_avg:580.17ms +step:23746/57344 train_time:13776312ms step_avg:580.15ms +step:23747/57344 train_time:13776846ms step_avg:580.15ms +grad accum step:5937/14336 +step:23748/57344 train_time:13778123ms step_avg:580.18ms +step:23749/57344 train_time:13778140ms step_avg:580.16ms +step:23750/57344 train_time:13778382ms step_avg:580.14ms +step:23751/57344 train_time:13778928ms step_avg:580.14ms +grad accum step:5938/14336 +step:23752/57344 train_time:13780248ms step_avg:580.17ms +step:23753/57344 train_time:13780265ms step_avg:580.15ms +step:23754/57344 train_time:13780511ms step_avg:580.13ms +step:23755/57344 train_time:13781055ms step_avg:580.13ms +grad accum step:5939/14336 +step:23756/57344 train_time:13782350ms step_avg:580.16ms +step:23757/57344 train_time:13782366ms step_avg:580.14ms +step:23758/57344 train_time:13782614ms step_avg:580.13ms +step:23759/57344 train_time:13783164ms step_avg:580.12ms +grad accum step:5940/14336 +step:23760/57344 train_time:13784470ms step_avg:580.15ms +step:23761/57344 train_time:13784487ms step_avg:580.13ms +step:23762/57344 train_time:13784736ms step_avg:580.12ms +step:23763/57344 train_time:13785282ms step_avg:580.12ms +grad accum step:5941/14336 +step:23764/57344 train_time:13786581ms step_avg:580.15ms +step:23765/57344 train_time:13786598ms step_avg:580.12ms +step:23766/57344 train_time:13786845ms step_avg:580.11ms +step:23767/57344 train_time:13787388ms step_avg:580.11ms +grad accum step:5942/14336 +step:23768/57344 train_time:13788676ms step_avg:580.14ms +step:23769/57344 train_time:13788693ms step_avg:580.11ms +step:23770/57344 train_time:13788940ms step_avg:580.10ms +step:23771/57344 train_time:13789492ms step_avg:580.10ms +grad accum step:5943/14336 +step:23772/57344 train_time:13790836ms step_avg:580.13ms +step:23773/57344 train_time:13790853ms step_avg:580.11ms +step:23774/57344 train_time:13791104ms step_avg:580.09ms +step:23775/57344 train_time:13791661ms step_avg:580.09ms +grad accum step:5944/14336 +step:23776/57344 train_time:13792967ms step_avg:580.12ms +step:23777/57344 train_time:13792984ms step_avg:580.10ms +step:23778/57344 train_time:13793231ms step_avg:580.08ms +step:23779/57344 train_time:13793775ms step_avg:580.08ms +grad accum step:5945/14336 +step:23780/57344 train_time:13795065ms step_avg:580.11ms +step:23781/57344 train_time:13795082ms step_avg:580.09ms +step:23782/57344 train_time:13795332ms step_avg:580.07ms +step:23783/57344 train_time:13795889ms step_avg:580.07ms +grad accum step:5946/14336 +step:23784/57344 train_time:13797216ms step_avg:580.10ms +step:23785/57344 train_time:13797233ms step_avg:580.08ms +step:23786/57344 train_time:13797479ms step_avg:580.07ms +step:23787/57344 train_time:13798020ms step_avg:580.07ms +grad accum step:5947/14336 +step:23788/57344 train_time:13799309ms step_avg:580.10ms +step:23789/57344 train_time:13799326ms step_avg:580.07ms +step:23790/57344 train_time:13799575ms step_avg:580.06ms +step:23791/57344 train_time:13800133ms step_avg:580.06ms +grad accum step:5948/14336 +step:23792/57344 train_time:13801497ms step_avg:580.09ms +step:23793/57344 train_time:13801514ms step_avg:580.07ms +step:23794/57344 train_time:13801767ms step_avg:580.05ms +step:23795/57344 train_time:13802332ms step_avg:580.05ms +grad accum step:5949/14336 +step:23796/57344 train_time:13803630ms step_avg:580.08ms +step:23797/57344 train_time:13803646ms step_avg:580.06ms +step:23798/57344 train_time:13803895ms step_avg:580.04ms +step:23799/57344 train_time:13804448ms step_avg:580.04ms +grad accum step:5950/14336 +step:23800/57344 train_time:13805753ms step_avg:580.07ms +step:23801/57344 train_time:13805770ms step_avg:580.05ms +step:23802/57344 train_time:13806016ms step_avg:580.04ms +step:23803/57344 train_time:13806559ms step_avg:580.03ms +grad accum step:5951/14336 +step:23804/57344 train_time:13807863ms step_avg:580.06ms +step:23805/57344 train_time:13807879ms step_avg:580.04ms +step:23806/57344 train_time:13808124ms step_avg:580.03ms +step:23807/57344 train_time:13808667ms step_avg:580.03ms +grad accum step:5952/14336 +step:23808/57344 train_time:13809949ms step_avg:580.05ms +step:23808/57344 val_loss:6.749393 train_time:13809949ms step_avg:580.05ms +step:23809/57344 train_time:13809962ms step_avg:580.03ms +step:23810/57344 train_time:13810185ms step_avg:580.02ms +step:23811/57344 train_time:13810728ms step_avg:580.01ms +grad accum step:5953/14336 +step:23812/57344 train_time:13812025ms step_avg:580.04ms +step:23813/57344 train_time:13812041ms step_avg:580.02ms +step:23814/57344 train_time:13812291ms step_avg:580.01ms +step:23815/57344 train_time:13812845ms step_avg:580.01ms +grad accum step:5954/14336 +step:23816/57344 train_time:13814188ms step_avg:580.04ms +step:23817/57344 train_time:13814205ms step_avg:580.01ms +step:23818/57344 train_time:13814467ms step_avg:580.00ms +step:23819/57344 train_time:13815078ms step_avg:580.00ms +grad accum step:5955/14336 +step:23820/57344 train_time:13816477ms step_avg:580.04ms +step:23821/57344 train_time:13816494ms step_avg:580.01ms +step:23822/57344 train_time:13816744ms step_avg:580.00ms +step:23823/57344 train_time:13817302ms step_avg:580.00ms +grad accum step:5956/14336 +step:23824/57344 train_time:13818594ms step_avg:580.03ms +step:23825/57344 train_time:13818612ms step_avg:580.00ms +step:23826/57344 train_time:13818862ms step_avg:579.99ms +step:23827/57344 train_time:13819408ms step_avg:579.99ms +grad accum step:5957/14336 +step:23828/57344 train_time:13820720ms step_avg:580.02ms +step:23829/57344 train_time:13820737ms step_avg:580.00ms +step:23830/57344 train_time:13820981ms step_avg:579.98ms +step:23831/57344 train_time:13821531ms step_avg:579.98ms +grad accum step:5958/14336 +step:23832/57344 train_time:13822823ms step_avg:580.01ms +step:23833/57344 train_time:13822840ms step_avg:579.99ms +step:23834/57344 train_time:13823087ms step_avg:579.97ms +step:23835/57344 train_time:13823633ms step_avg:579.97ms +grad accum step:5959/14336 +step:23836/57344 train_time:13824926ms step_avg:580.00ms +step:23837/57344 train_time:13824943ms step_avg:579.98ms +step:23838/57344 train_time:13825192ms step_avg:579.96ms +step:23839/57344 train_time:13825741ms step_avg:579.96ms +grad accum step:5960/14336 +step:23840/57344 train_time:13827052ms step_avg:579.99ms +step:23841/57344 train_time:13827068ms step_avg:579.97ms +step:23842/57344 train_time:13827314ms step_avg:579.96ms +step:23843/57344 train_time:13827859ms step_avg:579.95ms +grad accum step:5961/14336 +step:23844/57344 train_time:13829223ms step_avg:579.99ms +step:23845/57344 train_time:13829239ms step_avg:579.96ms +step:23846/57344 train_time:13829488ms step_avg:579.95ms +step:23847/57344 train_time:13830051ms step_avg:579.95ms +grad accum step:5962/14336 +step:23848/57344 train_time:13831383ms step_avg:579.98ms +step:23849/57344 train_time:13831400ms step_avg:579.96ms +step:23850/57344 train_time:13831643ms step_avg:579.94ms +step:23851/57344 train_time:13832186ms step_avg:579.94ms +grad accum step:5963/14336 +step:23852/57344 train_time:13833467ms step_avg:579.97ms +step:23853/57344 train_time:13833484ms step_avg:579.95ms +step:23854/57344 train_time:13833742ms step_avg:579.93ms +step:23855/57344 train_time:13834321ms step_avg:579.93ms +grad accum step:5964/14336 +step:23856/57344 train_time:13835635ms step_avg:579.96ms +step:23857/57344 train_time:13835652ms step_avg:579.94ms +step:23858/57344 train_time:13835898ms step_avg:579.93ms +step:23859/57344 train_time:13836444ms step_avg:579.93ms +grad accum step:5965/14336 +step:23860/57344 train_time:13837760ms step_avg:579.96ms +step:23861/57344 train_time:13837777ms step_avg:579.93ms +step:23862/57344 train_time:13838021ms step_avg:579.92ms +step:23863/57344 train_time:13838563ms step_avg:579.92ms +grad accum step:5966/14336 +step:23864/57344 train_time:13839898ms step_avg:579.95ms +step:23865/57344 train_time:13839915ms step_avg:579.93ms +step:23866/57344 train_time:13840162ms step_avg:579.91ms +step:23867/57344 train_time:13840717ms step_avg:579.91ms +grad accum step:5967/14336 +step:23868/57344 train_time:13842047ms step_avg:579.94ms +step:23869/57344 train_time:13842064ms step_avg:579.92ms +step:23870/57344 train_time:13842309ms step_avg:579.90ms +step:23871/57344 train_time:13842847ms step_avg:579.90ms +grad accum step:5968/14336 +step:23872/57344 train_time:13844173ms step_avg:579.93ms +step:23872/57344 val_loss:6.786535 train_time:13844173ms step_avg:579.93ms +step:23873/57344 train_time:13844359ms step_avg:579.92ms +step:23874/57344 train_time:13844462ms step_avg:579.90ms +step:23875/57344 train_time:13845001ms step_avg:579.90ms +grad accum step:5969/14336 +step:23876/57344 train_time:13846312ms step_avg:579.93ms +step:23877/57344 train_time:13846329ms step_avg:579.90ms +step:23878/57344 train_time:13846577ms step_avg:579.89ms +step:23879/57344 train_time:13847131ms step_avg:579.89ms +grad accum step:5970/14336 +step:23880/57344 train_time:13848469ms step_avg:579.92ms +step:23881/57344 train_time:13848486ms step_avg:579.90ms +step:23882/57344 train_time:13848746ms step_avg:579.88ms +step:23883/57344 train_time:13849323ms step_avg:579.88ms +grad accum step:5971/14336 +step:23884/57344 train_time:13850617ms step_avg:579.91ms +step:23885/57344 train_time:13850633ms step_avg:579.89ms +step:23886/57344 train_time:13850887ms step_avg:579.87ms +step:23887/57344 train_time:13851462ms step_avg:579.87ms +grad accum step:5972/14336 +step:23888/57344 train_time:13852775ms step_avg:579.91ms +step:23889/57344 train_time:13852792ms step_avg:579.88ms +step:23890/57344 train_time:13853040ms step_avg:579.87ms +step:23891/57344 train_time:13853599ms step_avg:579.87ms +grad accum step:5973/14336 +step:23892/57344 train_time:13854928ms step_avg:579.90ms +step:23893/57344 train_time:13854945ms step_avg:579.87ms +step:23894/57344 train_time:13855193ms step_avg:579.86ms +step:23895/57344 train_time:13855737ms step_avg:579.86ms +grad accum step:5974/14336 +step:23896/57344 train_time:13857061ms step_avg:579.89ms +step:23897/57344 train_time:13857078ms step_avg:579.87ms +step:23898/57344 train_time:13857322ms step_avg:579.85ms +step:23899/57344 train_time:13857869ms step_avg:579.85ms +grad accum step:5975/14336 +step:23900/57344 train_time:13859172ms step_avg:579.88ms +step:23901/57344 train_time:13859189ms step_avg:579.86ms +step:23902/57344 train_time:13859440ms step_avg:579.84ms +step:23903/57344 train_time:13859997ms step_avg:579.84ms +grad accum step:5976/14336 +step:23904/57344 train_time:13861317ms step_avg:579.87ms +step:23905/57344 train_time:13861334ms step_avg:579.85ms +step:23906/57344 train_time:13861579ms step_avg:579.84ms +step:23907/57344 train_time:13862119ms step_avg:579.84ms +grad accum step:5977/14336 +step:23908/57344 train_time:13863419ms step_avg:579.87ms +step:23909/57344 train_time:13863436ms step_avg:579.84ms +step:23910/57344 train_time:13863684ms step_avg:579.83ms +step:23911/57344 train_time:13864233ms step_avg:579.83ms +grad accum step:5978/14336 +step:23912/57344 train_time:13865516ms step_avg:579.86ms +step:23913/57344 train_time:13865533ms step_avg:579.83ms +step:23914/57344 train_time:13865776ms step_avg:579.82ms +step:23915/57344 train_time:13866315ms step_avg:579.82ms +grad accum step:5979/14336 +step:23916/57344 train_time:13867667ms step_avg:579.85ms +step:23917/57344 train_time:13867684ms step_avg:579.83ms +step:23918/57344 train_time:13867938ms step_avg:579.81ms +step:23919/57344 train_time:13868507ms step_avg:579.81ms +grad accum step:5980/14336 +step:23920/57344 train_time:13869805ms step_avg:579.84ms +step:23921/57344 train_time:13869822ms step_avg:579.82ms +step:23922/57344 train_time:13870071ms step_avg:579.80ms +step:23923/57344 train_time:13870629ms step_avg:579.80ms +grad accum step:5981/14336 +step:23924/57344 train_time:13871908ms step_avg:579.83ms +step:23925/57344 train_time:13871925ms step_avg:579.81ms +step:23926/57344 train_time:13872168ms step_avg:579.79ms +step:23927/57344 train_time:13872715ms step_avg:579.79ms +grad accum step:5982/14336 +step:23928/57344 train_time:13873992ms step_avg:579.82ms +step:23929/57344 train_time:13874009ms step_avg:579.80ms +step:23930/57344 train_time:13874256ms step_avg:579.79ms +step:23931/57344 train_time:13874803ms step_avg:579.78ms +grad accum step:5983/14336 +step:23932/57344 train_time:13876123ms step_avg:579.81ms +step:23933/57344 train_time:13876140ms step_avg:579.79ms +step:23934/57344 train_time:13876383ms step_avg:579.78ms +step:23935/57344 train_time:13876927ms step_avg:579.78ms +grad accum step:5984/14336 +step:23936/57344 train_time:13878235ms step_avg:579.81ms +step:23936/57344 val_loss:6.791564 train_time:13878236ms step_avg:579.81ms +step:23937/57344 train_time:13878248ms step_avg:579.78ms +step:23938/57344 train_time:13878472ms step_avg:579.77ms +step:23939/57344 train_time:13879012ms step_avg:579.77ms +grad accum step:5985/14336 +step:23940/57344 train_time:13880303ms step_avg:579.80ms +step:23941/57344 train_time:13880320ms step_avg:579.77ms +step:23942/57344 train_time:13880568ms step_avg:579.76ms +step:23943/57344 train_time:13881114ms step_avg:579.76ms +grad accum step:5986/14336 +step:23944/57344 train_time:13882429ms step_avg:579.79ms +step:23945/57344 train_time:13882446ms step_avg:579.76ms +step:23946/57344 train_time:13882701ms step_avg:579.75ms +step:23947/57344 train_time:13883266ms step_avg:579.75ms +grad accum step:5987/14336 +step:23948/57344 train_time:13884574ms step_avg:579.78ms +step:23949/57344 train_time:13884591ms step_avg:579.76ms +step:23950/57344 train_time:13884839ms step_avg:579.74ms +step:23951/57344 train_time:13885382ms step_avg:579.74ms +grad accum step:5988/14336 +step:23952/57344 train_time:13886669ms step_avg:579.77ms +step:23953/57344 train_time:13886686ms step_avg:579.75ms +step:23954/57344 train_time:13886934ms step_avg:579.73ms +step:23955/57344 train_time:13887485ms step_avg:579.73ms +grad accum step:5989/14336 +step:23956/57344 train_time:13888788ms step_avg:579.76ms +step:23957/57344 train_time:13888805ms step_avg:579.74ms +step:23958/57344 train_time:13889052ms step_avg:579.73ms +step:23959/57344 train_time:13889595ms step_avg:579.72ms +grad accum step:5990/14336 +step:23960/57344 train_time:13890907ms step_avg:579.75ms +step:23961/57344 train_time:13890924ms step_avg:579.73ms +step:23962/57344 train_time:13891172ms step_avg:579.72ms +step:23963/57344 train_time:13891721ms step_avg:579.72ms +grad accum step:5991/14336 +step:23964/57344 train_time:13893116ms step_avg:579.75ms +step:23965/57344 train_time:13893133ms step_avg:579.73ms +step:23966/57344 train_time:13893381ms step_avg:579.71ms +step:23967/57344 train_time:13893929ms step_avg:579.71ms +grad accum step:5992/14336 +step:23968/57344 train_time:13895310ms step_avg:579.74ms +step:23969/57344 train_time:13895326ms step_avg:579.72ms +step:23970/57344 train_time:13895575ms step_avg:579.71ms +step:23971/57344 train_time:13896122ms step_avg:579.71ms +grad accum step:5993/14336 +step:23972/57344 train_time:13897425ms step_avg:579.74ms +step:23973/57344 train_time:13897442ms step_avg:579.71ms +step:23974/57344 train_time:13897691ms step_avg:579.70ms +step:23975/57344 train_time:13898241ms step_avg:579.70ms +grad accum step:5994/14336 +step:23976/57344 train_time:13899517ms step_avg:579.73ms +step:23977/57344 train_time:13899533ms step_avg:579.70ms +step:23978/57344 train_time:13899779ms step_avg:579.69ms +step:23979/57344 train_time:13900324ms step_avg:579.69ms +grad accum step:5995/14336 +step:23980/57344 train_time:13901686ms step_avg:579.72ms +step:23981/57344 train_time:13901703ms step_avg:579.70ms +step:23982/57344 train_time:13901959ms step_avg:579.68ms +step:23983/57344 train_time:13902537ms step_avg:579.68ms +grad accum step:5996/14336 +step:23984/57344 train_time:13903872ms step_avg:579.71ms +step:23985/57344 train_time:13903889ms step_avg:579.69ms +step:23986/57344 train_time:13904133ms step_avg:579.68ms +step:23987/57344 train_time:13904679ms step_avg:579.68ms +grad accum step:5997/14336 +step:23988/57344 train_time:13905977ms step_avg:579.71ms +step:23989/57344 train_time:13905994ms step_avg:579.68ms +step:23990/57344 train_time:13906242ms step_avg:579.67ms +step:23991/57344 train_time:13906791ms step_avg:579.67ms +grad accum step:5998/14336 +step:23992/57344 train_time:13908120ms step_avg:579.70ms +step:23993/57344 train_time:13908137ms step_avg:579.67ms +step:23994/57344 train_time:13908386ms step_avg:579.66ms +step:23995/57344 train_time:13908942ms step_avg:579.66ms +grad accum step:5999/14336 +step:23996/57344 train_time:13910248ms step_avg:579.69ms +step:23997/57344 train_time:13910265ms step_avg:579.67ms +step:23998/57344 train_time:13910510ms step_avg:579.65ms +step:23999/57344 train_time:13911058ms step_avg:579.65ms +grad accum step:6000/14336 +step:24000/57344 train_time:13912361ms step_avg:579.68ms +step:24000/57344 val_loss:6.813724 train_time:13912361ms step_avg:579.68ms +step:24001/57344 train_time:13912374ms step_avg:579.66ms +step:24002/57344 train_time:13912593ms step_avg:579.64ms +step:24003/57344 train_time:13913129ms step_avg:579.64ms +grad accum step:6001/14336 +step:24004/57344 train_time:13914423ms step_avg:579.67ms +step:24005/57344 train_time:13914440ms step_avg:579.65ms +step:24006/57344 train_time:13914687ms step_avg:579.63ms +step:24007/57344 train_time:13915237ms step_avg:579.63ms +grad accum step:6002/14336 +step:24008/57344 train_time:13916531ms step_avg:579.66ms +step:24009/57344 train_time:13916548ms step_avg:579.64ms +step:24010/57344 train_time:13916795ms step_avg:579.62ms +step:24011/57344 train_time:13917340ms step_avg:579.62ms +grad accum step:6003/14336 +step:24012/57344 train_time:13918647ms step_avg:579.65ms +step:24013/57344 train_time:13918663ms step_avg:579.63ms +step:24014/57344 train_time:13918910ms step_avg:579.62ms +step:24015/57344 train_time:13919451ms step_avg:579.61ms +grad accum step:6004/14336 +step:24016/57344 train_time:13920737ms step_avg:579.64ms +step:24017/57344 train_time:13920754ms step_avg:579.62ms +step:24018/57344 train_time:13921001ms step_avg:579.61ms +step:24019/57344 train_time:13921550ms step_avg:579.61ms +grad accum step:6005/14336 +step:24020/57344 train_time:13922849ms step_avg:579.64ms +step:24021/57344 train_time:13922865ms step_avg:579.61ms +step:24022/57344 train_time:13923113ms step_avg:579.60ms +step:24023/57344 train_time:13923663ms step_avg:579.60ms +grad accum step:6006/14336 +step:24024/57344 train_time:13924975ms step_avg:579.63ms +step:24025/57344 train_time:13924992ms step_avg:579.60ms +step:24026/57344 train_time:13925238ms step_avg:579.59ms +step:24027/57344 train_time:13925785ms step_avg:579.59ms +grad accum step:6007/14336 +step:24028/57344 train_time:13927057ms step_avg:579.62ms +step:24029/57344 train_time:13927074ms step_avg:579.59ms +step:24030/57344 train_time:13927324ms step_avg:579.58ms +step:24031/57344 train_time:13927877ms step_avg:579.58ms +grad accum step:6008/14336 +step:24032/57344 train_time:13929233ms step_avg:579.61ms +step:24033/57344 train_time:13929249ms step_avg:579.59ms +step:24034/57344 train_time:13929494ms step_avg:579.57ms +step:24035/57344 train_time:13930032ms step_avg:579.57ms +grad accum step:6009/14336 +step:24036/57344 train_time:13931327ms step_avg:579.60ms +step:24037/57344 train_time:13931344ms step_avg:579.58ms +step:24038/57344 train_time:13931592ms step_avg:579.57ms +step:24039/57344 train_time:13932145ms step_avg:579.56ms +grad accum step:6010/14336 +step:24040/57344 train_time:13933465ms step_avg:579.60ms +step:24041/57344 train_time:13933481ms step_avg:579.57ms +step:24042/57344 train_time:13933733ms step_avg:579.56ms +step:24043/57344 train_time:13934290ms step_avg:579.56ms +grad accum step:6011/14336 +step:24044/57344 train_time:13935565ms step_avg:579.59ms +step:24045/57344 train_time:13935581ms step_avg:579.56ms +step:24046/57344 train_time:13935833ms step_avg:579.55ms +step:24047/57344 train_time:13936394ms step_avg:579.55ms +grad accum step:6012/14336 +step:24048/57344 train_time:13937756ms step_avg:579.58ms +step:24049/57344 train_time:13937773ms step_avg:579.56ms +step:24050/57344 train_time:13938020ms step_avg:579.54ms +step:24051/57344 train_time:13938565ms step_avg:579.54ms +grad accum step:6013/14336 +step:24052/57344 train_time:13939845ms step_avg:579.57ms +step:24053/57344 train_time:13939862ms step_avg:579.55ms +step:24054/57344 train_time:13940112ms step_avg:579.53ms +step:24055/57344 train_time:13940668ms step_avg:579.53ms +grad accum step:6014/14336 +step:24056/57344 train_time:13942010ms step_avg:579.56ms +step:24057/57344 train_time:13942027ms step_avg:579.54ms +step:24058/57344 train_time:13942274ms step_avg:579.53ms +step:24059/57344 train_time:13942830ms step_avg:579.53ms +grad accum step:6015/14336 +step:24060/57344 train_time:13944146ms step_avg:579.56ms +step:24061/57344 train_time:13944163ms step_avg:579.53ms +step:24062/57344 train_time:13944407ms step_avg:579.52ms +step:24063/57344 train_time:13944958ms step_avg:579.52ms +grad accum step:6016/14336 +step:24064/57344 train_time:13946256ms step_avg:579.55ms +step:24064/57344 val_loss:6.825016 train_time:13946256ms step_avg:579.55ms +step:24065/57344 train_time:13946269ms step_avg:579.52ms +step:24066/57344 train_time:13946490ms step_avg:579.51ms +step:24067/57344 train_time:13947041ms step_avg:579.51ms +grad accum step:6017/14336 +step:24068/57344 train_time:13948354ms step_avg:579.54ms +step:24069/57344 train_time:13948371ms step_avg:579.52ms +step:24070/57344 train_time:13948619ms step_avg:579.50ms +step:24071/57344 train_time:13949175ms step_avg:579.50ms +grad accum step:6018/14336 +step:24072/57344 train_time:13950465ms step_avg:579.53ms +step:24073/57344 train_time:13950482ms step_avg:579.51ms +step:24074/57344 train_time:13950730ms step_avg:579.49ms +step:24075/57344 train_time:13951272ms step_avg:579.49ms +grad accum step:6019/14336 +step:24076/57344 train_time:13952556ms step_avg:579.52ms +step:24077/57344 train_time:13952572ms step_avg:579.50ms +step:24078/57344 train_time:13952814ms step_avg:579.48ms +step:24079/57344 train_time:13953361ms step_avg:579.48ms +grad accum step:6020/14336 +step:24080/57344 train_time:13954724ms step_avg:579.52ms +step:24081/57344 train_time:13954741ms step_avg:579.49ms +step:24082/57344 train_time:13954990ms step_avg:579.48ms +step:24083/57344 train_time:13955545ms step_avg:579.48ms +grad accum step:6021/14336 +step:24084/57344 train_time:13956866ms step_avg:579.51ms +step:24085/57344 train_time:13956883ms step_avg:579.48ms +step:24086/57344 train_time:13957146ms step_avg:579.47ms +step:24087/57344 train_time:13957729ms step_avg:579.47ms +grad accum step:6022/14336 +step:24088/57344 train_time:13959034ms step_avg:579.50ms +step:24089/57344 train_time:13959051ms step_avg:579.48ms +step:24090/57344 train_time:13959308ms step_avg:579.46ms +step:24091/57344 train_time:13959883ms step_avg:579.46ms +grad accum step:6023/14336 +step:24092/57344 train_time:13961193ms step_avg:579.49ms +step:24093/57344 train_time:13961210ms step_avg:579.47ms +step:24094/57344 train_time:13961455ms step_avg:579.46ms +step:24095/57344 train_time:13962006ms step_avg:579.46ms +grad accum step:6024/14336 +step:24096/57344 train_time:13963303ms step_avg:579.49ms +step:24097/57344 train_time:13963320ms step_avg:579.46ms +step:24098/57344 train_time:13963573ms step_avg:579.45ms +step:24099/57344 train_time:13964133ms step_avg:579.45ms +grad accum step:6025/14336 +step:24100/57344 train_time:13965427ms step_avg:579.48ms +step:24101/57344 train_time:13965444ms step_avg:579.45ms +step:24102/57344 train_time:13965692ms step_avg:579.44ms +step:24103/57344 train_time:13966240ms step_avg:579.44ms +grad accum step:6026/14336 +step:24104/57344 train_time:13967568ms step_avg:579.47ms +step:24105/57344 train_time:13967585ms step_avg:579.45ms +step:24106/57344 train_time:13967830ms step_avg:579.43ms +step:24107/57344 train_time:13968382ms step_avg:579.43ms +grad accum step:6027/14336 +step:24108/57344 train_time:13969714ms step_avg:579.46ms +step:24109/57344 train_time:13969731ms step_avg:579.44ms +step:24110/57344 train_time:13969979ms step_avg:579.43ms +step:24111/57344 train_time:13970523ms step_avg:579.43ms +grad accum step:6028/14336 +step:24112/57344 train_time:13971880ms step_avg:579.46ms +step:24113/57344 train_time:13971898ms step_avg:579.43ms +step:24114/57344 train_time:13972144ms step_avg:579.42ms +step:24115/57344 train_time:13972698ms step_avg:579.42ms +grad accum step:6029/14336 +step:24116/57344 train_time:13973996ms step_avg:579.45ms +step:24117/57344 train_time:13974013ms step_avg:579.43ms +step:24118/57344 train_time:13974256ms step_avg:579.41ms +step:24119/57344 train_time:13974801ms step_avg:579.41ms +grad accum step:6030/14336 +step:24120/57344 train_time:13976102ms step_avg:579.44ms +step:24121/57344 train_time:13976119ms step_avg:579.42ms +step:24122/57344 train_time:13976367ms step_avg:579.40ms +step:24123/57344 train_time:13976922ms step_avg:579.40ms +grad accum step:6031/14336 +step:24124/57344 train_time:13978233ms step_avg:579.43ms +step:24125/57344 train_time:13978250ms step_avg:579.41ms +step:24126/57344 train_time:13978497ms step_avg:579.40ms +step:24127/57344 train_time:13979049ms step_avg:579.39ms +grad accum step:6032/14336 +step:24128/57344 train_time:13980344ms step_avg:579.42ms +step:24128/57344 val_loss:6.843540 train_time:13980345ms step_avg:579.42ms +step:24129/57344 train_time:13980358ms step_avg:579.40ms +step:24130/57344 train_time:13980580ms step_avg:579.39ms +step:24131/57344 train_time:13981124ms step_avg:579.38ms +grad accum step:6033/14336 +step:24132/57344 train_time:13982447ms step_avg:579.42ms +step:24133/57344 train_time:13982464ms step_avg:579.39ms +step:24134/57344 train_time:13982709ms step_avg:579.38ms +step:24135/57344 train_time:13983259ms step_avg:579.38ms +grad accum step:6034/14336 +step:24136/57344 train_time:13984576ms step_avg:579.41ms +step:24137/57344 train_time:13984593ms step_avg:579.38ms +step:24138/57344 train_time:13984844ms step_avg:579.37ms +step:24139/57344 train_time:13985397ms step_avg:579.37ms +grad accum step:6035/14336 +step:24140/57344 train_time:13986679ms step_avg:579.40ms +step:24141/57344 train_time:13986696ms step_avg:579.38ms +step:24142/57344 train_time:13986945ms step_avg:579.36ms +step:24143/57344 train_time:13987492ms step_avg:579.36ms +grad accum step:6036/14336 +step:24144/57344 train_time:13988851ms step_avg:579.39ms +step:24145/57344 train_time:13988867ms step_avg:579.37ms +step:24146/57344 train_time:13989115ms step_avg:579.36ms +step:24147/57344 train_time:13989667ms step_avg:579.35ms +grad accum step:6037/14336 +step:24148/57344 train_time:13990950ms step_avg:579.38ms +step:24149/57344 train_time:13990967ms step_avg:579.36ms +step:24150/57344 train_time:13991218ms step_avg:579.35ms +step:24151/57344 train_time:13991774ms step_avg:579.35ms +grad accum step:6038/14336 +step:24152/57344 train_time:13993055ms step_avg:579.37ms +step:24153/57344 train_time:13993072ms step_avg:579.35ms +step:24154/57344 train_time:13993322ms step_avg:579.34ms +step:24155/57344 train_time:13993888ms step_avg:579.34ms +grad accum step:6039/14336 +step:24156/57344 train_time:13995202ms step_avg:579.37ms +step:24157/57344 train_time:13995219ms step_avg:579.34ms +step:24158/57344 train_time:13995466ms step_avg:579.33ms +step:24159/57344 train_time:13996015ms step_avg:579.33ms +grad accum step:6040/14336 +step:24160/57344 train_time:13997329ms step_avg:579.36ms +step:24161/57344 train_time:13997346ms step_avg:579.34ms +step:24162/57344 train_time:13997590ms step_avg:579.32ms +step:24163/57344 train_time:13998137ms step_avg:579.32ms +grad accum step:6041/14336 +step:24164/57344 train_time:13999442ms step_avg:579.35ms +step:24165/57344 train_time:13999459ms step_avg:579.33ms +step:24166/57344 train_time:13999716ms step_avg:579.31ms +step:24167/57344 train_time:14000288ms step_avg:579.31ms +grad accum step:6042/14336 +step:24168/57344 train_time:14001598ms step_avg:579.34ms +step:24169/57344 train_time:14001615ms step_avg:579.32ms +step:24170/57344 train_time:14001871ms step_avg:579.31ms +step:24171/57344 train_time:14002436ms step_avg:579.31ms +grad accum step:6043/14336 +step:24172/57344 train_time:14003790ms step_avg:579.34ms +step:24173/57344 train_time:14003807ms step_avg:579.32ms +step:24174/57344 train_time:14004054ms step_avg:579.30ms +step:24175/57344 train_time:14004602ms step_avg:579.30ms +grad accum step:6044/14336 +step:24176/57344 train_time:14005909ms step_avg:579.33ms +step:24177/57344 train_time:14005926ms step_avg:579.31ms +step:24178/57344 train_time:14006174ms step_avg:579.29ms +step:24179/57344 train_time:14006721ms step_avg:579.29ms +grad accum step:6045/14336 +step:24180/57344 train_time:14008019ms step_avg:579.32ms +step:24181/57344 train_time:14008035ms step_avg:579.30ms +step:24182/57344 train_time:14008281ms step_avg:579.29ms +step:24183/57344 train_time:14008828ms step_avg:579.28ms +grad accum step:6046/14336 +step:24184/57344 train_time:14010152ms step_avg:579.31ms +step:24185/57344 train_time:14010169ms step_avg:579.29ms +step:24186/57344 train_time:14010415ms step_avg:579.28ms +step:24187/57344 train_time:14010959ms step_avg:579.28ms +grad accum step:6047/14336 +step:24188/57344 train_time:14012279ms step_avg:579.31ms +step:24189/57344 train_time:14012295ms step_avg:579.28ms +step:24190/57344 train_time:14012546ms step_avg:579.27ms +step:24191/57344 train_time:14013103ms step_avg:579.27ms +grad accum step:6048/14336 +step:24192/57344 train_time:14014404ms step_avg:579.30ms +step:24192/57344 val_loss:6.861302 train_time:14014405ms step_avg:579.30ms +step:24193/57344 train_time:14014418ms step_avg:579.28ms +step:24194/57344 train_time:14014637ms step_avg:579.26ms +step:24195/57344 train_time:14015183ms step_avg:579.26ms +grad accum step:6049/14336 +step:24196/57344 train_time:14016462ms step_avg:579.29ms +step:24197/57344 train_time:14016479ms step_avg:579.27ms +step:24198/57344 train_time:14016728ms step_avg:579.25ms +step:24199/57344 train_time:14017273ms step_avg:579.25ms +grad accum step:6050/14336 +step:24200/57344 train_time:14018564ms step_avg:579.28ms +step:24201/57344 train_time:14018581ms step_avg:579.26ms +step:24202/57344 train_time:14018829ms step_avg:579.24ms +step:24203/57344 train_time:14019376ms step_avg:579.24ms +grad accum step:6051/14336 +step:24204/57344 train_time:14020690ms step_avg:579.27ms +step:24205/57344 train_time:14020708ms step_avg:579.25ms +step:24206/57344 train_time:14020959ms step_avg:579.23ms +step:24207/57344 train_time:14021509ms step_avg:579.23ms +grad accum step:6052/14336 +step:24208/57344 train_time:14022819ms step_avg:579.26ms +step:24209/57344 train_time:14022836ms step_avg:579.24ms +step:24210/57344 train_time:14023081ms step_avg:579.23ms +step:24211/57344 train_time:14023625ms step_avg:579.23ms +grad accum step:6053/14336 +step:24212/57344 train_time:14024927ms step_avg:579.26ms +step:24213/57344 train_time:14024944ms step_avg:579.23ms +step:24214/57344 train_time:14025195ms step_avg:579.22ms +step:24215/57344 train_time:14025755ms step_avg:579.22ms +grad accum step:6054/14336 +step:24216/57344 train_time:14027043ms step_avg:579.25ms +step:24217/57344 train_time:14027059ms step_avg:579.22ms +step:24218/57344 train_time:14027304ms step_avg:579.21ms +step:24219/57344 train_time:14027852ms step_avg:579.21ms +grad accum step:6055/14336 +step:24220/57344 train_time:14029147ms step_avg:579.24ms +step:24221/57344 train_time:14029164ms step_avg:579.21ms +step:24222/57344 train_time:14029415ms step_avg:579.20ms +step:24223/57344 train_time:14029968ms step_avg:579.20ms +grad accum step:6056/14336 +step:24224/57344 train_time:14031284ms step_avg:579.23ms +step:24225/57344 train_time:14031300ms step_avg:579.21ms +step:24226/57344 train_time:14031545ms step_avg:579.19ms +step:24227/57344 train_time:14032090ms step_avg:579.19ms +grad accum step:6057/14336 +step:24228/57344 train_time:14033389ms step_avg:579.22ms +step:24229/57344 train_time:14033406ms step_avg:579.20ms +step:24230/57344 train_time:14033656ms step_avg:579.19ms +step:24231/57344 train_time:14034204ms step_avg:579.18ms +grad accum step:6058/14336 +step:24232/57344 train_time:14035503ms step_avg:579.21ms +step:24233/57344 train_time:14035520ms step_avg:579.19ms +step:24234/57344 train_time:14035768ms step_avg:579.18ms +step:24235/57344 train_time:14036320ms step_avg:579.18ms +grad accum step:6059/14336 +step:24236/57344 train_time:14037616ms step_avg:579.21ms +step:24237/57344 train_time:14037633ms step_avg:579.18ms +step:24238/57344 train_time:14037883ms step_avg:579.17ms +step:24239/57344 train_time:14038437ms step_avg:579.17ms +grad accum step:6060/14336 +step:24240/57344 train_time:14039730ms step_avg:579.20ms +step:24241/57344 train_time:14039747ms step_avg:579.17ms +step:24242/57344 train_time:14039995ms step_avg:579.16ms +step:24243/57344 train_time:14040545ms step_avg:579.16ms +grad accum step:6061/14336 +step:24244/57344 train_time:14041872ms step_avg:579.19ms +step:24245/57344 train_time:14041889ms step_avg:579.17ms +step:24246/57344 train_time:14042140ms step_avg:579.15ms +step:24247/57344 train_time:14042695ms step_avg:579.15ms +grad accum step:6062/14336 +step:24248/57344 train_time:14044017ms step_avg:579.18ms +step:24249/57344 train_time:14044034ms step_avg:579.16ms +step:24250/57344 train_time:14044279ms step_avg:579.15ms +step:24251/57344 train_time:14044833ms step_avg:579.14ms +grad accum step:6063/14336 +step:24252/57344 train_time:14046146ms step_avg:579.17ms +step:24253/57344 train_time:14046163ms step_avg:579.15ms +step:24254/57344 train_time:14046403ms step_avg:579.14ms +step:24255/57344 train_time:14046936ms step_avg:579.14ms +grad accum step:6064/14336 +step:24256/57344 train_time:14048287ms step_avg:579.17ms +step:24256/57344 val_loss:6.871196 train_time:14048288ms step_avg:579.17ms +step:24257/57344 train_time:14048300ms step_avg:579.14ms +step:24258/57344 train_time:14048531ms step_avg:579.13ms +step:24259/57344 train_time:14049100ms step_avg:579.13ms +grad accum step:6065/14336 +step:24260/57344 train_time:14050409ms step_avg:579.16ms +step:24261/57344 train_time:14050426ms step_avg:579.14ms +step:24262/57344 train_time:14050673ms step_avg:579.12ms +step:24263/57344 train_time:14051221ms step_avg:579.12ms +grad accum step:6066/14336 +step:24264/57344 train_time:14052533ms step_avg:579.15ms +step:24265/57344 train_time:14052550ms step_avg:579.13ms +step:24266/57344 train_time:14052811ms step_avg:579.12ms +step:24267/57344 train_time:14053394ms step_avg:579.12ms +grad accum step:6067/14336 +step:24268/57344 train_time:14054699ms step_avg:579.15ms +step:24269/57344 train_time:14054716ms step_avg:579.12ms +step:24270/57344 train_time:14054960ms step_avg:579.11ms +step:24271/57344 train_time:14055502ms step_avg:579.11ms +grad accum step:6068/14336 +step:24272/57344 train_time:14056804ms step_avg:579.14ms +step:24273/57344 train_time:14056821ms step_avg:579.11ms +step:24274/57344 train_time:14057065ms step_avg:579.10ms +step:24275/57344 train_time:14057609ms step_avg:579.10ms +grad accum step:6069/14336 +step:24276/57344 train_time:14058908ms step_avg:579.13ms +step:24277/57344 train_time:14058925ms step_avg:579.10ms +step:24278/57344 train_time:14059176ms step_avg:579.09ms +step:24279/57344 train_time:14059730ms step_avg:579.09ms +grad accum step:6070/14336 +step:24280/57344 train_time:14061040ms step_avg:579.12ms +step:24281/57344 train_time:14061057ms step_avg:579.10ms +step:24282/57344 train_time:14061303ms step_avg:579.08ms +step:24283/57344 train_time:14061848ms step_avg:579.08ms +grad accum step:6071/14336 +step:24284/57344 train_time:14063132ms step_avg:579.11ms +step:24285/57344 train_time:14063149ms step_avg:579.09ms +step:24286/57344 train_time:14063392ms step_avg:579.07ms +step:24287/57344 train_time:14063937ms step_avg:579.07ms +grad accum step:6072/14336 +step:24288/57344 train_time:14065243ms step_avg:579.10ms +step:24289/57344 train_time:14065259ms step_avg:579.08ms +step:24290/57344 train_time:14065507ms step_avg:579.07ms +step:24291/57344 train_time:14066055ms step_avg:579.06ms +grad accum step:6073/14336 +step:24292/57344 train_time:14067375ms step_avg:579.09ms +step:24293/57344 train_time:14067392ms step_avg:579.07ms +step:24294/57344 train_time:14067637ms step_avg:579.06ms +step:24295/57344 train_time:14068178ms step_avg:579.06ms +grad accum step:6074/14336 +step:24296/57344 train_time:14069473ms step_avg:579.09ms +step:24297/57344 train_time:14069485ms step_avg:579.06ms +step:24298/57344 train_time:14069729ms step_avg:579.05ms +step:24299/57344 train_time:14070277ms step_avg:579.05ms +grad accum step:6075/14336 +step:24300/57344 train_time:14071601ms step_avg:579.08ms +step:24301/57344 train_time:14071618ms step_avg:579.06ms +step:24302/57344 train_time:14071868ms step_avg:579.04ms +step:24303/57344 train_time:14072421ms step_avg:579.04ms +grad accum step:6076/14336 +step:24304/57344 train_time:14073738ms step_avg:579.07ms +step:24305/57344 train_time:14073755ms step_avg:579.05ms +step:24306/57344 train_time:14074004ms step_avg:579.03ms +step:24307/57344 train_time:14074555ms step_avg:579.03ms +grad accum step:6077/14336 +step:24308/57344 train_time:14075890ms step_avg:579.06ms +step:24309/57344 train_time:14075907ms step_avg:579.04ms +step:24310/57344 train_time:14076156ms step_avg:579.03ms +step:24311/57344 train_time:14076699ms step_avg:579.03ms +grad accum step:6078/14336 +step:24312/57344 train_time:14077984ms step_avg:579.05ms +step:24313/57344 train_time:14078001ms step_avg:579.03ms +step:24314/57344 train_time:14078252ms step_avg:579.02ms +step:24315/57344 train_time:14078816ms step_avg:579.02ms +grad accum step:6079/14336 +step:24316/57344 train_time:14080132ms step_avg:579.05ms +step:24317/57344 train_time:14080149ms step_avg:579.02ms +step:24318/57344 train_time:14080396ms step_avg:579.01ms +step:24319/57344 train_time:14080953ms step_avg:579.01ms +grad accum step:6080/14336 +step:24320/57344 train_time:14082306ms step_avg:579.04ms +step:24320/57344 val_loss:6.885727 train_time:14082307ms step_avg:579.04ms +step:24321/57344 train_time:14082319ms step_avg:579.02ms +step:24322/57344 train_time:14082547ms step_avg:579.00ms +step:24323/57344 train_time:14083098ms step_avg:579.00ms +grad accum step:6081/14336 +step:24324/57344 train_time:14084414ms step_avg:579.03ms +step:24325/57344 train_time:14084430ms step_avg:579.01ms +step:24326/57344 train_time:14084677ms step_avg:579.00ms +step:24327/57344 train_time:14085217ms step_avg:579.00ms +grad accum step:6082/14336 +step:24328/57344 train_time:14086494ms step_avg:579.02ms +step:24329/57344 train_time:14086512ms step_avg:579.00ms +step:24330/57344 train_time:14086759ms step_avg:578.99ms +step:24331/57344 train_time:14087309ms step_avg:578.99ms +grad accum step:6083/14336 +step:24332/57344 train_time:14088604ms step_avg:579.02ms +step:24333/57344 train_time:14088621ms step_avg:578.99ms +step:24334/57344 train_time:14088873ms step_avg:578.98ms +step:24335/57344 train_time:14089435ms step_avg:578.98ms +grad accum step:6084/14336 +step:24336/57344 train_time:14090757ms step_avg:579.01ms +step:24337/57344 train_time:14090774ms step_avg:578.99ms +step:24338/57344 train_time:14091025ms step_avg:578.97ms +step:24339/57344 train_time:14091591ms step_avg:578.97ms +grad accum step:6085/14336 +step:24340/57344 train_time:14092892ms step_avg:579.00ms +step:24341/57344 train_time:14092909ms step_avg:578.98ms +step:24342/57344 train_time:14093153ms step_avg:578.96ms +step:24343/57344 train_time:14093692ms step_avg:578.96ms +grad accum step:6086/14336 +step:24344/57344 train_time:14094998ms step_avg:578.99ms +step:24345/57344 train_time:14095015ms step_avg:578.97ms +step:24346/57344 train_time:14095267ms step_avg:578.96ms +step:24347/57344 train_time:14095825ms step_avg:578.96ms +grad accum step:6087/14336 +step:24348/57344 train_time:14097157ms step_avg:578.99ms +step:24349/57344 train_time:14097174ms step_avg:578.96ms +step:24350/57344 train_time:14097426ms step_avg:578.95ms +step:24351/57344 train_time:14097985ms step_avg:578.95ms +grad accum step:6088/14336 +step:24352/57344 train_time:14099314ms step_avg:578.98ms +step:24353/57344 train_time:14099331ms step_avg:578.96ms +step:24354/57344 train_time:14099578ms step_avg:578.94ms +step:24355/57344 train_time:14100120ms step_avg:578.94ms +grad accum step:6089/14336 +step:24356/57344 train_time:14101394ms step_avg:578.97ms +step:24357/57344 train_time:14101411ms step_avg:578.95ms +step:24358/57344 train_time:14101660ms step_avg:578.93ms +step:24359/57344 train_time:14102215ms step_avg:578.93ms +grad accum step:6090/14336 +step:24360/57344 train_time:14103521ms step_avg:578.96ms +step:24361/57344 train_time:14103537ms step_avg:578.94ms +step:24362/57344 train_time:14103783ms step_avg:578.93ms +step:24363/57344 train_time:14104331ms step_avg:578.92ms +grad accum step:6091/14336 +step:24364/57344 train_time:14105645ms step_avg:578.95ms +step:24365/57344 train_time:14105662ms step_avg:578.93ms +step:24366/57344 train_time:14105913ms step_avg:578.92ms +step:24367/57344 train_time:14106464ms step_avg:578.92ms +grad accum step:6092/14336 +step:24368/57344 train_time:14107770ms step_avg:578.95ms +step:24369/57344 train_time:14107786ms step_avg:578.92ms +step:24370/57344 train_time:14108038ms step_avg:578.91ms +step:24371/57344 train_time:14108605ms step_avg:578.91ms +grad accum step:6093/14336 +step:24372/57344 train_time:14109913ms step_avg:578.94ms +step:24373/57344 train_time:14109930ms step_avg:578.92ms +step:24374/57344 train_time:14110183ms step_avg:578.90ms +step:24375/57344 train_time:14110742ms step_avg:578.90ms +grad accum step:6094/14336 +step:24376/57344 train_time:14112030ms step_avg:578.93ms +step:24377/57344 train_time:14112047ms step_avg:578.91ms +step:24378/57344 train_time:14112290ms step_avg:578.89ms +step:24379/57344 train_time:14112822ms step_avg:578.89ms +grad accum step:6095/14336 +step:24380/57344 train_time:14114121ms step_avg:578.92ms +step:24381/57344 train_time:14114138ms step_avg:578.90ms +step:24382/57344 train_time:14114386ms step_avg:578.89ms +step:24383/57344 train_time:14114933ms step_avg:578.88ms +grad accum step:6096/14336 +step:24384/57344 train_time:14116283ms step_avg:578.92ms +step:24384/57344 val_loss:6.892621 train_time:14116283ms step_avg:578.92ms +step:24385/57344 train_time:14116296ms step_avg:578.89ms +step:24386/57344 train_time:14116519ms step_avg:578.88ms +step:24387/57344 train_time:14117070ms step_avg:578.88ms +grad accum step:6097/14336 +step:24388/57344 train_time:14118413ms step_avg:578.91ms +step:24389/57344 train_time:14118430ms step_avg:578.89ms +step:24390/57344 train_time:14118675ms step_avg:578.87ms +step:24391/57344 train_time:14119219ms step_avg:578.87ms +grad accum step:6098/14336 +step:24392/57344 train_time:14120624ms step_avg:578.90ms +step:24393/57344 train_time:14120641ms step_avg:578.88ms +step:24394/57344 train_time:14120889ms step_avg:578.87ms +step:24395/57344 train_time:14121450ms step_avg:578.87ms +grad accum step:6099/14336 +step:24396/57344 train_time:14122784ms step_avg:578.90ms +step:24397/57344 train_time:14122801ms step_avg:578.87ms +step:24398/57344 train_time:14123051ms step_avg:578.86ms +step:24399/57344 train_time:14123600ms step_avg:578.86ms +grad accum step:6100/14336 +step:24400/57344 train_time:14124909ms step_avg:578.89ms +step:24401/57344 train_time:14124926ms step_avg:578.87ms +step:24402/57344 train_time:14125174ms step_avg:578.85ms +step:24403/57344 train_time:14125725ms step_avg:578.85ms +grad accum step:6101/14336 +step:24404/57344 train_time:14127038ms step_avg:578.88ms +step:24405/57344 train_time:14127055ms step_avg:578.86ms +step:24406/57344 train_time:14127304ms step_avg:578.85ms +step:24407/57344 train_time:14127858ms step_avg:578.84ms +grad accum step:6102/14336 +step:24408/57344 train_time:14129169ms step_avg:578.87ms +step:24409/57344 train_time:14129186ms step_avg:578.85ms +step:24410/57344 train_time:14129437ms step_avg:578.84ms +step:24411/57344 train_time:14130000ms step_avg:578.84ms +grad accum step:6103/14336 +step:24412/57344 train_time:14131334ms step_avg:578.87ms +step:24413/57344 train_time:14131351ms step_avg:578.85ms +step:24414/57344 train_time:14131601ms step_avg:578.83ms +step:24415/57344 train_time:14132157ms step_avg:578.83ms +grad accum step:6104/14336 +step:24416/57344 train_time:14133453ms step_avg:578.86ms +step:24417/57344 train_time:14133470ms step_avg:578.84ms +step:24418/57344 train_time:14133722ms step_avg:578.82ms +step:24419/57344 train_time:14134281ms step_avg:578.82ms +grad accum step:6105/14336 +step:24420/57344 train_time:14135599ms step_avg:578.85ms +step:24421/57344 train_time:14135616ms step_avg:578.83ms +step:24422/57344 train_time:14135866ms step_avg:578.82ms +step:24423/57344 train_time:14136418ms step_avg:578.82ms +grad accum step:6106/14336 +step:24424/57344 train_time:14137730ms step_avg:578.85ms +step:24425/57344 train_time:14137747ms step_avg:578.82ms +step:24426/57344 train_time:14137996ms step_avg:578.81ms +step:24427/57344 train_time:14138552ms step_avg:578.81ms +grad accum step:6107/14336 +step:24428/57344 train_time:14139848ms step_avg:578.84ms +step:24429/57344 train_time:14139865ms step_avg:578.81ms +step:24430/57344 train_time:14140110ms step_avg:578.80ms +step:24431/57344 train_time:14140651ms step_avg:578.80ms +grad accum step:6108/14336 +step:24432/57344 train_time:14142003ms step_avg:578.83ms +step:24433/57344 train_time:14142020ms step_avg:578.81ms +step:24434/57344 train_time:14142264ms step_avg:578.79ms +step:24435/57344 train_time:14142799ms step_avg:578.79ms +grad accum step:6109/14336 +step:24436/57344 train_time:14144069ms step_avg:578.82ms +step:24437/57344 train_time:14144086ms step_avg:578.80ms +step:24438/57344 train_time:14144337ms step_avg:578.78ms +step:24439/57344 train_time:14144898ms step_avg:578.78ms +grad accum step:6110/14336 +step:24440/57344 train_time:14146219ms step_avg:578.81ms +step:24441/57344 train_time:14146235ms step_avg:578.79ms +step:24442/57344 train_time:14146482ms step_avg:578.78ms +step:24443/57344 train_time:14147023ms step_avg:578.78ms +grad accum step:6111/14336 +step:24444/57344 train_time:14148299ms step_avg:578.80ms +step:24445/57344 train_time:14148316ms step_avg:578.78ms +step:24446/57344 train_time:14148564ms step_avg:578.77ms +step:24447/57344 train_time:14149111ms step_avg:578.77ms +grad accum step:6112/14336 +step:24448/57344 train_time:14150403ms step_avg:578.80ms +step:24448/57344 val_loss:6.903215 train_time:14150403ms step_avg:578.80ms +step:24449/57344 train_time:14150416ms step_avg:578.77ms +step:24450/57344 train_time:14150647ms step_avg:578.76ms +step:24451/57344 train_time:14151214ms step_avg:578.76ms +grad accum step:6113/14336 +step:24452/57344 train_time:14152527ms step_avg:578.79ms +step:24453/57344 train_time:14152543ms step_avg:578.77ms +step:24454/57344 train_time:14152797ms step_avg:578.75ms +step:24455/57344 train_time:14153356ms step_avg:578.75ms +grad accum step:6114/14336 +step:24456/57344 train_time:14154633ms step_avg:578.78ms +step:24457/57344 train_time:14154650ms step_avg:578.76ms +step:24458/57344 train_time:14154895ms step_avg:578.74ms +step:24459/57344 train_time:14155439ms step_avg:578.74ms +grad accum step:6115/14336 +step:24460/57344 train_time:14156718ms step_avg:578.77ms +step:24461/57344 train_time:14156735ms step_avg:578.75ms +step:24462/57344 train_time:14156982ms step_avg:578.73ms +step:24463/57344 train_time:14157529ms step_avg:578.73ms +grad accum step:6116/14336 +step:24464/57344 train_time:14158821ms step_avg:578.76ms +step:24465/57344 train_time:14158839ms step_avg:578.74ms +step:24466/57344 train_time:14159095ms step_avg:578.73ms +step:24467/57344 train_time:14159668ms step_avg:578.73ms +grad accum step:6117/14336 +step:24468/57344 train_time:14160972ms step_avg:578.75ms +step:24469/57344 train_time:14160989ms step_avg:578.73ms +step:24470/57344 train_time:14161242ms step_avg:578.72ms +step:24471/57344 train_time:14161800ms step_avg:578.72ms +grad accum step:6118/14336 +step:24472/57344 train_time:14163068ms step_avg:578.75ms +step:24473/57344 train_time:14163085ms step_avg:578.72ms +step:24474/57344 train_time:14163329ms step_avg:578.71ms +step:24475/57344 train_time:14163876ms step_avg:578.71ms +grad accum step:6119/14336 +step:24476/57344 train_time:14165172ms step_avg:578.74ms +step:24477/57344 train_time:14165189ms step_avg:578.71ms +step:24478/57344 train_time:14165437ms step_avg:578.70ms +step:24479/57344 train_time:14165990ms step_avg:578.70ms +grad accum step:6120/14336 +step:24480/57344 train_time:14167318ms step_avg:578.73ms +step:24481/57344 train_time:14167335ms step_avg:578.71ms +step:24482/57344 train_time:14167591ms step_avg:578.69ms +step:24483/57344 train_time:14168165ms step_avg:578.69ms +grad accum step:6121/14336 +step:24484/57344 train_time:14169462ms step_avg:578.72ms +step:24485/57344 train_time:14169479ms step_avg:578.70ms +step:24486/57344 train_time:14169729ms step_avg:578.69ms +step:24487/57344 train_time:14170276ms step_avg:578.69ms +grad accum step:6122/14336 +step:24488/57344 train_time:14171619ms step_avg:578.72ms +step:24489/57344 train_time:14171636ms step_avg:578.69ms +step:24490/57344 train_time:14171888ms step_avg:578.68ms +step:24491/57344 train_time:14172460ms step_avg:578.68ms +grad accum step:6123/14336 +step:24492/57344 train_time:14173795ms step_avg:578.71ms +step:24493/57344 train_time:14173812ms step_avg:578.69ms +step:24494/57344 train_time:14174058ms step_avg:578.67ms +step:24495/57344 train_time:14174605ms step_avg:578.67ms +grad accum step:6124/14336 +step:24496/57344 train_time:14175920ms step_avg:578.70ms +step:24497/57344 train_time:14175936ms step_avg:578.68ms +step:24498/57344 train_time:14176179ms step_avg:578.67ms +step:24499/57344 train_time:14176722ms step_avg:578.67ms +grad accum step:6125/14336 +step:24500/57344 train_time:14178014ms step_avg:578.69ms +step:24501/57344 train_time:14178031ms step_avg:578.67ms +step:24502/57344 train_time:14178278ms step_avg:578.66ms +step:24503/57344 train_time:14178829ms step_avg:578.66ms +grad accum step:6126/14336 +step:24504/57344 train_time:14180141ms step_avg:578.69ms +step:24505/57344 train_time:14180158ms step_avg:578.66ms +step:24506/57344 train_time:14180407ms step_avg:578.65ms +step:24507/57344 train_time:14180959ms step_avg:578.65ms +grad accum step:6127/14336 +step:24508/57344 train_time:14182269ms step_avg:578.68ms +step:24509/57344 train_time:14182286ms step_avg:578.66ms +step:24510/57344 train_time:14182533ms step_avg:578.64ms +step:24511/57344 train_time:14183096ms step_avg:578.64ms +grad accum step:6128/14336 +step:24512/57344 train_time:14184432ms step_avg:578.67ms +step:24512/57344 val_loss:6.909631 train_time:14184433ms step_avg:578.67ms +step:24513/57344 train_time:14184445ms step_avg:578.65ms +step:24514/57344 train_time:14184668ms step_avg:578.64ms +step:24515/57344 train_time:14185219ms step_avg:578.63ms +grad accum step:6129/14336 +step:24516/57344 train_time:14186519ms step_avg:578.66ms +step:24517/57344 train_time:14186536ms step_avg:578.64ms +step:24518/57344 train_time:14186783ms step_avg:578.63ms +step:24519/57344 train_time:14187328ms step_avg:578.63ms +grad accum step:6130/14336 +step:24520/57344 train_time:14188650ms step_avg:578.66ms +step:24521/57344 train_time:14188667ms step_avg:578.63ms +step:24522/57344 train_time:14188916ms step_avg:578.62ms +step:24523/57344 train_time:14189469ms step_avg:578.62ms +grad accum step:6131/14336 +step:24524/57344 train_time:14190761ms step_avg:578.65ms +step:24525/57344 train_time:14190778ms step_avg:578.62ms +step:24526/57344 train_time:14191027ms step_avg:578.61ms +step:24527/57344 train_time:14191580ms step_avg:578.61ms +grad accum step:6132/14336 +step:24528/57344 train_time:14192868ms step_avg:578.64ms +step:24529/57344 train_time:14192885ms step_avg:578.62ms +step:24530/57344 train_time:14193133ms step_avg:578.60ms +step:24531/57344 train_time:14193676ms step_avg:578.60ms +grad accum step:6133/14336 +step:24532/57344 train_time:14194975ms step_avg:578.63ms +step:24533/57344 train_time:14194992ms step_avg:578.61ms +step:24534/57344 train_time:14195240ms step_avg:578.59ms +step:24535/57344 train_time:14195785ms step_avg:578.59ms +grad accum step:6134/14336 +step:24536/57344 train_time:14197096ms step_avg:578.62ms +step:24537/57344 train_time:14197112ms step_avg:578.60ms +step:24538/57344 train_time:14197361ms step_avg:578.59ms +step:24539/57344 train_time:14197909ms step_avg:578.59ms +grad accum step:6135/14336 +step:24540/57344 train_time:14199202ms step_avg:578.61ms +step:24541/57344 train_time:14199219ms step_avg:578.59ms +step:24542/57344 train_time:14199463ms step_avg:578.58ms +step:24543/57344 train_time:14200011ms step_avg:578.58ms +grad accum step:6136/14336 +step:24544/57344 train_time:14201341ms step_avg:578.61ms +step:24545/57344 train_time:14201358ms step_avg:578.58ms +step:24546/57344 train_time:14201609ms step_avg:578.57ms +step:24547/57344 train_time:14202158ms step_avg:578.57ms +grad accum step:6137/14336 +step:24548/57344 train_time:14203477ms step_avg:578.60ms +step:24549/57344 train_time:14203493ms step_avg:578.58ms +step:24550/57344 train_time:14203740ms step_avg:578.56ms +step:24551/57344 train_time:14204287ms step_avg:578.56ms +grad accum step:6138/14336 +step:24552/57344 train_time:14205608ms step_avg:578.59ms +step:24553/57344 train_time:14205625ms step_avg:578.57ms +step:24554/57344 train_time:14205869ms step_avg:578.56ms +step:24555/57344 train_time:14206417ms step_avg:578.55ms +grad accum step:6139/14336 +step:24556/57344 train_time:14207697ms step_avg:578.58ms +step:24557/57344 train_time:14207713ms step_avg:578.56ms +step:24558/57344 train_time:14207958ms step_avg:578.55ms +step:24559/57344 train_time:14208499ms step_avg:578.55ms +grad accum step:6140/14336 +step:24560/57344 train_time:14209800ms step_avg:578.57ms +step:24561/57344 train_time:14209816ms step_avg:578.55ms +step:24562/57344 train_time:14210082ms step_avg:578.54ms +step:24563/57344 train_time:14210695ms step_avg:578.54ms +grad accum step:6141/14336 +step:24564/57344 train_time:14212083ms step_avg:578.57ms +step:24565/57344 train_time:14212100ms step_avg:578.55ms +step:24566/57344 train_time:14212344ms step_avg:578.54ms +step:24567/57344 train_time:14212888ms step_avg:578.54ms +grad accum step:6142/14336 +step:24568/57344 train_time:14214207ms step_avg:578.57ms +step:24569/57344 train_time:14214224ms step_avg:578.54ms +step:24570/57344 train_time:14214485ms step_avg:578.53ms +step:24571/57344 train_time:14215065ms step_avg:578.53ms +grad accum step:6143/14336 +step:24572/57344 train_time:14216374ms step_avg:578.56ms +step:24573/57344 train_time:14216390ms step_avg:578.54ms +step:24574/57344 train_time:14216639ms step_avg:578.52ms +step:24575/57344 train_time:14217186ms step_avg:578.52ms +grad accum step:6144/14336 +step:24576/57344 train_time:14218493ms step_avg:578.55ms +step:24576/57344 val_loss:6.941854 train_time:14218493ms step_avg:578.55ms +step:24577/57344 train_time:14218506ms step_avg:578.53ms +step:24578/57344 train_time:14218738ms step_avg:578.51ms +step:24579/57344 train_time:14219301ms step_avg:578.51ms +grad accum step:6145/14336 +step:24580/57344 train_time:14220579ms step_avg:578.54ms +step:24581/57344 train_time:14220596ms step_avg:578.52ms +step:24582/57344 train_time:14220842ms step_avg:578.51ms +step:24583/57344 train_time:14221385ms step_avg:578.50ms +grad accum step:6146/14336 +step:24584/57344 train_time:14222687ms step_avg:578.53ms +step:24585/57344 train_time:14222703ms step_avg:578.51ms +step:24586/57344 train_time:14222949ms step_avg:578.50ms +step:24587/57344 train_time:14223494ms step_avg:578.50ms +grad accum step:6147/14336 +step:24588/57344 train_time:14224784ms step_avg:578.53ms +step:24589/57344 train_time:14224801ms step_avg:578.50ms +step:24590/57344 train_time:14225043ms step_avg:578.49ms +step:24591/57344 train_time:14225590ms step_avg:578.49ms +grad accum step:6148/14336 +step:24592/57344 train_time:14226944ms step_avg:578.52ms +step:24593/57344 train_time:14226961ms step_avg:578.50ms +step:24594/57344 train_time:14227210ms step_avg:578.48ms +step:24595/57344 train_time:14227758ms step_avg:578.48ms +grad accum step:6149/14336 +step:24596/57344 train_time:14229074ms step_avg:578.51ms +step:24597/57344 train_time:14229091ms step_avg:578.49ms +step:24598/57344 train_time:14229336ms step_avg:578.48ms +step:24599/57344 train_time:14229881ms step_avg:578.47ms +grad accum step:6150/14336 +step:24600/57344 train_time:14231194ms step_avg:578.50ms +step:24601/57344 train_time:14231212ms step_avg:578.48ms +step:24602/57344 train_time:14231463ms step_avg:578.47ms +step:24603/57344 train_time:14232030ms step_avg:578.47ms +grad accum step:6151/14336 +step:24604/57344 train_time:14233343ms step_avg:578.50ms +step:24605/57344 train_time:14233360ms step_avg:578.47ms +step:24606/57344 train_time:14233609ms step_avg:578.46ms +step:24607/57344 train_time:14234159ms step_avg:578.46ms +grad accum step:6152/14336 +step:24608/57344 train_time:14235482ms step_avg:578.49ms +step:24609/57344 train_time:14235498ms step_avg:578.47ms +step:24610/57344 train_time:14235742ms step_avg:578.45ms +step:24611/57344 train_time:14236284ms step_avg:578.45ms +grad accum step:6153/14336 +step:24612/57344 train_time:14237591ms step_avg:578.48ms +step:24613/57344 train_time:14237607ms step_avg:578.46ms +step:24614/57344 train_time:14237857ms step_avg:578.45ms +step:24615/57344 train_time:14238424ms step_avg:578.45ms +grad accum step:6154/14336 +step:24616/57344 train_time:14239750ms step_avg:578.48ms +step:24617/57344 train_time:14239767ms step_avg:578.45ms +step:24618/57344 train_time:14240019ms step_avg:578.44ms +step:24619/57344 train_time:14240573ms step_avg:578.44ms +grad accum step:6155/14336 +step:24620/57344 train_time:14241876ms step_avg:578.47ms +step:24621/57344 train_time:14241893ms step_avg:578.44ms +step:24622/57344 train_time:14242144ms step_avg:578.43ms +step:24623/57344 train_time:14242702ms step_avg:578.43ms +grad accum step:6156/14336 +step:24624/57344 train_time:14243999ms step_avg:578.46ms +step:24625/57344 train_time:14244016ms step_avg:578.44ms +step:24626/57344 train_time:14244263ms step_avg:578.42ms +step:24627/57344 train_time:14244813ms step_avg:578.42ms +grad accum step:6157/14336 +step:24628/57344 train_time:14246121ms step_avg:578.45ms +step:24629/57344 train_time:14246138ms step_avg:578.43ms +step:24630/57344 train_time:14246382ms step_avg:578.42ms +step:24631/57344 train_time:14246931ms step_avg:578.41ms +grad accum step:6158/14336 +step:24632/57344 train_time:14248253ms step_avg:578.44ms +step:24633/57344 train_time:14248269ms step_avg:578.42ms +step:24634/57344 train_time:14248532ms step_avg:578.41ms +step:24635/57344 train_time:14249117ms step_avg:578.41ms +grad accum step:6159/14336 +step:24636/57344 train_time:14250393ms step_avg:578.44ms +step:24637/57344 train_time:14250409ms step_avg:578.41ms +step:24638/57344 train_time:14250657ms step_avg:578.40ms +step:24639/57344 train_time:14251203ms step_avg:578.40ms +grad accum step:6160/14336 +step:24640/57344 train_time:14252530ms step_avg:578.43ms +step:24640/57344 val_loss:6.927251 train_time:14252531ms step_avg:578.43ms +step:24641/57344 train_time:14252543ms step_avg:578.41ms +step:24642/57344 train_time:14252777ms step_avg:578.39ms +step:24643/57344 train_time:14253343ms step_avg:578.39ms +grad accum step:6161/14336 +step:24644/57344 train_time:14254622ms step_avg:578.42ms +step:24645/57344 train_time:14254639ms step_avg:578.40ms +step:24646/57344 train_time:14254890ms step_avg:578.39ms +step:24647/57344 train_time:14255448ms step_avg:578.38ms +grad accum step:6162/14336 +step:24648/57344 train_time:14256746ms step_avg:578.41ms +step:24649/57344 train_time:14256762ms step_avg:578.39ms +step:24650/57344 train_time:14257019ms step_avg:578.38ms +step:24651/57344 train_time:14257587ms step_avg:578.38ms +grad accum step:6163/14336 +step:24652/57344 train_time:14258915ms step_avg:578.41ms +step:24653/57344 train_time:14258932ms step_avg:578.39ms +step:24654/57344 train_time:14259181ms step_avg:578.37ms +step:24655/57344 train_time:14259737ms step_avg:578.37ms +grad accum step:6164/14336 +step:24656/57344 train_time:14261030ms step_avg:578.40ms +step:24657/57344 train_time:14261047ms step_avg:578.38ms +step:24658/57344 train_time:14261294ms step_avg:578.36ms +step:24659/57344 train_time:14261840ms step_avg:578.36ms +grad accum step:6165/14336 +step:24660/57344 train_time:14263135ms step_avg:578.39ms +step:24661/57344 train_time:14263151ms step_avg:578.37ms +step:24662/57344 train_time:14263399ms step_avg:578.36ms +step:24663/57344 train_time:14263947ms step_avg:578.35ms +grad accum step:6166/14336 +step:24664/57344 train_time:14265252ms step_avg:578.38ms +step:24665/57344 train_time:14265269ms step_avg:578.36ms +step:24666/57344 train_time:14265520ms step_avg:578.35ms +step:24667/57344 train_time:14266074ms step_avg:578.35ms +grad accum step:6167/14336 +step:24668/57344 train_time:14267403ms step_avg:578.38ms +step:24669/57344 train_time:14267420ms step_avg:578.35ms +step:24670/57344 train_time:14267664ms step_avg:578.34ms +step:24671/57344 train_time:14268205ms step_avg:578.34ms +grad accum step:6168/14336 +step:24672/57344 train_time:14269481ms step_avg:578.37ms +step:24673/57344 train_time:14269499ms step_avg:578.34ms +step:24674/57344 train_time:14269743ms step_avg:578.33ms +step:24675/57344 train_time:14270296ms step_avg:578.33ms +grad accum step:6169/14336 +step:24676/57344 train_time:14271655ms step_avg:578.36ms +step:24677/57344 train_time:14271671ms step_avg:578.34ms +step:24678/57344 train_time:14271922ms step_avg:578.33ms +step:24679/57344 train_time:14272490ms step_avg:578.33ms +grad accum step:6170/14336 +step:24680/57344 train_time:14273819ms step_avg:578.36ms +step:24681/57344 train_time:14273836ms step_avg:578.33ms +step:24682/57344 train_time:14274085ms step_avg:578.32ms +step:24683/57344 train_time:14274632ms step_avg:578.32ms +grad accum step:6171/14336 +step:24684/57344 train_time:14275950ms step_avg:578.35ms +step:24685/57344 train_time:14275967ms step_avg:578.33ms +step:24686/57344 train_time:14276213ms step_avg:578.31ms +step:24687/57344 train_time:14276760ms step_avg:578.31ms +grad accum step:6172/14336 +step:24688/57344 train_time:14278099ms step_avg:578.34ms +step:24689/57344 train_time:14278116ms step_avg:578.32ms +step:24690/57344 train_time:14278362ms step_avg:578.31ms +step:24691/57344 train_time:14278925ms step_avg:578.30ms +grad accum step:6173/14336 +step:24692/57344 train_time:14280267ms step_avg:578.34ms +step:24693/57344 train_time:14280285ms step_avg:578.31ms +step:24694/57344 train_time:14280527ms step_avg:578.30ms +step:24695/57344 train_time:14281072ms step_avg:578.30ms +grad accum step:6174/14336 +step:24696/57344 train_time:14282371ms step_avg:578.33ms +step:24697/57344 train_time:14282388ms step_avg:578.30ms +step:24698/57344 train_time:14282638ms step_avg:578.29ms +step:24699/57344 train_time:14283190ms step_avg:578.29ms +grad accum step:6175/14336 +step:24700/57344 train_time:14284505ms step_avg:578.32ms +step:24701/57344 train_time:14284522ms step_avg:578.30ms +step:24702/57344 train_time:14284775ms step_avg:578.28ms +step:24703/57344 train_time:14285337ms step_avg:578.28ms +grad accum step:6176/14336 +step:24704/57344 train_time:14286628ms step_avg:578.31ms +step:24704/57344 val_loss:6.931237 train_time:14286629ms step_avg:578.31ms +step:24705/57344 train_time:14286641ms step_avg:578.29ms +step:24706/57344 train_time:14286865ms step_avg:578.28ms +step:24707/57344 train_time:14287414ms step_avg:578.27ms +grad accum step:6177/14336 +step:24708/57344 train_time:14288748ms step_avg:578.30ms +step:24709/57344 train_time:14288765ms step_avg:578.28ms +step:24710/57344 train_time:14289009ms step_avg:578.27ms +step:24711/57344 train_time:14289554ms step_avg:578.27ms +grad accum step:6178/14336 +step:24712/57344 train_time:14290878ms step_avg:578.30ms +step:24713/57344 train_time:14290894ms step_avg:578.27ms +step:24714/57344 train_time:14291139ms step_avg:578.26ms +step:24715/57344 train_time:14291687ms step_avg:578.26ms +grad accum step:6179/14336 +step:24716/57344 train_time:14292995ms step_avg:578.29ms +step:24717/57344 train_time:14293012ms step_avg:578.27ms +step:24718/57344 train_time:14293266ms step_avg:578.25ms +step:24719/57344 train_time:14293836ms step_avg:578.25ms +grad accum step:6180/14336 +step:24720/57344 train_time:14295160ms step_avg:578.28ms +step:24721/57344 train_time:14295177ms step_avg:578.26ms +step:24722/57344 train_time:14295424ms step_avg:578.25ms +step:24723/57344 train_time:14295975ms step_avg:578.25ms +grad accum step:6181/14336 +step:24724/57344 train_time:14297279ms step_avg:578.28ms +step:24725/57344 train_time:14297296ms step_avg:578.25ms +step:24726/57344 train_time:14297551ms step_avg:578.24ms +step:24727/57344 train_time:14298118ms step_avg:578.24ms +grad accum step:6182/14336 +step:24728/57344 train_time:14299429ms step_avg:578.27ms +step:24729/57344 train_time:14299446ms step_avg:578.25ms +step:24730/57344 train_time:14299698ms step_avg:578.23ms +step:24731/57344 train_time:14300252ms step_avg:578.23ms +grad accum step:6183/14336 +step:24732/57344 train_time:14301569ms step_avg:578.26ms +step:24733/57344 train_time:14301586ms step_avg:578.24ms +step:24734/57344 train_time:14301834ms step_avg:578.23ms +step:24735/57344 train_time:14302381ms step_avg:578.22ms +grad accum step:6184/14336 +step:24736/57344 train_time:14303673ms step_avg:578.25ms +step:24737/57344 train_time:14303690ms step_avg:578.23ms +step:24738/57344 train_time:14303947ms step_avg:578.22ms +step:24739/57344 train_time:14304519ms step_avg:578.22ms +grad accum step:6185/14336 +step:24740/57344 train_time:14305839ms step_avg:578.25ms +step:24741/57344 train_time:14305856ms step_avg:578.22ms +step:24742/57344 train_time:14306101ms step_avg:578.21ms +step:24743/57344 train_time:14306647ms step_avg:578.21ms +grad accum step:6186/14336 +step:24744/57344 train_time:14307949ms step_avg:578.24ms +step:24745/57344 train_time:14307966ms step_avg:578.22ms +step:24746/57344 train_time:14308216ms step_avg:578.20ms +step:24747/57344 train_time:14308774ms step_avg:578.20ms +grad accum step:6187/14336 +step:24748/57344 train_time:14310136ms step_avg:578.23ms +step:24749/57344 train_time:14310153ms step_avg:578.21ms +step:24750/57344 train_time:14310396ms step_avg:578.20ms +step:24751/57344 train_time:14310937ms step_avg:578.20ms +grad accum step:6188/14336 +step:24752/57344 train_time:14312257ms step_avg:578.23ms +step:24753/57344 train_time:14312274ms step_avg:578.20ms +step:24754/57344 train_time:14312516ms step_avg:578.19ms +step:24755/57344 train_time:14313053ms step_avg:578.19ms +grad accum step:6189/14336 +step:24756/57344 train_time:14314360ms step_avg:578.22ms +step:24757/57344 train_time:14314377ms step_avg:578.20ms +step:24758/57344 train_time:14314629ms step_avg:578.18ms +step:24759/57344 train_time:14315189ms step_avg:578.18ms +grad accum step:6190/14336 +step:24760/57344 train_time:14316491ms step_avg:578.21ms +step:24761/57344 train_time:14316507ms step_avg:578.19ms +step:24762/57344 train_time:14316756ms step_avg:578.17ms +step:24763/57344 train_time:14317310ms step_avg:578.17ms +grad accum step:6191/14336 +step:24764/57344 train_time:14318601ms step_avg:578.20ms +step:24765/57344 train_time:14318615ms step_avg:578.18ms +step:24766/57344 train_time:14318861ms step_avg:578.17ms +step:24767/57344 train_time:14319404ms step_avg:578.16ms +grad accum step:6192/14336 +step:24768/57344 train_time:14320683ms step_avg:578.19ms +step:24768/57344 val_loss:6.934501 train_time:14320684ms step_avg:578.19ms +step:24769/57344 train_time:14320697ms step_avg:578.17ms +step:24770/57344 train_time:14320922ms step_avg:578.16ms +step:24771/57344 train_time:14321469ms step_avg:578.15ms +grad accum step:6193/14336 +step:24772/57344 train_time:14322825ms step_avg:578.19ms +step:24773/57344 train_time:14322841ms step_avg:578.16ms +step:24774/57344 train_time:14323087ms step_avg:578.15ms +step:24775/57344 train_time:14323649ms step_avg:578.15ms +grad accum step:6194/14336 +step:24776/57344 train_time:14325045ms step_avg:578.18ms +step:24777/57344 train_time:14325061ms step_avg:578.16ms +step:24778/57344 train_time:14325311ms step_avg:578.15ms +step:24779/57344 train_time:14325867ms step_avg:578.15ms +grad accum step:6195/14336 +step:24780/57344 train_time:14327163ms step_avg:578.17ms +step:24781/57344 train_time:14327180ms step_avg:578.15ms +step:24782/57344 train_time:14327423ms step_avg:578.14ms +step:24783/57344 train_time:14327972ms step_avg:578.14ms +grad accum step:6196/14336 +step:24784/57344 train_time:14329313ms step_avg:578.17ms +step:24785/57344 train_time:14329330ms step_avg:578.15ms +step:24786/57344 train_time:14329576ms step_avg:578.13ms +step:24787/57344 train_time:14330122ms step_avg:578.13ms +grad accum step:6197/14336 +step:24788/57344 train_time:14331436ms step_avg:578.16ms +step:24789/57344 train_time:14331453ms step_avg:578.14ms +step:24790/57344 train_time:14331713ms step_avg:578.12ms +step:24791/57344 train_time:14332294ms step_avg:578.12ms +grad accum step:6198/14336 +step:24792/57344 train_time:14333600ms step_avg:578.15ms +step:24793/57344 train_time:14333617ms step_avg:578.13ms +step:24794/57344 train_time:14333863ms step_avg:578.12ms +step:24795/57344 train_time:14334407ms step_avg:578.12ms +grad accum step:6199/14336 +step:24796/57344 train_time:14335709ms step_avg:578.15ms +step:24797/57344 train_time:14335726ms step_avg:578.12ms +step:24798/57344 train_time:14335972ms step_avg:578.11ms +step:24799/57344 train_time:14336521ms step_avg:578.11ms +grad accum step:6200/14336 +step:24800/57344 train_time:14337849ms step_avg:578.14ms +step:24801/57344 train_time:14337866ms step_avg:578.12ms +step:24802/57344 train_time:14338117ms step_avg:578.10ms +step:24803/57344 train_time:14338673ms step_avg:578.10ms +grad accum step:6201/14336 +step:24804/57344 train_time:14340007ms step_avg:578.13ms +step:24805/57344 train_time:14340024ms step_avg:578.11ms +step:24806/57344 train_time:14340274ms step_avg:578.10ms +step:24807/57344 train_time:14340838ms step_avg:578.10ms +grad accum step:6202/14336 +step:24808/57344 train_time:14342162ms step_avg:578.13ms +step:24809/57344 train_time:14342179ms step_avg:578.10ms +step:24810/57344 train_time:14342431ms step_avg:578.09ms +step:24811/57344 train_time:14342977ms step_avg:578.09ms +grad accum step:6203/14336 +step:24812/57344 train_time:14344254ms step_avg:578.12ms +step:24813/57344 train_time:14344271ms step_avg:578.09ms +step:24814/57344 train_time:14344527ms step_avg:578.08ms +step:24815/57344 train_time:14345098ms step_avg:578.08ms +grad accum step:6204/14336 +step:24816/57344 train_time:14346419ms step_avg:578.11ms +step:24817/57344 train_time:14346436ms step_avg:578.09ms +step:24818/57344 train_time:14346689ms step_avg:578.08ms +step:24819/57344 train_time:14347254ms step_avg:578.08ms +grad accum step:6205/14336 +step:24820/57344 train_time:14348567ms step_avg:578.11ms +step:24821/57344 train_time:14348584ms step_avg:578.08ms +step:24822/57344 train_time:14348833ms step_avg:578.07ms +step:24823/57344 train_time:14349388ms step_avg:578.07ms +grad accum step:6206/14336 +step:24824/57344 train_time:14350673ms step_avg:578.10ms +step:24825/57344 train_time:14350691ms step_avg:578.07ms +step:24826/57344 train_time:14350942ms step_avg:578.06ms +step:24827/57344 train_time:14351499ms step_avg:578.06ms +grad accum step:6207/14336 +step:24828/57344 train_time:14352802ms step_avg:578.09ms +step:24829/57344 train_time:14352819ms step_avg:578.07ms +step:24830/57344 train_time:14353064ms step_avg:578.05ms +step:24831/57344 train_time:14353613ms step_avg:578.05ms +grad accum step:6208/14336 +step:24832/57344 train_time:14354928ms step_avg:578.08ms +step:24832/57344 val_loss:6.946264 train_time:14354929ms step_avg:578.08ms +step:24833/57344 train_time:14354941ms step_avg:578.06ms +step:24834/57344 train_time:14355164ms step_avg:578.04ms +step:24835/57344 train_time:14355708ms step_avg:578.04ms +grad accum step:6209/14336 +step:24836/57344 train_time:14357008ms step_avg:578.07ms +step:24837/57344 train_time:14357025ms step_avg:578.05ms +step:24838/57344 train_time:14357278ms step_avg:578.04ms +step:24839/57344 train_time:14357843ms step_avg:578.04ms +grad accum step:6210/14336 +step:24840/57344 train_time:14359126ms step_avg:578.06ms +step:24841/57344 train_time:14359143ms step_avg:578.04ms +step:24842/57344 train_time:14359390ms step_avg:578.03ms +step:24843/57344 train_time:14359938ms step_avg:578.03ms +grad accum step:6211/14336 +step:24844/57344 train_time:14361239ms step_avg:578.06ms +step:24845/57344 train_time:14361256ms step_avg:578.03ms +step:24846/57344 train_time:14361502ms step_avg:578.02ms +step:24847/57344 train_time:14362041ms step_avg:578.02ms +grad accum step:6212/14336 +step:24848/57344 train_time:14363320ms step_avg:578.05ms +step:24849/57344 train_time:14363337ms step_avg:578.02ms +step:24850/57344 train_time:14363587ms step_avg:578.01ms +step:24851/57344 train_time:14364144ms step_avg:578.01ms +grad accum step:6213/14336 +step:24852/57344 train_time:14365447ms step_avg:578.04ms +step:24853/57344 train_time:14365464ms step_avg:578.02ms +step:24854/57344 train_time:14365716ms step_avg:578.00ms +step:24855/57344 train_time:14366271ms step_avg:578.00ms +grad accum step:6214/14336 +step:24856/57344 train_time:14367612ms step_avg:578.03ms +step:24857/57344 train_time:14367629ms step_avg:578.01ms +step:24858/57344 train_time:14367878ms step_avg:578.00ms +step:24859/57344 train_time:14368438ms step_avg:578.00ms +grad accum step:6215/14336 +step:24860/57344 train_time:14369733ms step_avg:578.03ms +step:24861/57344 train_time:14369750ms step_avg:578.00ms +step:24862/57344 train_time:14369998ms step_avg:577.99ms +step:24863/57344 train_time:14370544ms step_avg:577.99ms +grad accum step:6216/14336 +step:24864/57344 train_time:14371817ms step_avg:578.02ms +step:24865/57344 train_time:14371833ms step_avg:577.99ms +step:24866/57344 train_time:14372085ms step_avg:577.98ms +step:24867/57344 train_time:14372644ms step_avg:577.98ms +grad accum step:6217/14336 +step:24868/57344 train_time:14373947ms step_avg:578.01ms +step:24869/57344 train_time:14373964ms step_avg:577.99ms +step:24870/57344 train_time:14374215ms step_avg:577.97ms +step:24871/57344 train_time:14374777ms step_avg:577.97ms +grad accum step:6218/14336 +step:24872/57344 train_time:14376072ms step_avg:578.00ms +step:24873/57344 train_time:14376089ms step_avg:577.98ms +step:24874/57344 train_time:14376338ms step_avg:577.97ms +step:24875/57344 train_time:14376888ms step_avg:577.97ms +grad accum step:6219/14336 +step:24876/57344 train_time:14378198ms step_avg:577.99ms +step:24877/57344 train_time:14378215ms step_avg:577.97ms +step:24878/57344 train_time:14378464ms step_avg:577.96ms +step:24879/57344 train_time:14379009ms step_avg:577.96ms +grad accum step:6220/14336 +step:24880/57344 train_time:14380307ms step_avg:577.99ms +step:24881/57344 train_time:14380323ms step_avg:577.96ms +step:24882/57344 train_time:14380568ms step_avg:577.95ms +step:24883/57344 train_time:14381108ms step_avg:577.95ms +grad accum step:6221/14336 +step:24884/57344 train_time:14382390ms step_avg:577.98ms +step:24885/57344 train_time:14382407ms step_avg:577.95ms +step:24886/57344 train_time:14382657ms step_avg:577.94ms +step:24887/57344 train_time:14383207ms step_avg:577.94ms +grad accum step:6222/14336 +step:24888/57344 train_time:14384497ms step_avg:577.97ms +step:24889/57344 train_time:14384513ms step_avg:577.95ms +step:24890/57344 train_time:14384761ms step_avg:577.93ms +step:24891/57344 train_time:14385309ms step_avg:577.93ms +grad accum step:6223/14336 +step:24892/57344 train_time:14386622ms step_avg:577.96ms +step:24893/57344 train_time:14386638ms step_avg:577.94ms +step:24894/57344 train_time:14386893ms step_avg:577.93ms +step:24895/57344 train_time:14387465ms step_avg:577.93ms +grad accum step:6224/14336 +step:24896/57344 train_time:14388775ms step_avg:577.96ms +step:24896/57344 val_loss:6.943892 train_time:14388776ms step_avg:577.96ms +step:24897/57344 train_time:14388946ms step_avg:577.94ms +step:24898/57344 train_time:14389106ms step_avg:577.92ms +step:24899/57344 train_time:14389644ms step_avg:577.92ms +grad accum step:6225/14336 +step:24900/57344 train_time:14391080ms step_avg:577.96ms +step:24901/57344 train_time:14391093ms step_avg:577.93ms +step:24902/57344 train_time:14391318ms step_avg:577.92ms +step:24903/57344 train_time:14391871ms step_avg:577.92ms +grad accum step:6226/14336 +step:24904/57344 train_time:14393167ms step_avg:577.95ms +step:24905/57344 train_time:14393184ms step_avg:577.92ms +step:24906/57344 train_time:14393431ms step_avg:577.91ms +step:24907/57344 train_time:14393975ms step_avg:577.91ms +grad accum step:6227/14336 +step:24908/57344 train_time:14395289ms step_avg:577.94ms +step:24909/57344 train_time:14395306ms step_avg:577.92ms +step:24910/57344 train_time:14395561ms step_avg:577.90ms +step:24911/57344 train_time:14396124ms step_avg:577.90ms +grad accum step:6228/14336 +step:24912/57344 train_time:14397465ms step_avg:577.93ms +step:24913/57344 train_time:14397482ms step_avg:577.91ms +step:24914/57344 train_time:14397729ms step_avg:577.90ms +step:24915/57344 train_time:14398276ms step_avg:577.90ms +grad accum step:6229/14336 +step:24916/57344 train_time:14399625ms step_avg:577.93ms +step:24917/57344 train_time:14399641ms step_avg:577.90ms +step:24918/57344 train_time:14399906ms step_avg:577.89ms +step:24919/57344 train_time:14400495ms step_avg:577.89ms +grad accum step:6230/14336 +step:24920/57344 train_time:14401814ms step_avg:577.92ms +step:24921/57344 train_time:14401831ms step_avg:577.90ms +step:24922/57344 train_time:14402077ms step_avg:577.89ms +step:24923/57344 train_time:14402620ms step_avg:577.88ms +grad accum step:6231/14336 +step:24924/57344 train_time:14403922ms step_avg:577.91ms +step:24925/57344 train_time:14403938ms step_avg:577.89ms +step:24926/57344 train_time:14404193ms step_avg:577.88ms +step:24927/57344 train_time:14404749ms step_avg:577.88ms +grad accum step:6232/14336 +step:24928/57344 train_time:14406062ms step_avg:577.91ms +step:24929/57344 train_time:14406078ms step_avg:577.88ms +step:24930/57344 train_time:14406326ms step_avg:577.87ms +step:24931/57344 train_time:14406881ms step_avg:577.87ms +grad accum step:6233/14336 +step:24932/57344 train_time:14408221ms step_avg:577.90ms +step:24933/57344 train_time:14408238ms step_avg:577.88ms +step:24934/57344 train_time:14408488ms step_avg:577.87ms +step:24935/57344 train_time:14409043ms step_avg:577.86ms +grad accum step:6234/14336 +step:24936/57344 train_time:14410318ms step_avg:577.89ms +step:24937/57344 train_time:14410335ms step_avg:577.87ms +step:24938/57344 train_time:14410584ms step_avg:577.86ms +step:24939/57344 train_time:14411130ms step_avg:577.86ms +grad accum step:6235/14336 +step:24940/57344 train_time:14412441ms step_avg:577.88ms +step:24941/57344 train_time:14412459ms step_avg:577.86ms +step:24942/57344 train_time:14412704ms step_avg:577.85ms +step:24943/57344 train_time:14413254ms step_avg:577.85ms +grad accum step:6236/14336 +step:24944/57344 train_time:14414564ms step_avg:577.88ms +step:24945/57344 train_time:14414581ms step_avg:577.85ms +step:24946/57344 train_time:14414830ms step_avg:577.84ms +step:24947/57344 train_time:14415383ms step_avg:577.84ms +grad accum step:6237/14336 +step:24948/57344 train_time:14416712ms step_avg:577.87ms +step:24949/57344 train_time:14416729ms step_avg:577.85ms +step:24950/57344 train_time:14416975ms step_avg:577.83ms +step:24951/57344 train_time:14417531ms step_avg:577.83ms +grad accum step:6238/14336 +step:24952/57344 train_time:14418837ms step_avg:577.86ms +step:24953/57344 train_time:14418853ms step_avg:577.84ms +step:24954/57344 train_time:14419098ms step_avg:577.83ms +step:24955/57344 train_time:14419640ms step_avg:577.83ms +grad accum step:6239/14336 +step:24956/57344 train_time:14420919ms step_avg:577.85ms +step:24957/57344 train_time:14420936ms step_avg:577.83ms +step:24958/57344 train_time:14421186ms step_avg:577.82ms +step:24959/57344 train_time:14421732ms step_avg:577.82ms +grad accum step:6240/14336 +step:24960/57344 train_time:14423011ms step_avg:577.84ms +step:24960/57344 val_loss:6.948223 train_time:14423012ms step_avg:577.85ms +step:24961/57344 train_time:14423136ms step_avg:577.83ms +step:24962/57344 train_time:14423360ms step_avg:577.81ms +step:24963/57344 train_time:14423944ms step_avg:577.81ms +grad accum step:6241/14336 +step:24964/57344 train_time:14425260ms step_avg:577.84ms +step:24965/57344 train_time:14425277ms step_avg:577.82ms +step:24966/57344 train_time:14425523ms step_avg:577.81ms +step:24967/57344 train_time:14426067ms step_avg:577.81ms +grad accum step:6242/14336 +step:24968/57344 train_time:14427375ms step_avg:577.83ms +step:24969/57344 train_time:14427392ms step_avg:577.81ms +step:24970/57344 train_time:14427642ms step_avg:577.80ms +step:24971/57344 train_time:14428196ms step_avg:577.80ms +grad accum step:6243/14336 +step:24972/57344 train_time:14429508ms step_avg:577.83ms +step:24973/57344 train_time:14429525ms step_avg:577.81ms +step:24974/57344 train_time:14429777ms step_avg:577.79ms +step:24975/57344 train_time:14430344ms step_avg:577.79ms +grad accum step:6244/14336 +step:24976/57344 train_time:14431699ms step_avg:577.82ms +step:24977/57344 train_time:14431715ms step_avg:577.80ms +step:24978/57344 train_time:14431965ms step_avg:577.79ms +step:24979/57344 train_time:14432518ms step_avg:577.79ms +grad accum step:6245/14336 +step:24980/57344 train_time:14433825ms step_avg:577.82ms +step:24981/57344 train_time:14433842ms step_avg:577.79ms +step:24982/57344 train_time:14434088ms step_avg:577.78ms +step:24983/57344 train_time:14434632ms step_avg:577.78ms +grad accum step:6246/14336 +step:24984/57344 train_time:14435913ms step_avg:577.81ms +step:24985/57344 train_time:14435930ms step_avg:577.78ms +step:24986/57344 train_time:14436175ms step_avg:577.77ms +step:24987/57344 train_time:14436721ms step_avg:577.77ms +grad accum step:6247/14336 +step:24988/57344 train_time:14438016ms step_avg:577.80ms +step:24989/57344 train_time:14438033ms step_avg:577.78ms +step:24990/57344 train_time:14438279ms step_avg:577.76ms +step:24991/57344 train_time:14438822ms step_avg:577.76ms +grad accum step:6248/14336 +step:24992/57344 train_time:14440117ms step_avg:577.79ms +step:24993/57344 train_time:14440134ms step_avg:577.77ms +step:24994/57344 train_time:14440379ms step_avg:577.75ms +step:24995/57344 train_time:14440930ms step_avg:577.75ms +grad accum step:6249/14336 +step:24996/57344 train_time:14442227ms step_avg:577.78ms +step:24997/57344 train_time:14442244ms step_avg:577.76ms +step:24998/57344 train_time:14442494ms step_avg:577.75ms +step:24999/57344 train_time:14443046ms step_avg:577.74ms +grad accum step:6250/14336 +step:25000/57344 train_time:14444372ms step_avg:577.77ms +step:25001/57344 train_time:14444389ms step_avg:577.75ms +step:25002/57344 train_time:14444636ms step_avg:577.74ms +step:25003/57344 train_time:14445190ms step_avg:577.74ms +grad accum step:6251/14336 +step:25004/57344 train_time:14446503ms step_avg:577.77ms +step:25005/57344 train_time:14446520ms step_avg:577.75ms +step:25006/57344 train_time:14446767ms step_avg:577.73ms +step:25007/57344 train_time:14447318ms step_avg:577.73ms +grad accum step:6252/14336 +step:25008/57344 train_time:14448663ms step_avg:577.76ms +step:25009/57344 train_time:14448680ms step_avg:577.74ms +step:25010/57344 train_time:14448926ms step_avg:577.73ms +step:25011/57344 train_time:14449468ms step_avg:577.72ms +grad accum step:6253/14336 +step:25012/57344 train_time:14450768ms step_avg:577.75ms +step:25013/57344 train_time:14450785ms step_avg:577.73ms +step:25014/57344 train_time:14451034ms step_avg:577.72ms +step:25015/57344 train_time:14451579ms step_avg:577.72ms +grad accum step:6254/14336 +step:25016/57344 train_time:14452878ms step_avg:577.75ms +step:25017/57344 train_time:14452895ms step_avg:577.72ms +step:25018/57344 train_time:14453146ms step_avg:577.71ms +step:25019/57344 train_time:14453719ms step_avg:577.71ms +grad accum step:6255/14336 +step:25020/57344 train_time:14455089ms step_avg:577.74ms +step:25021/57344 train_time:14455106ms step_avg:577.72ms +step:25022/57344 train_time:14455359ms step_avg:577.71ms +step:25023/57344 train_time:14455922ms step_avg:577.71ms +grad accum step:6256/14336 +step:25024/57344 train_time:14457219ms step_avg:577.73ms +step:25024/57344 val_loss:6.962749 train_time:14457220ms step_avg:577.73ms +step:25025/57344 train_time:14457232ms step_avg:577.71ms +step:25026/57344 train_time:14457452ms step_avg:577.70ms +step:25027/57344 train_time:14457994ms step_avg:577.70ms +grad accum step:6257/14336 +step:25028/57344 train_time:14459325ms step_avg:577.73ms +step:25029/57344 train_time:14459342ms step_avg:577.70ms +step:25030/57344 train_time:14459591ms step_avg:577.69ms +step:25031/57344 train_time:14460145ms step_avg:577.69ms +grad accum step:6258/14336 +step:25032/57344 train_time:14461482ms step_avg:577.72ms +step:25033/57344 train_time:14461499ms step_avg:577.70ms +step:25034/57344 train_time:14461749ms step_avg:577.68ms +step:25035/57344 train_time:14462299ms step_avg:577.68ms +grad accum step:6259/14336 +step:25036/57344 train_time:14463670ms step_avg:577.71ms +step:25037/57344 train_time:14463687ms step_avg:577.69ms +step:25038/57344 train_time:14463951ms step_avg:577.68ms +step:25039/57344 train_time:14464547ms step_avg:577.68ms +grad accum step:6260/14336 +step:25040/57344 train_time:14465853ms step_avg:577.71ms +step:25041/57344 train_time:14465870ms step_avg:577.69ms +step:25042/57344 train_time:14466120ms step_avg:577.67ms +step:25043/57344 train_time:14466673ms step_avg:577.67ms +grad accum step:6261/14336 +step:25044/57344 train_time:14467962ms step_avg:577.70ms +step:25045/57344 train_time:14467979ms step_avg:577.68ms +step:25046/57344 train_time:14468226ms step_avg:577.67ms +step:25047/57344 train_time:14468775ms step_avg:577.66ms +grad accum step:6262/14336 +step:25048/57344 train_time:14470131ms step_avg:577.70ms +step:25049/57344 train_time:14470148ms step_avg:577.67ms +step:25050/57344 train_time:14470415ms step_avg:577.66ms +step:25051/57344 train_time:14471027ms step_avg:577.66ms +grad accum step:6263/14336 +step:25052/57344 train_time:14472379ms step_avg:577.69ms +step:25053/57344 train_time:14472395ms step_avg:577.67ms +step:25054/57344 train_time:14472650ms step_avg:577.66ms +step:25055/57344 train_time:14473215ms step_avg:577.66ms +grad accum step:6264/14336 +step:25056/57344 train_time:14474565ms step_avg:577.69ms +step:25057/57344 train_time:14474582ms step_avg:577.67ms +step:25058/57344 train_time:14474832ms step_avg:577.65ms +step:25059/57344 train_time:14475391ms step_avg:577.65ms +grad accum step:6265/14336 +step:25060/57344 train_time:14476704ms step_avg:577.68ms +step:25061/57344 train_time:14476721ms step_avg:577.66ms +step:25062/57344 train_time:14476970ms step_avg:577.65ms +step:25063/57344 train_time:14477522ms step_avg:577.65ms +grad accum step:6266/14336 +step:25064/57344 train_time:14478824ms step_avg:577.67ms +step:25065/57344 train_time:14478841ms step_avg:577.65ms +step:25066/57344 train_time:14479086ms step_avg:577.64ms +step:25067/57344 train_time:14479629ms step_avg:577.64ms +grad accum step:6267/14336 +step:25068/57344 train_time:14480912ms step_avg:577.67ms +step:25069/57344 train_time:14480930ms step_avg:577.64ms +step:25070/57344 train_time:14481176ms step_avg:577.63ms +step:25071/57344 train_time:14481723ms step_avg:577.63ms +grad accum step:6268/14336 +step:25072/57344 train_time:14483031ms step_avg:577.66ms +step:25073/57344 train_time:14483048ms step_avg:577.64ms +step:25074/57344 train_time:14483290ms step_avg:577.62ms +step:25075/57344 train_time:14483832ms step_avg:577.62ms +grad accum step:6269/14336 +step:25076/57344 train_time:14485113ms step_avg:577.65ms +step:25077/57344 train_time:14485130ms step_avg:577.63ms +step:25078/57344 train_time:14485381ms step_avg:577.61ms +step:25079/57344 train_time:14485933ms step_avg:577.61ms +grad accum step:6270/14336 +step:25080/57344 train_time:14487275ms step_avg:577.64ms +step:25081/57344 train_time:14487292ms step_avg:577.62ms +step:25082/57344 train_time:14487541ms step_avg:577.61ms +step:25083/57344 train_time:14488095ms step_avg:577.61ms +grad accum step:6271/14336 +step:25084/57344 train_time:14489406ms step_avg:577.64ms +step:25085/57344 train_time:14489423ms step_avg:577.61ms +step:25086/57344 train_time:14489679ms step_avg:577.60ms +step:25087/57344 train_time:14490245ms step_avg:577.60ms +grad accum step:6272/14336 +step:25088/57344 train_time:14491558ms step_avg:577.63ms +step:25088/57344 val_loss:6.961961 train_time:14491559ms step_avg:577.63ms +step:25089/57344 train_time:14491571ms step_avg:577.61ms +step:25090/57344 train_time:14491841ms step_avg:577.59ms +step:25091/57344 train_time:14492380ms step_avg:577.59ms +grad accum step:6273/14336 +step:25092/57344 train_time:14493686ms step_avg:577.62ms +step:25093/57344 train_time:14493703ms step_avg:577.60ms +step:25094/57344 train_time:14493950ms step_avg:577.59ms +step:25095/57344 train_time:14494499ms step_avg:577.59ms +grad accum step:6274/14336 +step:25096/57344 train_time:14495801ms step_avg:577.61ms +step:25097/57344 train_time:14495818ms step_avg:577.59ms +step:25098/57344 train_time:14496068ms step_avg:577.58ms +step:25099/57344 train_time:14496622ms step_avg:577.58ms +grad accum step:6275/14336 +step:25100/57344 train_time:14497937ms step_avg:577.61ms +step:25101/57344 train_time:14497953ms step_avg:577.58ms +step:25102/57344 train_time:14498202ms step_avg:577.57ms +step:25103/57344 train_time:14498756ms step_avg:577.57ms +grad accum step:6276/14336 +step:25104/57344 train_time:14500053ms step_avg:577.60ms +step:25105/57344 train_time:14500070ms step_avg:577.58ms +step:25106/57344 train_time:14500325ms step_avg:577.56ms +step:25107/57344 train_time:14500893ms step_avg:577.56ms +grad accum step:6277/14336 +step:25108/57344 train_time:14502184ms step_avg:577.59ms +step:25109/57344 train_time:14502201ms step_avg:577.57ms +step:25110/57344 train_time:14502445ms step_avg:577.56ms +step:25111/57344 train_time:14502992ms step_avg:577.56ms +grad accum step:6278/14336 +step:25112/57344 train_time:14504299ms step_avg:577.58ms +step:25113/57344 train_time:14504315ms step_avg:577.56ms +step:25114/57344 train_time:14504561ms step_avg:577.55ms +step:25115/57344 train_time:14505106ms step_avg:577.55ms +grad accum step:6279/14336 +step:25116/57344 train_time:14506424ms step_avg:577.58ms +step:25117/57344 train_time:14506441ms step_avg:577.55ms +step:25118/57344 train_time:14506689ms step_avg:577.54ms +step:25119/57344 train_time:14507238ms step_avg:577.54ms +grad accum step:6280/14336 +step:25120/57344 train_time:14508502ms step_avg:577.57ms +step:25121/57344 train_time:14508519ms step_avg:577.55ms +step:25122/57344 train_time:14508767ms step_avg:577.53ms +step:25123/57344 train_time:14509316ms step_avg:577.53ms +grad accum step:6281/14336 +step:25124/57344 train_time:14510653ms step_avg:577.56ms +step:25125/57344 train_time:14510669ms step_avg:577.54ms +step:25126/57344 train_time:14510914ms step_avg:577.53ms +step:25127/57344 train_time:14511458ms step_avg:577.52ms +grad accum step:6282/14336 +step:25128/57344 train_time:14512731ms step_avg:577.55ms +step:25129/57344 train_time:14512747ms step_avg:577.53ms +step:25130/57344 train_time:14512997ms step_avg:577.52ms +step:25131/57344 train_time:14513551ms step_avg:577.52ms +grad accum step:6283/14336 +step:25132/57344 train_time:14514853ms step_avg:577.54ms +step:25133/57344 train_time:14514870ms step_avg:577.52ms +step:25134/57344 train_time:14515117ms step_avg:577.51ms +step:25135/57344 train_time:14515670ms step_avg:577.51ms +grad accum step:6284/14336 +step:25136/57344 train_time:14516969ms step_avg:577.54ms +step:25137/57344 train_time:14516986ms step_avg:577.51ms +step:25138/57344 train_time:14517233ms step_avg:577.50ms +step:25139/57344 train_time:14517777ms step_avg:577.50ms +grad accum step:6285/14336 +step:25140/57344 train_time:14519080ms step_avg:577.53ms +step:25141/57344 train_time:14519097ms step_avg:577.51ms +step:25142/57344 train_time:14519347ms step_avg:577.49ms +step:25143/57344 train_time:14519907ms step_avg:577.49ms +grad accum step:6286/14336 +step:25144/57344 train_time:14521226ms step_avg:577.52ms +step:25145/57344 train_time:14521242ms step_avg:577.50ms +step:25146/57344 train_time:14521487ms step_avg:577.49ms +step:25147/57344 train_time:14522041ms step_avg:577.49ms +grad accum step:6287/14336 +step:25148/57344 train_time:14523371ms step_avg:577.52ms +step:25149/57344 train_time:14523388ms step_avg:577.49ms +step:25150/57344 train_time:14523636ms step_avg:577.48ms +step:25151/57344 train_time:14524187ms step_avg:577.48ms +grad accum step:6288/14336 +step:25152/57344 train_time:14525473ms step_avg:577.51ms +step:25152/57344 val_loss:6.960940 train_time:14525474ms step_avg:577.51ms +step:25153/57344 train_time:14525486ms step_avg:577.49ms +step:25154/57344 train_time:14525714ms step_avg:577.47ms +step:25155/57344 train_time:14526275ms step_avg:577.47ms +grad accum step:6289/14336 +step:25156/57344 train_time:14527621ms step_avg:577.50ms +step:25157/57344 train_time:14527638ms step_avg:577.48ms +step:25158/57344 train_time:14527880ms step_avg:577.47ms +step:25159/57344 train_time:14528412ms step_avg:577.46ms +grad accum step:6290/14336 +step:25160/57344 train_time:14529727ms step_avg:577.49ms +step:25161/57344 train_time:14529744ms step_avg:577.47ms +step:25162/57344 train_time:14529990ms step_avg:577.46ms +step:25163/57344 train_time:14530534ms step_avg:577.46ms +grad accum step:6291/14336 +step:25164/57344 train_time:14531832ms step_avg:577.48ms +step:25165/57344 train_time:14531849ms step_avg:577.46ms +step:25166/57344 train_time:14532094ms step_avg:577.45ms +step:25167/57344 train_time:14532638ms step_avg:577.45ms +grad accum step:6292/14336 +step:25168/57344 train_time:14533935ms step_avg:577.48ms +step:25169/57344 train_time:14533952ms step_avg:577.45ms +step:25170/57344 train_time:14534203ms step_avg:577.44ms +step:25171/57344 train_time:14534764ms step_avg:577.44ms +grad accum step:6293/14336 +step:25172/57344 train_time:14536098ms step_avg:577.47ms +step:25173/57344 train_time:14536114ms step_avg:577.45ms +step:25174/57344 train_time:14536360ms step_avg:577.44ms +step:25175/57344 train_time:14536910ms step_avg:577.43ms +grad accum step:6294/14336 +step:25176/57344 train_time:14538198ms step_avg:577.46ms +step:25177/57344 train_time:14538215ms step_avg:577.44ms +step:25178/57344 train_time:14538462ms step_avg:577.43ms +step:25179/57344 train_time:14539012ms step_avg:577.43ms +grad accum step:6295/14336 +step:25180/57344 train_time:14540334ms step_avg:577.46ms +step:25181/57344 train_time:14540350ms step_avg:577.43ms +step:25182/57344 train_time:14540592ms step_avg:577.42ms +step:25183/57344 train_time:14541134ms step_avg:577.42ms +grad accum step:6296/14336 +step:25184/57344 train_time:14542442ms step_avg:577.45ms +step:25185/57344 train_time:14542459ms step_avg:577.43ms +step:25186/57344 train_time:14542704ms step_avg:577.41ms +step:25187/57344 train_time:14543242ms step_avg:577.41ms +grad accum step:6297/14336 +step:25188/57344 train_time:14544555ms step_avg:577.44ms +step:25189/57344 train_time:14544572ms step_avg:577.42ms +step:25190/57344 train_time:14544816ms step_avg:577.40ms +step:25191/57344 train_time:14545357ms step_avg:577.40ms +grad accum step:6298/14336 +step:25192/57344 train_time:14546665ms step_avg:577.43ms +step:25193/57344 train_time:14546682ms step_avg:577.41ms +step:25194/57344 train_time:14546928ms step_avg:577.40ms +step:25195/57344 train_time:14547464ms step_avg:577.39ms +grad accum step:6299/14336 +step:25196/57344 train_time:14548760ms step_avg:577.42ms +step:25197/57344 train_time:14548777ms step_avg:577.40ms +step:25198/57344 train_time:14549020ms step_avg:577.39ms +step:25199/57344 train_time:14549569ms step_avg:577.39ms +grad accum step:6300/14336 +step:25200/57344 train_time:14550968ms step_avg:577.42ms +step:25201/57344 train_time:14550984ms step_avg:577.40ms +step:25202/57344 train_time:14551230ms step_avg:577.38ms +step:25203/57344 train_time:14551768ms step_avg:577.38ms +grad accum step:6301/14336 +step:25204/57344 train_time:14553046ms step_avg:577.41ms +step:25205/57344 train_time:14553063ms step_avg:577.39ms +step:25206/57344 train_time:14553313ms step_avg:577.37ms +step:25207/57344 train_time:14553869ms step_avg:577.37ms +grad accum step:6302/14336 +step:25208/57344 train_time:14555150ms step_avg:577.40ms +step:25209/57344 train_time:14555168ms step_avg:577.38ms +step:25210/57344 train_time:14555409ms step_avg:577.37ms +step:25211/57344 train_time:14555953ms step_avg:577.37ms +grad accum step:6303/14336 +step:25212/57344 train_time:14557304ms step_avg:577.40ms +step:25213/57344 train_time:14557317ms step_avg:577.37ms +step:25214/57344 train_time:14557549ms step_avg:577.36ms +step:25215/57344 train_time:14558106ms step_avg:577.36ms +grad accum step:6304/14336 +step:25216/57344 train_time:14559411ms step_avg:577.39ms +step:25216/57344 val_loss:6.965730 train_time:14559412ms step_avg:577.39ms +step:25217/57344 train_time:14559424ms step_avg:577.37ms +step:25218/57344 train_time:14559646ms step_avg:577.35ms +step:25219/57344 train_time:14560193ms step_avg:577.35ms +grad accum step:6305/14336 +step:25220/57344 train_time:14561501ms step_avg:577.38ms +step:25221/57344 train_time:14561518ms step_avg:577.36ms +step:25222/57344 train_time:14561764ms step_avg:577.34ms +step:25223/57344 train_time:14562303ms step_avg:577.34ms +grad accum step:6306/14336 +step:25224/57344 train_time:14563582ms step_avg:577.37ms +step:25225/57344 train_time:14563599ms step_avg:577.35ms +step:25226/57344 train_time:14563846ms step_avg:577.33ms +step:25227/57344 train_time:14564395ms step_avg:577.33ms +grad accum step:6307/14336 +step:25228/57344 train_time:14565711ms step_avg:577.36ms +step:25229/57344 train_time:14565728ms step_avg:577.34ms +step:25230/57344 train_time:14565972ms step_avg:577.33ms +step:25231/57344 train_time:14566517ms step_avg:577.33ms +grad accum step:6308/14336 +step:25232/57344 train_time:14567826ms step_avg:577.36ms +step:25233/57344 train_time:14567843ms step_avg:577.33ms +step:25234/57344 train_time:14568089ms step_avg:577.32ms +step:25235/57344 train_time:14568639ms step_avg:577.32ms +grad accum step:6309/14336 +step:25236/57344 train_time:14569911ms step_avg:577.35ms +step:25237/57344 train_time:14569929ms step_avg:577.32ms +step:25238/57344 train_time:14570180ms step_avg:577.31ms +step:25239/57344 train_time:14570740ms step_avg:577.31ms +grad accum step:6310/14336 +step:25240/57344 train_time:14572044ms step_avg:577.34ms +step:25241/57344 train_time:14572061ms step_avg:577.32ms +step:25242/57344 train_time:14572306ms step_avg:577.30ms +step:25243/57344 train_time:14572847ms step_avg:577.30ms +grad accum step:6311/14336 +step:25244/57344 train_time:14574135ms step_avg:577.33ms +step:25245/57344 train_time:14574152ms step_avg:577.31ms +step:25246/57344 train_time:14574395ms step_avg:577.30ms +step:25247/57344 train_time:14574932ms step_avg:577.29ms +grad accum step:6312/14336 +step:25248/57344 train_time:14576227ms step_avg:577.32ms +step:25249/57344 train_time:14576244ms step_avg:577.30ms +step:25250/57344 train_time:14576490ms step_avg:577.29ms +step:25251/57344 train_time:14577033ms step_avg:577.29ms +grad accum step:6313/14336 +step:25252/57344 train_time:14578325ms step_avg:577.31ms +step:25253/57344 train_time:14578343ms step_avg:577.29ms +step:25254/57344 train_time:14578595ms step_avg:577.28ms +step:25255/57344 train_time:14579155ms step_avg:577.28ms +grad accum step:6314/14336 +step:25256/57344 train_time:14580475ms step_avg:577.31ms +step:25257/57344 train_time:14580492ms step_avg:577.29ms +step:25258/57344 train_time:14580742ms step_avg:577.27ms +step:25259/57344 train_time:14581317ms step_avg:577.27ms +grad accum step:6315/14336 +step:25260/57344 train_time:14582700ms step_avg:577.30ms +step:25261/57344 train_time:14582717ms step_avg:577.28ms +step:25262/57344 train_time:14582970ms step_avg:577.27ms +step:25263/57344 train_time:14583528ms step_avg:577.27ms +grad accum step:6316/14336 +step:25264/57344 train_time:14584821ms step_avg:577.30ms +step:25265/57344 train_time:14584838ms step_avg:577.27ms +step:25266/57344 train_time:14585084ms step_avg:577.26ms +step:25267/57344 train_time:14585632ms step_avg:577.26ms +grad accum step:6317/14336 +step:25268/57344 train_time:14586926ms step_avg:577.29ms +step:25269/57344 train_time:14586943ms step_avg:577.27ms +step:25270/57344 train_time:14587191ms step_avg:577.25ms +step:25271/57344 train_time:14587742ms step_avg:577.25ms +grad accum step:6318/14336 +step:25272/57344 train_time:14589026ms step_avg:577.28ms +step:25273/57344 train_time:14589043ms step_avg:577.26ms +step:25274/57344 train_time:14589289ms step_avg:577.24ms +step:25275/57344 train_time:14589834ms step_avg:577.24ms +grad accum step:6319/14336 +step:25276/57344 train_time:14591154ms step_avg:577.27ms +step:25277/57344 train_time:14591171ms step_avg:577.25ms +step:25278/57344 train_time:14591416ms step_avg:577.24ms +step:25279/57344 train_time:14591958ms step_avg:577.24ms +grad accum step:6320/14336 +step:25280/57344 train_time:14593239ms step_avg:577.26ms +step:25280/57344 val_loss:6.967648 train_time:14593239ms step_avg:577.26ms +step:25281/57344 train_time:14593252ms step_avg:577.24ms +step:25282/57344 train_time:14593476ms step_avg:577.23ms +step:25283/57344 train_time:14594030ms step_avg:577.23ms +grad accum step:6321/14336 +step:25284/57344 train_time:14595352ms step_avg:577.26ms +step:25285/57344 train_time:14595368ms step_avg:577.23ms +step:25286/57344 train_time:14595611ms step_avg:577.22ms +step:25287/57344 train_time:14596151ms step_avg:577.22ms +grad accum step:6322/14336 +step:25288/57344 train_time:14597430ms step_avg:577.25ms +step:25289/57344 train_time:14597446ms step_avg:577.23ms +step:25290/57344 train_time:14597708ms step_avg:577.21ms +step:25291/57344 train_time:14598311ms step_avg:577.21ms +grad accum step:6323/14336 +step:25292/57344 train_time:14599713ms step_avg:577.25ms +step:25293/57344 train_time:14599730ms step_avg:577.22ms +step:25294/57344 train_time:14599991ms step_avg:577.21ms +step:25295/57344 train_time:14600568ms step_avg:577.21ms +grad accum step:6324/14336 +step:25296/57344 train_time:14601836ms step_avg:577.24ms +step:25297/57344 train_time:14601853ms step_avg:577.22ms +step:25298/57344 train_time:14602099ms step_avg:577.20ms +step:25299/57344 train_time:14602650ms step_avg:577.20ms +grad accum step:6325/14336 +step:25300/57344 train_time:14603964ms step_avg:577.23ms +step:25301/57344 train_time:14603981ms step_avg:577.21ms +step:25302/57344 train_time:14604228ms step_avg:577.20ms +step:25303/57344 train_time:14604770ms step_avg:577.20ms +grad accum step:6326/14336 +step:25304/57344 train_time:14606069ms step_avg:577.22ms +step:25305/57344 train_time:14606085ms step_avg:577.20ms +step:25306/57344 train_time:14606333ms step_avg:577.19ms +step:25307/57344 train_time:14606881ms step_avg:577.19ms +grad accum step:6327/14336 +step:25308/57344 train_time:14608191ms step_avg:577.22ms +step:25309/57344 train_time:14608207ms step_avg:577.19ms +step:25310/57344 train_time:14608452ms step_avg:577.18ms +step:25311/57344 train_time:14608996ms step_avg:577.18ms +grad accum step:6328/14336 +step:25312/57344 train_time:14610316ms step_avg:577.21ms +step:25313/57344 train_time:14610333ms step_avg:577.19ms +step:25314/57344 train_time:14610579ms step_avg:577.17ms +step:25315/57344 train_time:14611121ms step_avg:577.17ms +grad accum step:6329/14336 +step:25316/57344 train_time:14612434ms step_avg:577.20ms +step:25317/57344 train_time:14612451ms step_avg:577.18ms +step:25318/57344 train_time:14612699ms step_avg:577.17ms +step:25319/57344 train_time:14613241ms step_avg:577.17ms +grad accum step:6330/14336 +step:25320/57344 train_time:14614538ms step_avg:577.19ms +step:25321/57344 train_time:14614555ms step_avg:577.17ms +step:25322/57344 train_time:14614808ms step_avg:577.16ms +step:25323/57344 train_time:14615366ms step_avg:577.16ms +grad accum step:6331/14336 +step:25324/57344 train_time:14616642ms step_avg:577.19ms +step:25325/57344 train_time:14616659ms step_avg:577.16ms +step:25326/57344 train_time:14616907ms step_avg:577.15ms +step:25327/57344 train_time:14617466ms step_avg:577.15ms +grad accum step:6332/14336 +step:25328/57344 train_time:14618810ms step_avg:577.18ms +step:25329/57344 train_time:14618827ms step_avg:577.16ms +step:25330/57344 train_time:14619071ms step_avg:577.14ms +step:25331/57344 train_time:14619617ms step_avg:577.14ms +grad accum step:6333/14336 +step:25332/57344 train_time:14620948ms step_avg:577.17ms +step:25333/57344 train_time:14620965ms step_avg:577.15ms +step:25334/57344 train_time:14621214ms step_avg:577.14ms +step:25335/57344 train_time:14621765ms step_avg:577.14ms +grad accum step:6334/14336 +step:25336/57344 train_time:14623078ms step_avg:577.17ms +step:25337/57344 train_time:14623095ms step_avg:577.14ms +step:25338/57344 train_time:14623344ms step_avg:577.13ms +step:25339/57344 train_time:14623886ms step_avg:577.13ms +grad accum step:6335/14336 +step:25340/57344 train_time:14625202ms step_avg:577.16ms +step:25341/57344 train_time:14625218ms step_avg:577.14ms +step:25342/57344 train_time:14625468ms step_avg:577.12ms +step:25343/57344 train_time:14626023ms step_avg:577.12ms +grad accum step:6336/14336 +step:25344/57344 train_time:14627325ms step_avg:577.15ms +step:25344/57344 val_loss:6.969640 train_time:14627325ms step_avg:577.15ms +step:25345/57344 train_time:14627337ms step_avg:577.13ms +step:25346/57344 train_time:14627562ms step_avg:577.12ms +step:25347/57344 train_time:14628116ms step_avg:577.11ms +grad accum step:6337/14336 +step:25348/57344 train_time:14629435ms step_avg:577.14ms +step:25349/57344 train_time:14629452ms step_avg:577.12ms +step:25350/57344 train_time:14629707ms step_avg:577.11ms +step:25351/57344 train_time:14630282ms step_avg:577.11ms +grad accum step:6338/14336 +step:25352/57344 train_time:14631615ms step_avg:577.14ms +step:25353/57344 train_time:14631632ms step_avg:577.12ms +step:25354/57344 train_time:14631878ms step_avg:577.10ms +step:25355/57344 train_time:14632423ms step_avg:577.10ms +grad accum step:6339/14336 +step:25356/57344 train_time:14633730ms step_avg:577.13ms +step:25357/57344 train_time:14633747ms step_avg:577.11ms +step:25358/57344 train_time:14633996ms step_avg:577.10ms +step:25359/57344 train_time:14634554ms step_avg:577.10ms +grad accum step:6340/14336 +step:25360/57344 train_time:14635901ms step_avg:577.13ms +step:25361/57344 train_time:14635918ms step_avg:577.10ms +step:25362/57344 train_time:14636166ms step_avg:577.09ms +step:25363/57344 train_time:14636711ms step_avg:577.09ms +grad accum step:6341/14336 +step:25364/57344 train_time:14637991ms step_avg:577.12ms +step:25365/57344 train_time:14638008ms step_avg:577.09ms +step:25366/57344 train_time:14638255ms step_avg:577.08ms +step:25367/57344 train_time:14638801ms step_avg:577.08ms +grad accum step:6342/14336 +step:25368/57344 train_time:14640114ms step_avg:577.11ms +step:25369/57344 train_time:14640131ms step_avg:577.09ms +step:25370/57344 train_time:14640380ms step_avg:577.07ms +step:25371/57344 train_time:14640926ms step_avg:577.07ms +grad accum step:6343/14336 +step:25372/57344 train_time:14642264ms step_avg:577.10ms +step:25373/57344 train_time:14642281ms step_avg:577.08ms +step:25374/57344 train_time:14642527ms step_avg:577.07ms +step:25375/57344 train_time:14643066ms step_avg:577.07ms +grad accum step:6344/14336 +step:25376/57344 train_time:14644374ms step_avg:577.10ms +step:25377/57344 train_time:14644391ms step_avg:577.07ms +step:25378/57344 train_time:14644648ms step_avg:577.06ms +step:25379/57344 train_time:14645222ms step_avg:577.06ms +grad accum step:6345/14336 +step:25380/57344 train_time:14646519ms step_avg:577.09ms +step:25381/57344 train_time:14646536ms step_avg:577.07ms +step:25382/57344 train_time:14646785ms step_avg:577.05ms +step:25383/57344 train_time:14647335ms step_avg:577.05ms +grad accum step:6346/14336 +step:25384/57344 train_time:14648663ms step_avg:577.08ms +step:25385/57344 train_time:14648680ms step_avg:577.06ms +step:25386/57344 train_time:14648923ms step_avg:577.05ms +step:25387/57344 train_time:14649456ms step_avg:577.05ms +grad accum step:6347/14336 +step:25388/57344 train_time:14650753ms step_avg:577.07ms +step:25389/57344 train_time:14650770ms step_avg:577.05ms +step:25390/57344 train_time:14651017ms step_avg:577.04ms +step:25391/57344 train_time:14651560ms step_avg:577.04ms +grad accum step:6348/14336 +step:25392/57344 train_time:14652857ms step_avg:577.07ms +step:25393/57344 train_time:14652874ms step_avg:577.04ms +step:25394/57344 train_time:14653126ms step_avg:577.03ms +step:25395/57344 train_time:14653682ms step_avg:577.03ms +grad accum step:6349/14336 +step:25396/57344 train_time:14654996ms step_avg:577.06ms +step:25397/57344 train_time:14655014ms step_avg:577.04ms +step:25398/57344 train_time:14655256ms step_avg:577.02ms +step:25399/57344 train_time:14655794ms step_avg:577.02ms +grad accum step:6350/14336 +step:25400/57344 train_time:14657103ms step_avg:577.05ms +step:25401/57344 train_time:14657120ms step_avg:577.03ms +step:25402/57344 train_time:14657367ms step_avg:577.02ms +step:25403/57344 train_time:14657916ms step_avg:577.02ms +grad accum step:6351/14336 +step:25404/57344 train_time:14659221ms step_avg:577.04ms +step:25405/57344 train_time:14659237ms step_avg:577.02ms +step:25406/57344 train_time:14659489ms step_avg:577.01ms +step:25407/57344 train_time:14660050ms step_avg:577.01ms +grad accum step:6352/14336 +step:25408/57344 train_time:14661340ms step_avg:577.04ms +step:25408/57344 val_loss:6.972083 train_time:14661340ms step_avg:577.04ms +step:25409/57344 train_time:14661353ms step_avg:577.01ms +step:25410/57344 train_time:14661576ms step_avg:577.00ms +step:25411/57344 train_time:14662112ms step_avg:577.00ms +grad accum step:6353/14336 +step:25412/57344 train_time:14663400ms step_avg:577.03ms +step:25413/57344 train_time:14663417ms step_avg:577.00ms +step:25414/57344 train_time:14663661ms step_avg:576.99ms +step:25415/57344 train_time:14664195ms step_avg:576.99ms +grad accum step:6354/14336 +step:25416/57344 train_time:14665529ms step_avg:577.02ms +step:25417/57344 train_time:14665546ms step_avg:577.00ms +step:25418/57344 train_time:14665792ms step_avg:576.98ms +step:25419/57344 train_time:14666349ms step_avg:576.98ms +grad accum step:6355/14336 +step:25420/57344 train_time:14667689ms step_avg:577.01ms +step:25421/57344 train_time:14667706ms step_avg:576.99ms +step:25422/57344 train_time:14667951ms step_avg:576.98ms +step:25423/57344 train_time:14668504ms step_avg:576.98ms +grad accum step:6356/14336 +step:25424/57344 train_time:14669816ms step_avg:577.01ms +step:25425/57344 train_time:14669833ms step_avg:576.98ms +step:25426/57344 train_time:14670076ms step_avg:576.97ms +step:25427/57344 train_time:14670625ms step_avg:576.97ms +grad accum step:6357/14336 +step:25428/57344 train_time:14671935ms step_avg:577.00ms +step:25429/57344 train_time:14671952ms step_avg:576.98ms +step:25430/57344 train_time:14672202ms step_avg:576.96ms +step:25431/57344 train_time:14672759ms step_avg:576.96ms +grad accum step:6358/14336 +step:25432/57344 train_time:14674090ms step_avg:576.99ms +step:25433/57344 train_time:14674107ms step_avg:576.97ms +step:25434/57344 train_time:14674355ms step_avg:576.96ms +step:25435/57344 train_time:14674921ms step_avg:576.96ms +grad accum step:6359/14336 +step:25436/57344 train_time:14676278ms step_avg:576.99ms +step:25437/57344 train_time:14676295ms step_avg:576.97ms +step:25438/57344 train_time:14676538ms step_avg:576.95ms +step:25439/57344 train_time:14677082ms step_avg:576.95ms +grad accum step:6360/14336 +step:25440/57344 train_time:14678393ms step_avg:576.98ms +step:25441/57344 train_time:14678410ms step_avg:576.96ms +step:25442/57344 train_time:14678655ms step_avg:576.95ms +step:25443/57344 train_time:14679203ms step_avg:576.94ms +grad accum step:6361/14336 +step:25444/57344 train_time:14680478ms step_avg:576.97ms +step:25445/57344 train_time:14680495ms step_avg:576.95ms +step:25446/57344 train_time:14680739ms step_avg:576.94ms +step:25447/57344 train_time:14681282ms step_avg:576.94ms +grad accum step:6362/14336 +step:25448/57344 train_time:14682582ms step_avg:576.96ms +step:25449/57344 train_time:14682599ms step_avg:576.94ms +step:25450/57344 train_time:14682848ms step_avg:576.93ms +step:25451/57344 train_time:14683397ms step_avg:576.93ms +grad accum step:6363/14336 +step:25452/57344 train_time:14684720ms step_avg:576.96ms +step:25453/57344 train_time:14684737ms step_avg:576.94ms +step:25454/57344 train_time:14684986ms step_avg:576.92ms +step:25455/57344 train_time:14685543ms step_avg:576.92ms +grad accum step:6364/14336 +step:25456/57344 train_time:14686849ms step_avg:576.95ms +step:25457/57344 train_time:14686866ms step_avg:576.93ms +step:25458/57344 train_time:14687111ms step_avg:576.92ms +step:25459/57344 train_time:14687654ms step_avg:576.91ms +grad accum step:6365/14336 +step:25460/57344 train_time:14688991ms step_avg:576.94ms +step:25461/57344 train_time:14689008ms step_avg:576.92ms +step:25462/57344 train_time:14689259ms step_avg:576.91ms +step:25463/57344 train_time:14689813ms step_avg:576.91ms +grad accum step:6366/14336 +step:25464/57344 train_time:14691109ms step_avg:576.94ms +step:25465/57344 train_time:14691126ms step_avg:576.91ms +step:25466/57344 train_time:14691374ms step_avg:576.90ms +step:25467/57344 train_time:14691929ms step_avg:576.90ms +grad accum step:6367/14336 +step:25468/57344 train_time:14693229ms step_avg:576.93ms +step:25469/57344 train_time:14693246ms step_avg:576.91ms +step:25470/57344 train_time:14693492ms step_avg:576.89ms +step:25471/57344 train_time:14694043ms step_avg:576.89ms +grad accum step:6368/14336 +step:25472/57344 train_time:14695376ms step_avg:576.92ms +step:25472/57344 val_loss:6.974039 train_time:14695376ms step_avg:576.92ms +step:25473/57344 train_time:14695389ms step_avg:576.90ms +step:25474/57344 train_time:14695613ms step_avg:576.89ms +step:25475/57344 train_time:14696160ms step_avg:576.89ms +grad accum step:6369/14336 +step:25476/57344 train_time:14697486ms step_avg:576.91ms +step:25477/57344 train_time:14697504ms step_avg:576.89ms +step:25478/57344 train_time:14697753ms step_avg:576.88ms +step:25479/57344 train_time:14698300ms step_avg:576.88ms +grad accum step:6370/14336 +step:25480/57344 train_time:14699590ms step_avg:576.91ms +step:25481/57344 train_time:14699607ms step_avg:576.89ms +step:25482/57344 train_time:14699854ms step_avg:576.87ms +step:25483/57344 train_time:14700396ms step_avg:576.87ms +grad accum step:6371/14336 +step:25484/57344 train_time:14701685ms step_avg:576.90ms +step:25485/57344 train_time:14701702ms step_avg:576.88ms +step:25486/57344 train_time:14701949ms step_avg:576.86ms +step:25487/57344 train_time:14702499ms step_avg:576.86ms +grad accum step:6372/14336 +step:25488/57344 train_time:14703776ms step_avg:576.89ms +step:25489/57344 train_time:14703793ms step_avg:576.87ms +step:25490/57344 train_time:14704035ms step_avg:576.86ms +step:25491/57344 train_time:14704576ms step_avg:576.85ms +grad accum step:6373/14336 +step:25492/57344 train_time:14705862ms step_avg:576.88ms +step:25493/57344 train_time:14705879ms step_avg:576.86ms +step:25494/57344 train_time:14706123ms step_avg:576.85ms +step:25495/57344 train_time:14706662ms step_avg:576.84ms +grad accum step:6374/14336 +step:25496/57344 train_time:14708006ms step_avg:576.88ms +step:25497/57344 train_time:14708022ms step_avg:576.85ms +step:25498/57344 train_time:14708269ms step_avg:576.84ms +step:25499/57344 train_time:14708817ms step_avg:576.84ms +grad accum step:6375/14336 +step:25500/57344 train_time:14710127ms step_avg:576.87ms +step:25501/57344 train_time:14710143ms step_avg:576.85ms +step:25502/57344 train_time:14710392ms step_avg:576.83ms +step:25503/57344 train_time:14710933ms step_avg:576.83ms +grad accum step:6376/14336 +step:25504/57344 train_time:14712210ms step_avg:576.86ms +step:25505/57344 train_time:14712227ms step_avg:576.84ms +step:25506/57344 train_time:14712472ms step_avg:576.82ms +step:25507/57344 train_time:14713014ms step_avg:576.82ms +grad accum step:6377/14336 +step:25508/57344 train_time:14714295ms step_avg:576.85ms +step:25509/57344 train_time:14714312ms step_avg:576.83ms +step:25510/57344 train_time:14714562ms step_avg:576.82ms +step:25511/57344 train_time:14715124ms step_avg:576.81ms +grad accum step:6378/14336 +step:25512/57344 train_time:14716427ms step_avg:576.84ms +step:25513/57344 train_time:14716444ms step_avg:576.82ms +step:25514/57344 train_time:14716694ms step_avg:576.81ms +step:25515/57344 train_time:14717247ms step_avg:576.81ms +grad accum step:6379/14336 +step:25516/57344 train_time:14718574ms step_avg:576.84ms +step:25517/57344 train_time:14718590ms step_avg:576.82ms +step:25518/57344 train_time:14718839ms step_avg:576.80ms +step:25519/57344 train_time:14719392ms step_avg:576.80ms +grad accum step:6380/14336 +step:25520/57344 train_time:14720689ms step_avg:576.83ms +step:25521/57344 train_time:14720706ms step_avg:576.81ms +step:25522/57344 train_time:14720953ms step_avg:576.79ms +step:25523/57344 train_time:14721497ms step_avg:576.79ms +grad accum step:6381/14336 +step:25524/57344 train_time:14722803ms step_avg:576.82ms +step:25525/57344 train_time:14722819ms step_avg:576.80ms +step:25526/57344 train_time:14723067ms step_avg:576.79ms +step:25527/57344 train_time:14723611ms step_avg:576.79ms +grad accum step:6382/14336 +step:25528/57344 train_time:14724925ms step_avg:576.81ms +step:25529/57344 train_time:14724941ms step_avg:576.79ms +step:25530/57344 train_time:14725190ms step_avg:576.78ms +step:25531/57344 train_time:14725753ms step_avg:576.78ms +grad accum step:6383/14336 +step:25532/57344 train_time:14727083ms step_avg:576.81ms +step:25533/57344 train_time:14727100ms step_avg:576.79ms +step:25534/57344 train_time:14727348ms step_avg:576.77ms +step:25535/57344 train_time:14727901ms step_avg:576.77ms +grad accum step:6384/14336 +step:25536/57344 train_time:14729222ms step_avg:576.80ms +step:25536/57344 val_loss:6.971818 train_time:14729223ms step_avg:576.80ms +step:25537/57344 train_time:14729235ms step_avg:576.78ms +step:25538/57344 train_time:14729471ms step_avg:576.77ms +step:25539/57344 train_time:14730053ms step_avg:576.77ms +grad accum step:6385/14336 +step:25540/57344 train_time:14731388ms step_avg:576.80ms +step:25541/57344 train_time:14731404ms step_avg:576.77ms +step:25542/57344 train_time:14731650ms step_avg:576.76ms +step:25543/57344 train_time:14732193ms step_avg:576.76ms +grad accum step:6386/14336 +step:25544/57344 train_time:14733516ms step_avg:576.79ms +step:25545/57344 train_time:14733533ms step_avg:576.77ms +step:25546/57344 train_time:14733781ms step_avg:576.75ms +step:25547/57344 train_time:14734330ms step_avg:576.75ms +grad accum step:6387/14336 +step:25548/57344 train_time:14735650ms step_avg:576.78ms +step:25549/57344 train_time:14735667ms step_avg:576.76ms +step:25550/57344 train_time:14735918ms step_avg:576.75ms +step:25551/57344 train_time:14736468ms step_avg:576.75ms +grad accum step:6388/14336 +step:25552/57344 train_time:14737779ms step_avg:576.78ms +step:25553/57344 train_time:14737795ms step_avg:576.75ms +step:25554/57344 train_time:14738041ms step_avg:576.74ms +step:25555/57344 train_time:14738589ms step_avg:576.74ms +grad accum step:6389/14336 +step:25556/57344 train_time:14739910ms step_avg:576.77ms +step:25557/57344 train_time:14739927ms step_avg:576.75ms +step:25558/57344 train_time:14740172ms step_avg:576.73ms +step:25559/57344 train_time:14740713ms step_avg:576.73ms +grad accum step:6390/14336 +step:25560/57344 train_time:14742014ms step_avg:576.76ms +step:25561/57344 train_time:14742031ms step_avg:576.74ms +step:25562/57344 train_time:14742278ms step_avg:576.73ms +step:25563/57344 train_time:14742831ms step_avg:576.73ms +grad accum step:6391/14336 +step:25564/57344 train_time:14744111ms step_avg:576.75ms +step:25565/57344 train_time:14744127ms step_avg:576.73ms +step:25566/57344 train_time:14744373ms step_avg:576.72ms +step:25567/57344 train_time:14744920ms step_avg:576.72ms +grad accum step:6392/14336 +step:25568/57344 train_time:14746237ms step_avg:576.75ms +step:25569/57344 train_time:14746254ms step_avg:576.72ms +step:25570/57344 train_time:14746500ms step_avg:576.71ms +step:25571/57344 train_time:14747043ms step_avg:576.71ms +grad accum step:6393/14336 +step:25572/57344 train_time:14748357ms step_avg:576.74ms +step:25573/57344 train_time:14748374ms step_avg:576.72ms +step:25574/57344 train_time:14748628ms step_avg:576.70ms +step:25575/57344 train_time:14749189ms step_avg:576.70ms +grad accum step:6394/14336 +step:25576/57344 train_time:14750486ms step_avg:576.73ms +step:25577/57344 train_time:14750503ms step_avg:576.71ms +step:25578/57344 train_time:14750750ms step_avg:576.70ms +step:25579/57344 train_time:14751305ms step_avg:576.70ms +grad accum step:6395/14336 +step:25580/57344 train_time:14752629ms step_avg:576.73ms +step:25581/57344 train_time:14752646ms step_avg:576.70ms +step:25582/57344 train_time:14752895ms step_avg:576.69ms +step:25583/57344 train_time:14753447ms step_avg:576.69ms +grad accum step:6396/14336 +step:25584/57344 train_time:14754745ms step_avg:576.72ms +step:25585/57344 train_time:14754762ms step_avg:576.70ms +step:25586/57344 train_time:14755011ms step_avg:576.68ms +step:25587/57344 train_time:14755564ms step_avg:576.68ms +grad accum step:6397/14336 +step:25588/57344 train_time:14756866ms step_avg:576.71ms +step:25589/57344 train_time:14756883ms step_avg:576.69ms +step:25590/57344 train_time:14757126ms step_avg:576.68ms +step:25591/57344 train_time:14757662ms step_avg:576.67ms +grad accum step:6398/14336 +step:25592/57344 train_time:14758957ms step_avg:576.70ms +step:25593/57344 train_time:14758973ms step_avg:576.68ms +step:25594/57344 train_time:14759218ms step_avg:576.67ms +step:25595/57344 train_time:14759764ms step_avg:576.67ms +grad accum step:6399/14336 +step:25596/57344 train_time:14761066ms step_avg:576.69ms +step:25597/57344 train_time:14761082ms step_avg:576.67ms +step:25598/57344 train_time:14761333ms step_avg:576.66ms +step:25599/57344 train_time:14761889ms step_avg:576.66ms +grad accum step:6400/14336 +step:25600/57344 train_time:14763174ms step_avg:576.69ms +step:25600/57344 val_loss:6.970633 train_time:14763175ms step_avg:576.69ms +step:25601/57344 train_time:14763187ms step_avg:576.66ms +step:25602/57344 train_time:14763424ms step_avg:576.65ms +step:25603/57344 train_time:14764007ms step_avg:576.65ms +grad accum step:6401/14336 +step:25604/57344 train_time:14765337ms step_avg:576.68ms +step:25605/57344 train_time:14765354ms step_avg:576.66ms +step:25606/57344 train_time:14765600ms step_avg:576.65ms +step:25607/57344 train_time:14766138ms step_avg:576.64ms +grad accum step:6402/14336 +step:25608/57344 train_time:14767412ms step_avg:576.67ms +step:25609/57344 train_time:14767429ms step_avg:576.65ms +step:25610/57344 train_time:14767675ms step_avg:576.64ms +step:25611/57344 train_time:14768219ms step_avg:576.64ms +grad accum step:6403/14336 +step:25612/57344 train_time:14769536ms step_avg:576.66ms +step:25613/57344 train_time:14769553ms step_avg:576.64ms +step:25614/57344 train_time:14769810ms step_avg:576.63ms +step:25615/57344 train_time:14770382ms step_avg:576.63ms +grad accum step:6404/14336 +step:25616/57344 train_time:14771708ms step_avg:576.66ms +step:25617/57344 train_time:14771724ms step_avg:576.64ms +step:25618/57344 train_time:14771975ms step_avg:576.62ms +step:25619/57344 train_time:14772536ms step_avg:576.62ms +grad accum step:6405/14336 +step:25620/57344 train_time:14773826ms step_avg:576.65ms +step:25621/57344 train_time:14773843ms step_avg:576.63ms +step:25622/57344 train_time:14774106ms step_avg:576.62ms +step:25623/57344 train_time:14774696ms step_avg:576.62ms +grad accum step:6406/14336 +step:25624/57344 train_time:14776037ms step_avg:576.65ms +step:25625/57344 train_time:14776054ms step_avg:576.63ms +step:25626/57344 train_time:14776313ms step_avg:576.61ms +step:25627/57344 train_time:14776883ms step_avg:576.61ms +grad accum step:6407/14336 +step:25628/57344 train_time:14778197ms step_avg:576.64ms +step:25629/57344 train_time:14778214ms step_avg:576.62ms +step:25630/57344 train_time:14778461ms step_avg:576.61ms +step:25631/57344 train_time:14779009ms step_avg:576.61ms +grad accum step:6408/14336 +step:25632/57344 train_time:14780365ms step_avg:576.64ms +step:25633/57344 train_time:14780382ms step_avg:576.62ms +step:25634/57344 train_time:14780628ms step_avg:576.60ms +step:25635/57344 train_time:14781168ms step_avg:576.60ms +grad accum step:6409/14336 +step:25636/57344 train_time:14782490ms step_avg:576.63ms +step:25637/57344 train_time:14782507ms step_avg:576.61ms +step:25638/57344 train_time:14782754ms step_avg:576.60ms +step:25639/57344 train_time:14783302ms step_avg:576.59ms +grad accum step:6410/14336 +step:25640/57344 train_time:14784597ms step_avg:576.62ms +step:25641/57344 train_time:14784614ms step_avg:576.60ms +step:25642/57344 train_time:14784859ms step_avg:576.59ms +step:25643/57344 train_time:14785414ms step_avg:576.59ms +grad accum step:6411/14336 +step:25644/57344 train_time:14786737ms step_avg:576.62ms +step:25645/57344 train_time:14786754ms step_avg:576.59ms +step:25646/57344 train_time:14787003ms step_avg:576.58ms +step:25647/57344 train_time:14787553ms step_avg:576.58ms +grad accum step:6412/14336 +step:25648/57344 train_time:14788889ms step_avg:576.61ms +step:25649/57344 train_time:14788906ms step_avg:576.59ms +step:25650/57344 train_time:14789152ms step_avg:576.58ms +step:25651/57344 train_time:14789700ms step_avg:576.57ms +grad accum step:6413/14336 +step:25652/57344 train_time:14790998ms step_avg:576.60ms +step:25653/57344 train_time:14791015ms step_avg:576.58ms +step:25654/57344 train_time:14791263ms step_avg:576.57ms +step:25655/57344 train_time:14791818ms step_avg:576.57ms +grad accum step:6414/14336 +step:25656/57344 train_time:14793125ms step_avg:576.60ms +step:25657/57344 train_time:14793142ms step_avg:576.57ms +step:25658/57344 train_time:14793388ms step_avg:576.56ms +step:25659/57344 train_time:14793931ms step_avg:576.56ms +grad accum step:6415/14336 +step:25660/57344 train_time:14795212ms step_avg:576.59ms +step:25661/57344 train_time:14795228ms step_avg:576.56ms +step:25662/57344 train_time:14795475ms step_avg:576.55ms +step:25663/57344 train_time:14796024ms step_avg:576.55ms +grad accum step:6416/14336 +step:25664/57344 train_time:14797341ms step_avg:576.58ms +step:25664/57344 val_loss:6.974714 train_time:14797341ms step_avg:576.58ms +step:25665/57344 train_time:14797354ms step_avg:576.56ms +step:25666/57344 train_time:14797577ms step_avg:576.54ms +step:25667/57344 train_time:14798131ms step_avg:576.54ms +grad accum step:6417/14336 +step:25668/57344 train_time:14799430ms step_avg:576.57ms +step:25669/57344 train_time:14799447ms step_avg:576.55ms +step:25670/57344 train_time:14799698ms step_avg:576.54ms +step:25671/57344 train_time:14800249ms step_avg:576.54ms +grad accum step:6418/14336 +step:25672/57344 train_time:14801571ms step_avg:576.56ms +step:25673/57344 train_time:14801588ms step_avg:576.54ms +step:25674/57344 train_time:14801835ms step_avg:576.53ms +step:25675/57344 train_time:14802388ms step_avg:576.53ms +grad accum step:6419/14336 +step:25676/57344 train_time:14803692ms step_avg:576.56ms +step:25677/57344 train_time:14803709ms step_avg:576.54ms +step:25678/57344 train_time:14803957ms step_avg:576.52ms +step:25679/57344 train_time:14804508ms step_avg:576.52ms +grad accum step:6420/14336 +step:25680/57344 train_time:14805819ms step_avg:576.55ms +step:25681/57344 train_time:14805836ms step_avg:576.53ms +step:25682/57344 train_time:14806092ms step_avg:576.52ms +step:25683/57344 train_time:14806671ms step_avg:576.52ms +grad accum step:6421/14336 +step:25684/57344 train_time:14807966ms step_avg:576.54ms +step:25685/57344 train_time:14807983ms step_avg:576.52ms +step:25686/57344 train_time:14808236ms step_avg:576.51ms +step:25687/57344 train_time:14808797ms step_avg:576.51ms +grad accum step:6422/14336 +step:25688/57344 train_time:14810094ms step_avg:576.54ms +step:25689/57344 train_time:14810111ms step_avg:576.52ms +step:25690/57344 train_time:14810358ms step_avg:576.50ms +step:25691/57344 train_time:14810908ms step_avg:576.50ms +grad accum step:6423/14336 +step:25692/57344 train_time:14812258ms step_avg:576.53ms +step:25693/57344 train_time:14812275ms step_avg:576.51ms +step:25694/57344 train_time:14812520ms step_avg:576.50ms +step:25695/57344 train_time:14813071ms step_avg:576.50ms +grad accum step:6424/14336 +step:25696/57344 train_time:14814406ms step_avg:576.53ms +step:25697/57344 train_time:14814422ms step_avg:576.50ms +step:25698/57344 train_time:14814670ms step_avg:576.49ms +step:25699/57344 train_time:14815219ms step_avg:576.49ms +grad accum step:6425/14336 +step:25700/57344 train_time:14816532ms step_avg:576.52ms +step:25701/57344 train_time:14816548ms step_avg:576.50ms +step:25702/57344 train_time:14816796ms step_avg:576.48ms +step:25703/57344 train_time:14817345ms step_avg:576.48ms +grad accum step:6426/14336 +step:25704/57344 train_time:14818745ms step_avg:576.52ms +step:25705/57344 train_time:14818761ms step_avg:576.49ms +step:25706/57344 train_time:14819015ms step_avg:576.48ms +step:25707/57344 train_time:14819587ms step_avg:576.48ms +grad accum step:6427/14336 +step:25708/57344 train_time:14820885ms step_avg:576.51ms +step:25709/57344 train_time:14820902ms step_avg:576.49ms +step:25710/57344 train_time:14821147ms step_avg:576.47ms +step:25711/57344 train_time:14821693ms step_avg:576.47ms +grad accum step:6428/14336 +step:25712/57344 train_time:14822985ms step_avg:576.50ms +step:25713/57344 train_time:14823002ms step_avg:576.48ms +step:25714/57344 train_time:14823251ms step_avg:576.47ms +step:25715/57344 train_time:14823799ms step_avg:576.47ms +grad accum step:6429/14336 +step:25716/57344 train_time:14825141ms step_avg:576.49ms +step:25717/57344 train_time:14825158ms step_avg:576.47ms +step:25718/57344 train_time:14825406ms step_avg:576.46ms +step:25719/57344 train_time:14825954ms step_avg:576.46ms +grad accum step:6430/14336 +step:25720/57344 train_time:14827247ms step_avg:576.49ms +step:25721/57344 train_time:14827263ms step_avg:576.47ms +step:25722/57344 train_time:14827511ms step_avg:576.45ms +step:25723/57344 train_time:14828060ms step_avg:576.45ms +grad accum step:6431/14336 +step:25724/57344 train_time:14829399ms step_avg:576.48ms +step:25725/57344 train_time:14829416ms step_avg:576.46ms +step:25726/57344 train_time:14829672ms step_avg:576.45ms +step:25727/57344 train_time:14830237ms step_avg:576.45ms +grad accum step:6432/14336 +step:25728/57344 train_time:14831511ms step_avg:576.47ms +step:25728/57344 val_loss:6.965921 train_time:14831512ms step_avg:576.47ms +step:25729/57344 train_time:14831525ms step_avg:576.45ms +step:25730/57344 train_time:14831754ms step_avg:576.44ms +step:25731/57344 train_time:14832316ms step_avg:576.44ms +grad accum step:6433/14336 +step:25732/57344 train_time:14833634ms step_avg:576.47ms +step:25733/57344 train_time:14833651ms step_avg:576.44ms +step:25734/57344 train_time:14833897ms step_avg:576.43ms +step:25735/57344 train_time:14834445ms step_avg:576.43ms +grad accum step:6434/14336 +step:25736/57344 train_time:14835727ms step_avg:576.46ms +step:25737/57344 train_time:14835743ms step_avg:576.44ms +step:25738/57344 train_time:14835992ms step_avg:576.42ms +step:25739/57344 train_time:14836550ms step_avg:576.42ms +grad accum step:6435/14336 +step:25740/57344 train_time:14837849ms step_avg:576.45ms +step:25741/57344 train_time:14837866ms step_avg:576.43ms +step:25742/57344 train_time:14838113ms step_avg:576.42ms +step:25743/57344 train_time:14838660ms step_avg:576.42ms +grad accum step:6436/14336 +step:25744/57344 train_time:14839951ms step_avg:576.44ms +step:25745/57344 train_time:14839968ms step_avg:576.42ms +step:25746/57344 train_time:14840215ms step_avg:576.41ms +step:25747/57344 train_time:14840772ms step_avg:576.41ms +grad accum step:6437/14336 +step:25748/57344 train_time:14842110ms step_avg:576.44ms +step:25749/57344 train_time:14842127ms step_avg:576.42ms +step:25750/57344 train_time:14842382ms step_avg:576.40ms +step:25751/57344 train_time:14842944ms step_avg:576.40ms +grad accum step:6438/14336 +step:25752/57344 train_time:14844254ms step_avg:576.43ms +step:25753/57344 train_time:14844270ms step_avg:576.41ms +step:25754/57344 train_time:14844516ms step_avg:576.40ms +step:25755/57344 train_time:14845060ms step_avg:576.40ms +grad accum step:6439/14336 +step:25756/57344 train_time:14846356ms step_avg:576.42ms +step:25757/57344 train_time:14846372ms step_avg:576.40ms +step:25758/57344 train_time:14846619ms step_avg:576.39ms +step:25759/57344 train_time:14847165ms step_avg:576.39ms +grad accum step:6440/14336 +step:25760/57344 train_time:14848464ms step_avg:576.42ms +step:25761/57344 train_time:14848481ms step_avg:576.39ms +step:25762/57344 train_time:14848729ms step_avg:576.38ms +step:25763/57344 train_time:14849277ms step_avg:576.38ms +grad accum step:6441/14336 +step:25764/57344 train_time:14850567ms step_avg:576.41ms +step:25765/57344 train_time:14850584ms step_avg:576.39ms +step:25766/57344 train_time:14850828ms step_avg:576.37ms +step:25767/57344 train_time:14851375ms step_avg:576.37ms +grad accum step:6442/14336 +step:25768/57344 train_time:14852673ms step_avg:576.40ms +step:25769/57344 train_time:14852690ms step_avg:576.38ms +step:25770/57344 train_time:14852934ms step_avg:576.37ms +step:25771/57344 train_time:14853483ms step_avg:576.36ms +grad accum step:6443/14336 +step:25772/57344 train_time:14854806ms step_avg:576.39ms +step:25773/57344 train_time:14854823ms step_avg:576.37ms +step:25774/57344 train_time:14855072ms step_avg:576.36ms +step:25775/57344 train_time:14855630ms step_avg:576.36ms +grad accum step:6444/14336 +step:25776/57344 train_time:14856934ms step_avg:576.39ms +step:25777/57344 train_time:14856951ms step_avg:576.36ms +step:25778/57344 train_time:14857199ms step_avg:576.35ms +step:25779/57344 train_time:14857750ms step_avg:576.35ms +grad accum step:6445/14336 +step:25780/57344 train_time:14859069ms step_avg:576.38ms +step:25781/57344 train_time:14859085ms step_avg:576.36ms +step:25782/57344 train_time:14859335ms step_avg:576.35ms +step:25783/57344 train_time:14859886ms step_avg:576.34ms +grad accum step:6446/14336 +step:25784/57344 train_time:14861181ms step_avg:576.37ms +step:25785/57344 train_time:14861198ms step_avg:576.35ms +step:25786/57344 train_time:14861447ms step_avg:576.34ms +step:25787/57344 train_time:14861993ms step_avg:576.34ms +grad accum step:6447/14336 +step:25788/57344 train_time:14863326ms step_avg:576.37ms +step:25789/57344 train_time:14863343ms step_avg:576.34ms +step:25790/57344 train_time:14863610ms step_avg:576.33ms +step:25791/57344 train_time:14864215ms step_avg:576.33ms +grad accum step:6448/14336 +step:25792/57344 train_time:14865522ms step_avg:576.36ms +step:25792/57344 val_loss:6.967148 train_time:14865523ms step_avg:576.36ms +step:25793/57344 train_time:14865536ms step_avg:576.34ms +step:25794/57344 train_time:14865759ms step_avg:576.33ms +step:25795/57344 train_time:14866308ms step_avg:576.33ms +grad accum step:6449/14336 +step:25796/57344 train_time:14867627ms step_avg:576.35ms +step:25797/57344 train_time:14867644ms step_avg:576.33ms +step:25798/57344 train_time:14867891ms step_avg:576.32ms +step:25799/57344 train_time:14868439ms step_avg:576.32ms +grad accum step:6450/14336 +step:25800/57344 train_time:14869759ms step_avg:576.35ms +step:25801/57344 train_time:14869775ms step_avg:576.33ms +step:25802/57344 train_time:14870023ms step_avg:576.31ms +step:25803/57344 train_time:14870578ms step_avg:576.31ms +grad accum step:6451/14336 +step:25804/57344 train_time:14871900ms step_avg:576.34ms +step:25805/57344 train_time:14871917ms step_avg:576.32ms +step:25806/57344 train_time:14872175ms step_avg:576.31ms +step:25807/57344 train_time:14872756ms step_avg:576.31ms +grad accum step:6452/14336 +step:25808/57344 train_time:14874086ms step_avg:576.34ms +step:25809/57344 train_time:14874103ms step_avg:576.31ms +step:25810/57344 train_time:14874350ms step_avg:576.30ms +step:25811/57344 train_time:14874899ms step_avg:576.30ms +grad accum step:6453/14336 +step:25812/57344 train_time:14876238ms step_avg:576.33ms +step:25813/57344 train_time:14876254ms step_avg:576.31ms +step:25814/57344 train_time:14876500ms step_avg:576.30ms +step:25815/57344 train_time:14877039ms step_avg:576.29ms +grad accum step:6454/14336 +step:25816/57344 train_time:14878324ms step_avg:576.32ms +step:25817/57344 train_time:14878341ms step_avg:576.30ms +step:25818/57344 train_time:14878589ms step_avg:576.29ms +step:25819/57344 train_time:14879136ms step_avg:576.29ms +grad accum step:6455/14336 +step:25820/57344 train_time:14880451ms step_avg:576.31ms +step:25821/57344 train_time:14880468ms step_avg:576.29ms +step:25822/57344 train_time:14880733ms step_avg:576.28ms +step:25823/57344 train_time:14881328ms step_avg:576.28ms +grad accum step:6456/14336 +step:25824/57344 train_time:14882649ms step_avg:576.31ms +step:25825/57344 train_time:14882665ms step_avg:576.29ms +step:25826/57344 train_time:14882912ms step_avg:576.28ms +step:25827/57344 train_time:14883462ms step_avg:576.28ms +grad accum step:6457/14336 +step:25828/57344 train_time:14884793ms step_avg:576.30ms +step:25829/57344 train_time:14884809ms step_avg:576.28ms +step:25830/57344 train_time:14885057ms step_avg:576.27ms +step:25831/57344 train_time:14885615ms step_avg:576.27ms +grad accum step:6458/14336 +step:25832/57344 train_time:14886959ms step_avg:576.30ms +step:25833/57344 train_time:14886976ms step_avg:576.28ms +step:25834/57344 train_time:14887235ms step_avg:576.27ms +step:25835/57344 train_time:14887811ms step_avg:576.27ms +grad accum step:6459/14336 +step:25836/57344 train_time:14889134ms step_avg:576.29ms +step:25837/57344 train_time:14889151ms step_avg:576.27ms +step:25838/57344 train_time:14889402ms step_avg:576.26ms +step:25839/57344 train_time:14889954ms step_avg:576.26ms +grad accum step:6460/14336 +step:25840/57344 train_time:14891268ms step_avg:576.29ms +step:25841/57344 train_time:14891285ms step_avg:576.27ms +step:25842/57344 train_time:14891529ms step_avg:576.25ms +step:25843/57344 train_time:14892072ms step_avg:576.25ms +grad accum step:6461/14336 +step:25844/57344 train_time:14893396ms step_avg:576.28ms +step:25845/57344 train_time:14893413ms step_avg:576.26ms +step:25846/57344 train_time:14893662ms step_avg:576.25ms +step:25847/57344 train_time:14894223ms step_avg:576.25ms +grad accum step:6462/14336 +step:25848/57344 train_time:14895557ms step_avg:576.28ms +step:25849/57344 train_time:14895574ms step_avg:576.25ms +step:25850/57344 train_time:14895820ms step_avg:576.24ms +step:25851/57344 train_time:14896363ms step_avg:576.24ms +grad accum step:6463/14336 +step:25852/57344 train_time:14897646ms step_avg:576.27ms +step:25853/57344 train_time:14897663ms step_avg:576.25ms +step:25854/57344 train_time:14897919ms step_avg:576.23ms +step:25855/57344 train_time:14898488ms step_avg:576.23ms +grad accum step:6464/14336 +step:25856/57344 train_time:14899799ms step_avg:576.26ms +step:25856/57344 val_loss:6.922833 train_time:14899799ms step_avg:576.26ms +step:25857/57344 train_time:14899812ms step_avg:576.24ms +step:25858/57344 train_time:14900040ms step_avg:576.23ms +step:25859/57344 train_time:14900594ms step_avg:576.22ms +grad accum step:6465/14336 +step:25860/57344 train_time:14901902ms step_avg:576.25ms +step:25861/57344 train_time:14901919ms step_avg:576.23ms +step:25862/57344 train_time:14902167ms step_avg:576.22ms +step:25863/57344 train_time:14902717ms step_avg:576.22ms +grad accum step:6466/14336 +step:25864/57344 train_time:14904012ms step_avg:576.25ms +step:25865/57344 train_time:14904029ms step_avg:576.22ms +step:25866/57344 train_time:14904275ms step_avg:576.21ms +step:25867/57344 train_time:14904816ms step_avg:576.21ms +grad accum step:6467/14336 +step:25868/57344 train_time:14906096ms step_avg:576.24ms +step:25869/57344 train_time:14906113ms step_avg:576.22ms +step:25870/57344 train_time:14906357ms step_avg:576.20ms +step:25871/57344 train_time:14906902ms step_avg:576.20ms +grad accum step:6468/14336 +step:25872/57344 train_time:14908202ms step_avg:576.23ms +step:25873/57344 train_time:14908219ms step_avg:576.21ms +step:25874/57344 train_time:14908464ms step_avg:576.19ms +step:25875/57344 train_time:14909012ms step_avg:576.19ms +grad accum step:6469/14336 +step:25876/57344 train_time:14910309ms step_avg:576.22ms +step:25877/57344 train_time:14910326ms step_avg:576.20ms +step:25878/57344 train_time:14910576ms step_avg:576.19ms +step:25879/57344 train_time:14911125ms step_avg:576.19ms +grad accum step:6470/14336 +step:25880/57344 train_time:14912443ms step_avg:576.21ms +step:25881/57344 train_time:14912459ms step_avg:576.19ms +step:25882/57344 train_time:14912708ms step_avg:576.18ms +step:25883/57344 train_time:14913255ms step_avg:576.18ms +grad accum step:6471/14336 +step:25884/57344 train_time:14914548ms step_avg:576.21ms +step:25885/57344 train_time:14914565ms step_avg:576.19ms +step:25886/57344 train_time:14914807ms step_avg:576.17ms +step:25887/57344 train_time:14915338ms step_avg:576.17ms +grad accum step:6472/14336 +step:25888/57344 train_time:14916676ms step_avg:576.20ms +step:25889/57344 train_time:14916693ms step_avg:576.18ms +step:25890/57344 train_time:14916953ms step_avg:576.17ms +step:25891/57344 train_time:14917519ms step_avg:576.17ms +grad accum step:6473/14336 +step:25892/57344 train_time:14918860ms step_avg:576.20ms +step:25893/57344 train_time:14918877ms step_avg:576.17ms +step:25894/57344 train_time:14919120ms step_avg:576.16ms +step:25895/57344 train_time:14919659ms step_avg:576.16ms +grad accum step:6474/14336 +step:25896/57344 train_time:14920945ms step_avg:576.19ms +step:25897/57344 train_time:14920961ms step_avg:576.17ms +step:25898/57344 train_time:14921208ms step_avg:576.15ms +step:25899/57344 train_time:14921755ms step_avg:576.15ms +grad accum step:6475/14336 +step:25900/57344 train_time:14923065ms step_avg:576.18ms +step:25901/57344 train_time:14923082ms step_avg:576.16ms +step:25902/57344 train_time:14923331ms step_avg:576.15ms +step:25903/57344 train_time:14923890ms step_avg:576.15ms +grad accum step:6476/14336 +step:25904/57344 train_time:14925211ms step_avg:576.17ms +step:25905/57344 train_time:14925228ms step_avg:576.15ms +step:25906/57344 train_time:14925477ms step_avg:576.14ms +step:25907/57344 train_time:14926026ms step_avg:576.14ms +grad accum step:6477/14336 +step:25908/57344 train_time:14927350ms step_avg:576.17ms +step:25909/57344 train_time:14927367ms step_avg:576.15ms +step:25910/57344 train_time:14927615ms step_avg:576.13ms +step:25911/57344 train_time:14928170ms step_avg:576.13ms +grad accum step:6478/14336 +step:25912/57344 train_time:14929518ms step_avg:576.16ms +step:25913/57344 train_time:14929535ms step_avg:576.14ms +step:25914/57344 train_time:14929777ms step_avg:576.13ms +step:25915/57344 train_time:14930317ms step_avg:576.13ms +grad accum step:6479/14336 +step:25916/57344 train_time:14931622ms step_avg:576.15ms +step:25917/57344 train_time:14931639ms step_avg:576.13ms +step:25918/57344 train_time:14931895ms step_avg:576.12ms +step:25919/57344 train_time:14932464ms step_avg:576.12ms +grad accum step:6480/14336 +step:25920/57344 train_time:14933768ms step_avg:576.15ms +step:25920/57344 val_loss:6.888264 train_time:14933768ms step_avg:576.15ms +step:25921/57344 train_time:14933781ms step_avg:576.13ms +step:25922/57344 train_time:14934007ms step_avg:576.11ms +step:25923/57344 train_time:14934554ms step_avg:576.11ms +grad accum step:6481/14336 +step:25924/57344 train_time:14935848ms step_avg:576.14ms +step:25925/57344 train_time:14935864ms step_avg:576.12ms +step:25926/57344 train_time:14936110ms step_avg:576.11ms +step:25927/57344 train_time:14936656ms step_avg:576.10ms +grad accum step:6482/14336 +step:25928/57344 train_time:14937971ms step_avg:576.13ms +step:25929/57344 train_time:14937988ms step_avg:576.11ms +step:25930/57344 train_time:14938236ms step_avg:576.10ms +step:25931/57344 train_time:14938791ms step_avg:576.10ms +grad accum step:6483/14336 +step:25932/57344 train_time:14940100ms step_avg:576.13ms +step:25933/57344 train_time:14940117ms step_avg:576.10ms +step:25934/57344 train_time:14940363ms step_avg:576.09ms +step:25935/57344 train_time:14940905ms step_avg:576.09ms +grad accum step:6484/14336 +step:25936/57344 train_time:14942184ms step_avg:576.12ms +step:25937/57344 train_time:14942201ms step_avg:576.10ms +step:25938/57344 train_time:14942443ms step_avg:576.08ms +step:25939/57344 train_time:14942988ms step_avg:576.08ms +grad accum step:6485/14336 +step:25940/57344 train_time:14944288ms step_avg:576.11ms +step:25941/57344 train_time:14944305ms step_avg:576.09ms +step:25942/57344 train_time:14944553ms step_avg:576.08ms +step:25943/57344 train_time:14945108ms step_avg:576.07ms +grad accum step:6486/14336 +step:25944/57344 train_time:14946446ms step_avg:576.10ms +step:25945/57344 train_time:14946463ms step_avg:576.08ms +step:25946/57344 train_time:14946711ms step_avg:576.07ms +step:25947/57344 train_time:14947253ms step_avg:576.07ms +grad accum step:6487/14336 +step:25948/57344 train_time:14948554ms step_avg:576.10ms +step:25949/57344 train_time:14948571ms step_avg:576.08ms +step:25950/57344 train_time:14948829ms step_avg:576.06ms +step:25951/57344 train_time:14949409ms step_avg:576.06ms +grad accum step:6488/14336 +step:25952/57344 train_time:14950722ms step_avg:576.09ms +step:25953/57344 train_time:14950739ms step_avg:576.07ms +step:25954/57344 train_time:14950988ms step_avg:576.06ms +step:25955/57344 train_time:14951539ms step_avg:576.06ms +grad accum step:6489/14336 +step:25956/57344 train_time:14952839ms step_avg:576.08ms +step:25957/57344 train_time:14952856ms step_avg:576.06ms +step:25958/57344 train_time:14953103ms step_avg:576.05ms +step:25959/57344 train_time:14953644ms step_avg:576.05ms +grad accum step:6490/14336 +step:25960/57344 train_time:14954958ms step_avg:576.08ms +step:25961/57344 train_time:14954975ms step_avg:576.06ms +step:25962/57344 train_time:14955216ms step_avg:576.04ms +step:25963/57344 train_time:14955764ms step_avg:576.04ms +grad accum step:6491/14336 +step:25964/57344 train_time:14957081ms step_avg:576.07ms +step:25965/57344 train_time:14957098ms step_avg:576.05ms +step:25966/57344 train_time:14957344ms step_avg:576.04ms +step:25967/57344 train_time:14957892ms step_avg:576.03ms +grad accum step:6492/14336 +step:25968/57344 train_time:14959185ms step_avg:576.06ms +step:25969/57344 train_time:14959202ms step_avg:576.04ms +step:25970/57344 train_time:14959455ms step_avg:576.03ms +step:25971/57344 train_time:14960023ms step_avg:576.03ms +grad accum step:6493/14336 +step:25972/57344 train_time:14961330ms step_avg:576.06ms +step:25973/57344 train_time:14961347ms step_avg:576.03ms +step:25974/57344 train_time:14961592ms step_avg:576.02ms +step:25975/57344 train_time:14962137ms step_avg:576.02ms +grad accum step:6494/14336 +step:25976/57344 train_time:14963435ms step_avg:576.05ms +step:25977/57344 train_time:14963451ms step_avg:576.03ms +step:25978/57344 train_time:14963699ms step_avg:576.01ms +step:25979/57344 train_time:14964244ms step_avg:576.01ms +grad accum step:6495/14336 +step:25980/57344 train_time:14965592ms step_avg:576.04ms +step:25981/57344 train_time:14965609ms step_avg:576.02ms +step:25982/57344 train_time:14965856ms step_avg:576.01ms +step:25983/57344 train_time:14966406ms step_avg:576.01ms +grad accum step:6496/14336 +step:25984/57344 train_time:14967713ms step_avg:576.04ms +step:25984/57344 val_loss:6.852927 train_time:14967713ms step_avg:576.04ms +step:25985/57344 train_time:14967726ms step_avg:576.01ms +step:25986/57344 train_time:14967947ms step_avg:576.00ms +step:25987/57344 train_time:14968486ms step_avg:576.00ms +grad accum step:6497/14336 +step:25988/57344 train_time:14969769ms step_avg:576.03ms +step:25989/57344 train_time:14969786ms step_avg:576.00ms +step:25990/57344 train_time:14970041ms step_avg:575.99ms +step:25991/57344 train_time:14970603ms step_avg:575.99ms +grad accum step:6498/14336 +step:25992/57344 train_time:14971904ms step_avg:576.02ms +step:25993/57344 train_time:14971921ms step_avg:576.00ms +step:25994/57344 train_time:14972165ms step_avg:575.99ms +step:25995/57344 train_time:14972714ms step_avg:575.98ms +grad accum step:6499/14336 +step:25996/57344 train_time:14974046ms step_avg:576.01ms +step:25997/57344 train_time:14974063ms step_avg:575.99ms +step:25998/57344 train_time:14974309ms step_avg:575.98ms +step:25999/57344 train_time:14974862ms step_avg:575.98ms +grad accum step:6500/14336 +step:26000/57344 train_time:14976159ms step_avg:576.01ms +step:26001/57344 train_time:14976176ms step_avg:575.98ms +step:26002/57344 train_time:14976420ms step_avg:575.97ms +step:26003/57344 train_time:14976965ms step_avg:575.97ms +grad accum step:6501/14336 +step:26004/57344 train_time:14978272ms step_avg:576.00ms +step:26005/57344 train_time:14978289ms step_avg:575.98ms +step:26006/57344 train_time:14978538ms step_avg:575.96ms +step:26007/57344 train_time:14979089ms step_avg:575.96ms +grad accum step:6502/14336 +step:26008/57344 train_time:14980488ms step_avg:576.00ms +step:26009/57344 train_time:14980504ms step_avg:575.97ms +step:26010/57344 train_time:14980751ms step_avg:575.96ms +step:26011/57344 train_time:14981291ms step_avg:575.96ms +grad accum step:6503/14336 +step:26012/57344 train_time:14982583ms step_avg:575.99ms +step:26013/57344 train_time:14982600ms step_avg:575.97ms +step:26014/57344 train_time:14982846ms step_avg:575.95ms +step:26015/57344 train_time:14983394ms step_avg:575.95ms +grad accum step:6504/14336 +step:26016/57344 train_time:14984669ms step_avg:575.98ms +step:26017/57344 train_time:14984686ms step_avg:575.96ms +step:26018/57344 train_time:14984932ms step_avg:575.94ms +step:26019/57344 train_time:14985477ms step_avg:575.94ms +grad accum step:6505/14336 +step:26020/57344 train_time:14986766ms step_avg:575.97ms +step:26021/57344 train_time:14986783ms step_avg:575.95ms +step:26022/57344 train_time:14987028ms step_avg:575.94ms +step:26023/57344 train_time:14987585ms step_avg:575.94ms +grad accum step:6506/14336 +step:26024/57344 train_time:14988897ms step_avg:575.96ms +step:26025/57344 train_time:14988914ms step_avg:575.94ms +step:26026/57344 train_time:14989160ms step_avg:575.93ms +step:26027/57344 train_time:14989724ms step_avg:575.93ms +grad accum step:6507/14336 +step:26028/57344 train_time:14991083ms step_avg:575.96ms +step:26029/57344 train_time:14991100ms step_avg:575.94ms +step:26030/57344 train_time:14991346ms step_avg:575.93ms +step:26031/57344 train_time:14991892ms step_avg:575.92ms +grad accum step:6508/14336 +step:26032/57344 train_time:14993189ms step_avg:575.95ms +step:26033/57344 train_time:14993206ms step_avg:575.93ms +step:26034/57344 train_time:14993453ms step_avg:575.92ms +step:26035/57344 train_time:14994004ms step_avg:575.92ms +grad accum step:6509/14336 +step:26036/57344 train_time:14995293ms step_avg:575.94ms +step:26037/57344 train_time:14995310ms step_avg:575.92ms +step:26038/57344 train_time:14995560ms step_avg:575.91ms +step:26039/57344 train_time:14996101ms step_avg:575.91ms +grad accum step:6510/14336 +step:26040/57344 train_time:14997396ms step_avg:575.94ms +step:26041/57344 train_time:14997413ms step_avg:575.92ms +step:26042/57344 train_time:14997656ms step_avg:575.90ms +step:26043/57344 train_time:14998191ms step_avg:575.90ms +grad accum step:6511/14336 +step:26044/57344 train_time:14999475ms step_avg:575.93ms +step:26045/57344 train_time:14999492ms step_avg:575.91ms +step:26046/57344 train_time:14999739ms step_avg:575.89ms +step:26047/57344 train_time:15000286ms step_avg:575.89ms +grad accum step:6512/14336 +step:26048/57344 train_time:15001593ms step_avg:575.92ms +step:26048/57344 val_loss:6.810912 train_time:15001594ms step_avg:575.92ms +step:26049/57344 train_time:15001606ms step_avg:575.90ms +step:26050/57344 train_time:15001827ms step_avg:575.89ms +step:26051/57344 train_time:15002364ms step_avg:575.88ms +grad accum step:6513/14336 +step:26052/57344 train_time:15003660ms step_avg:575.91ms +step:26053/57344 train_time:15003677ms step_avg:575.89ms +step:26054/57344 train_time:15003925ms step_avg:575.88ms +step:26055/57344 train_time:15004478ms step_avg:575.88ms +grad accum step:6514/14336 +step:26056/57344 train_time:15005835ms step_avg:575.91ms +step:26057/57344 train_time:15005852ms step_avg:575.89ms +step:26058/57344 train_time:15006099ms step_avg:575.87ms +step:26059/57344 train_time:15006650ms step_avg:575.87ms +grad accum step:6515/14336 +step:26060/57344 train_time:15007942ms step_avg:575.90ms +step:26061/57344 train_time:15007959ms step_avg:575.88ms +step:26062/57344 train_time:15008204ms step_avg:575.87ms +step:26063/57344 train_time:15008753ms step_avg:575.86ms +grad accum step:6516/14336 +step:26064/57344 train_time:15010036ms step_avg:575.89ms +step:26065/57344 train_time:15010053ms step_avg:575.87ms +step:26066/57344 train_time:15010299ms step_avg:575.86ms +step:26067/57344 train_time:15010843ms step_avg:575.86ms +grad accum step:6517/14336 +step:26068/57344 train_time:15012158ms step_avg:575.88ms +step:26069/57344 train_time:15012175ms step_avg:575.86ms +step:26070/57344 train_time:15012429ms step_avg:575.85ms +step:26071/57344 train_time:15012999ms step_avg:575.85ms +grad accum step:6518/14336 +step:26072/57344 train_time:15014310ms step_avg:575.88ms +step:26073/57344 train_time:15014326ms step_avg:575.86ms +step:26074/57344 train_time:15014571ms step_avg:575.84ms +step:26075/57344 train_time:15015115ms step_avg:575.84ms +grad accum step:6519/14336 +step:26076/57344 train_time:15016433ms step_avg:575.87ms +step:26077/57344 train_time:15016450ms step_avg:575.85ms +step:26078/57344 train_time:15016698ms step_avg:575.84ms +step:26079/57344 train_time:15017246ms step_avg:575.84ms +grad accum step:6520/14336 +step:26080/57344 train_time:15018541ms step_avg:575.86ms +step:26081/57344 train_time:15018559ms step_avg:575.84ms +step:26082/57344 train_time:15018805ms step_avg:575.83ms +step:26083/57344 train_time:15019357ms step_avg:575.83ms +grad accum step:6521/14336 +step:26084/57344 train_time:15020700ms step_avg:575.86ms +step:26085/57344 train_time:15020717ms step_avg:575.84ms +step:26086/57344 train_time:15020964ms step_avg:575.82ms +step:26087/57344 train_time:15021504ms step_avg:575.82ms +grad accum step:6522/14336 +step:26088/57344 train_time:15022785ms step_avg:575.85ms +step:26089/57344 train_time:15022802ms step_avg:575.83ms +step:26090/57344 train_time:15023051ms step_avg:575.82ms +step:26091/57344 train_time:15023608ms step_avg:575.82ms +grad accum step:6523/14336 +step:26092/57344 train_time:15024901ms step_avg:575.84ms +step:26093/57344 train_time:15024918ms step_avg:575.82ms +step:26094/57344 train_time:15025170ms step_avg:575.81ms +step:26095/57344 train_time:15025729ms step_avg:575.81ms +grad accum step:6524/14336 +step:26096/57344 train_time:15027069ms step_avg:575.84ms +step:26097/57344 train_time:15027086ms step_avg:575.82ms +step:26098/57344 train_time:15027337ms step_avg:575.80ms +step:26099/57344 train_time:15027896ms step_avg:575.80ms +grad accum step:6525/14336 +step:26100/57344 train_time:15029208ms step_avg:575.83ms +step:26101/57344 train_time:15029225ms step_avg:575.81ms +step:26102/57344 train_time:15029473ms step_avg:575.80ms +step:26103/57344 train_time:15030019ms step_avg:575.80ms +grad accum step:6526/14336 +step:26104/57344 train_time:15031333ms step_avg:575.82ms +step:26105/57344 train_time:15031350ms step_avg:575.80ms +step:26106/57344 train_time:15031597ms step_avg:575.79ms +step:26107/57344 train_time:15032142ms step_avg:575.79ms +grad accum step:6527/14336 +step:26108/57344 train_time:15033432ms step_avg:575.82ms +step:26109/57344 train_time:15033449ms step_avg:575.80ms +step:26110/57344 train_time:15033700ms step_avg:575.78ms +step:26111/57344 train_time:15034258ms step_avg:575.78ms +grad accum step:6528/14336 +step:26112/57344 train_time:15035558ms step_avg:575.81ms +step:26112/57344 val_loss:6.771035 train_time:15035559ms step_avg:575.81ms +step:26113/57344 train_time:15035571ms step_avg:575.79ms +step:26114/57344 train_time:15035795ms step_avg:575.78ms +step:26115/57344 train_time:15036351ms step_avg:575.77ms +grad accum step:6529/14336 +step:26116/57344 train_time:15037680ms step_avg:575.80ms +step:26117/57344 train_time:15037697ms step_avg:575.78ms +step:26118/57344 train_time:15037943ms step_avg:575.77ms +step:26119/57344 train_time:15038486ms step_avg:575.77ms +grad accum step:6530/14336 +step:26120/57344 train_time:15039844ms step_avg:575.80ms +step:26121/57344 train_time:15039861ms step_avg:575.78ms +step:26122/57344 train_time:15040113ms step_avg:575.76ms +step:26123/57344 train_time:15040672ms step_avg:575.76ms +grad accum step:6531/14336 +step:26124/57344 train_time:15041988ms step_avg:575.79ms +step:26125/57344 train_time:15042005ms step_avg:575.77ms +step:26126/57344 train_time:15042249ms step_avg:575.76ms +step:26127/57344 train_time:15042794ms step_avg:575.76ms +grad accum step:6532/14336 +step:26128/57344 train_time:15044090ms step_avg:575.78ms +step:26129/57344 train_time:15044107ms step_avg:575.76ms +step:26130/57344 train_time:15044356ms step_avg:575.75ms +step:26131/57344 train_time:15044911ms step_avg:575.75ms +grad accum step:6533/14336 +step:26132/57344 train_time:15046244ms step_avg:575.78ms +step:26133/57344 train_time:15046261ms step_avg:575.76ms +step:26134/57344 train_time:15046510ms step_avg:575.74ms +step:26135/57344 train_time:15047059ms step_avg:575.74ms +grad accum step:6534/14336 +step:26136/57344 train_time:15048344ms step_avg:575.77ms +step:26137/57344 train_time:15048361ms step_avg:575.75ms +step:26138/57344 train_time:15048609ms step_avg:575.74ms +step:26139/57344 train_time:15049160ms step_avg:575.74ms +grad accum step:6535/14336 +step:26140/57344 train_time:15050478ms step_avg:575.76ms +step:26141/57344 train_time:15050495ms step_avg:575.74ms +step:26142/57344 train_time:15050743ms step_avg:575.73ms +step:26143/57344 train_time:15051296ms step_avg:575.73ms +grad accum step:6536/14336 +step:26144/57344 train_time:15052588ms step_avg:575.76ms +step:26145/57344 train_time:15052605ms step_avg:575.74ms +step:26146/57344 train_time:15052851ms step_avg:575.72ms +step:26147/57344 train_time:15053396ms step_avg:575.72ms +grad accum step:6537/14336 +step:26148/57344 train_time:15054697ms step_avg:575.75ms +step:26149/57344 train_time:15054714ms step_avg:575.73ms +step:26150/57344 train_time:15054956ms step_avg:575.72ms +step:26151/57344 train_time:15055496ms step_avg:575.71ms +grad accum step:6538/14336 +step:26152/57344 train_time:15056804ms step_avg:575.74ms +step:26153/57344 train_time:15056821ms step_avg:575.72ms +step:26154/57344 train_time:15057075ms step_avg:575.71ms +step:26155/57344 train_time:15057646ms step_avg:575.71ms +grad accum step:6539/14336 +step:26156/57344 train_time:15058957ms step_avg:575.74ms +step:26157/57344 train_time:15058974ms step_avg:575.71ms +step:26158/57344 train_time:15059222ms step_avg:575.70ms +step:26159/57344 train_time:15059772ms step_avg:575.70ms +grad accum step:6540/14336 +step:26160/57344 train_time:15061062ms step_avg:575.73ms +step:26161/57344 train_time:15061079ms step_avg:575.71ms +step:26162/57344 train_time:15061325ms step_avg:575.69ms +step:26163/57344 train_time:15061873ms step_avg:575.69ms +grad accum step:6541/14336 +step:26164/57344 train_time:15063165ms step_avg:575.72ms +step:26165/57344 train_time:15063182ms step_avg:575.70ms +step:26166/57344 train_time:15063430ms step_avg:575.69ms +step:26167/57344 train_time:15063974ms step_avg:575.69ms +grad accum step:6542/14336 +step:26168/57344 train_time:15065265ms step_avg:575.71ms +step:26169/57344 train_time:15065282ms step_avg:575.69ms +step:26170/57344 train_time:15065530ms step_avg:575.68ms +step:26171/57344 train_time:15066085ms step_avg:575.68ms +grad accum step:6543/14336 +step:26172/57344 train_time:15067397ms step_avg:575.71ms +step:26173/57344 train_time:15067414ms step_avg:575.69ms +step:26174/57344 train_time:15067662ms step_avg:575.67ms +step:26175/57344 train_time:15068211ms step_avg:575.67ms +grad accum step:6544/14336 +step:26176/57344 train_time:15069494ms step_avg:575.70ms +step:26176/57344 val_loss:6.736585 train_time:15069495ms step_avg:575.70ms +step:26177/57344 train_time:15069507ms step_avg:575.68ms +step:26178/57344 train_time:15069739ms step_avg:575.66ms +step:26179/57344 train_time:15070305ms step_avg:575.66ms +grad accum step:6545/14336 +step:26180/57344 train_time:15071616ms step_avg:575.69ms +step:26181/57344 train_time:15071633ms step_avg:575.67ms +step:26182/57344 train_time:15071878ms step_avg:575.66ms +step:26183/57344 train_time:15072425ms step_avg:575.66ms +grad accum step:6546/14336 +step:26184/57344 train_time:15073704ms step_avg:575.68ms +step:26185/57344 train_time:15073721ms step_avg:575.66ms +step:26186/57344 train_time:15073969ms step_avg:575.65ms +step:26187/57344 train_time:15074514ms step_avg:575.65ms +grad accum step:6547/14336 +step:26188/57344 train_time:15075838ms step_avg:575.68ms +step:26189/57344 train_time:15075855ms step_avg:575.66ms +step:26190/57344 train_time:15076101ms step_avg:575.64ms +step:26191/57344 train_time:15076648ms step_avg:575.64ms +grad accum step:6548/14336 +step:26192/57344 train_time:15077934ms step_avg:575.67ms +step:26193/57344 train_time:15077951ms step_avg:575.65ms +step:26194/57344 train_time:15078193ms step_avg:575.64ms +step:26195/57344 train_time:15078732ms step_avg:575.63ms +grad accum step:6549/14336 +step:26196/57344 train_time:15080012ms step_avg:575.66ms +step:26197/57344 train_time:15080029ms step_avg:575.64ms +step:26198/57344 train_time:15080288ms step_avg:575.63ms +step:26199/57344 train_time:15080867ms step_avg:575.63ms +grad accum step:6550/14336 +step:26200/57344 train_time:15082159ms step_avg:575.65ms +step:26201/57344 train_time:15082176ms step_avg:575.63ms +step:26202/57344 train_time:15082423ms step_avg:575.62ms +step:26203/57344 train_time:15082974ms step_avg:575.62ms +grad accum step:6551/14336 +step:26204/57344 train_time:15084286ms step_avg:575.65ms +step:26205/57344 train_time:15084303ms step_avg:575.63ms +step:26206/57344 train_time:15084553ms step_avg:575.61ms +step:26207/57344 train_time:15085110ms step_avg:575.61ms +grad accum step:6552/14336 +step:26208/57344 train_time:15086427ms step_avg:575.64ms +step:26209/57344 train_time:15086444ms step_avg:575.62ms +step:26210/57344 train_time:15086691ms step_avg:575.61ms +step:26211/57344 train_time:15087239ms step_avg:575.61ms +grad accum step:6553/14336 +step:26212/57344 train_time:15088550ms step_avg:575.64ms +step:26213/57344 train_time:15088567ms step_avg:575.61ms +step:26214/57344 train_time:15088818ms step_avg:575.60ms +step:26215/57344 train_time:15089367ms step_avg:575.60ms +grad accum step:6554/14336 +step:26216/57344 train_time:15090680ms step_avg:575.63ms +step:26217/57344 train_time:15090697ms step_avg:575.61ms +step:26218/57344 train_time:15090948ms step_avg:575.59ms +step:26219/57344 train_time:15091507ms step_avg:575.59ms +grad accum step:6555/14336 +step:26220/57344 train_time:15092797ms step_avg:575.62ms +step:26221/57344 train_time:15092814ms step_avg:575.60ms +step:26222/57344 train_time:15093059ms step_avg:575.59ms +step:26223/57344 train_time:15093606ms step_avg:575.59ms +grad accum step:6556/14336 +step:26224/57344 train_time:15094903ms step_avg:575.61ms +step:26225/57344 train_time:15094920ms step_avg:575.59ms +step:26226/57344 train_time:15095168ms step_avg:575.58ms +step:26227/57344 train_time:15095719ms step_avg:575.58ms +grad accum step:6557/14336 +step:26228/57344 train_time:15097045ms step_avg:575.61ms +step:26229/57344 train_time:15097062ms step_avg:575.59ms +step:26230/57344 train_time:15097312ms step_avg:575.57ms +step:26231/57344 train_time:15097871ms step_avg:575.57ms +grad accum step:6558/14336 +step:26232/57344 train_time:15099204ms step_avg:575.60ms +step:26233/57344 train_time:15099221ms step_avg:575.58ms +step:26234/57344 train_time:15099469ms step_avg:575.57ms +step:26235/57344 train_time:15100031ms step_avg:575.57ms +grad accum step:6559/14336 +step:26236/57344 train_time:15101371ms step_avg:575.60ms +step:26237/57344 train_time:15101388ms step_avg:575.58ms +step:26238/57344 train_time:15101641ms step_avg:575.56ms +step:26239/57344 train_time:15102224ms step_avg:575.56ms +grad accum step:6560/14336 +step:26240/57344 train_time:15103654ms step_avg:575.60ms +step:26240/57344 val_loss:6.730480 train_time:15103654ms step_avg:575.60ms +step:26241/57344 train_time:15103667ms step_avg:575.58ms +step:26242/57344 train_time:15103895ms step_avg:575.56ms +step:26243/57344 train_time:15104448ms step_avg:575.56ms +grad accum step:6561/14336 +step:26244/57344 train_time:15105736ms step_avg:575.59ms +step:26245/57344 train_time:15105753ms step_avg:575.57ms +step:26246/57344 train_time:15106002ms step_avg:575.55ms +step:26247/57344 train_time:15106552ms step_avg:575.55ms +grad accum step:6562/14336 +step:26248/57344 train_time:15107895ms step_avg:575.58ms +step:26249/57344 train_time:15107912ms step_avg:575.56ms +step:26250/57344 train_time:15108159ms step_avg:575.55ms +step:26251/57344 train_time:15108714ms step_avg:575.55ms +grad accum step:6563/14336 +step:26252/57344 train_time:15110033ms step_avg:575.58ms +step:26253/57344 train_time:15110049ms step_avg:575.56ms +step:26254/57344 train_time:15110298ms step_avg:575.54ms +step:26255/57344 train_time:15110845ms step_avg:575.54ms +grad accum step:6564/14336 +step:26256/57344 train_time:15112131ms step_avg:575.57ms +step:26257/57344 train_time:15112148ms step_avg:575.55ms +step:26258/57344 train_time:15112400ms step_avg:575.54ms +step:26259/57344 train_time:15112961ms step_avg:575.53ms +grad accum step:6565/14336 +step:26260/57344 train_time:15114275ms step_avg:575.56ms +step:26261/57344 train_time:15114291ms step_avg:575.54ms +step:26262/57344 train_time:15114542ms step_avg:575.53ms +step:26263/57344 train_time:15115100ms step_avg:575.53ms +grad accum step:6566/14336 +step:26264/57344 train_time:15116414ms step_avg:575.56ms +step:26265/57344 train_time:15116431ms step_avg:575.54ms +step:26266/57344 train_time:15116688ms step_avg:575.52ms +step:26267/57344 train_time:15117257ms step_avg:575.52ms +grad accum step:6567/14336 +step:26268/57344 train_time:15118582ms step_avg:575.55ms +step:26269/57344 train_time:15118599ms step_avg:575.53ms +step:26270/57344 train_time:15118851ms step_avg:575.52ms +step:26271/57344 train_time:15119406ms step_avg:575.52ms +grad accum step:6568/14336 +step:26272/57344 train_time:15120712ms step_avg:575.54ms +step:26273/57344 train_time:15120729ms step_avg:575.52ms +step:26274/57344 train_time:15120981ms step_avg:575.51ms +step:26275/57344 train_time:15121541ms step_avg:575.51ms +grad accum step:6569/14336 +step:26276/57344 train_time:15122858ms step_avg:575.54ms +step:26277/57344 train_time:15122875ms step_avg:575.52ms +step:26278/57344 train_time:15123126ms step_avg:575.51ms +step:26279/57344 train_time:15123683ms step_avg:575.50ms +grad accum step:6570/14336 +step:26280/57344 train_time:15125018ms step_avg:575.53ms +step:26281/57344 train_time:15125035ms step_avg:575.51ms +step:26282/57344 train_time:15125288ms step_avg:575.50ms +step:26283/57344 train_time:15125852ms step_avg:575.50ms +grad accum step:6571/14336 +step:26284/57344 train_time:15127182ms step_avg:575.53ms +step:26285/57344 train_time:15127199ms step_avg:575.51ms +step:26286/57344 train_time:15127447ms step_avg:575.49ms +step:26287/57344 train_time:15127994ms step_avg:575.49ms +grad accum step:6572/14336 +step:26288/57344 train_time:15129304ms step_avg:575.52ms +step:26289/57344 train_time:15129321ms step_avg:575.50ms +step:26290/57344 train_time:15129577ms step_avg:575.49ms +step:26291/57344 train_time:15130153ms step_avg:575.49ms +grad accum step:6573/14336 +step:26292/57344 train_time:15131496ms step_avg:575.52ms +step:26293/57344 train_time:15131513ms step_avg:575.50ms +step:26294/57344 train_time:15131761ms step_avg:575.48ms +step:26295/57344 train_time:15132313ms step_avg:575.48ms +grad accum step:6574/14336 +step:26296/57344 train_time:15133635ms step_avg:575.51ms +step:26297/57344 train_time:15133652ms step_avg:575.49ms +step:26298/57344 train_time:15133900ms step_avg:575.48ms +step:26299/57344 train_time:15134447ms step_avg:575.48ms +grad accum step:6575/14336 +step:26300/57344 train_time:15135749ms step_avg:575.50ms +step:26301/57344 train_time:15135766ms step_avg:575.48ms +step:26302/57344 train_time:15136009ms step_avg:575.47ms +step:26303/57344 train_time:15136552ms step_avg:575.47ms +grad accum step:6576/14336 +step:26304/57344 train_time:15137847ms step_avg:575.50ms +step:26304/57344 val_loss:6.670739 train_time:15137848ms step_avg:575.50ms +step:26305/57344 train_time:15137861ms step_avg:575.47ms +step:26306/57344 train_time:15138090ms step_avg:575.46ms +step:26307/57344 train_time:15138649ms step_avg:575.46ms +grad accum step:6577/14336 +step:26308/57344 train_time:15139958ms step_avg:575.49ms +step:26309/57344 train_time:15139974ms step_avg:575.47ms +step:26310/57344 train_time:15140217ms step_avg:575.45ms +step:26311/57344 train_time:15140765ms step_avg:575.45ms +grad accum step:6578/14336 +step:26312/57344 train_time:15142073ms step_avg:575.48ms +step:26313/57344 train_time:15142090ms step_avg:575.46ms +step:26314/57344 train_time:15142340ms step_avg:575.45ms +step:26315/57344 train_time:15142898ms step_avg:575.45ms +grad accum step:6579/14336 +step:26316/57344 train_time:15144223ms step_avg:575.48ms +step:26317/57344 train_time:15144240ms step_avg:575.45ms +step:26318/57344 train_time:15144484ms step_avg:575.44ms +step:26319/57344 train_time:15145028ms step_avg:575.44ms +grad accum step:6580/14336 +step:26320/57344 train_time:15146320ms step_avg:575.47ms +step:26321/57344 train_time:15146337ms step_avg:575.45ms +step:26322/57344 train_time:15146587ms step_avg:575.43ms +step:26323/57344 train_time:15147138ms step_avg:575.43ms +grad accum step:6581/14336 +step:26324/57344 train_time:15148488ms step_avg:575.46ms +step:26325/57344 train_time:15148505ms step_avg:575.44ms +step:26326/57344 train_time:15148755ms step_avg:575.43ms +step:26327/57344 train_time:15149315ms step_avg:575.43ms +grad accum step:6582/14336 +step:26328/57344 train_time:15150633ms step_avg:575.46ms +step:26329/57344 train_time:15150650ms step_avg:575.44ms +step:26330/57344 train_time:15150903ms step_avg:575.42ms +step:26331/57344 train_time:15151467ms step_avg:575.42ms +grad accum step:6583/14336 +step:26332/57344 train_time:15152780ms step_avg:575.45ms +step:26333/57344 train_time:15152796ms step_avg:575.43ms +step:26334/57344 train_time:15153042ms step_avg:575.42ms +step:26335/57344 train_time:15153587ms step_avg:575.42ms +grad accum step:6584/14336 +step:26336/57344 train_time:15154910ms step_avg:575.44ms +step:26337/57344 train_time:15154926ms step_avg:575.42ms +step:26338/57344 train_time:15155179ms step_avg:575.41ms +step:26339/57344 train_time:15155749ms step_avg:575.41ms +grad accum step:6585/14336 +step:26340/57344 train_time:15157091ms step_avg:575.44ms +step:26341/57344 train_time:15157107ms step_avg:575.42ms +step:26342/57344 train_time:15157355ms step_avg:575.41ms +step:26343/57344 train_time:15157900ms step_avg:575.41ms +grad accum step:6586/14336 +step:26344/57344 train_time:15159232ms step_avg:575.43ms +step:26345/57344 train_time:15159249ms step_avg:575.41ms +step:26346/57344 train_time:15159495ms step_avg:575.40ms +step:26347/57344 train_time:15160039ms step_avg:575.40ms +grad accum step:6587/14336 +step:26348/57344 train_time:15161324ms step_avg:575.43ms +step:26349/57344 train_time:15161341ms step_avg:575.40ms +step:26350/57344 train_time:15161589ms step_avg:575.39ms +step:26351/57344 train_time:15162146ms step_avg:575.39ms +grad accum step:6588/14336 +step:26352/57344 train_time:15163518ms step_avg:575.42ms +step:26353/57344 train_time:15163536ms step_avg:575.40ms +step:26354/57344 train_time:15163784ms step_avg:575.39ms +step:26355/57344 train_time:15164345ms step_avg:575.39ms +grad accum step:6589/14336 +step:26356/57344 train_time:15165691ms step_avg:575.42ms +step:26357/57344 train_time:15165708ms step_avg:575.40ms +step:26358/57344 train_time:15165959ms step_avg:575.38ms +step:26359/57344 train_time:15166517ms step_avg:575.38ms +grad accum step:6590/14336 +step:26360/57344 train_time:15167813ms step_avg:575.41ms +step:26361/57344 train_time:15167830ms step_avg:575.39ms +step:26362/57344 train_time:15168078ms step_avg:575.38ms +step:26363/57344 train_time:15168629ms step_avg:575.38ms +grad accum step:6591/14336 +step:26364/57344 train_time:15169970ms step_avg:575.40ms +step:26365/57344 train_time:15169987ms step_avg:575.38ms +step:26366/57344 train_time:15170232ms step_avg:575.37ms +step:26367/57344 train_time:15170776ms step_avg:575.37ms +grad accum step:6592/14336 +step:26368/57344 train_time:15172142ms step_avg:575.40ms +step:26368/57344 val_loss:6.646334 train_time:15172143ms step_avg:575.40ms +step:26369/57344 train_time:15172155ms step_avg:575.38ms +step:26370/57344 train_time:15172376ms step_avg:575.37ms +step:26371/57344 train_time:15172918ms step_avg:575.36ms +grad accum step:6593/14336 +step:26372/57344 train_time:15174209ms step_avg:575.39ms +step:26373/57344 train_time:15174226ms step_avg:575.37ms +step:26374/57344 train_time:15174475ms step_avg:575.36ms +step:26375/57344 train_time:15175022ms step_avg:575.36ms +grad accum step:6594/14336 +step:26376/57344 train_time:15176297ms step_avg:575.38ms +step:26377/57344 train_time:15176314ms step_avg:575.36ms +step:26378/57344 train_time:15176561ms step_avg:575.35ms +step:26379/57344 train_time:15177106ms step_avg:575.35ms +grad accum step:6595/14336 +step:26380/57344 train_time:15178381ms step_avg:575.37ms +step:26381/57344 train_time:15178398ms step_avg:575.35ms +step:26382/57344 train_time:15178645ms step_avg:575.34ms +step:26383/57344 train_time:15179184ms step_avg:575.34ms +grad accum step:6596/14336 +step:26384/57344 train_time:15180480ms step_avg:575.37ms +step:26385/57344 train_time:15180497ms step_avg:575.35ms +step:26386/57344 train_time:15180743ms step_avg:575.33ms +step:26387/57344 train_time:15181289ms step_avg:575.33ms +grad accum step:6597/14336 +step:26388/57344 train_time:15182571ms step_avg:575.36ms +step:26389/57344 train_time:15182588ms step_avg:575.34ms +step:26390/57344 train_time:15182829ms step_avg:575.33ms +step:26391/57344 train_time:15183367ms step_avg:575.32ms +grad accum step:6598/14336 +step:26392/57344 train_time:15184666ms step_avg:575.35ms +step:26393/57344 train_time:15184683ms step_avg:575.33ms +step:26394/57344 train_time:15184929ms step_avg:575.32ms +step:26395/57344 train_time:15185473ms step_avg:575.32ms +grad accum step:6599/14336 +step:26396/57344 train_time:15186774ms step_avg:575.34ms +step:26397/57344 train_time:15186790ms step_avg:575.32ms +step:26398/57344 train_time:15187039ms step_avg:575.31ms +step:26399/57344 train_time:15187580ms step_avg:575.31ms +grad accum step:6600/14336 +step:26400/57344 train_time:15188857ms step_avg:575.34ms +step:26401/57344 train_time:15188874ms step_avg:575.31ms +step:26402/57344 train_time:15189119ms step_avg:575.30ms +step:26403/57344 train_time:15189665ms step_avg:575.30ms +grad accum step:6601/14336 +step:26404/57344 train_time:15190980ms step_avg:575.33ms +step:26405/57344 train_time:15190997ms step_avg:575.31ms +step:26406/57344 train_time:15191243ms step_avg:575.30ms +step:26407/57344 train_time:15191794ms step_avg:575.29ms +grad accum step:6602/14336 +step:26408/57344 train_time:15193106ms step_avg:575.32ms +step:26409/57344 train_time:15193123ms step_avg:575.30ms +step:26410/57344 train_time:15193374ms step_avg:575.29ms +step:26411/57344 train_time:15193929ms step_avg:575.29ms +grad accum step:6603/14336 +step:26412/57344 train_time:15195213ms step_avg:575.31ms +step:26413/57344 train_time:15195230ms step_avg:575.29ms +step:26414/57344 train_time:15195483ms step_avg:575.28ms +step:26415/57344 train_time:15196050ms step_avg:575.28ms +grad accum step:6604/14336 +step:26416/57344 train_time:15197397ms step_avg:575.31ms +step:26417/57344 train_time:15197413ms step_avg:575.29ms +step:26418/57344 train_time:15197658ms step_avg:575.28ms +step:26419/57344 train_time:15198210ms step_avg:575.28ms +grad accum step:6605/14336 +step:26420/57344 train_time:15199547ms step_avg:575.30ms +step:26421/57344 train_time:15199564ms step_avg:575.28ms +step:26422/57344 train_time:15199809ms step_avg:575.27ms +step:26423/57344 train_time:15200350ms step_avg:575.27ms +grad accum step:6606/14336 +step:26424/57344 train_time:15201661ms step_avg:575.30ms +step:26425/57344 train_time:15201677ms step_avg:575.28ms +step:26426/57344 train_time:15201923ms step_avg:575.26ms +step:26427/57344 train_time:15202457ms step_avg:575.26ms +grad accum step:6607/14336 +step:26428/57344 train_time:15203753ms step_avg:575.29ms +step:26429/57344 train_time:15203770ms step_avg:575.27ms +step:26430/57344 train_time:15204020ms step_avg:575.26ms +step:26431/57344 train_time:15204573ms step_avg:575.26ms +grad accum step:6608/14336 +step:26432/57344 train_time:15205903ms step_avg:575.28ms +step:26432/57344 val_loss:6.608521 train_time:15205903ms step_avg:575.28ms +step:26433/57344 train_time:15205916ms step_avg:575.26ms +step:26434/57344 train_time:15206140ms step_avg:575.25ms +step:26435/57344 train_time:15206685ms step_avg:575.25ms +grad accum step:6609/14336 +step:26436/57344 train_time:15207998ms step_avg:575.28ms +step:26437/57344 train_time:15208015ms step_avg:575.25ms +step:26438/57344 train_time:15208259ms step_avg:575.24ms +step:26439/57344 train_time:15208808ms step_avg:575.24ms +grad accum step:6610/14336 +step:26440/57344 train_time:15210127ms step_avg:575.27ms +step:26441/57344 train_time:15210144ms step_avg:575.25ms +step:26442/57344 train_time:15210395ms step_avg:575.24ms +step:26443/57344 train_time:15210952ms step_avg:575.24ms +grad accum step:6611/14336 +step:26444/57344 train_time:15212237ms step_avg:575.26ms +step:26445/57344 train_time:15212254ms step_avg:575.24ms +step:26446/57344 train_time:15212508ms step_avg:575.23ms +step:26447/57344 train_time:15213071ms step_avg:575.23ms +grad accum step:6612/14336 +step:26448/57344 train_time:15214401ms step_avg:575.26ms +step:26449/57344 train_time:15214418ms step_avg:575.24ms +step:26450/57344 train_time:15214668ms step_avg:575.22ms +step:26451/57344 train_time:15215216ms step_avg:575.22ms +grad accum step:6613/14336 +step:26452/57344 train_time:15216553ms step_avg:575.25ms +step:26453/57344 train_time:15216569ms step_avg:575.23ms +step:26454/57344 train_time:15216815ms step_avg:575.22ms +step:26455/57344 train_time:15217360ms step_avg:575.22ms +grad accum step:6614/14336 +step:26456/57344 train_time:15218637ms step_avg:575.24ms +step:26457/57344 train_time:15218654ms step_avg:575.22ms +step:26458/57344 train_time:15218908ms step_avg:575.21ms +step:26459/57344 train_time:15219473ms step_avg:575.21ms +grad accum step:6615/14336 +step:26460/57344 train_time:15220825ms step_avg:575.24ms +step:26461/57344 train_time:15220842ms step_avg:575.22ms +step:26462/57344 train_time:15221088ms step_avg:575.21ms +step:26463/57344 train_time:15221633ms step_avg:575.20ms +grad accum step:6616/14336 +step:26464/57344 train_time:15222966ms step_avg:575.23ms +step:26465/57344 train_time:15222983ms step_avg:575.21ms +step:26466/57344 train_time:15223229ms step_avg:575.20ms +step:26467/57344 train_time:15223773ms step_avg:575.20ms +grad accum step:6617/14336 +step:26468/57344 train_time:15225050ms step_avg:575.22ms +step:26469/57344 train_time:15225067ms step_avg:575.20ms +step:26470/57344 train_time:15225313ms step_avg:575.19ms +step:26471/57344 train_time:15225860ms step_avg:575.19ms +grad accum step:6618/14336 +step:26472/57344 train_time:15227158ms step_avg:575.22ms +step:26473/57344 train_time:15227175ms step_avg:575.20ms +step:26474/57344 train_time:15227431ms step_avg:575.18ms +step:26475/57344 train_time:15227994ms step_avg:575.18ms +grad accum step:6619/14336 +step:26476/57344 train_time:15229286ms step_avg:575.21ms +step:26477/57344 train_time:15229303ms step_avg:575.19ms +step:26478/57344 train_time:15229552ms step_avg:575.18ms +step:26479/57344 train_time:15230100ms step_avg:575.18ms +grad accum step:6620/14336 +step:26480/57344 train_time:15231403ms step_avg:575.20ms +step:26481/57344 train_time:15231420ms step_avg:575.18ms +step:26482/57344 train_time:15231673ms step_avg:575.17ms +step:26483/57344 train_time:15232235ms step_avg:575.17ms +grad accum step:6621/14336 +step:26484/57344 train_time:15233517ms step_avg:575.20ms +step:26485/57344 train_time:15233534ms step_avg:575.18ms +step:26486/57344 train_time:15233778ms step_avg:575.16ms +step:26487/57344 train_time:15234318ms step_avg:575.16ms +grad accum step:6622/14336 +step:26488/57344 train_time:15235608ms step_avg:575.19ms +step:26489/57344 train_time:15235625ms step_avg:575.17ms +step:26490/57344 train_time:15235875ms step_avg:575.16ms +step:26491/57344 train_time:15236427ms step_avg:575.15ms +grad accum step:6623/14336 +step:26492/57344 train_time:15237711ms step_avg:575.18ms +step:26493/57344 train_time:15237728ms step_avg:575.16ms +step:26494/57344 train_time:15237972ms step_avg:575.15ms +step:26495/57344 train_time:15238522ms step_avg:575.15ms +grad accum step:6624/14336 +step:26496/57344 train_time:15239839ms step_avg:575.18ms +step:26496/57344 val_loss:6.570975 train_time:15239840ms step_avg:575.18ms +step:26497/57344 train_time:15239852ms step_avg:575.15ms +step:26498/57344 train_time:15240075ms step_avg:575.14ms +step:26499/57344 train_time:15240623ms step_avg:575.14ms +grad accum step:6625/14336 +step:26500/57344 train_time:15241902ms step_avg:575.17ms +step:26501/57344 train_time:15241919ms step_avg:575.15ms +step:26502/57344 train_time:15242169ms step_avg:575.13ms +step:26503/57344 train_time:15242726ms step_avg:575.13ms +grad accum step:6626/14336 +step:26504/57344 train_time:15244044ms step_avg:575.16ms +step:26505/57344 train_time:15244062ms step_avg:575.14ms +step:26506/57344 train_time:15244306ms step_avg:575.13ms +step:26507/57344 train_time:15244854ms step_avg:575.13ms +grad accum step:6627/14336 +step:26508/57344 train_time:15246148ms step_avg:575.15ms +step:26509/57344 train_time:15246165ms step_avg:575.13ms +step:26510/57344 train_time:15246411ms step_avg:575.12ms +step:26511/57344 train_time:15246964ms step_avg:575.12ms +grad accum step:6628/14336 +step:26512/57344 train_time:15248292ms step_avg:575.15ms +step:26513/57344 train_time:15248309ms step_avg:575.13ms +step:26514/57344 train_time:15248562ms step_avg:575.11ms +step:26515/57344 train_time:15249123ms step_avg:575.11ms +grad accum step:6629/14336 +step:26516/57344 train_time:15250429ms step_avg:575.14ms +step:26517/57344 train_time:15250446ms step_avg:575.12ms +step:26518/57344 train_time:15250697ms step_avg:575.11ms +step:26519/57344 train_time:15251249ms step_avg:575.11ms +grad accum step:6630/14336 +step:26520/57344 train_time:15252557ms step_avg:575.13ms +step:26521/57344 train_time:15252575ms step_avg:575.11ms +step:26522/57344 train_time:15252822ms step_avg:575.10ms +step:26523/57344 train_time:15253369ms step_avg:575.10ms +grad accum step:6631/14336 +step:26524/57344 train_time:15254664ms step_avg:575.13ms +step:26525/57344 train_time:15254681ms step_avg:575.11ms +step:26526/57344 train_time:15254932ms step_avg:575.09ms +step:26527/57344 train_time:15255496ms step_avg:575.09ms +grad accum step:6632/14336 +step:26528/57344 train_time:15256801ms step_avg:575.12ms +step:26529/57344 train_time:15256818ms step_avg:575.10ms +step:26530/57344 train_time:15257068ms step_avg:575.09ms +step:26531/57344 train_time:15257624ms step_avg:575.09ms +grad accum step:6633/14336 +step:26532/57344 train_time:15258918ms step_avg:575.11ms +step:26533/57344 train_time:15258935ms step_avg:575.09ms +step:26534/57344 train_time:15259187ms step_avg:575.08ms +step:26535/57344 train_time:15259750ms step_avg:575.08ms +grad accum step:6634/14336 +step:26536/57344 train_time:15261070ms step_avg:575.11ms +step:26537/57344 train_time:15261087ms step_avg:575.09ms +step:26538/57344 train_time:15261333ms step_avg:575.07ms +step:26539/57344 train_time:15261878ms step_avg:575.07ms +grad accum step:6635/14336 +step:26540/57344 train_time:15263177ms step_avg:575.10ms +step:26541/57344 train_time:15263194ms step_avg:575.08ms +step:26542/57344 train_time:15263443ms step_avg:575.07ms +step:26543/57344 train_time:15263989ms step_avg:575.07ms +grad accum step:6636/14336 +step:26544/57344 train_time:15265282ms step_avg:575.09ms +step:26545/57344 train_time:15265298ms step_avg:575.07ms +step:26546/57344 train_time:15265543ms step_avg:575.06ms +step:26547/57344 train_time:15266086ms step_avg:575.06ms +grad accum step:6637/14336 +step:26548/57344 train_time:15267367ms step_avg:575.09ms +step:26549/57344 train_time:15267384ms step_avg:575.06ms +step:26550/57344 train_time:15267638ms step_avg:575.05ms +step:26551/57344 train_time:15268206ms step_avg:575.05ms +grad accum step:6638/14336 +step:26552/57344 train_time:15269493ms step_avg:575.08ms +step:26553/57344 train_time:15269510ms step_avg:575.06ms +step:26554/57344 train_time:15269756ms step_avg:575.05ms +step:26555/57344 train_time:15270303ms step_avg:575.04ms +grad accum step:6639/14336 +step:26556/57344 train_time:15271604ms step_avg:575.07ms +step:26557/57344 train_time:15271621ms step_avg:575.05ms +step:26558/57344 train_time:15271868ms step_avg:575.04ms +step:26559/57344 train_time:15272408ms step_avg:575.04ms +grad accum step:6640/14336 +step:26560/57344 train_time:15273686ms step_avg:575.06ms +step:26560/57344 val_loss:6.545394 train_time:15273686ms step_avg:575.06ms +step:26561/57344 train_time:15273699ms step_avg:575.04ms +step:26562/57344 train_time:15273924ms step_avg:575.03ms +step:26563/57344 train_time:15274484ms step_avg:575.03ms +grad accum step:6641/14336 +step:26564/57344 train_time:15275815ms step_avg:575.06ms +step:26565/57344 train_time:15275832ms step_avg:575.04ms +step:26566/57344 train_time:15276080ms step_avg:575.02ms +step:26567/57344 train_time:15276630ms step_avg:575.02ms +grad accum step:6642/14336 +step:26568/57344 train_time:15277949ms step_avg:575.05ms +step:26569/57344 train_time:15277966ms step_avg:575.03ms +step:26570/57344 train_time:15278217ms step_avg:575.02ms +step:26571/57344 train_time:15278777ms step_avg:575.02ms +grad accum step:6643/14336 +step:26572/57344 train_time:15280087ms step_avg:575.04ms +step:26573/57344 train_time:15280104ms step_avg:575.02ms +step:26574/57344 train_time:15280349ms step_avg:575.01ms +step:26575/57344 train_time:15280888ms step_avg:575.01ms +grad accum step:6644/14336 +step:26576/57344 train_time:15282185ms step_avg:575.04ms +step:26577/57344 train_time:15282202ms step_avg:575.02ms +step:26578/57344 train_time:15282452ms step_avg:575.00ms +step:26579/57344 train_time:15283008ms step_avg:575.00ms +grad accum step:6645/14336 +step:26580/57344 train_time:15284329ms step_avg:575.03ms +step:26581/57344 train_time:15284346ms step_avg:575.01ms +step:26582/57344 train_time:15284594ms step_avg:575.00ms +step:26583/57344 train_time:15285148ms step_avg:575.00ms +grad accum step:6646/14336 +step:26584/57344 train_time:15286545ms step_avg:575.03ms +step:26585/57344 train_time:15286561ms step_avg:575.01ms +step:26586/57344 train_time:15286814ms step_avg:574.99ms +step:26587/57344 train_time:15287379ms step_avg:574.99ms +grad accum step:6647/14336 +step:26588/57344 train_time:15288681ms step_avg:575.02ms +step:26589/57344 train_time:15288698ms step_avg:575.00ms +step:26590/57344 train_time:15288947ms step_avg:574.99ms +step:26591/57344 train_time:15289498ms step_avg:574.99ms +grad accum step:6648/14336 +step:26592/57344 train_time:15290785ms step_avg:575.01ms +step:26593/57344 train_time:15290802ms step_avg:574.99ms +step:26594/57344 train_time:15291047ms step_avg:574.98ms +step:26595/57344 train_time:15291594ms step_avg:574.98ms +grad accum step:6649/14336 +step:26596/57344 train_time:15292888ms step_avg:575.01ms +step:26597/57344 train_time:15292905ms step_avg:574.99ms +step:26598/57344 train_time:15293152ms step_avg:574.97ms +step:26599/57344 train_time:15293698ms step_avg:574.97ms +grad accum step:6650/14336 +step:26600/57344 train_time:15294996ms step_avg:575.00ms +step:26601/57344 train_time:15295013ms step_avg:574.98ms +step:26602/57344 train_time:15295256ms step_avg:574.97ms +step:26603/57344 train_time:15295800ms step_avg:574.97ms +grad accum step:6651/14336 +step:26604/57344 train_time:15297097ms step_avg:574.99ms +step:26605/57344 train_time:15297114ms step_avg:574.97ms +step:26606/57344 train_time:15297364ms step_avg:574.96ms +step:26607/57344 train_time:15297928ms step_avg:574.96ms +grad accum step:6652/14336 +step:26608/57344 train_time:15299250ms step_avg:574.99ms +step:26609/57344 train_time:15299267ms step_avg:574.97ms +step:26610/57344 train_time:15299512ms step_avg:574.95ms +step:26611/57344 train_time:15300057ms step_avg:574.95ms +grad accum step:6653/14336 +step:26612/57344 train_time:15301384ms step_avg:574.98ms +step:26613/57344 train_time:15301400ms step_avg:574.96ms +step:26614/57344 train_time:15301644ms step_avg:574.95ms +step:26615/57344 train_time:15302190ms step_avg:574.95ms +grad accum step:6654/14336 +step:26616/57344 train_time:15303476ms step_avg:574.97ms +step:26617/57344 train_time:15303493ms step_avg:574.95ms +step:26618/57344 train_time:15303742ms step_avg:574.94ms +step:26619/57344 train_time:15304296ms step_avg:574.94ms +grad accum step:6655/14336 +step:26620/57344 train_time:15305615ms step_avg:574.97ms +step:26621/57344 train_time:15305632ms step_avg:574.95ms +step:26622/57344 train_time:15305877ms step_avg:574.93ms +step:26623/57344 train_time:15306421ms step_avg:574.93ms +grad accum step:6656/14336 +step:26624/57344 train_time:15307713ms step_avg:574.96ms +step:26624/57344 val_loss:6.524185 train_time:15307714ms step_avg:574.96ms +step:26625/57344 train_time:15307725ms step_avg:574.94ms +step:26626/57344 train_time:15307948ms step_avg:574.92ms +step:26627/57344 train_time:15308498ms step_avg:574.92ms +grad accum step:6657/14336 +step:26628/57344 train_time:15309797ms step_avg:574.95ms +step:26629/57344 train_time:15309814ms step_avg:574.93ms +step:26630/57344 train_time:15310062ms step_avg:574.92ms +step:26631/57344 train_time:15310615ms step_avg:574.92ms +grad accum step:6658/14336 +step:26632/57344 train_time:15311942ms step_avg:574.95ms +step:26633/57344 train_time:15311959ms step_avg:574.92ms +step:26634/57344 train_time:15312211ms step_avg:574.91ms +step:26635/57344 train_time:15312779ms step_avg:574.91ms +grad accum step:6659/14336 +step:26636/57344 train_time:15314097ms step_avg:574.94ms +step:26637/57344 train_time:15314115ms step_avg:574.92ms +step:26638/57344 train_time:15314359ms step_avg:574.91ms +step:26639/57344 train_time:15314906ms step_avg:574.91ms +grad accum step:6660/14336 +step:26640/57344 train_time:15316213ms step_avg:574.93ms +step:26641/57344 train_time:15316230ms step_avg:574.91ms +step:26642/57344 train_time:15316474ms step_avg:574.90ms +step:26643/57344 train_time:15317019ms step_avg:574.90ms +grad accum step:6661/14336 +step:26644/57344 train_time:15318358ms step_avg:574.93ms +step:26645/57344 train_time:15318375ms step_avg:574.91ms +step:26646/57344 train_time:15318628ms step_avg:574.89ms +step:26647/57344 train_time:15319187ms step_avg:574.89ms +grad accum step:6662/14336 +step:26648/57344 train_time:15320483ms step_avg:574.92ms +step:26649/57344 train_time:15320500ms step_avg:574.90ms +step:26650/57344 train_time:15320747ms step_avg:574.89ms +step:26651/57344 train_time:15321294ms step_avg:574.89ms +grad accum step:6663/14336 +step:26652/57344 train_time:15322611ms step_avg:574.91ms +step:26653/57344 train_time:15322627ms step_avg:574.89ms +step:26654/57344 train_time:15322877ms step_avg:574.88ms +step:26655/57344 train_time:15323417ms step_avg:574.88ms +grad accum step:6664/14336 +step:26656/57344 train_time:15324734ms step_avg:574.91ms +step:26657/57344 train_time:15324751ms step_avg:574.89ms +step:26658/57344 train_time:15325002ms step_avg:574.87ms +step:26659/57344 train_time:15325566ms step_avg:574.87ms +grad accum step:6665/14336 +step:26660/57344 train_time:15326882ms step_avg:574.90ms +step:26661/57344 train_time:15326899ms step_avg:574.88ms +step:26662/57344 train_time:15327145ms step_avg:574.87ms +step:26663/57344 train_time:15327690ms step_avg:574.87ms +grad accum step:6666/14336 +step:26664/57344 train_time:15328970ms step_avg:574.89ms +step:26665/57344 train_time:15328987ms step_avg:574.87ms +step:26666/57344 train_time:15329239ms step_avg:574.86ms +step:26667/57344 train_time:15329803ms step_avg:574.86ms +grad accum step:6667/14336 +step:26668/57344 train_time:15331161ms step_avg:574.89ms +step:26669/57344 train_time:15331178ms step_avg:574.87ms +step:26670/57344 train_time:15331424ms step_avg:574.86ms +step:26671/57344 train_time:15331966ms step_avg:574.86ms +grad accum step:6668/14336 +step:26672/57344 train_time:15333268ms step_avg:574.88ms +step:26673/57344 train_time:15333285ms step_avg:574.86ms +step:26674/57344 train_time:15333529ms step_avg:574.85ms +step:26675/57344 train_time:15334077ms step_avg:574.85ms +grad accum step:6669/14336 +step:26676/57344 train_time:15335383ms step_avg:574.88ms +step:26677/57344 train_time:15335400ms step_avg:574.85ms +step:26678/57344 train_time:15335648ms step_avg:574.84ms +step:26679/57344 train_time:15336190ms step_avg:574.84ms +grad accum step:6670/14336 +step:26680/57344 train_time:15337510ms step_avg:574.87ms +step:26681/57344 train_time:15337527ms step_avg:574.85ms +step:26682/57344 train_time:15337778ms step_avg:574.84ms +step:26683/57344 train_time:15338340ms step_avg:574.84ms +grad accum step:6671/14336 +step:26684/57344 train_time:15339733ms step_avg:574.87ms +step:26685/57344 train_time:15339750ms step_avg:574.85ms +step:26686/57344 train_time:15340005ms step_avg:574.83ms +step:26687/57344 train_time:15340578ms step_avg:574.83ms +grad accum step:6672/14336 +step:26688/57344 train_time:15341910ms step_avg:574.86ms +step:26688/57344 val_loss:6.516870 train_time:15341911ms step_avg:574.86ms +step:26689/57344 train_time:15341923ms step_avg:574.84ms +step:26690/57344 train_time:15342146ms step_avg:574.83ms +step:26691/57344 train_time:15342696ms step_avg:574.83ms +grad accum step:6673/14336 +step:26692/57344 train_time:15344009ms step_avg:574.85ms +step:26693/57344 train_time:15344026ms step_avg:574.83ms +step:26694/57344 train_time:15344269ms step_avg:574.82ms +step:26695/57344 train_time:15344817ms step_avg:574.82ms +grad accum step:6674/14336 +step:26696/57344 train_time:15346120ms step_avg:574.85ms +step:26697/57344 train_time:15346137ms step_avg:574.83ms +step:26698/57344 train_time:15346384ms step_avg:574.81ms +step:26699/57344 train_time:15346941ms step_avg:574.81ms +grad accum step:6675/14336 +step:26700/57344 train_time:15348264ms step_avg:574.84ms +step:26701/57344 train_time:15348281ms step_avg:574.82ms +step:26702/57344 train_time:15348525ms step_avg:574.81ms +step:26703/57344 train_time:15349072ms step_avg:574.81ms +grad accum step:6676/14336 +step:26704/57344 train_time:15350376ms step_avg:574.83ms +step:26705/57344 train_time:15350393ms step_avg:574.81ms +step:26706/57344 train_time:15350635ms step_avg:574.80ms +step:26707/57344 train_time:15351176ms step_avg:574.80ms +grad accum step:6677/14336 +step:26708/57344 train_time:15352480ms step_avg:574.83ms +step:26709/57344 train_time:15352496ms step_avg:574.81ms +step:26710/57344 train_time:15352742ms step_avg:574.79ms +step:26711/57344 train_time:15353284ms step_avg:574.79ms +grad accum step:6678/14336 +step:26712/57344 train_time:15354578ms step_avg:574.82ms +step:26713/57344 train_time:15354594ms step_avg:574.80ms +step:26714/57344 train_time:15354847ms step_avg:574.79ms +step:26715/57344 train_time:15355409ms step_avg:574.79ms +grad accum step:6679/14336 +step:26716/57344 train_time:15356705ms step_avg:574.81ms +step:26717/57344 train_time:15356722ms step_avg:574.79ms +step:26718/57344 train_time:15356968ms step_avg:574.78ms +step:26719/57344 train_time:15357516ms step_avg:574.78ms +grad accum step:6680/14336 +step:26720/57344 train_time:15358814ms step_avg:574.81ms +step:26721/57344 train_time:15358831ms step_avg:574.79ms +step:26722/57344 train_time:15359075ms step_avg:574.77ms +step:26723/57344 train_time:15359624ms step_avg:574.77ms +grad accum step:6681/14336 +step:26724/57344 train_time:15360925ms step_avg:574.80ms +step:26725/57344 train_time:15360942ms step_avg:574.78ms +step:26726/57344 train_time:15361190ms step_avg:574.77ms +step:26727/57344 train_time:15361746ms step_avg:574.77ms +grad accum step:6682/14336 +step:26728/57344 train_time:15363049ms step_avg:574.79ms +step:26729/57344 train_time:15363067ms step_avg:574.77ms +step:26730/57344 train_time:15363313ms step_avg:574.76ms +step:26731/57344 train_time:15363867ms step_avg:574.76ms +grad accum step:6683/14336 +step:26732/57344 train_time:15365182ms step_avg:574.79ms +step:26733/57344 train_time:15365199ms step_avg:574.77ms +step:26734/57344 train_time:15365449ms step_avg:574.75ms +step:26735/57344 train_time:15366003ms step_avg:574.75ms +grad accum step:6684/14336 +step:26736/57344 train_time:15367311ms step_avg:574.78ms +step:26737/57344 train_time:15367328ms step_avg:574.76ms +step:26738/57344 train_time:15367577ms step_avg:574.75ms +step:26739/57344 train_time:15368124ms step_avg:574.75ms +grad accum step:6685/14336 +step:26740/57344 train_time:15369429ms step_avg:574.77ms +step:26741/57344 train_time:15369446ms step_avg:574.75ms +step:26742/57344 train_time:15369703ms step_avg:574.74ms +step:26743/57344 train_time:15370274ms step_avg:574.74ms +grad accum step:6686/14336 +step:26744/57344 train_time:15371552ms step_avg:574.77ms +step:26745/57344 train_time:15371569ms step_avg:574.75ms +step:26746/57344 train_time:15371817ms step_avg:574.73ms +step:26747/57344 train_time:15372368ms step_avg:574.73ms +grad accum step:6687/14336 +step:26748/57344 train_time:15373659ms step_avg:574.76ms +step:26749/57344 train_time:15373676ms step_avg:574.74ms +step:26750/57344 train_time:15373924ms step_avg:574.73ms +step:26751/57344 train_time:15374471ms step_avg:574.73ms +grad accum step:6688/14336 +step:26752/57344 train_time:15375757ms step_avg:574.75ms +step:26752/57344 val_loss:6.475085 train_time:15375757ms step_avg:574.75ms +step:26753/57344 train_time:15375770ms step_avg:574.73ms +step:26754/57344 train_time:15376000ms step_avg:574.72ms +step:26755/57344 train_time:15376569ms step_avg:574.72ms +grad accum step:6689/14336 +step:26756/57344 train_time:15377870ms step_avg:574.74ms +step:26757/57344 train_time:15377887ms step_avg:574.72ms +step:26758/57344 train_time:15378134ms step_avg:574.71ms +step:26759/57344 train_time:15378673ms step_avg:574.71ms +grad accum step:6690/14336 +step:26760/57344 train_time:15380001ms step_avg:574.74ms +step:26761/57344 train_time:15380018ms step_avg:574.72ms +step:26762/57344 train_time:15380275ms step_avg:574.71ms +step:26763/57344 train_time:15380847ms step_avg:574.71ms +grad accum step:6691/14336 +step:26764/57344 train_time:15382142ms step_avg:574.73ms +step:26765/57344 train_time:15382158ms step_avg:574.71ms +step:26766/57344 train_time:15382405ms step_avg:574.70ms +step:26767/57344 train_time:15382947ms step_avg:574.70ms +grad accum step:6692/14336 +step:26768/57344 train_time:15384243ms step_avg:574.73ms +step:26769/57344 train_time:15384260ms step_avg:574.70ms +step:26770/57344 train_time:15384507ms step_avg:574.69ms +step:26771/57344 train_time:15385050ms step_avg:574.69ms +grad accum step:6693/14336 +step:26772/57344 train_time:15386336ms step_avg:574.72ms +step:26773/57344 train_time:15386353ms step_avg:574.70ms +step:26774/57344 train_time:15386603ms step_avg:574.68ms +step:26775/57344 train_time:15387154ms step_avg:574.68ms +grad accum step:6694/14336 +step:26776/57344 train_time:15388524ms step_avg:574.71ms +step:26777/57344 train_time:15388540ms step_avg:574.69ms +step:26778/57344 train_time:15388789ms step_avg:574.68ms +step:26779/57344 train_time:15389338ms step_avg:574.68ms +grad accum step:6695/14336 +step:26780/57344 train_time:15390639ms step_avg:574.71ms +step:26781/57344 train_time:15390656ms step_avg:574.69ms +step:26782/57344 train_time:15390905ms step_avg:574.67ms +step:26783/57344 train_time:15391456ms step_avg:574.67ms +grad accum step:6696/14336 +step:26784/57344 train_time:15392744ms step_avg:574.70ms +step:26785/57344 train_time:15392760ms step_avg:574.68ms +step:26786/57344 train_time:15393008ms step_avg:574.67ms +step:26787/57344 train_time:15393555ms step_avg:574.67ms +grad accum step:6697/14336 +step:26788/57344 train_time:15394873ms step_avg:574.69ms +step:26789/57344 train_time:15394890ms step_avg:574.67ms +step:26790/57344 train_time:15395140ms step_avg:574.66ms +step:26791/57344 train_time:15395689ms step_avg:574.66ms +grad accum step:6698/14336 +step:26792/57344 train_time:15396981ms step_avg:574.69ms +step:26793/57344 train_time:15396998ms step_avg:574.66ms +step:26794/57344 train_time:15397248ms step_avg:574.65ms +step:26795/57344 train_time:15397797ms step_avg:574.65ms +grad accum step:6699/14336 +step:26796/57344 train_time:15399072ms step_avg:574.68ms +step:26797/57344 train_time:15399089ms step_avg:574.66ms +step:26798/57344 train_time:15399339ms step_avg:574.65ms +step:26799/57344 train_time:15399896ms step_avg:574.64ms +grad accum step:6700/14336 +step:26800/57344 train_time:15401195ms step_avg:574.67ms +step:26801/57344 train_time:15401212ms step_avg:574.65ms +step:26802/57344 train_time:15401463ms step_avg:574.64ms +step:26803/57344 train_time:15402022ms step_avg:574.64ms +grad accum step:6701/14336 +step:26804/57344 train_time:15403361ms step_avg:574.67ms +step:26805/57344 train_time:15403378ms step_avg:574.65ms +step:26806/57344 train_time:15403625ms step_avg:574.63ms +step:26807/57344 train_time:15404173ms step_avg:574.63ms +grad accum step:6702/14336 +step:26808/57344 train_time:15405467ms step_avg:574.66ms +step:26809/57344 train_time:15405483ms step_avg:574.64ms +step:26810/57344 train_time:15405741ms step_avg:574.63ms +step:26811/57344 train_time:15406315ms step_avg:574.63ms +grad accum step:6703/14336 +step:26812/57344 train_time:15407609ms step_avg:574.65ms +step:26813/57344 train_time:15407626ms step_avg:574.63ms +step:26814/57344 train_time:15407877ms step_avg:574.62ms +step:26815/57344 train_time:15408437ms step_avg:574.62ms +grad accum step:6704/14336 +step:26816/57344 train_time:15409750ms step_avg:574.65ms +step:26816/57344 val_loss:6.453985 train_time:15409750ms step_avg:574.65ms +step:26817/57344 train_time:15409763ms step_avg:574.63ms +step:26818/57344 train_time:15409985ms step_avg:574.61ms +step:26819/57344 train_time:15410520ms step_avg:574.61ms +grad accum step:6705/14336 +step:26820/57344 train_time:15411816ms step_avg:574.64ms +step:26821/57344 train_time:15411833ms step_avg:574.62ms +step:26822/57344 train_time:15412077ms step_avg:574.61ms +step:26823/57344 train_time:15412628ms step_avg:574.60ms +grad accum step:6706/14336 +step:26824/57344 train_time:15413962ms step_avg:574.63ms +step:26825/57344 train_time:15413979ms step_avg:574.61ms +step:26826/57344 train_time:15414231ms step_avg:574.60ms +step:26827/57344 train_time:15414789ms step_avg:574.60ms +grad accum step:6707/14336 +step:26828/57344 train_time:15416090ms step_avg:574.63ms +step:26829/57344 train_time:15416107ms step_avg:574.61ms +step:26830/57344 train_time:15416353ms step_avg:574.59ms +step:26831/57344 train_time:15416902ms step_avg:574.59ms +grad accum step:6708/14336 +step:26832/57344 train_time:15418196ms step_avg:574.62ms +step:26833/57344 train_time:15418213ms step_avg:574.60ms +step:26834/57344 train_time:15418467ms step_avg:574.59ms +step:26835/57344 train_time:15419029ms step_avg:574.59ms +grad accum step:6709/14336 +step:26836/57344 train_time:15420348ms step_avg:574.61ms +step:26837/57344 train_time:15420365ms step_avg:574.59ms +step:26838/57344 train_time:15420612ms step_avg:574.58ms +step:26839/57344 train_time:15421159ms step_avg:574.58ms +grad accum step:6710/14336 +step:26840/57344 train_time:15422474ms step_avg:574.61ms +step:26841/57344 train_time:15422491ms step_avg:574.59ms +step:26842/57344 train_time:15422735ms step_avg:574.57ms +step:26843/57344 train_time:15423280ms step_avg:574.57ms +grad accum step:6711/14336 +step:26844/57344 train_time:15424565ms step_avg:574.60ms +step:26845/57344 train_time:15424582ms step_avg:574.58ms +step:26846/57344 train_time:15424829ms step_avg:574.57ms +step:26847/57344 train_time:15425373ms step_avg:574.57ms +grad accum step:6712/14336 +step:26848/57344 train_time:15426654ms step_avg:574.59ms +step:26849/57344 train_time:15426671ms step_avg:574.57ms +step:26850/57344 train_time:15426917ms step_avg:574.56ms +step:26851/57344 train_time:15427466ms step_avg:574.56ms +grad accum step:6713/14336 +step:26852/57344 train_time:15428765ms step_avg:574.59ms +step:26853/57344 train_time:15428781ms step_avg:574.56ms +step:26854/57344 train_time:15429033ms step_avg:574.55ms +step:26855/57344 train_time:15429591ms step_avg:574.55ms +grad accum step:6714/14336 +step:26856/57344 train_time:15430890ms step_avg:574.58ms +step:26857/57344 train_time:15430907ms step_avg:574.56ms +step:26858/57344 train_time:15431149ms step_avg:574.55ms +step:26859/57344 train_time:15431690ms step_avg:574.54ms +grad accum step:6715/14336 +step:26860/57344 train_time:15432997ms step_avg:574.57ms +step:26861/57344 train_time:15433014ms step_avg:574.55ms +step:26862/57344 train_time:15433260ms step_avg:574.54ms +step:26863/57344 train_time:15433802ms step_avg:574.54ms +grad accum step:6716/14336 +step:26864/57344 train_time:15435114ms step_avg:574.57ms +step:26865/57344 train_time:15435133ms step_avg:574.54ms +step:26866/57344 train_time:15435379ms step_avg:574.53ms +step:26867/57344 train_time:15435928ms step_avg:574.53ms +grad accum step:6717/14336 +step:26868/57344 train_time:15437198ms step_avg:574.56ms +step:26869/57344 train_time:15437215ms step_avg:574.54ms +step:26870/57344 train_time:15437461ms step_avg:574.52ms +step:26871/57344 train_time:15438010ms step_avg:574.52ms +grad accum step:6718/14336 +step:26872/57344 train_time:15439356ms step_avg:574.55ms +step:26873/57344 train_time:15439373ms step_avg:574.53ms +step:26874/57344 train_time:15439617ms step_avg:574.52ms +step:26875/57344 train_time:15440156ms step_avg:574.52ms +grad accum step:6719/14336 +step:26876/57344 train_time:15441455ms step_avg:574.54ms +step:26877/57344 train_time:15441472ms step_avg:574.52ms +step:26878/57344 train_time:15441722ms step_avg:574.51ms +step:26879/57344 train_time:15442277ms step_avg:574.51ms +grad accum step:6720/14336 +step:26880/57344 train_time:15443580ms step_avg:574.54ms +step:26880/57344 val_loss:6.431835 train_time:15443581ms step_avg:574.54ms +step:26881/57344 train_time:15443594ms step_avg:574.52ms +step:26882/57344 train_time:15443818ms step_avg:574.50ms +step:26883/57344 train_time:15444370ms step_avg:574.50ms +grad accum step:6721/14336 +step:26884/57344 train_time:15445673ms step_avg:574.53ms +step:26885/57344 train_time:15445689ms step_avg:574.51ms +step:26886/57344 train_time:15445933ms step_avg:574.50ms +step:26887/57344 train_time:15446474ms step_avg:574.50ms +grad accum step:6722/14336 +step:26888/57344 train_time:15447781ms step_avg:574.52ms +step:26889/57344 train_time:15447797ms step_avg:574.50ms +step:26890/57344 train_time:15448048ms step_avg:574.49ms +step:26891/57344 train_time:15448600ms step_avg:574.49ms +grad accum step:6723/14336 +step:26892/57344 train_time:15449905ms step_avg:574.52ms +step:26893/57344 train_time:15449922ms step_avg:574.50ms +step:26894/57344 train_time:15450166ms step_avg:574.48ms +step:26895/57344 train_time:15450718ms step_avg:574.48ms +grad accum step:6724/14336 +step:26896/57344 train_time:15452029ms step_avg:574.51ms +step:26897/57344 train_time:15452046ms step_avg:574.49ms +step:26898/57344 train_time:15452303ms step_avg:574.48ms +step:26899/57344 train_time:15452878ms step_avg:574.48ms +grad accum step:6725/14336 +step:26900/57344 train_time:15454165ms step_avg:574.50ms +step:26901/57344 train_time:15454182ms step_avg:574.48ms +step:26902/57344 train_time:15454426ms step_avg:574.47ms +step:26903/57344 train_time:15454979ms step_avg:574.47ms +grad accum step:6726/14336 +step:26904/57344 train_time:15456294ms step_avg:574.50ms +step:26905/57344 train_time:15456311ms step_avg:574.48ms +step:26906/57344 train_time:15456559ms step_avg:574.47ms +step:26907/57344 train_time:15457104ms step_avg:574.46ms +grad accum step:6727/14336 +step:26908/57344 train_time:15458427ms step_avg:574.49ms +step:26909/57344 train_time:15458444ms step_avg:574.47ms +step:26910/57344 train_time:15458689ms step_avg:574.46ms +step:26911/57344 train_time:15459231ms step_avg:574.46ms +grad accum step:6728/14336 +step:26912/57344 train_time:15460513ms step_avg:574.48ms +step:26913/57344 train_time:15460530ms step_avg:574.46ms +step:26914/57344 train_time:15460777ms step_avg:574.45ms +step:26915/57344 train_time:15461338ms step_avg:574.45ms +grad accum step:6729/14336 +step:26916/57344 train_time:15462659ms step_avg:574.48ms +step:26917/57344 train_time:15462676ms step_avg:574.46ms +step:26918/57344 train_time:15462921ms step_avg:574.45ms +step:26919/57344 train_time:15463465ms step_avg:574.44ms +grad accum step:6730/14336 +step:26920/57344 train_time:15464762ms step_avg:574.47ms +step:26921/57344 train_time:15464780ms step_avg:574.45ms +step:26922/57344 train_time:15465032ms step_avg:574.44ms +step:26923/57344 train_time:15465597ms step_avg:574.44ms +grad accum step:6731/14336 +step:26924/57344 train_time:15466912ms step_avg:574.47ms +step:26925/57344 train_time:15466928ms step_avg:574.44ms +step:26926/57344 train_time:15467178ms step_avg:574.43ms +step:26927/57344 train_time:15467723ms step_avg:574.43ms +grad accum step:6732/14336 +step:26928/57344 train_time:15469031ms step_avg:574.46ms +step:26929/57344 train_time:15469048ms step_avg:574.44ms +step:26930/57344 train_time:15469294ms step_avg:574.43ms +step:26931/57344 train_time:15469844ms step_avg:574.43ms +grad accum step:6733/14336 +step:26932/57344 train_time:15471162ms step_avg:574.45ms +step:26933/57344 train_time:15471179ms step_avg:574.43ms +step:26934/57344 train_time:15471430ms step_avg:574.42ms +step:26935/57344 train_time:15471985ms step_avg:574.42ms +grad accum step:6734/14336 +step:26936/57344 train_time:15473325ms step_avg:574.45ms +step:26937/57344 train_time:15473341ms step_avg:574.43ms +step:26938/57344 train_time:15473586ms step_avg:574.41ms +step:26939/57344 train_time:15474130ms step_avg:574.41ms +grad accum step:6735/14336 +step:26940/57344 train_time:15475506ms step_avg:574.44ms +step:26941/57344 train_time:15475523ms step_avg:574.42ms +step:26942/57344 train_time:15475780ms step_avg:574.41ms +step:26943/57344 train_time:15476357ms step_avg:574.41ms +grad accum step:6736/14336 +step:26944/57344 train_time:15477675ms step_avg:574.44ms +step:26944/57344 val_loss:6.417792 train_time:15477675ms step_avg:574.44ms +step:26945/57344 train_time:15477688ms step_avg:574.42ms +step:26946/57344 train_time:15477915ms step_avg:574.40ms +step:26947/57344 train_time:15478477ms step_avg:574.40ms +grad accum step:6737/14336 +step:26948/57344 train_time:15479827ms step_avg:574.43ms +step:26949/57344 train_time:15479844ms step_avg:574.41ms +step:26950/57344 train_time:15480094ms step_avg:574.40ms +step:26951/57344 train_time:15480659ms step_avg:574.40ms +grad accum step:6738/14336 +step:26952/57344 train_time:15482002ms step_avg:574.43ms +step:26953/57344 train_time:15482019ms step_avg:574.41ms +step:26954/57344 train_time:15482262ms step_avg:574.40ms +step:26955/57344 train_time:15482816ms step_avg:574.39ms +grad accum step:6739/14336 +step:26956/57344 train_time:15484227ms step_avg:574.43ms +step:26957/57344 train_time:15484244ms step_avg:574.41ms +step:26958/57344 train_time:15484490ms step_avg:574.39ms +step:26959/57344 train_time:15485041ms step_avg:574.39ms +grad accum step:6740/14336 +step:26960/57344 train_time:15486356ms step_avg:574.42ms +step:26961/57344 train_time:15486373ms step_avg:574.40ms +step:26962/57344 train_time:15486624ms step_avg:574.39ms +step:26963/57344 train_time:15487176ms step_avg:574.39ms +grad accum step:6741/14336 +step:26964/57344 train_time:15488467ms step_avg:574.41ms +step:26965/57344 train_time:15488484ms step_avg:574.39ms +step:26966/57344 train_time:15488736ms step_avg:574.38ms +step:26967/57344 train_time:15489298ms step_avg:574.38ms +grad accum step:6742/14336 +step:26968/57344 train_time:15490599ms step_avg:574.41ms +step:26969/57344 train_time:15490616ms step_avg:574.39ms +step:26970/57344 train_time:15490863ms step_avg:574.37ms +step:26971/57344 train_time:15491404ms step_avg:574.37ms +grad accum step:6743/14336 +step:26972/57344 train_time:15492694ms step_avg:574.40ms +step:26973/57344 train_time:15492711ms step_avg:574.38ms +step:26974/57344 train_time:15492959ms step_avg:574.37ms +step:26975/57344 train_time:15493507ms step_avg:574.37ms +grad accum step:6744/14336 +step:26976/57344 train_time:15494800ms step_avg:574.39ms +step:26977/57344 train_time:15494817ms step_avg:574.37ms +step:26978/57344 train_time:15495071ms step_avg:574.36ms +step:26979/57344 train_time:15495642ms step_avg:574.36ms +grad accum step:6745/14336 +step:26980/57344 train_time:15498227ms step_avg:574.43ms +step:26981/57344 train_time:15498420ms step_avg:574.42ms +step:26982/57344 train_time:15498624ms step_avg:574.41ms +step:26983/57344 train_time:15499185ms step_avg:574.41ms +grad accum step:6746/14336 +step:26984/57344 train_time:15500547ms step_avg:574.43ms +step:26985/57344 train_time:15500563ms step_avg:574.41ms +step:26986/57344 train_time:15500813ms step_avg:574.40ms +step:26987/57344 train_time:15501377ms step_avg:574.40ms +grad accum step:6747/14336 +step:26988/57344 train_time:15502691ms step_avg:574.43ms +step:26989/57344 train_time:15502708ms step_avg:574.41ms +step:26990/57344 train_time:15502960ms step_avg:574.40ms +step:26991/57344 train_time:15503519ms step_avg:574.40ms +grad accum step:6748/14336 +step:26992/57344 train_time:15504834ms step_avg:574.42ms +step:26993/57344 train_time:15504851ms step_avg:574.40ms +step:26994/57344 train_time:15505100ms step_avg:574.39ms +step:26995/57344 train_time:15505644ms step_avg:574.39ms +grad accum step:6749/14336 +step:26996/57344 train_time:15506920ms step_avg:574.42ms +step:26997/57344 train_time:15506937ms step_avg:574.39ms +step:26998/57344 train_time:15507186ms step_avg:574.38ms +step:26999/57344 train_time:15507729ms step_avg:574.38ms +grad accum step:6750/14336 +step:27000/57344 train_time:15509027ms step_avg:574.41ms +step:27001/57344 train_time:15509044ms step_avg:574.39ms +step:27002/57344 train_time:15509290ms step_avg:574.38ms +step:27003/57344 train_time:15509840ms step_avg:574.37ms +grad accum step:6751/14336 +step:27004/57344 train_time:15511188ms step_avg:574.40ms +step:27005/57344 train_time:15511561ms step_avg:574.40ms +step:27006/57344 train_time:15511803ms step_avg:574.38ms +step:27007/57344 train_time:15512172ms step_avg:574.38ms +grad accum step:6752/14336 +step:27008/57344 train_time:15513482ms step_avg:574.40ms +step:27008/57344 val_loss:6.393958 train_time:15513483ms step_avg:574.40ms +step:27009/57344 train_time:15514090ms step_avg:574.40ms +step:27010/57344 train_time:15514207ms step_avg:574.39ms +step:27011/57344 train_time:15514590ms step_avg:574.38ms +grad accum step:6753/14336 +step:27012/57344 train_time:15515986ms step_avg:574.41ms +step:27013/57344 train_time:15515998ms step_avg:574.39ms +step:27014/57344 train_time:15516218ms step_avg:574.38ms +step:27015/57344 train_time:15516750ms step_avg:574.38ms +grad accum step:6754/14336 +step:27016/57344 train_time:15518051ms step_avg:574.40ms +step:27017/57344 train_time:15518067ms step_avg:574.38ms +step:27018/57344 train_time:15518311ms step_avg:574.37ms +step:27019/57344 train_time:15518855ms step_avg:574.37ms +grad accum step:6755/14336 +step:27020/57344 train_time:15520176ms step_avg:574.40ms +step:27021/57344 train_time:15520193ms step_avg:574.38ms +step:27022/57344 train_time:15520436ms step_avg:574.36ms +step:27023/57344 train_time:15520985ms step_avg:574.36ms +grad accum step:6756/14336 +step:27024/57344 train_time:15522299ms step_avg:574.39ms +step:27025/57344 train_time:15522315ms step_avg:574.37ms +step:27026/57344 train_time:15522559ms step_avg:574.36ms +step:27027/57344 train_time:15523106ms step_avg:574.36ms +grad accum step:6757/14336 +step:27028/57344 train_time:15524396ms step_avg:574.38ms +step:27029/57344 train_time:15524413ms step_avg:574.36ms +step:27030/57344 train_time:15524666ms step_avg:574.35ms +step:27031/57344 train_time:15525226ms step_avg:574.35ms +grad accum step:6758/14336 +step:27032/57344 train_time:15526515ms step_avg:574.38ms +step:27033/57344 train_time:15526531ms step_avg:574.35ms +step:27034/57344 train_time:15526780ms step_avg:574.34ms +step:27035/57344 train_time:15527337ms step_avg:574.34ms +grad accum step:6759/14336 +step:27036/57344 train_time:15528632ms step_avg:574.37ms +step:27037/57344 train_time:15528649ms step_avg:574.35ms +step:27038/57344 train_time:15528895ms step_avg:574.34ms +step:27039/57344 train_time:15529444ms step_avg:574.34ms +grad accum step:6760/14336 +step:27040/57344 train_time:15530784ms step_avg:574.36ms +step:27041/57344 train_time:15530801ms step_avg:574.34ms +step:27042/57344 train_time:15531050ms step_avg:574.33ms +step:27043/57344 train_time:15531608ms step_avg:574.33ms +grad accum step:6761/14336 +step:27044/57344 train_time:15532921ms step_avg:574.36ms +step:27045/57344 train_time:15532938ms step_avg:574.34ms +step:27046/57344 train_time:15533185ms step_avg:574.32ms +step:27047/57344 train_time:15533740ms step_avg:574.32ms +grad accum step:6762/14336 +step:27048/57344 train_time:15535040ms step_avg:574.35ms +step:27049/57344 train_time:15535057ms step_avg:574.33ms +step:27050/57344 train_time:15535303ms step_avg:574.32ms +step:27051/57344 train_time:15535848ms step_avg:574.32ms +grad accum step:6763/14336 +step:27052/57344 train_time:15537142ms step_avg:574.34ms +step:27053/57344 train_time:15537159ms step_avg:574.32ms +step:27054/57344 train_time:15537405ms step_avg:574.31ms +step:27055/57344 train_time:15537953ms step_avg:574.31ms +grad accum step:6764/14336 +step:27056/57344 train_time:15539250ms step_avg:574.34ms +step:27057/57344 train_time:15539266ms step_avg:574.32ms +step:27058/57344 train_time:15539514ms step_avg:574.30ms +step:27059/57344 train_time:15540061ms step_avg:574.30ms +grad accum step:6765/14336 +step:27060/57344 train_time:15541408ms step_avg:574.33ms +step:27061/57344 train_time:15541425ms step_avg:574.31ms +step:27062/57344 train_time:15541674ms step_avg:574.30ms +step:27063/57344 train_time:15542221ms step_avg:574.30ms +grad accum step:6766/14336 +step:27064/57344 train_time:15543505ms step_avg:574.32ms +step:27065/57344 train_time:15543522ms step_avg:574.30ms +step:27066/57344 train_time:15543766ms step_avg:574.29ms +step:27067/57344 train_time:15544302ms step_avg:574.29ms +grad accum step:6767/14336 +step:27068/57344 train_time:15545618ms step_avg:574.32ms +step:27069/57344 train_time:15545634ms step_avg:574.30ms +step:27070/57344 train_time:15545878ms step_avg:574.28ms +step:27071/57344 train_time:15546427ms step_avg:574.28ms +grad accum step:6768/14336 +step:27072/57344 train_time:15547728ms step_avg:574.31ms +step:27072/57344 val_loss:6.372041 train_time:15547728ms step_avg:574.31ms +step:27073/57344 train_time:15547741ms step_avg:574.29ms +step:27074/57344 train_time:15547993ms step_avg:574.28ms +step:27075/57344 train_time:15548530ms step_avg:574.28ms +grad accum step:6769/14336 +step:27076/57344 train_time:15549813ms step_avg:574.30ms +step:27077/57344 train_time:15549830ms step_avg:574.28ms +step:27078/57344 train_time:15550074ms step_avg:574.27ms +step:27079/57344 train_time:15550620ms step_avg:574.27ms +grad accum step:6770/14336 +step:27080/57344 train_time:15551924ms step_avg:574.30ms +step:27081/57344 train_time:15551940ms step_avg:574.27ms +step:27082/57344 train_time:15552183ms step_avg:574.26ms +step:27083/57344 train_time:15552733ms step_avg:574.26ms +grad accum step:6771/14336 +step:27084/57344 train_time:15554045ms step_avg:574.29ms +step:27085/57344 train_time:15554062ms step_avg:574.27ms +step:27086/57344 train_time:15554300ms step_avg:574.26ms +step:27087/57344 train_time:15554833ms step_avg:574.25ms +grad accum step:6772/14336 +step:27088/57344 train_time:15556135ms step_avg:574.28ms +step:27089/57344 train_time:15556152ms step_avg:574.26ms +step:27090/57344 train_time:15556401ms step_avg:574.25ms +step:27091/57344 train_time:15556960ms step_avg:574.25ms +grad accum step:6773/14336 +step:27092/57344 train_time:15558292ms step_avg:574.28ms +step:27093/57344 train_time:15558308ms step_avg:574.26ms +step:27094/57344 train_time:15558560ms step_avg:574.24ms +step:27095/57344 train_time:15559119ms step_avg:574.24ms +grad accum step:6774/14336 +step:27096/57344 train_time:15560394ms step_avg:574.27ms +step:27097/57344 train_time:15560410ms step_avg:574.25ms +step:27098/57344 train_time:15560655ms step_avg:574.24ms +step:27099/57344 train_time:15561199ms step_avg:574.24ms +grad accum step:6775/14336 +step:27100/57344 train_time:15562497ms step_avg:574.26ms +step:27101/57344 train_time:15562514ms step_avg:574.24ms +step:27102/57344 train_time:15562766ms step_avg:574.23ms +step:27103/57344 train_time:15563343ms step_avg:574.23ms +grad accum step:6776/14336 +step:27104/57344 train_time:15564676ms step_avg:574.26ms +step:27105/57344 train_time:15564693ms step_avg:574.24ms +step:27106/57344 train_time:15564940ms step_avg:574.22ms +step:27107/57344 train_time:15565511ms step_avg:574.22ms +grad accum step:6777/14336 +step:27108/57344 train_time:15566883ms step_avg:574.25ms +step:27109/57344 train_time:15566899ms step_avg:574.23ms +step:27110/57344 train_time:15567155ms step_avg:574.22ms +step:27111/57344 train_time:15567720ms step_avg:574.22ms +grad accum step:6778/14336 +step:27112/57344 train_time:15569034ms step_avg:574.25ms +step:27113/57344 train_time:15569051ms step_avg:574.23ms +step:27114/57344 train_time:15569294ms step_avg:574.22ms +step:27115/57344 train_time:15569842ms step_avg:574.22ms +grad accum step:6779/14336 +step:27116/57344 train_time:15571165ms step_avg:574.24ms +step:27117/57344 train_time:15571182ms step_avg:574.22ms +step:27118/57344 train_time:15571433ms step_avg:574.21ms +step:27119/57344 train_time:15571986ms step_avg:574.21ms +grad accum step:6780/14336 +step:27120/57344 train_time:15579728ms step_avg:574.47ms +step:27121/57344 train_time:15579745ms step_avg:574.45ms +step:27122/57344 train_time:15579981ms step_avg:574.44ms +step:27123/57344 train_time:15580523ms step_avg:574.44ms +grad accum step:6781/14336 +step:27124/57344 train_time:15581809ms step_avg:574.47ms +step:27125/57344 train_time:15581826ms step_avg:574.45ms +step:27126/57344 train_time:15582073ms step_avg:574.43ms +step:27127/57344 train_time:15584469ms step_avg:574.50ms +grad accum step:6782/14336 +step:27128/57344 train_time:15585498ms step_avg:574.52ms +step:27129/57344 train_time:15585515ms step_avg:574.50ms +step:27130/57344 train_time:15585766ms step_avg:574.48ms +step:27131/57344 train_time:15586329ms step_avg:574.48ms +grad accum step:6783/14336 +step:27132/57344 train_time:15587626ms step_avg:574.51ms +step:27133/57344 train_time:15587643ms step_avg:574.49ms +step:27134/57344 train_time:15587891ms step_avg:574.48ms +step:27135/57344 train_time:15588442ms step_avg:574.48ms +grad accum step:6784/14336 +step:27136/57344 train_time:15589756ms step_avg:574.50ms +step:27136/57344 val_loss:6.353952 train_time:15589757ms step_avg:574.50ms +step:27137/57344 train_time:15589769ms step_avg:574.48ms +step:27138/57344 train_time:15589995ms step_avg:574.47ms +step:27139/57344 train_time:15590555ms step_avg:574.47ms +grad accum step:6785/14336 +step:27140/57344 train_time:15591869ms step_avg:574.50ms +step:27141/57344 train_time:15591886ms step_avg:574.48ms +step:27142/57344 train_time:15592139ms step_avg:574.47ms +step:27143/57344 train_time:15592700ms step_avg:574.46ms +grad accum step:6786/14336 +step:27144/57344 train_time:15593997ms step_avg:574.49ms +step:27145/57344 train_time:15594013ms step_avg:574.47ms +step:27146/57344 train_time:15594260ms step_avg:574.46ms +step:27147/57344 train_time:15594813ms step_avg:574.46ms +grad accum step:6787/14336 +step:27148/57344 train_time:15596133ms step_avg:574.49ms +step:27149/57344 train_time:15596150ms step_avg:574.46ms +step:27150/57344 train_time:15596397ms step_avg:574.45ms +step:27151/57344 train_time:15596951ms step_avg:574.45ms +grad accum step:6788/14336 +step:27152/57344 train_time:15598284ms step_avg:574.48ms +step:27153/57344 train_time:15598301ms step_avg:574.46ms +step:27154/57344 train_time:15598549ms step_avg:574.45ms +step:27155/57344 train_time:15599095ms step_avg:574.45ms +grad accum step:6789/14336 +step:27156/57344 train_time:15600415ms step_avg:574.47ms +step:27157/57344 train_time:15600432ms step_avg:574.45ms +step:27158/57344 train_time:15600677ms step_avg:574.44ms +step:27159/57344 train_time:15601218ms step_avg:574.44ms +grad accum step:6790/14336 +step:27160/57344 train_time:15602546ms step_avg:574.47ms +step:27161/57344 train_time:15602563ms step_avg:574.45ms +step:27162/57344 train_time:15602819ms step_avg:574.44ms +step:27163/57344 train_time:15603387ms step_avg:574.44ms +grad accum step:6791/14336 +step:27164/57344 train_time:15604660ms step_avg:574.46ms +step:27165/57344 train_time:15604677ms step_avg:574.44ms +step:27166/57344 train_time:15604922ms step_avg:574.43ms +step:27167/57344 train_time:15605469ms step_avg:574.43ms +grad accum step:6792/14336 +step:27168/57344 train_time:15606749ms step_avg:574.45ms +step:27169/57344 train_time:15606766ms step_avg:574.43ms +step:27170/57344 train_time:15607018ms step_avg:574.42ms +step:27171/57344 train_time:15607578ms step_avg:574.42ms +grad accum step:6793/14336 +step:27172/57344 train_time:15608895ms step_avg:574.45ms +step:27173/57344 train_time:15608912ms step_avg:574.43ms +step:27174/57344 train_time:15609161ms step_avg:574.42ms +step:27175/57344 train_time:15609709ms step_avg:574.41ms +grad accum step:6794/14336 +step:27176/57344 train_time:15611028ms step_avg:574.44ms +step:27177/57344 train_time:15611045ms step_avg:574.42ms +step:27178/57344 train_time:15611295ms step_avg:574.41ms +step:27179/57344 train_time:15611850ms step_avg:574.41ms +grad accum step:6795/14336 +step:27180/57344 train_time:15613153ms step_avg:574.44ms +step:27181/57344 train_time:15613170ms step_avg:574.41ms +step:27182/57344 train_time:15613421ms step_avg:574.40ms +step:27183/57344 train_time:15613983ms step_avg:574.40ms +grad accum step:6796/14336 +step:27184/57344 train_time:15615281ms step_avg:574.43ms +step:27185/57344 train_time:15615297ms step_avg:574.41ms +step:27186/57344 train_time:15615552ms step_avg:574.40ms +step:27187/57344 train_time:15616118ms step_avg:574.40ms +grad accum step:6797/14336 +step:27188/57344 train_time:15617406ms step_avg:574.42ms +step:27189/57344 train_time:15617424ms step_avg:574.40ms +step:27190/57344 train_time:15617666ms step_avg:574.39ms +step:27191/57344 train_time:15618210ms step_avg:574.39ms +grad accum step:6798/14336 +step:27192/57344 train_time:15619589ms step_avg:574.42ms +step:27193/57344 train_time:15619606ms step_avg:574.40ms +step:27194/57344 train_time:15619855ms step_avg:574.39ms +step:27195/57344 train_time:15620417ms step_avg:574.39ms +grad accum step:6799/14336 +step:27196/57344 train_time:15621719ms step_avg:574.41ms +step:27197/57344 train_time:15621736ms step_avg:574.39ms +step:27198/57344 train_time:15621980ms step_avg:574.38ms +step:27199/57344 train_time:15622523ms step_avg:574.38ms +grad accum step:6800/14336 +step:27200/57344 train_time:15623842ms step_avg:574.41ms +step:27200/57344 val_loss:6.338624 train_time:15623843ms step_avg:574.41ms +step:27201/57344 train_time:15623855ms step_avg:574.39ms +step:27202/57344 train_time:15624078ms step_avg:574.37ms +step:27203/57344 train_time:15624616ms step_avg:574.37ms +grad accum step:6801/14336 +step:27204/57344 train_time:15625921ms step_avg:574.40ms +step:27205/57344 train_time:15625938ms step_avg:574.38ms +step:27206/57344 train_time:15626185ms step_avg:574.37ms +step:27207/57344 train_time:15626730ms step_avg:574.36ms +grad accum step:6802/14336 +step:27208/57344 train_time:15628030ms step_avg:574.39ms +step:27209/57344 train_time:15628047ms step_avg:574.37ms +step:27210/57344 train_time:15628292ms step_avg:574.36ms +step:27211/57344 train_time:15628836ms step_avg:574.36ms +grad accum step:6803/14336 +step:27212/57344 train_time:15630141ms step_avg:574.38ms +step:27213/57344 train_time:15630158ms step_avg:574.36ms +step:27214/57344 train_time:15630408ms step_avg:574.35ms +step:27215/57344 train_time:15630977ms step_avg:574.35ms +grad accum step:6804/14336 +step:27216/57344 train_time:15632316ms step_avg:574.38ms +step:27217/57344 train_time:15632333ms step_avg:574.36ms +step:27218/57344 train_time:15632589ms step_avg:574.35ms +step:27219/57344 train_time:15633159ms step_avg:574.35ms +grad accum step:6805/14336 +step:27220/57344 train_time:15634480ms step_avg:574.37ms +step:27221/57344 train_time:15634497ms step_avg:574.35ms +step:27222/57344 train_time:15634744ms step_avg:574.34ms +step:27223/57344 train_time:15635301ms step_avg:574.34ms +grad accum step:6806/14336 +step:27224/57344 train_time:15636609ms step_avg:574.37ms +step:27225/57344 train_time:15636626ms step_avg:574.35ms +step:27226/57344 train_time:15636876ms step_avg:574.34ms +step:27227/57344 train_time:15637435ms step_avg:574.34ms +grad accum step:6807/14336 +step:27228/57344 train_time:15638730ms step_avg:574.36ms +step:27229/57344 train_time:15638747ms step_avg:574.34ms +step:27230/57344 train_time:15638991ms step_avg:574.33ms +step:27231/57344 train_time:15639538ms step_avg:574.33ms +grad accum step:6808/14336 +step:27232/57344 train_time:15640863ms step_avg:574.36ms +step:27233/57344 train_time:15640880ms step_avg:574.34ms +step:27234/57344 train_time:15641131ms step_avg:574.32ms +step:27235/57344 train_time:15641693ms step_avg:574.32ms +grad accum step:6809/14336 +step:27236/57344 train_time:15642997ms step_avg:574.35ms +step:27237/57344 train_time:15643013ms step_avg:574.33ms +step:27238/57344 train_time:15643263ms step_avg:574.32ms +step:27239/57344 train_time:15643818ms step_avg:574.32ms +grad accum step:6810/14336 +step:27240/57344 train_time:15645184ms step_avg:574.35ms +step:27241/57344 train_time:15645201ms step_avg:574.33ms +step:27242/57344 train_time:15645450ms step_avg:574.31ms +step:27243/57344 train_time:15646021ms step_avg:574.31ms +grad accum step:6811/14336 +step:27244/57344 train_time:15647347ms step_avg:574.34ms +step:27245/57344 train_time:15647364ms step_avg:574.32ms +step:27246/57344 train_time:15647613ms step_avg:574.31ms +step:27247/57344 train_time:15648167ms step_avg:574.31ms +grad accum step:6812/14336 +step:27248/57344 train_time:15649497ms step_avg:574.34ms +step:27249/57344 train_time:15649513ms step_avg:574.32ms +step:27250/57344 train_time:15649761ms step_avg:574.30ms +step:27251/57344 train_time:15650307ms step_avg:574.30ms +grad accum step:6813/14336 +step:27252/57344 train_time:15651595ms step_avg:574.33ms +step:27253/57344 train_time:15651612ms step_avg:574.31ms +step:27254/57344 train_time:15651863ms step_avg:574.30ms +step:27255/57344 train_time:15652430ms step_avg:574.30ms +grad accum step:6814/14336 +step:27256/57344 train_time:15653737ms step_avg:574.32ms +step:27257/57344 train_time:15653753ms step_avg:574.30ms +step:27258/57344 train_time:15653998ms step_avg:574.29ms +step:27259/57344 train_time:15654547ms step_avg:574.29ms +grad accum step:6815/14336 +step:27260/57344 train_time:15655868ms step_avg:574.32ms +step:27261/57344 train_time:15655884ms step_avg:574.30ms +step:27262/57344 train_time:15656132ms step_avg:574.28ms +step:27263/57344 train_time:15656675ms step_avg:574.28ms +grad accum step:6816/14336 +step:27264/57344 train_time:15657958ms step_avg:574.31ms +step:27264/57344 val_loss:6.322062 train_time:15657958ms step_avg:574.31ms +step:27265/57344 train_time:15657970ms step_avg:574.29ms +step:27266/57344 train_time:15658194ms step_avg:574.28ms +step:27267/57344 train_time:15658740ms step_avg:574.27ms +grad accum step:6817/14336 +step:27268/57344 train_time:15660032ms step_avg:574.30ms +step:27269/57344 train_time:15660049ms step_avg:574.28ms +step:27270/57344 train_time:15660297ms step_avg:574.27ms +step:27271/57344 train_time:15660855ms step_avg:574.27ms +grad accum step:6818/14336 +step:27272/57344 train_time:15662184ms step_avg:574.30ms +step:27273/57344 train_time:15662201ms step_avg:574.27ms +step:27274/57344 train_time:15662447ms step_avg:574.26ms +step:27275/57344 train_time:15662989ms step_avg:574.26ms +grad accum step:6819/14336 +step:27276/57344 train_time:15664303ms step_avg:574.29ms +step:27277/57344 train_time:15664320ms step_avg:574.27ms +step:27278/57344 train_time:15664566ms step_avg:574.26ms +step:27279/57344 train_time:15665120ms step_avg:574.26ms +grad accum step:6820/14336 +step:27280/57344 train_time:15666411ms step_avg:574.28ms +step:27281/57344 train_time:15666428ms step_avg:574.26ms +step:27282/57344 train_time:15666674ms step_avg:574.25ms +step:27283/57344 train_time:15667225ms step_avg:574.25ms +grad accum step:6821/14336 +step:27284/57344 train_time:15668529ms step_avg:574.28ms +step:27285/57344 train_time:15668546ms step_avg:574.25ms +step:27286/57344 train_time:15668796ms step_avg:574.24ms +step:27287/57344 train_time:15669354ms step_avg:574.24ms +grad accum step:6822/14336 +step:27288/57344 train_time:15670662ms step_avg:574.27ms +step:27289/57344 train_time:15670680ms step_avg:574.25ms +step:27290/57344 train_time:15670927ms step_avg:574.24ms +step:27291/57344 train_time:15671485ms step_avg:574.24ms +grad accum step:6823/14336 +step:27292/57344 train_time:15672869ms step_avg:574.27ms +step:27293/57344 train_time:15672885ms step_avg:574.25ms +step:27294/57344 train_time:15673132ms step_avg:574.23ms +step:27295/57344 train_time:15673680ms step_avg:574.23ms +grad accum step:6824/14336 +step:27296/57344 train_time:15674967ms step_avg:574.26ms +step:27297/57344 train_time:15674984ms step_avg:574.24ms +step:27298/57344 train_time:15675237ms step_avg:574.23ms +step:27299/57344 train_time:15675797ms step_avg:574.23ms +grad accum step:6825/14336 +step:27300/57344 train_time:15677088ms step_avg:574.25ms +step:27301/57344 train_time:15677105ms step_avg:574.23ms +step:27302/57344 train_time:15677360ms step_avg:574.22ms +step:27303/57344 train_time:15677940ms step_avg:574.22ms +grad accum step:6826/14336 +step:27304/57344 train_time:15679259ms step_avg:574.25ms +step:27305/57344 train_time:15679276ms step_avg:574.23ms +step:27306/57344 train_time:15679523ms step_avg:574.22ms +step:27307/57344 train_time:15680071ms step_avg:574.21ms +grad accum step:6827/14336 +step:27308/57344 train_time:15681369ms step_avg:574.24ms +step:27309/57344 train_time:15681386ms step_avg:574.22ms +step:27310/57344 train_time:15681634ms step_avg:574.21ms +step:27311/57344 train_time:15682188ms step_avg:574.21ms +grad accum step:6828/14336 +step:27312/57344 train_time:15683487ms step_avg:574.23ms +step:27313/57344 train_time:15683503ms step_avg:574.21ms +step:27314/57344 train_time:15683755ms step_avg:574.20ms +step:27315/57344 train_time:15684310ms step_avg:574.20ms +grad accum step:6829/14336 +step:27316/57344 train_time:15685618ms step_avg:574.23ms +step:27317/57344 train_time:15685634ms step_avg:574.21ms +step:27318/57344 train_time:15685877ms step_avg:574.20ms +step:27319/57344 train_time:15686417ms step_avg:574.19ms +grad accum step:6830/14336 +step:27320/57344 train_time:15687703ms step_avg:574.22ms +step:27321/57344 train_time:15687720ms step_avg:574.20ms +step:27322/57344 train_time:15687970ms step_avg:574.19ms +step:27323/57344 train_time:15688532ms step_avg:574.19ms +grad accum step:6831/14336 +step:27324/57344 train_time:15689852ms step_avg:574.22ms +step:27325/57344 train_time:15689869ms step_avg:574.19ms +step:27326/57344 train_time:15690112ms step_avg:574.18ms +step:27327/57344 train_time:15690658ms step_avg:574.18ms +grad accum step:6832/14336 +step:27328/57344 train_time:15691976ms step_avg:574.21ms +step:27328/57344 val_loss:6.298830 train_time:15691977ms step_avg:574.21ms +step:27329/57344 train_time:15691989ms step_avg:574.19ms +step:27330/57344 train_time:15692217ms step_avg:574.18ms +step:27331/57344 train_time:15692776ms step_avg:574.17ms +grad accum step:6833/14336 +step:27332/57344 train_time:15694084ms step_avg:574.20ms +step:27333/57344 train_time:15694100ms step_avg:574.18ms +step:27334/57344 train_time:15694355ms step_avg:574.17ms +step:27335/57344 train_time:15694920ms step_avg:574.17ms +grad accum step:6834/14336 +step:27336/57344 train_time:15696242ms step_avg:574.20ms +step:27337/57344 train_time:15696258ms step_avg:574.18ms +step:27338/57344 train_time:15696507ms step_avg:574.16ms +step:27339/57344 train_time:15697057ms step_avg:574.16ms +grad accum step:6835/14336 +step:27340/57344 train_time:15698348ms step_avg:574.19ms +step:27341/57344 train_time:15698365ms step_avg:574.17ms +step:27342/57344 train_time:15698624ms step_avg:574.16ms +step:27343/57344 train_time:15699203ms step_avg:574.16ms +grad accum step:6836/14336 +step:27344/57344 train_time:15700526ms step_avg:574.19ms +step:27345/57344 train_time:15700543ms step_avg:574.17ms +step:27346/57344 train_time:15700790ms step_avg:574.15ms +step:27347/57344 train_time:15701340ms step_avg:574.15ms +grad accum step:6837/14336 +step:27348/57344 train_time:15702702ms step_avg:574.18ms +step:27349/57344 train_time:15702719ms step_avg:574.16ms +step:27350/57344 train_time:15702970ms step_avg:574.15ms +step:27351/57344 train_time:15703525ms step_avg:574.15ms +grad accum step:6838/14336 +step:27352/57344 train_time:15704820ms step_avg:574.17ms +step:27353/57344 train_time:15704836ms step_avg:574.15ms +step:27354/57344 train_time:15705083ms step_avg:574.14ms +step:27355/57344 train_time:15705633ms step_avg:574.14ms +grad accum step:6839/14336 +step:27356/57344 train_time:15706953ms step_avg:574.17ms +step:27357/57344 train_time:15706970ms step_avg:574.15ms +step:27358/57344 train_time:15707216ms step_avg:574.14ms +step:27359/57344 train_time:15707757ms step_avg:574.13ms +grad accum step:6840/14336 +step:27360/57344 train_time:15709031ms step_avg:574.16ms +step:27361/57344 train_time:15709048ms step_avg:574.14ms +step:27362/57344 train_time:15709301ms step_avg:574.13ms +step:27363/57344 train_time:15709862ms step_avg:574.13ms +grad accum step:6841/14336 +step:27364/57344 train_time:15711178ms step_avg:574.16ms +step:27365/57344 train_time:15711195ms step_avg:574.13ms +step:27366/57344 train_time:15711440ms step_avg:574.12ms +step:27367/57344 train_time:15711987ms step_avg:574.12ms +grad accum step:6842/14336 +step:27368/57344 train_time:15713300ms step_avg:574.15ms +step:27369/57344 train_time:15713317ms step_avg:574.13ms +step:27370/57344 train_time:15713570ms step_avg:574.12ms +step:27371/57344 train_time:15714134ms step_avg:574.12ms +grad accum step:6843/14336 +step:27372/57344 train_time:15715471ms step_avg:574.14ms +step:27373/57344 train_time:15715488ms step_avg:574.12ms +step:27374/57344 train_time:15715743ms step_avg:574.11ms +step:27375/57344 train_time:15716308ms step_avg:574.11ms +grad accum step:6844/14336 +step:27376/57344 train_time:15717624ms step_avg:574.14ms +step:27377/57344 train_time:15717641ms step_avg:574.12ms +step:27378/57344 train_time:15717892ms step_avg:574.11ms +step:27379/57344 train_time:15718449ms step_avg:574.11ms +grad accum step:6845/14336 +step:27380/57344 train_time:15719772ms step_avg:574.13ms +step:27381/57344 train_time:15719789ms step_avg:574.11ms +step:27382/57344 train_time:15720040ms step_avg:574.10ms +step:27383/57344 train_time:15720599ms step_avg:574.10ms +grad accum step:6846/14336 +step:27384/57344 train_time:15721905ms step_avg:574.13ms +step:27385/57344 train_time:15721922ms step_avg:574.11ms +step:27386/57344 train_time:15722169ms step_avg:574.10ms +step:27387/57344 train_time:15722722ms step_avg:574.09ms +grad accum step:6847/14336 +step:27388/57344 train_time:15724037ms step_avg:574.12ms +step:27389/57344 train_time:15724054ms step_avg:574.10ms +step:27390/57344 train_time:15724303ms step_avg:574.09ms +step:27391/57344 train_time:15724851ms step_avg:574.09ms +grad accum step:6848/14336 +step:27392/57344 train_time:15726186ms step_avg:574.12ms +step:27392/57344 val_loss:6.284842 train_time:15726187ms step_avg:574.12ms +step:27393/57344 train_time:15726199ms step_avg:574.10ms +step:27394/57344 train_time:15726503ms step_avg:574.09ms +step:27395/57344 train_time:15727051ms step_avg:574.08ms +grad accum step:6849/14336 +step:27396/57344 train_time:15728336ms step_avg:574.11ms +step:27397/57344 train_time:15728353ms step_avg:574.09ms +step:27398/57344 train_time:15728603ms step_avg:574.08ms +step:27399/57344 train_time:15729159ms step_avg:574.08ms +grad accum step:6850/14336 +step:27400/57344 train_time:15730472ms step_avg:574.10ms +step:27401/57344 train_time:15730488ms step_avg:574.08ms +step:27402/57344 train_time:15730741ms step_avg:574.07ms +step:27403/57344 train_time:15731303ms step_avg:574.07ms +grad accum step:6851/14336 +step:27404/57344 train_time:15732599ms step_avg:574.10ms +step:27405/57344 train_time:15732616ms step_avg:574.08ms +step:27406/57344 train_time:15732866ms step_avg:574.07ms +step:27407/57344 train_time:15733423ms step_avg:574.07ms +grad accum step:6852/14336 +step:27408/57344 train_time:15734754ms step_avg:574.09ms +step:27409/57344 train_time:15734770ms step_avg:574.07ms +step:27410/57344 train_time:15735017ms step_avg:574.06ms +step:27411/57344 train_time:15735561ms step_avg:574.06ms +grad accum step:6853/14336 +step:27412/57344 train_time:15736902ms step_avg:574.09ms +step:27413/57344 train_time:15736919ms step_avg:574.07ms +step:27414/57344 train_time:15737170ms step_avg:574.06ms +step:27415/57344 train_time:15737734ms step_avg:574.06ms +grad accum step:6854/14336 +step:27416/57344 train_time:15739029ms step_avg:574.08ms +step:27417/57344 train_time:15739046ms step_avg:574.06ms +step:27418/57344 train_time:15739292ms step_avg:574.05ms +step:27419/57344 train_time:15739842ms step_avg:574.05ms +grad accum step:6855/14336 +step:27420/57344 train_time:15741121ms step_avg:574.07ms +step:27421/57344 train_time:15741138ms step_avg:574.05ms +step:27422/57344 train_time:15741383ms step_avg:574.04ms +step:27423/57344 train_time:15741932ms step_avg:574.04ms +grad accum step:6856/14336 +step:27424/57344 train_time:15743230ms step_avg:574.07ms +step:27425/57344 train_time:15743247ms step_avg:574.05ms +step:27426/57344 train_time:15743495ms step_avg:574.04ms +step:27427/57344 train_time:15744042ms step_avg:574.03ms +grad accum step:6857/14336 +step:27428/57344 train_time:15745339ms step_avg:574.06ms +step:27429/57344 train_time:15745356ms step_avg:574.04ms +step:27430/57344 train_time:15745604ms step_avg:574.03ms +step:27431/57344 train_time:15746151ms step_avg:574.03ms +grad accum step:6858/14336 +step:27432/57344 train_time:15747444ms step_avg:574.05ms +step:27433/57344 train_time:15747461ms step_avg:574.03ms +step:27434/57344 train_time:15747712ms step_avg:574.02ms +step:27435/57344 train_time:15748268ms step_avg:574.02ms +grad accum step:6859/14336 +step:27436/57344 train_time:15749554ms step_avg:574.05ms +step:27437/57344 train_time:15749571ms step_avg:574.03ms +step:27438/57344 train_time:15749822ms step_avg:574.01ms +step:27439/57344 train_time:15750385ms step_avg:574.01ms +grad accum step:6860/14336 +step:27440/57344 train_time:15751697ms step_avg:574.04ms +step:27441/57344 train_time:15751714ms step_avg:574.02ms +step:27442/57344 train_time:15751961ms step_avg:574.01ms +step:27443/57344 train_time:15752509ms step_avg:574.01ms +grad accum step:6861/14336 +step:27444/57344 train_time:15753842ms step_avg:574.04ms +step:27445/57344 train_time:15753858ms step_avg:574.02ms +step:27446/57344 train_time:15754110ms step_avg:574.00ms +step:27447/57344 train_time:15754671ms step_avg:574.00ms +grad accum step:6862/14336 +step:27448/57344 train_time:15755965ms step_avg:574.03ms +step:27449/57344 train_time:15755982ms step_avg:574.01ms +step:27450/57344 train_time:15756232ms step_avg:574.00ms +step:27451/57344 train_time:15756794ms step_avg:574.00ms +grad accum step:6863/14336 +step:27452/57344 train_time:15765949ms step_avg:574.31ms +step:27453/57344 train_time:15765961ms step_avg:574.29ms +step:27454/57344 train_time:15766230ms step_avg:574.28ms +step:27455/57344 train_time:15766774ms step_avg:574.28ms +grad accum step:6864/14336 +step:27456/57344 train_time:15768088ms step_avg:574.30ms +step:27456/57344 val_loss:6.270062 train_time:15768088ms step_avg:574.30ms +step:27457/57344 train_time:15768100ms step_avg:574.28ms +step:27458/57344 train_time:15768322ms step_avg:574.27ms +step:27459/57344 train_time:15768868ms step_avg:574.27ms +grad accum step:6865/14336 +step:27460/57344 train_time:15770154ms step_avg:574.30ms +step:27461/57344 train_time:15770171ms step_avg:574.28ms +step:27462/57344 train_time:15770417ms step_avg:574.26ms +step:27463/57344 train_time:15770962ms step_avg:574.26ms +grad accum step:6866/14336 +step:27464/57344 train_time:15772298ms step_avg:574.29ms +step:27465/57344 train_time:15772316ms step_avg:574.27ms +step:27466/57344 train_time:15772563ms step_avg:574.26ms +step:27467/57344 train_time:15773110ms step_avg:574.26ms +grad accum step:6867/14336 +step:27468/57344 train_time:15774435ms step_avg:574.28ms +step:27469/57344 train_time:15774451ms step_avg:574.26ms +step:27470/57344 train_time:15774697ms step_avg:574.25ms +step:27471/57344 train_time:15775238ms step_avg:574.25ms +grad accum step:6868/14336 +step:27472/57344 train_time:15776519ms step_avg:574.28ms +step:27473/57344 train_time:15776536ms step_avg:574.26ms +step:27474/57344 train_time:15776786ms step_avg:574.24ms +step:27475/57344 train_time:15777340ms step_avg:574.24ms +grad accum step:6869/14336 +step:27476/57344 train_time:15778625ms step_avg:574.27ms +step:27477/57344 train_time:15778642ms step_avg:574.25ms +step:27478/57344 train_time:15778895ms step_avg:574.24ms +step:27479/57344 train_time:15779462ms step_avg:574.24ms +grad accum step:6870/14336 +step:27480/57344 train_time:15780768ms step_avg:574.26ms +step:27481/57344 train_time:15780784ms step_avg:574.24ms +step:27482/57344 train_time:15781028ms step_avg:574.23ms +step:27483/57344 train_time:15781579ms step_avg:574.23ms +grad accum step:6871/14336 +step:27484/57344 train_time:15782954ms step_avg:574.26ms +step:27485/57344 train_time:15782971ms step_avg:574.24ms +step:27486/57344 train_time:15783226ms step_avg:574.23ms +step:27487/57344 train_time:15783793ms step_avg:574.23ms +grad accum step:6872/14336 +step:27488/57344 train_time:15785115ms step_avg:574.25ms +step:27489/57344 train_time:15785133ms step_avg:574.23ms +step:27490/57344 train_time:15785398ms step_avg:574.22ms +step:27491/57344 train_time:15785999ms step_avg:574.22ms +grad accum step:6873/14336 +step:27492/57344 train_time:15787323ms step_avg:574.25ms +step:27493/57344 train_time:15787340ms step_avg:574.23ms +step:27494/57344 train_time:15787583ms step_avg:574.22ms +step:27495/57344 train_time:15788128ms step_avg:574.22ms +grad accum step:6874/14336 +step:27496/57344 train_time:15789422ms step_avg:574.24ms +step:27497/57344 train_time:15789439ms step_avg:574.22ms +step:27498/57344 train_time:15789681ms step_avg:574.21ms +step:27499/57344 train_time:15790219ms step_avg:574.21ms +grad accum step:6875/14336 +step:27500/57344 train_time:15791511ms step_avg:574.24ms +step:27501/57344 train_time:15791528ms step_avg:574.22ms +step:27502/57344 train_time:15791772ms step_avg:574.20ms +step:27503/57344 train_time:15792309ms step_avg:574.20ms +grad accum step:6876/14336 +step:27504/57344 train_time:15793611ms step_avg:574.23ms +step:27505/57344 train_time:15793628ms step_avg:574.21ms +step:27506/57344 train_time:15793873ms step_avg:574.20ms +step:27507/57344 train_time:15794418ms step_avg:574.20ms +grad accum step:6877/14336 +step:27508/57344 train_time:15795711ms step_avg:574.22ms +step:27509/57344 train_time:15795728ms step_avg:574.20ms +step:27510/57344 train_time:15795972ms step_avg:574.19ms +step:27511/57344 train_time:15796516ms step_avg:574.19ms +grad accum step:6878/14336 +step:27512/57344 train_time:15797793ms step_avg:574.21ms +step:27513/57344 train_time:15797810ms step_avg:574.19ms +step:27514/57344 train_time:15798056ms step_avg:574.18ms +step:27515/57344 train_time:15798598ms step_avg:574.18ms +grad accum step:6879/14336 +step:27516/57344 train_time:15799921ms step_avg:574.21ms +step:27517/57344 train_time:15799938ms step_avg:574.19ms +step:27518/57344 train_time:15800191ms step_avg:574.18ms +step:27519/57344 train_time:15800751ms step_avg:574.18ms +grad accum step:6880/14336 +step:27520/57344 train_time:15802040ms step_avg:574.20ms +step:27520/57344 val_loss:6.253415 train_time:15802041ms step_avg:574.20ms +step:27521/57344 train_time:15802053ms step_avg:574.18ms +step:27522/57344 train_time:15802280ms step_avg:574.17ms +step:27523/57344 train_time:15802832ms step_avg:574.17ms +grad accum step:6881/14336 +step:27524/57344 train_time:15804120ms step_avg:574.19ms +step:27525/57344 train_time:15804137ms step_avg:574.17ms +step:27526/57344 train_time:15804388ms step_avg:574.16ms +step:27527/57344 train_time:15804953ms step_avg:574.16ms +grad accum step:6882/14336 +step:27528/57344 train_time:15806285ms step_avg:574.19ms +step:27529/57344 train_time:15806302ms step_avg:574.17ms +step:27530/57344 train_time:15806548ms step_avg:574.16ms +step:27531/57344 train_time:15807096ms step_avg:574.16ms +grad accum step:6883/14336 +step:27532/57344 train_time:15808410ms step_avg:574.18ms +step:27533/57344 train_time:15808427ms step_avg:574.16ms +step:27534/57344 train_time:15808677ms step_avg:574.15ms +step:27535/57344 train_time:15809226ms step_avg:574.15ms +grad accum step:6884/14336 +step:27536/57344 train_time:15810515ms step_avg:574.18ms +step:27537/57344 train_time:15810532ms step_avg:574.16ms +step:27538/57344 train_time:15810780ms step_avg:574.14ms +step:27539/57344 train_time:15811319ms step_avg:574.14ms +grad accum step:6885/14336 +step:27540/57344 train_time:15812600ms step_avg:574.17ms +step:27541/57344 train_time:15812616ms step_avg:574.15ms +step:27542/57344 train_time:15812863ms step_avg:574.14ms +step:27543/57344 train_time:15813423ms step_avg:574.14ms +grad accum step:6886/14336 +step:27544/57344 train_time:15814795ms step_avg:574.16ms +step:27545/57344 train_time:15814812ms step_avg:574.14ms +step:27546/57344 train_time:15815057ms step_avg:574.13ms +step:27547/57344 train_time:15815603ms step_avg:574.13ms +grad accum step:6887/14336 +step:27548/57344 train_time:15816885ms step_avg:574.16ms +step:27549/57344 train_time:15816902ms step_avg:574.14ms +step:27550/57344 train_time:15817154ms step_avg:574.13ms +step:27551/57344 train_time:15817715ms step_avg:574.12ms +grad accum step:6888/14336 +step:27552/57344 train_time:15819008ms step_avg:574.15ms +step:27553/57344 train_time:15819025ms step_avg:574.13ms +step:27554/57344 train_time:15819271ms step_avg:574.12ms +step:27555/57344 train_time:15819817ms step_avg:574.12ms +grad accum step:6889/14336 +step:27556/57344 train_time:15821131ms step_avg:574.14ms +step:27557/57344 train_time:15821147ms step_avg:574.12ms +step:27558/57344 train_time:15821398ms step_avg:574.11ms +step:27559/57344 train_time:15821951ms step_avg:574.11ms +grad accum step:6890/14336 +step:27560/57344 train_time:15823261ms step_avg:574.14ms +step:27561/57344 train_time:15823278ms step_avg:574.12ms +step:27562/57344 train_time:15823525ms step_avg:574.11ms +step:27563/57344 train_time:15824070ms step_avg:574.11ms +grad accum step:6891/14336 +step:27564/57344 train_time:15825358ms step_avg:574.13ms +step:27565/57344 train_time:15825375ms step_avg:574.11ms +step:27566/57344 train_time:15825619ms step_avg:574.10ms +step:27567/57344 train_time:15826162ms step_avg:574.10ms +grad accum step:6892/14336 +step:27568/57344 train_time:15827457ms step_avg:574.12ms +step:27569/57344 train_time:15827474ms step_avg:574.10ms +step:27570/57344 train_time:15827726ms step_avg:574.09ms +step:27571/57344 train_time:15828288ms step_avg:574.09ms +grad accum step:6893/14336 +step:27572/57344 train_time:15829633ms step_avg:574.12ms +step:27573/57344 train_time:15829650ms step_avg:574.10ms +step:27574/57344 train_time:15829893ms step_avg:574.09ms +step:27575/57344 train_time:15830435ms step_avg:574.09ms +grad accum step:6894/14336 +step:27576/57344 train_time:15831723ms step_avg:574.11ms +step:27577/57344 train_time:15831740ms step_avg:574.09ms +step:27578/57344 train_time:15831988ms step_avg:574.08ms +step:27579/57344 train_time:15832547ms step_avg:574.08ms +grad accum step:6895/14336 +step:27580/57344 train_time:15833855ms step_avg:574.11ms +step:27581/57344 train_time:15833872ms step_avg:574.09ms +step:27582/57344 train_time:15834124ms step_avg:574.07ms +step:27583/57344 train_time:15834679ms step_avg:574.07ms +grad accum step:6896/14336 +step:27584/57344 train_time:15835981ms step_avg:574.10ms +step:27584/57344 val_loss:6.233905 train_time:15835982ms step_avg:574.10ms +step:27585/57344 train_time:15835994ms step_avg:574.08ms +step:27586/57344 train_time:15836219ms step_avg:574.07ms +step:27587/57344 train_time:15836763ms step_avg:574.07ms +grad accum step:6897/14336 +step:27588/57344 train_time:15838049ms step_avg:574.09ms +step:27589/57344 train_time:15838066ms step_avg:574.07ms +step:27590/57344 train_time:15838313ms step_avg:574.06ms +step:27591/57344 train_time:15838861ms step_avg:574.06ms +grad accum step:6898/14336 +step:27592/57344 train_time:15840137ms step_avg:574.08ms +step:27593/57344 train_time:15840154ms step_avg:574.06ms +step:27594/57344 train_time:15840402ms step_avg:574.05ms +step:27595/57344 train_time:15840951ms step_avg:574.05ms +grad accum step:6899/14336 +step:27596/57344 train_time:15842263ms step_avg:574.08ms +step:27597/57344 train_time:15842280ms step_avg:574.06ms +step:27598/57344 train_time:15842527ms step_avg:574.05ms +step:27599/57344 train_time:15843074ms step_avg:574.05ms +grad accum step:6900/14336 +step:27600/57344 train_time:15844378ms step_avg:574.07ms +step:27601/57344 train_time:15844395ms step_avg:574.05ms +step:27602/57344 train_time:15844647ms step_avg:574.04ms +step:27603/57344 train_time:15845196ms step_avg:574.04ms +grad accum step:6901/14336 +step:27604/57344 train_time:15846489ms step_avg:574.06ms +step:27605/57344 train_time:15846506ms step_avg:574.04ms +step:27606/57344 train_time:15846755ms step_avg:574.03ms +step:27607/57344 train_time:15847324ms step_avg:574.03ms +grad accum step:6902/14336 +step:27608/57344 train_time:15848630ms step_avg:574.06ms +step:27609/57344 train_time:15848647ms step_avg:574.04ms +step:27610/57344 train_time:15848891ms step_avg:574.03ms +step:27611/57344 train_time:15849435ms step_avg:574.03ms +grad accum step:6903/14336 +step:27612/57344 train_time:15850761ms step_avg:574.05ms +step:27613/57344 train_time:15850777ms step_avg:574.03ms +step:27614/57344 train_time:15851037ms step_avg:574.02ms +step:27615/57344 train_time:15851612ms step_avg:574.02ms +grad accum step:6904/14336 +step:27616/57344 train_time:15852937ms step_avg:574.05ms +step:27617/57344 train_time:15852954ms step_avg:574.03ms +step:27618/57344 train_time:15853206ms step_avg:574.02ms +step:27619/57344 train_time:15853764ms step_avg:574.02ms +grad accum step:6905/14336 +step:27620/57344 train_time:15855064ms step_avg:574.04ms +step:27621/57344 train_time:15855081ms step_avg:574.02ms +step:27622/57344 train_time:15855333ms step_avg:574.01ms +step:27623/57344 train_time:15855894ms step_avg:574.01ms +grad accum step:6906/14336 +step:27624/57344 train_time:15857209ms step_avg:574.04ms +step:27625/57344 train_time:15857226ms step_avg:574.02ms +step:27626/57344 train_time:15857477ms step_avg:574.01ms +step:27627/57344 train_time:15858030ms step_avg:574.00ms +grad accum step:6907/14336 +step:27628/57344 train_time:15859340ms step_avg:574.03ms +step:27629/57344 train_time:15859357ms step_avg:574.01ms +step:27630/57344 train_time:15859604ms step_avg:574.00ms +step:27631/57344 train_time:15860153ms step_avg:574.00ms +grad accum step:6908/14336 +step:27632/57344 train_time:15861483ms step_avg:574.03ms +step:27633/57344 train_time:15861499ms step_avg:574.01ms +step:27634/57344 train_time:15861745ms step_avg:573.99ms +step:27635/57344 train_time:15862289ms step_avg:573.99ms +grad accum step:6909/14336 +step:27636/57344 train_time:15863596ms step_avg:574.02ms +step:27637/57344 train_time:15863613ms step_avg:574.00ms +step:27638/57344 train_time:15863861ms step_avg:573.99ms +step:27639/57344 train_time:15864407ms step_avg:573.99ms +grad accum step:6910/14336 +step:27640/57344 train_time:15865689ms step_avg:574.01ms +step:27641/57344 train_time:15865705ms step_avg:573.99ms +step:27642/57344 train_time:15865950ms step_avg:573.98ms +step:27643/57344 train_time:15866499ms step_avg:573.98ms +grad accum step:6911/14336 +step:27644/57344 train_time:15867791ms step_avg:574.00ms +step:27645/57344 train_time:15867808ms step_avg:573.98ms +step:27646/57344 train_time:15868060ms step_avg:573.97ms +step:27647/57344 train_time:15868617ms step_avg:573.97ms +grad accum step:6912/14336 +step:27648/57344 train_time:15869896ms step_avg:574.00ms +step:27648/57344 val_loss:6.221201 train_time:15869896ms step_avg:574.00ms +step:27649/57344 train_time:15869909ms step_avg:573.98ms +step:27650/57344 train_time:15870127ms step_avg:573.96ms +step:27651/57344 train_time:15870664ms step_avg:573.96ms +grad accum step:6913/14336 +step:27652/57344 train_time:15871960ms step_avg:573.99ms +step:27653/57344 train_time:15871976ms step_avg:573.97ms +step:27654/57344 train_time:15872225ms step_avg:573.96ms +step:27655/57344 train_time:15872774ms step_avg:573.96ms +grad accum step:6914/14336 +step:27656/57344 train_time:15874066ms step_avg:573.98ms +step:27657/57344 train_time:15874083ms step_avg:573.96ms +step:27658/57344 train_time:15874328ms step_avg:573.95ms +step:27659/57344 train_time:15874871ms step_avg:573.95ms +grad accum step:6915/14336 +step:27660/57344 train_time:15876170ms step_avg:573.98ms +step:27661/57344 train_time:15876187ms step_avg:573.96ms +step:27662/57344 train_time:15876434ms step_avg:573.94ms +step:27663/57344 train_time:15876983ms step_avg:573.94ms +grad accum step:6916/14336 +step:27664/57344 train_time:15878290ms step_avg:573.97ms +step:27665/57344 train_time:15878307ms step_avg:573.95ms +step:27666/57344 train_time:15878557ms step_avg:573.94ms +step:27667/57344 train_time:15879111ms step_avg:573.94ms +grad accum step:6917/14336 +step:27668/57344 train_time:15880432ms step_avg:573.96ms +step:27669/57344 train_time:15880449ms step_avg:573.94ms +step:27670/57344 train_time:15880694ms step_avg:573.93ms +step:27671/57344 train_time:15881238ms step_avg:573.93ms +grad accum step:6918/14336 +step:27672/57344 train_time:15882546ms step_avg:573.96ms +step:27673/57344 train_time:15882563ms step_avg:573.94ms +step:27674/57344 train_time:15882819ms step_avg:573.93ms +step:27675/57344 train_time:15883393ms step_avg:573.93ms +grad accum step:6919/14336 +step:27676/57344 train_time:15884680ms step_avg:573.95ms +step:27677/57344 train_time:15884697ms step_avg:573.93ms +step:27678/57344 train_time:15884943ms step_avg:573.92ms +step:27679/57344 train_time:15885491ms step_avg:573.92ms +grad accum step:6920/14336 +step:27680/57344 train_time:15886797ms step_avg:573.94ms +step:27681/57344 train_time:15886814ms step_avg:573.92ms +step:27682/57344 train_time:15887059ms step_avg:573.91ms +step:27683/57344 train_time:15887597ms step_avg:573.91ms +grad accum step:6921/14336 +step:27684/57344 train_time:15888911ms step_avg:573.94ms +step:27685/57344 train_time:15888928ms step_avg:573.92ms +step:27686/57344 train_time:15889171ms step_avg:573.91ms +step:27687/57344 train_time:15889715ms step_avg:573.91ms +grad accum step:6922/14336 +step:27688/57344 train_time:15891020ms step_avg:573.93ms +step:27689/57344 train_time:15891037ms step_avg:573.91ms +step:27690/57344 train_time:15891285ms step_avg:573.90ms +step:27691/57344 train_time:15891842ms step_avg:573.90ms +grad accum step:6923/14336 +step:27692/57344 train_time:15893162ms step_avg:573.93ms +step:27693/57344 train_time:15893178ms step_avg:573.91ms +step:27694/57344 train_time:15893425ms step_avg:573.89ms +step:27695/57344 train_time:15893967ms step_avg:573.89ms +grad accum step:6924/14336 +step:27696/57344 train_time:15895246ms step_avg:573.92ms +step:27697/57344 train_time:15895262ms step_avg:573.90ms +step:27698/57344 train_time:15895513ms step_avg:573.89ms +step:27699/57344 train_time:15896071ms step_avg:573.89ms +grad accum step:6925/14336 +step:27700/57344 train_time:15897389ms step_avg:573.91ms +step:27701/57344 train_time:15897406ms step_avg:573.89ms +step:27702/57344 train_time:15897656ms step_avg:573.88ms +step:27703/57344 train_time:15898216ms step_avg:573.88ms +grad accum step:6926/14336 +step:27704/57344 train_time:15899506ms step_avg:573.91ms +step:27705/57344 train_time:15899523ms step_avg:573.89ms +step:27706/57344 train_time:15899771ms step_avg:573.87ms +step:27707/57344 train_time:15900319ms step_avg:573.87ms +grad accum step:6927/14336 +step:27708/57344 train_time:15901639ms step_avg:573.90ms +step:27709/57344 train_time:15901656ms step_avg:573.88ms +step:27710/57344 train_time:15901904ms step_avg:573.87ms +step:27711/57344 train_time:15902449ms step_avg:573.87ms +grad accum step:6928/14336 +step:27712/57344 train_time:15903749ms step_avg:573.89ms +step:27712/57344 val_loss:6.225038 train_time:15903749ms step_avg:573.89ms +step:27713/57344 train_time:15903761ms step_avg:573.87ms +step:27714/57344 train_time:15903982ms step_avg:573.86ms +step:27715/57344 train_time:15904524ms step_avg:573.86ms +grad accum step:6929/14336 +step:27716/57344 train_time:15905826ms step_avg:573.89ms +step:27717/57344 train_time:15905842ms step_avg:573.87ms +step:27718/57344 train_time:15906091ms step_avg:573.85ms +step:27719/57344 train_time:15906635ms step_avg:573.85ms +grad accum step:6930/14336 +step:27720/57344 train_time:15907945ms step_avg:573.88ms +step:27721/57344 train_time:15907962ms step_avg:573.86ms +step:27722/57344 train_time:15908212ms step_avg:573.85ms +step:27723/57344 train_time:15908760ms step_avg:573.85ms +grad accum step:6931/14336 +step:27724/57344 train_time:15910053ms step_avg:573.87ms +step:27725/57344 train_time:15910069ms step_avg:573.85ms +step:27726/57344 train_time:15910314ms step_avg:573.84ms +step:27727/57344 train_time:15910862ms step_avg:573.84ms +grad accum step:6932/14336 +step:27728/57344 train_time:15912176ms step_avg:573.87ms +step:27729/57344 train_time:15912193ms step_avg:573.85ms +step:27730/57344 train_time:15912438ms step_avg:573.83ms +step:27731/57344 train_time:15912984ms step_avg:573.83ms +grad accum step:6933/14336 +step:27732/57344 train_time:15914258ms step_avg:573.86ms +step:27733/57344 train_time:15914276ms step_avg:573.84ms +step:27734/57344 train_time:15914520ms step_avg:573.83ms +step:27735/57344 train_time:15915068ms step_avg:573.83ms +grad accum step:6934/14336 +step:27736/57344 train_time:15916402ms step_avg:573.85ms +step:27737/57344 train_time:15916419ms step_avg:573.83ms +step:27738/57344 train_time:15916665ms step_avg:573.82ms +step:27739/57344 train_time:15917214ms step_avg:573.82ms +grad accum step:6935/14336 +step:27740/57344 train_time:15918528ms step_avg:573.85ms +step:27741/57344 train_time:15918545ms step_avg:573.83ms +step:27742/57344 train_time:15918790ms step_avg:573.82ms +step:27743/57344 train_time:15919343ms step_avg:573.81ms +grad accum step:6936/14336 +step:27744/57344 train_time:15920629ms step_avg:573.84ms +step:27745/57344 train_time:15920646ms step_avg:573.82ms +step:27746/57344 train_time:15920895ms step_avg:573.81ms +step:27747/57344 train_time:15921444ms step_avg:573.81ms +grad accum step:6937/14336 +step:27748/57344 train_time:15922722ms step_avg:573.83ms +step:27749/57344 train_time:15922739ms step_avg:573.81ms +step:27750/57344 train_time:15922989ms step_avg:573.80ms +step:27751/57344 train_time:15923548ms step_avg:573.80ms +grad accum step:6938/14336 +step:27752/57344 train_time:15924860ms step_avg:573.83ms +step:27753/57344 train_time:15924877ms step_avg:573.81ms +step:27754/57344 train_time:15925122ms step_avg:573.80ms +step:27755/57344 train_time:15925666ms step_avg:573.79ms +grad accum step:6939/14336 +step:27756/57344 train_time:15926946ms step_avg:573.82ms +step:27757/57344 train_time:15926963ms step_avg:573.80ms +step:27758/57344 train_time:15927210ms step_avg:573.79ms +step:27759/57344 train_time:15927767ms step_avg:573.79ms +grad accum step:6940/14336 +step:27760/57344 train_time:15929097ms step_avg:573.81ms +step:27761/57344 train_time:15929113ms step_avg:573.79ms +step:27762/57344 train_time:15929367ms step_avg:573.78ms +step:27763/57344 train_time:15929932ms step_avg:573.78ms +grad accum step:6941/14336 +step:27764/57344 train_time:15931255ms step_avg:573.81ms +step:27765/57344 train_time:15931271ms step_avg:573.79ms +step:27766/57344 train_time:15931515ms step_avg:573.78ms +step:27767/57344 train_time:15932051ms step_avg:573.78ms +grad accum step:6942/14336 +step:27768/57344 train_time:15933321ms step_avg:573.80ms +step:27769/57344 train_time:15933338ms step_avg:573.78ms +step:27770/57344 train_time:15933584ms step_avg:573.77ms +step:27771/57344 train_time:15934132ms step_avg:573.77ms +grad accum step:6943/14336 +step:27772/57344 train_time:15935435ms step_avg:573.80ms +step:27773/57344 train_time:15935452ms step_avg:573.77ms +step:27774/57344 train_time:15935705ms step_avg:573.76ms +step:27775/57344 train_time:15936264ms step_avg:573.76ms +grad accum step:6944/14336 +step:27776/57344 train_time:15937581ms step_avg:573.79ms +step:27776/57344 val_loss:6.246658 train_time:15937582ms step_avg:573.79ms +step:27777/57344 train_time:15937594ms step_avg:573.77ms +step:27778/57344 train_time:15937832ms step_avg:573.76ms +step:27779/57344 train_time:15938425ms step_avg:573.76ms +grad accum step:6945/14336 +step:27780/57344 train_time:15939791ms step_avg:573.79ms +step:27781/57344 train_time:15939807ms step_avg:573.77ms +step:27782/57344 train_time:15940053ms step_avg:573.75ms +step:27783/57344 train_time:15940598ms step_avg:573.75ms +grad accum step:6946/14336 +step:27784/57344 train_time:15941912ms step_avg:573.78ms +step:27785/57344 train_time:15941929ms step_avg:573.76ms +step:27786/57344 train_time:15942178ms step_avg:573.75ms +step:27787/57344 train_time:15942728ms step_avg:573.75ms +grad accum step:6947/14336 +step:27788/57344 train_time:15944041ms step_avg:573.77ms +step:27789/57344 train_time:15944058ms step_avg:573.75ms +step:27790/57344 train_time:15944301ms step_avg:573.74ms +step:27791/57344 train_time:15944843ms step_avg:573.74ms +grad accum step:6948/14336 +step:27792/57344 train_time:15946120ms step_avg:573.77ms +step:27793/57344 train_time:15946136ms step_avg:573.75ms +step:27794/57344 train_time:15946386ms step_avg:573.73ms +step:27795/57344 train_time:15946938ms step_avg:573.73ms +grad accum step:6949/14336 +step:27796/57344 train_time:15948244ms step_avg:573.76ms +step:27797/57344 train_time:15948261ms step_avg:573.74ms +step:27798/57344 train_time:15948516ms step_avg:573.73ms +step:27799/57344 train_time:15949082ms step_avg:573.73ms +grad accum step:6950/14336 +step:27800/57344 train_time:15950429ms step_avg:573.76ms +step:27801/57344 train_time:15950445ms step_avg:573.74ms +step:27802/57344 train_time:15950690ms step_avg:573.72ms +step:27803/57344 train_time:15951239ms step_avg:573.72ms +grad accum step:6951/14336 +step:27804/57344 train_time:15952533ms step_avg:573.75ms +step:27805/57344 train_time:15952549ms step_avg:573.73ms +step:27806/57344 train_time:15952792ms step_avg:573.72ms +step:27807/57344 train_time:15953341ms step_avg:573.72ms +grad accum step:6952/14336 +step:27808/57344 train_time:15954653ms step_avg:573.74ms +step:27809/57344 train_time:15954671ms step_avg:573.72ms +step:27810/57344 train_time:15954925ms step_avg:573.71ms +step:27811/57344 train_time:15955489ms step_avg:573.71ms +grad accum step:6953/14336 +step:27812/57344 train_time:15956826ms step_avg:573.74ms +step:27813/57344 train_time:15956843ms step_avg:573.72ms +step:27814/57344 train_time:15957091ms step_avg:573.71ms +step:27815/57344 train_time:15957643ms step_avg:573.71ms +grad accum step:6954/14336 +step:27816/57344 train_time:15958970ms step_avg:573.73ms +step:27817/57344 train_time:15958987ms step_avg:573.71ms +step:27818/57344 train_time:15959234ms step_avg:573.70ms +step:27819/57344 train_time:15959777ms step_avg:573.70ms +grad accum step:6955/14336 +step:27820/57344 train_time:15961076ms step_avg:573.73ms +step:27821/57344 train_time:15961092ms step_avg:573.71ms +step:27822/57344 train_time:15961338ms step_avg:573.69ms +step:27823/57344 train_time:15961876ms step_avg:573.69ms +grad accum step:6956/14336 +step:27824/57344 train_time:15963182ms step_avg:573.72ms +step:27825/57344 train_time:15963199ms step_avg:573.70ms +step:27826/57344 train_time:15963445ms step_avg:573.69ms +step:27827/57344 train_time:15963989ms step_avg:573.69ms +grad accum step:6957/14336 +step:27828/57344 train_time:15965284ms step_avg:573.71ms +step:27829/57344 train_time:15965301ms step_avg:573.69ms +step:27830/57344 train_time:15965546ms step_avg:573.68ms +step:27831/57344 train_time:15966091ms step_avg:573.68ms +grad accum step:6958/14336 +step:27832/57344 train_time:15967390ms step_avg:573.71ms +step:27833/57344 train_time:15967407ms step_avg:573.69ms +step:27834/57344 train_time:15967654ms step_avg:573.67ms +step:27835/57344 train_time:15968200ms step_avg:573.67ms +grad accum step:6959/14336 +step:27836/57344 train_time:15969474ms step_avg:573.70ms +step:27837/57344 train_time:15969491ms step_avg:573.68ms +step:27838/57344 train_time:15969739ms step_avg:573.67ms +step:27839/57344 train_time:15970286ms step_avg:573.67ms +grad accum step:6960/14336 +step:27840/57344 train_time:15971601ms step_avg:573.69ms +step:27840/57344 val_loss:6.280988 train_time:15971602ms step_avg:573.69ms +step:27841/57344 train_time:15971614ms step_avg:573.67ms +step:27842/57344 train_time:15971838ms step_avg:573.66ms +step:27843/57344 train_time:15972375ms step_avg:573.66ms +grad accum step:6961/14336 +step:27844/57344 train_time:15973648ms step_avg:573.68ms +step:27845/57344 train_time:15973665ms step_avg:573.66ms +step:27846/57344 train_time:15973911ms step_avg:573.65ms +step:27847/57344 train_time:15974461ms step_avg:573.65ms +grad accum step:6962/14336 +step:27848/57344 train_time:15975771ms step_avg:573.68ms +step:27849/57344 train_time:15975788ms step_avg:573.66ms +step:27850/57344 train_time:15976035ms step_avg:573.65ms +step:27851/57344 train_time:15976581ms step_avg:573.64ms +grad accum step:6963/14336 +step:27852/57344 train_time:15977878ms step_avg:573.67ms +step:27853/57344 train_time:15977895ms step_avg:573.65ms +step:27854/57344 train_time:15978140ms step_avg:573.64ms +step:27855/57344 train_time:15978686ms step_avg:573.64ms +grad accum step:6964/14336 +step:27856/57344 train_time:15980017ms step_avg:573.67ms +step:27857/57344 train_time:15980034ms step_avg:573.65ms +step:27858/57344 train_time:15980280ms step_avg:573.63ms +step:27859/57344 train_time:15980821ms step_avg:573.63ms +grad accum step:6965/14336 +step:27860/57344 train_time:15982103ms step_avg:573.66ms +step:27861/57344 train_time:15982120ms step_avg:573.64ms +step:27862/57344 train_time:15982376ms step_avg:573.63ms +step:27863/57344 train_time:15982955ms step_avg:573.63ms +grad accum step:6966/14336 +step:27864/57344 train_time:15984276ms step_avg:573.65ms +step:27865/57344 train_time:15984293ms step_avg:573.63ms +step:27866/57344 train_time:15984541ms step_avg:573.62ms +step:27867/57344 train_time:15985089ms step_avg:573.62ms +grad accum step:6967/14336 +step:27868/57344 train_time:15986392ms step_avg:573.65ms +step:27869/57344 train_time:15986409ms step_avg:573.63ms +step:27870/57344 train_time:15986651ms step_avg:573.62ms +step:27871/57344 train_time:15987198ms step_avg:573.61ms +grad accum step:6968/14336 +step:27872/57344 train_time:15988495ms step_avg:573.64ms +step:27873/57344 train_time:15988512ms step_avg:573.62ms +step:27874/57344 train_time:15988754ms step_avg:573.61ms +step:27875/57344 train_time:15989297ms step_avg:573.61ms +grad accum step:6969/14336 +step:27876/57344 train_time:15990577ms step_avg:573.63ms +step:27877/57344 train_time:15990594ms step_avg:573.61ms +step:27878/57344 train_time:15990837ms step_avg:573.60ms +step:27879/57344 train_time:15991379ms step_avg:573.60ms +grad accum step:6970/14336 +step:27880/57344 train_time:15992671ms step_avg:573.63ms +step:27881/57344 train_time:15992688ms step_avg:573.61ms +step:27882/57344 train_time:15992936ms step_avg:573.59ms +step:27883/57344 train_time:15993478ms step_avg:573.59ms +grad accum step:6971/14336 +step:27884/57344 train_time:15994792ms step_avg:573.62ms +step:27885/57344 train_time:15994809ms step_avg:573.60ms +step:27886/57344 train_time:15995058ms step_avg:573.59ms +step:27887/57344 train_time:15995614ms step_avg:573.59ms +grad accum step:6972/14336 +step:27888/57344 train_time:15996922ms step_avg:573.61ms +step:27889/57344 train_time:15996938ms step_avg:573.59ms +step:27890/57344 train_time:15997189ms step_avg:573.58ms +step:27891/57344 train_time:15997751ms step_avg:573.58ms +grad accum step:6973/14336 +step:27892/57344 train_time:15999057ms step_avg:573.61ms +step:27893/57344 train_time:15999073ms step_avg:573.59ms +step:27894/57344 train_time:15999323ms step_avg:573.58ms +step:27895/57344 train_time:15999873ms step_avg:573.57ms +grad accum step:6974/14336 +step:27896/57344 train_time:16001174ms step_avg:573.60ms +step:27897/57344 train_time:16001191ms step_avg:573.58ms +step:27898/57344 train_time:16001436ms step_avg:573.57ms +step:27899/57344 train_time:16001980ms step_avg:573.57ms +grad accum step:6975/14336 +step:27900/57344 train_time:16003279ms step_avg:573.59ms +step:27901/57344 train_time:16003527ms step_avg:573.58ms +step:27902/57344 train_time:16003741ms step_avg:573.57ms +step:27903/57344 train_time:16004287ms step_avg:573.57ms +grad accum step:6976/14336 +step:27904/57344 train_time:16005607ms step_avg:573.60ms +step:27904/57344 val_loss:6.312466 train_time:16005608ms step_avg:573.60ms +step:27905/57344 train_time:16005620ms step_avg:573.58ms +step:27906/57344 train_time:16005844ms step_avg:573.56ms +step:27907/57344 train_time:16006395ms step_avg:573.56ms +grad accum step:6977/14336 +step:27908/57344 train_time:16007728ms step_avg:573.59ms +step:27909/57344 train_time:16007745ms step_avg:573.57ms +step:27910/57344 train_time:16007988ms step_avg:573.56ms +step:27911/57344 train_time:16008522ms step_avg:573.56ms +grad accum step:6978/14336 +step:27912/57344 train_time:16009828ms step_avg:573.58ms +step:27913/57344 train_time:16009845ms step_avg:573.56ms +step:27914/57344 train_time:16010092ms step_avg:573.55ms +step:27915/57344 train_time:16010644ms step_avg:573.55ms +grad accum step:6979/14336 +step:27916/57344 train_time:16011976ms step_avg:573.58ms +step:27917/57344 train_time:16011993ms step_avg:573.56ms +step:27918/57344 train_time:16012244ms step_avg:573.55ms +step:27919/57344 train_time:16012803ms step_avg:573.55ms +grad accum step:6980/14336 +step:27920/57344 train_time:16014124ms step_avg:573.57ms +step:27921/57344 train_time:16014141ms step_avg:573.55ms +step:27922/57344 train_time:16014386ms step_avg:573.54ms +step:27923/57344 train_time:16014929ms step_avg:573.54ms +grad accum step:6981/14336 +step:27924/57344 train_time:16016233ms step_avg:573.57ms +step:27925/57344 train_time:16016250ms step_avg:573.55ms +step:27926/57344 train_time:16016495ms step_avg:573.53ms +step:27927/57344 train_time:16017036ms step_avg:573.53ms +grad accum step:6982/14336 +step:27928/57344 train_time:16018311ms step_avg:573.56ms +step:27929/57344 train_time:16018328ms step_avg:573.54ms +step:27930/57344 train_time:16018570ms step_avg:573.53ms +step:27931/57344 train_time:16019118ms step_avg:573.52ms +grad accum step:6983/14336 +step:27932/57344 train_time:16020453ms step_avg:573.55ms +step:27933/57344 train_time:16020469ms step_avg:573.53ms +step:27934/57344 train_time:16020715ms step_avg:573.52ms +step:27935/57344 train_time:16021263ms step_avg:573.52ms +grad accum step:6984/14336 +step:27936/57344 train_time:16022560ms step_avg:573.55ms +step:27937/57344 train_time:16022578ms step_avg:573.53ms +step:27938/57344 train_time:16022824ms step_avg:573.51ms +step:27939/57344 train_time:16023371ms step_avg:573.51ms +grad accum step:6985/14336 +step:27940/57344 train_time:16024667ms step_avg:573.54ms +step:27941/57344 train_time:16024684ms step_avg:573.52ms +step:27942/57344 train_time:16024928ms step_avg:573.51ms +step:27943/57344 train_time:16025477ms step_avg:573.51ms +grad accum step:6986/14336 +step:27944/57344 train_time:16026846ms step_avg:573.53ms +step:27945/57344 train_time:16026863ms step_avg:573.51ms +step:27946/57344 train_time:16027126ms step_avg:573.50ms +step:27947/57344 train_time:16027707ms step_avg:573.50ms +grad accum step:6987/14336 +step:27948/57344 train_time:16029000ms step_avg:573.53ms +step:27949/57344 train_time:16029017ms step_avg:573.51ms +step:27950/57344 train_time:16029267ms step_avg:573.50ms +step:27951/57344 train_time:16029820ms step_avg:573.50ms +grad accum step:6988/14336 +step:27952/57344 train_time:16031114ms step_avg:573.52ms +step:27953/57344 train_time:16031131ms step_avg:573.50ms +step:27954/57344 train_time:16031377ms step_avg:573.49ms +step:27955/57344 train_time:16031927ms step_avg:573.49ms +grad accum step:6989/14336 +step:27956/57344 train_time:16033235ms step_avg:573.52ms +step:27957/57344 train_time:16033252ms step_avg:573.50ms +step:27958/57344 train_time:16033498ms step_avg:573.49ms +step:27959/57344 train_time:16034049ms step_avg:573.48ms +grad accum step:6990/14336 +step:27960/57344 train_time:16035363ms step_avg:573.51ms +step:27961/57344 train_time:16035380ms step_avg:573.49ms +step:27962/57344 train_time:16035627ms step_avg:573.48ms +step:27963/57344 train_time:16036178ms step_avg:573.48ms +grad accum step:6991/14336 +step:27964/57344 train_time:16037475ms step_avg:573.50ms +step:27965/57344 train_time:16037492ms step_avg:573.48ms +step:27966/57344 train_time:16037738ms step_avg:573.47ms +step:27967/57344 train_time:16038284ms step_avg:573.47ms +grad accum step:6992/14336 +step:27968/57344 train_time:16039605ms step_avg:573.50ms +step:27968/57344 val_loss:6.351825 train_time:16039606ms step_avg:573.50ms +step:27969/57344 train_time:16039618ms step_avg:573.48ms +step:27970/57344 train_time:16039845ms step_avg:573.47ms +step:27971/57344 train_time:16040396ms step_avg:573.47ms +grad accum step:6993/14336 +step:27972/57344 train_time:16041666ms step_avg:573.49ms +step:27973/57344 train_time:16041683ms step_avg:573.47ms +step:27974/57344 train_time:16041928ms step_avg:573.46ms +step:27975/57344 train_time:16042477ms step_avg:573.46ms +grad accum step:6994/14336 +step:27976/57344 train_time:16043798ms step_avg:573.48ms +step:27977/57344 train_time:16043815ms step_avg:573.46ms +step:27978/57344 train_time:16044062ms step_avg:573.45ms +step:27979/57344 train_time:16044613ms step_avg:573.45ms +grad accum step:6995/14336 +step:27980/57344 train_time:16045937ms step_avg:573.48ms +step:27981/57344 train_time:16045954ms step_avg:573.46ms +step:27982/57344 train_time:16046198ms step_avg:573.45ms +step:27983/57344 train_time:16046745ms step_avg:573.45ms +grad accum step:6996/14336 +step:27984/57344 train_time:16048022ms step_avg:573.47ms +step:27985/57344 train_time:16048039ms step_avg:573.45ms +step:27986/57344 train_time:16048283ms step_avg:573.44ms +step:27987/57344 train_time:16048828ms step_avg:573.44ms +grad accum step:6997/14336 +step:27988/57344 train_time:16050195ms step_avg:573.47ms +step:27989/57344 train_time:16050211ms step_avg:573.45ms +step:27990/57344 train_time:16050459ms step_avg:573.44ms +step:27991/57344 train_time:16051011ms step_avg:573.43ms +grad accum step:6998/14336 +step:27992/57344 train_time:16052340ms step_avg:573.46ms +step:27993/57344 train_time:16052356ms step_avg:573.44ms +step:27994/57344 train_time:16052605ms step_avg:573.43ms +step:27995/57344 train_time:16053169ms step_avg:573.43ms +grad accum step:6999/14336 +step:27996/57344 train_time:16054526ms step_avg:573.46ms +step:27997/57344 train_time:16054543ms step_avg:573.44ms +step:27998/57344 train_time:16054789ms step_avg:573.43ms +step:27999/57344 train_time:16055336ms step_avg:573.43ms +grad accum step:7000/14336 +step:28000/57344 train_time:16056675ms step_avg:573.45ms +step:28001/57344 train_time:16056692ms step_avg:573.43ms +step:28002/57344 train_time:16056934ms step_avg:573.42ms +step:28003/57344 train_time:16057480ms step_avg:573.42ms +grad accum step:7001/14336 +step:28004/57344 train_time:16058781ms step_avg:573.45ms +step:28005/57344 train_time:16058797ms step_avg:573.43ms +step:28006/57344 train_time:16059042ms step_avg:573.41ms +step:28007/57344 train_time:16059588ms step_avg:573.41ms +grad accum step:7002/14336 +step:28008/57344 train_time:16060885ms step_avg:573.44ms +step:28009/57344 train_time:16060902ms step_avg:573.42ms +step:28010/57344 train_time:16061154ms step_avg:573.41ms +step:28011/57344 train_time:16061714ms step_avg:573.41ms +grad accum step:7003/14336 +step:28012/57344 train_time:16063020ms step_avg:573.43ms +step:28013/57344 train_time:16063037ms step_avg:573.41ms +step:28014/57344 train_time:16063286ms step_avg:573.40ms +step:28015/57344 train_time:16063841ms step_avg:573.40ms +grad accum step:7004/14336 +step:28016/57344 train_time:16065125ms step_avg:573.43ms +step:28017/57344 train_time:16065141ms step_avg:573.41ms +step:28018/57344 train_time:16065385ms step_avg:573.40ms +step:28019/57344 train_time:16065936ms step_avg:573.39ms +grad accum step:7005/14336 +step:28020/57344 train_time:16067256ms step_avg:573.42ms +step:28021/57344 train_time:16067273ms step_avg:573.40ms +step:28022/57344 train_time:16067518ms step_avg:573.39ms +step:28023/57344 train_time:16068062ms step_avg:573.39ms +grad accum step:7006/14336 +step:28024/57344 train_time:16069363ms step_avg:573.41ms +step:28025/57344 train_time:16069380ms step_avg:573.39ms +step:28026/57344 train_time:16069626ms step_avg:573.38ms +step:28027/57344 train_time:16070174ms step_avg:573.38ms +grad accum step:7007/14336 +step:28028/57344 train_time:16071451ms step_avg:573.41ms +step:28029/57344 train_time:16071468ms step_avg:573.39ms +step:28030/57344 train_time:16071718ms step_avg:573.38ms +step:28031/57344 train_time:16072268ms step_avg:573.37ms +grad accum step:7008/14336 +step:28032/57344 train_time:16093154ms step_avg:574.10ms +step:28032/57344 val_loss:6.371429 train_time:16093155ms step_avg:574.10ms +step:28033/57344 train_time:16093167ms step_avg:574.08ms +step:28034/57344 train_time:16095626ms step_avg:574.15ms +step:28035/57344 train_time:16095939ms step_avg:574.14ms +grad accum step:7009/14336 +step:28036/57344 train_time:16097265ms step_avg:574.16ms +step:28037/57344 train_time:16097282ms step_avg:574.14ms +step:28038/57344 train_time:16097530ms step_avg:574.13ms +step:28039/57344 train_time:16098075ms step_avg:574.13ms +grad accum step:7010/14336 +step:28040/57344 train_time:16099369ms step_avg:574.16ms +step:28041/57344 train_time:16099386ms step_avg:574.14ms +step:28042/57344 train_time:16099633ms step_avg:574.13ms +step:28043/57344 train_time:16100181ms step_avg:574.12ms +grad accum step:7011/14336 +step:28044/57344 train_time:16101481ms step_avg:574.15ms +step:28045/57344 train_time:16101498ms step_avg:574.13ms +step:28046/57344 train_time:16101746ms step_avg:574.12ms +step:28047/57344 train_time:16102303ms step_avg:574.12ms +grad accum step:7012/14336 +step:28048/57344 train_time:16103615ms step_avg:574.14ms +step:28049/57344 train_time:16103631ms step_avg:574.12ms +step:28050/57344 train_time:16103879ms step_avg:574.11ms +step:28051/57344 train_time:16104423ms step_avg:574.11ms +grad accum step:7013/14336 +step:28052/57344 train_time:16105720ms step_avg:574.14ms +step:28053/57344 train_time:16105737ms step_avg:574.12ms +step:28054/57344 train_time:16105982ms step_avg:574.11ms +step:28055/57344 train_time:16106530ms step_avg:574.11ms +grad accum step:7014/14336 +step:28056/57344 train_time:16115970ms step_avg:574.42ms +step:28057/57344 train_time:16115983ms step_avg:574.40ms +step:28058/57344 train_time:16116283ms step_avg:574.39ms +step:28059/57344 train_time:16116821ms step_avg:574.39ms +grad accum step:7015/14336 +step:28060/57344 train_time:16118093ms step_avg:574.42ms +step:28061/57344 train_time:16118109ms step_avg:574.40ms +step:28062/57344 train_time:16118357ms step_avg:574.38ms +step:28063/57344 train_time:16118910ms step_avg:574.38ms +grad accum step:7016/14336 +step:28064/57344 train_time:16120207ms step_avg:574.41ms +step:28065/57344 train_time:16120223ms step_avg:574.39ms +step:28066/57344 train_time:16120465ms step_avg:574.38ms +step:28067/57344 train_time:16121009ms step_avg:574.38ms +grad accum step:7017/14336 +step:28068/57344 train_time:16122310ms step_avg:574.40ms +step:28069/57344 train_time:16122327ms step_avg:574.38ms +step:28070/57344 train_time:16122571ms step_avg:574.37ms +step:28071/57344 train_time:16123114ms step_avg:574.37ms +grad accum step:7018/14336 +step:28072/57344 train_time:16124401ms step_avg:574.39ms +step:28073/57344 train_time:16124418ms step_avg:574.37ms +step:28074/57344 train_time:16124670ms step_avg:574.36ms +step:28075/57344 train_time:16125232ms step_avg:574.36ms +grad accum step:7019/14336 +step:28076/57344 train_time:16126514ms step_avg:574.39ms +step:28077/57344 train_time:16126531ms step_avg:574.37ms +step:28078/57344 train_time:16126776ms step_avg:574.36ms +step:28079/57344 train_time:16127323ms step_avg:574.36ms +grad accum step:7020/14336 +step:28080/57344 train_time:16128601ms step_avg:574.38ms +step:28081/57344 train_time:16128618ms step_avg:574.36ms +step:28082/57344 train_time:16128864ms step_avg:574.35ms +step:28083/57344 train_time:16129408ms step_avg:574.35ms +grad accum step:7021/14336 +step:28084/57344 train_time:16130797ms step_avg:574.38ms +step:28085/57344 train_time:16130814ms step_avg:574.36ms +step:28086/57344 train_time:16131064ms step_avg:574.35ms +step:28087/57344 train_time:16131620ms step_avg:574.34ms +grad accum step:7022/14336 +step:28088/57344 train_time:16132907ms step_avg:574.37ms +step:28089/57344 train_time:16139262ms step_avg:574.58ms +step:28090/57344 train_time:16139543ms step_avg:574.57ms +step:28091/57344 train_time:16140075ms step_avg:574.56ms +grad accum step:7023/14336 +step:28092/57344 train_time:16141391ms step_avg:574.59ms +step:28093/57344 train_time:16141408ms step_avg:574.57ms +step:28094/57344 train_time:16141658ms step_avg:574.56ms +step:28095/57344 train_time:16142207ms step_avg:574.56ms +grad accum step:7024/14336 +step:28096/57344 train_time:16155347ms step_avg:575.01ms +step:28096/57344 val_loss:6.407415 train_time:16158594ms step_avg:575.12ms +step:28097/57344 train_time:16158958ms step_avg:575.11ms +step:28098/57344 train_time:16159248ms step_avg:575.10ms +step:28099/57344 train_time:16159785ms step_avg:575.10ms +grad accum step:7025/14336 +step:28100/57344 train_time:16161078ms step_avg:575.13ms +step:28101/57344 train_time:16161090ms step_avg:575.11ms +step:28102/57344 train_time:16161329ms step_avg:575.10ms +step:28103/57344 train_time:16161882ms step_avg:575.09ms +grad accum step:7026/14336 +step:28104/57344 train_time:16163177ms step_avg:575.12ms +step:28105/57344 train_time:16163193ms step_avg:575.10ms +step:28106/57344 train_time:16163439ms step_avg:575.09ms +step:28107/57344 train_time:16163981ms step_avg:575.09ms +grad accum step:7027/14336 +step:28108/57344 train_time:16165261ms step_avg:575.11ms +step:28109/57344 train_time:16165278ms step_avg:575.09ms +step:28110/57344 train_time:16165520ms step_avg:575.08ms +step:28111/57344 train_time:16166071ms step_avg:575.08ms +grad accum step:7028/14336 +step:28112/57344 train_time:16167397ms step_avg:575.11ms +step:28113/57344 train_time:16167414ms step_avg:575.09ms +step:28114/57344 train_time:16167655ms step_avg:575.07ms +step:28115/57344 train_time:16168191ms step_avg:575.07ms +grad accum step:7029/14336 +step:28116/57344 train_time:16169479ms step_avg:575.10ms +step:28117/57344 train_time:16169495ms step_avg:575.08ms +step:28118/57344 train_time:16169742ms step_avg:575.07ms +step:28119/57344 train_time:16170282ms step_avg:575.07ms +grad accum step:7030/14336 +step:28120/57344 train_time:16171560ms step_avg:575.09ms +step:28121/57344 train_time:16171577ms step_avg:575.07ms +step:28122/57344 train_time:16171820ms step_avg:575.06ms +step:28123/57344 train_time:16172359ms step_avg:575.06ms +grad accum step:7031/14336 +step:28124/57344 train_time:16173645ms step_avg:575.08ms +step:28125/57344 train_time:16173661ms step_avg:575.06ms +step:28126/57344 train_time:16173907ms step_avg:575.05ms +step:28127/57344 train_time:16174455ms step_avg:575.05ms +grad accum step:7032/14336 +step:28128/57344 train_time:16175735ms step_avg:575.08ms +step:28129/57344 train_time:16175751ms step_avg:575.06ms +step:28130/57344 train_time:16175998ms step_avg:575.04ms +step:28131/57344 train_time:16176541ms step_avg:575.04ms +grad accum step:7033/14336 +step:28132/57344 train_time:16177843ms step_avg:575.07ms +step:28133/57344 train_time:16177860ms step_avg:575.05ms +step:28134/57344 train_time:16178102ms step_avg:575.04ms +step:28135/57344 train_time:16178651ms step_avg:575.04ms +grad accum step:7034/14336 +step:28136/57344 train_time:16179972ms step_avg:575.06ms +step:28137/57344 train_time:16179988ms step_avg:575.04ms +step:28138/57344 train_time:16180230ms step_avg:575.03ms +step:28139/57344 train_time:16180772ms step_avg:575.03ms +grad accum step:7035/14336 +step:28140/57344 train_time:16182054ms step_avg:575.06ms +step:28141/57344 train_time:16182071ms step_avg:575.04ms +step:28142/57344 train_time:16182316ms step_avg:575.02ms +step:28143/57344 train_time:16182867ms step_avg:575.02ms +grad accum step:7036/14336 +step:28144/57344 train_time:16184166ms step_avg:575.05ms +step:28145/57344 train_time:16184182ms step_avg:575.03ms +step:28146/57344 train_time:16184435ms step_avg:575.02ms +step:28147/57344 train_time:16184997ms step_avg:575.02ms +grad accum step:7037/14336 +step:28148/57344 train_time:16186298ms step_avg:575.04ms +step:28149/57344 train_time:16186315ms step_avg:575.02ms +step:28150/57344 train_time:16186564ms step_avg:575.01ms +step:28151/57344 train_time:16187113ms step_avg:575.01ms +grad accum step:7038/14336 +step:28152/57344 train_time:16188401ms step_avg:575.04ms +step:28153/57344 train_time:16188418ms step_avg:575.02ms +step:28154/57344 train_time:16188662ms step_avg:575.00ms +step:28155/57344 train_time:16189207ms step_avg:575.00ms +grad accum step:7039/14336 +step:28156/57344 train_time:16190512ms step_avg:575.03ms +step:28157/57344 train_time:16190529ms step_avg:575.01ms +step:28158/57344 train_time:16190771ms step_avg:575.00ms +step:28159/57344 train_time:16191319ms step_avg:575.00ms +grad accum step:7040/14336 +step:28160/57344 train_time:16192635ms step_avg:575.02ms +step:28160/57344 val_loss:6.420421 train_time:16192635ms step_avg:575.02ms +step:28161/57344 train_time:16192648ms step_avg:575.00ms +step:28162/57344 train_time:16192871ms step_avg:574.99ms +step:28163/57344 train_time:16193418ms step_avg:574.99ms +grad accum step:7041/14336 +step:28164/57344 train_time:16194712ms step_avg:575.01ms +step:28165/57344 train_time:16194729ms step_avg:574.99ms +step:28166/57344 train_time:16194976ms step_avg:574.98ms +step:28167/57344 train_time:16195524ms step_avg:574.98ms +grad accum step:7042/14336 +step:28168/57344 train_time:16196825ms step_avg:575.01ms +step:28169/57344 train_time:16196842ms step_avg:574.99ms +step:28170/57344 train_time:16197084ms step_avg:574.98ms +step:28171/57344 train_time:16197626ms step_avg:574.98ms +grad accum step:7043/14336 +step:28172/57344 train_time:16198919ms step_avg:575.00ms +step:28173/57344 train_time:16198937ms step_avg:574.98ms +step:28174/57344 train_time:16199186ms step_avg:574.97ms +step:28175/57344 train_time:16199740ms step_avg:574.97ms +grad accum step:7044/14336 +step:28176/57344 train_time:16201032ms step_avg:574.99ms +step:28177/57344 train_time:16201049ms step_avg:574.97ms +step:28178/57344 train_time:16201298ms step_avg:574.96ms +step:28179/57344 train_time:16201848ms step_avg:574.96ms +grad accum step:7045/14336 +step:28180/57344 train_time:16203136ms step_avg:574.99ms +step:28181/57344 train_time:16203153ms step_avg:574.97ms +step:28182/57344 train_time:16203400ms step_avg:574.96ms +step:28183/57344 train_time:16203947ms step_avg:574.95ms +grad accum step:7046/14336 +step:28184/57344 train_time:16205315ms step_avg:574.98ms +step:28185/57344 train_time:16205331ms step_avg:574.96ms +step:28186/57344 train_time:16205574ms step_avg:574.95ms +step:28187/57344 train_time:16206111ms step_avg:574.95ms +grad accum step:7047/14336 +step:28188/57344 train_time:16207401ms step_avg:574.98ms +step:28189/57344 train_time:16207418ms step_avg:574.96ms +step:28190/57344 train_time:16207660ms step_avg:574.94ms +step:28191/57344 train_time:16208195ms step_avg:574.94ms +grad accum step:7048/14336 +step:28192/57344 train_time:16209490ms step_avg:574.97ms +step:28193/57344 train_time:16209507ms step_avg:574.95ms +step:28194/57344 train_time:16209759ms step_avg:574.94ms +step:28195/57344 train_time:16210324ms step_avg:574.94ms +grad accum step:7049/14336 +step:28196/57344 train_time:16211686ms step_avg:574.96ms +step:28197/57344 train_time:16211703ms step_avg:574.94ms +step:28198/57344 train_time:16211955ms step_avg:574.93ms +step:28199/57344 train_time:16212517ms step_avg:574.93ms +grad accum step:7050/14336 +step:28200/57344 train_time:16213799ms step_avg:574.96ms +step:28201/57344 train_time:16213816ms step_avg:574.94ms +step:28202/57344 train_time:16214064ms step_avg:574.93ms +step:28203/57344 train_time:16214606ms step_avg:574.92ms +grad accum step:7051/14336 +step:28204/57344 train_time:16215904ms step_avg:574.95ms +step:28205/57344 train_time:16215921ms step_avg:574.93ms +step:28206/57344 train_time:16216168ms step_avg:574.92ms +step:28207/57344 train_time:16216714ms step_avg:574.92ms +grad accum step:7052/14336 +step:28208/57344 train_time:16217995ms step_avg:574.94ms +step:28209/57344 train_time:16218012ms step_avg:574.92ms +step:28210/57344 train_time:16218260ms step_avg:574.91ms +step:28211/57344 train_time:16218810ms step_avg:574.91ms +grad accum step:7053/14336 +step:28212/57344 train_time:16220178ms step_avg:574.94ms +step:28213/57344 train_time:16220195ms step_avg:574.92ms +step:28214/57344 train_time:16220437ms step_avg:574.91ms +step:28215/57344 train_time:16220972ms step_avg:574.91ms +grad accum step:7054/14336 +step:28216/57344 train_time:16222275ms step_avg:574.93ms +step:28217/57344 train_time:16222292ms step_avg:574.91ms +step:28218/57344 train_time:16222540ms step_avg:574.90ms +step:28219/57344 train_time:16223090ms step_avg:574.90ms +grad accum step:7055/14336 +step:28220/57344 train_time:16224400ms step_avg:574.93ms +step:28221/57344 train_time:16224417ms step_avg:574.91ms +step:28222/57344 train_time:16224658ms step_avg:574.89ms +step:28223/57344 train_time:16225207ms step_avg:574.89ms +grad accum step:7056/14336 +step:28224/57344 train_time:16226520ms step_avg:574.92ms +step:28224/57344 val_loss:6.439218 train_time:16226520ms step_avg:574.92ms +step:28225/57344 train_time:16226533ms step_avg:574.90ms +step:28226/57344 train_time:16226758ms step_avg:574.89ms +step:28227/57344 train_time:16227305ms step_avg:574.89ms +grad accum step:7057/14336 +step:28228/57344 train_time:16228610ms step_avg:574.91ms +step:28229/57344 train_time:16228627ms step_avg:574.89ms +step:28230/57344 train_time:16228871ms step_avg:574.88ms +step:28231/57344 train_time:16229420ms step_avg:574.88ms +grad accum step:7058/14336 +step:28232/57344 train_time:16230781ms step_avg:574.91ms +step:28233/57344 train_time:16230798ms step_avg:574.89ms +step:28234/57344 train_time:16231045ms step_avg:574.88ms +step:28235/57344 train_time:16231593ms step_avg:574.87ms +grad accum step:7059/14336 +step:28236/57344 train_time:16232877ms step_avg:574.90ms +step:28237/57344 train_time:16232895ms step_avg:574.88ms +step:28238/57344 train_time:16233151ms step_avg:574.87ms +step:28239/57344 train_time:16233718ms step_avg:574.87ms +grad accum step:7060/14336 +step:28240/57344 train_time:16235034ms step_avg:574.89ms +step:28241/57344 train_time:16235051ms step_avg:574.88ms +step:28242/57344 train_time:16235300ms step_avg:574.86ms +step:28243/57344 train_time:16235845ms step_avg:574.86ms +grad accum step:7061/14336 +step:28244/57344 train_time:16237213ms step_avg:574.89ms +step:28245/57344 train_time:16237229ms step_avg:574.87ms +step:28246/57344 train_time:16237474ms step_avg:574.86ms +step:28247/57344 train_time:16238010ms step_avg:574.86ms +grad accum step:7062/14336 +step:28248/57344 train_time:16239308ms step_avg:574.88ms +step:28249/57344 train_time:16239324ms step_avg:574.86ms +step:28250/57344 train_time:16239572ms step_avg:574.85ms +step:28251/57344 train_time:16240120ms step_avg:574.85ms +grad accum step:7063/14336 +step:28252/57344 train_time:16241447ms step_avg:574.88ms +step:28253/57344 train_time:16241463ms step_avg:574.86ms +step:28254/57344 train_time:16241710ms step_avg:574.85ms +step:28255/57344 train_time:16242260ms step_avg:574.85ms +grad accum step:7064/14336 +step:28256/57344 train_time:16243551ms step_avg:574.87ms +step:28257/57344 train_time:16243567ms step_avg:574.85ms +step:28258/57344 train_time:16243814ms step_avg:574.84ms +step:28259/57344 train_time:16244359ms step_avg:574.84ms +grad accum step:7065/14336 +step:28260/57344 train_time:16245711ms step_avg:574.87ms +step:28261/57344 train_time:16245728ms step_avg:574.85ms +step:28262/57344 train_time:16245975ms step_avg:574.83ms +step:28263/57344 train_time:16246522ms step_avg:574.83ms +grad accum step:7066/14336 +step:28264/57344 train_time:16247827ms step_avg:574.86ms +step:28265/57344 train_time:16247843ms step_avg:574.84ms +step:28266/57344 train_time:16248091ms step_avg:574.83ms +step:28267/57344 train_time:16248634ms step_avg:574.83ms +grad accum step:7067/14336 +step:28268/57344 train_time:16249932ms step_avg:574.85ms +step:28269/57344 train_time:16249949ms step_avg:574.83ms +step:28270/57344 train_time:16250196ms step_avg:574.82ms +step:28271/57344 train_time:16250749ms step_avg:574.82ms +grad accum step:7068/14336 +step:28272/57344 train_time:16252058ms step_avg:574.85ms +step:28273/57344 train_time:16252075ms step_avg:574.83ms +step:28274/57344 train_time:16252323ms step_avg:574.82ms +step:28275/57344 train_time:16252873ms step_avg:574.81ms +grad accum step:7069/14336 +step:28276/57344 train_time:16254174ms step_avg:574.84ms +step:28277/57344 train_time:16254190ms step_avg:574.82ms +step:28278/57344 train_time:16254440ms step_avg:574.81ms +step:28279/57344 train_time:16254997ms step_avg:574.81ms +grad accum step:7070/14336 +step:28280/57344 train_time:16256309ms step_avg:574.83ms +step:28281/57344 train_time:16256326ms step_avg:574.81ms +step:28282/57344 train_time:16256573ms step_avg:574.80ms +step:28283/57344 train_time:16257126ms step_avg:574.80ms +grad accum step:7071/14336 +step:28284/57344 train_time:16258487ms step_avg:574.83ms +step:28285/57344 train_time:16258504ms step_avg:574.81ms +step:28286/57344 train_time:16258748ms step_avg:574.80ms +step:28287/57344 train_time:16259285ms step_avg:574.80ms +grad accum step:7072/14336 +step:28288/57344 train_time:16260580ms step_avg:574.82ms +step:28288/57344 val_loss:6.458371 train_time:16260581ms step_avg:574.82ms +step:28289/57344 train_time:16260593ms step_avg:574.80ms +step:28290/57344 train_time:16260813ms step_avg:574.79ms +step:28291/57344 train_time:16261349ms step_avg:574.79ms +grad accum step:7073/14336 +step:28292/57344 train_time:16262663ms step_avg:574.81ms +step:28293/57344 train_time:16262680ms step_avg:574.80ms +step:28294/57344 train_time:16262927ms step_avg:574.78ms +step:28295/57344 train_time:16263497ms step_avg:574.78ms +grad accum step:7074/14336 +step:28296/57344 train_time:16264810ms step_avg:574.81ms +step:28297/57344 train_time:16264828ms step_avg:574.79ms +step:28298/57344 train_time:16265073ms step_avg:574.78ms +step:28299/57344 train_time:16265612ms step_avg:574.78ms +grad accum step:7075/14336 +step:28300/57344 train_time:16266927ms step_avg:574.80ms +step:28301/57344 train_time:16266943ms step_avg:574.78ms +step:28302/57344 train_time:16267187ms step_avg:574.77ms +step:28303/57344 train_time:16267735ms step_avg:574.77ms +grad accum step:7076/14336 +step:28304/57344 train_time:16269054ms step_avg:574.80ms +step:28305/57344 train_time:16269071ms step_avg:574.78ms +step:28306/57344 train_time:16269318ms step_avg:574.77ms +step:28307/57344 train_time:16269866ms step_avg:574.76ms +grad accum step:7077/14336 +step:28308/57344 train_time:16271188ms step_avg:574.79ms +step:28309/57344 train_time:16271205ms step_avg:574.77ms +step:28310/57344 train_time:16271459ms step_avg:574.76ms +step:28311/57344 train_time:16272028ms step_avg:574.76ms +grad accum step:7078/14336 +step:28312/57344 train_time:16273349ms step_avg:574.79ms +step:28313/57344 train_time:16273366ms step_avg:574.77ms +step:28314/57344 train_time:16273616ms step_avg:574.76ms +step:28315/57344 train_time:16274193ms step_avg:574.76ms +grad accum step:7079/14336 +step:28316/57344 train_time:16275585ms step_avg:574.78ms +step:28317/57344 train_time:16275602ms step_avg:574.76ms +step:28318/57344 train_time:16275855ms step_avg:574.75ms +step:28319/57344 train_time:16276417ms step_avg:574.75ms +grad accum step:7080/14336 +step:28320/57344 train_time:16277703ms step_avg:574.78ms +step:28321/57344 train_time:16277719ms step_avg:574.76ms +step:28322/57344 train_time:16277963ms step_avg:574.75ms +step:28323/57344 train_time:16278506ms step_avg:574.75ms +grad accum step:7081/14336 +step:28324/57344 train_time:16279853ms step_avg:574.77ms +step:28325/57344 train_time:16279869ms step_avg:574.75ms +step:28326/57344 train_time:16280117ms step_avg:574.74ms +step:28327/57344 train_time:16280675ms step_avg:574.74ms +grad accum step:7082/14336 +step:28328/57344 train_time:16281987ms step_avg:574.77ms +step:28329/57344 train_time:16282004ms step_avg:574.75ms +step:28330/57344 train_time:16282251ms step_avg:574.74ms +step:28331/57344 train_time:16282800ms step_avg:574.73ms +grad accum step:7083/14336 +step:28332/57344 train_time:16284090ms step_avg:574.76ms +step:28333/57344 train_time:16284107ms step_avg:574.74ms +step:28334/57344 train_time:16284358ms step_avg:574.73ms +step:28335/57344 train_time:16284912ms step_avg:574.73ms +grad accum step:7084/14336 +step:28336/57344 train_time:16286200ms step_avg:574.75ms +step:28337/57344 train_time:16286216ms step_avg:574.73ms +step:28338/57344 train_time:16286466ms step_avg:574.72ms +step:28339/57344 train_time:16287007ms step_avg:574.72ms +grad accum step:7085/14336 +step:28340/57344 train_time:16288327ms step_avg:574.75ms +step:28341/57344 train_time:16288344ms step_avg:574.73ms +step:28342/57344 train_time:16288592ms step_avg:574.72ms +step:28343/57344 train_time:16289149ms step_avg:574.72ms +grad accum step:7086/14336 +step:28344/57344 train_time:16290448ms step_avg:574.74ms +step:28345/57344 train_time:16290465ms step_avg:574.72ms +step:28346/57344 train_time:16290719ms step_avg:574.71ms +step:28347/57344 train_time:16291286ms step_avg:574.71ms +grad accum step:7087/14336 +step:28348/57344 train_time:16292612ms step_avg:574.74ms +step:28349/57344 train_time:16292629ms step_avg:574.72ms +step:28350/57344 train_time:16292876ms step_avg:574.70ms +step:28351/57344 train_time:16293422ms step_avg:574.70ms +grad accum step:7088/14336 +step:28352/57344 train_time:16294695ms step_avg:574.73ms +step:28352/57344 val_loss:6.471800 train_time:16294696ms step_avg:574.73ms +step:28353/57344 train_time:16294708ms step_avg:574.71ms +step:28354/57344 train_time:16294936ms step_avg:574.70ms +step:28355/57344 train_time:16295493ms step_avg:574.70ms +grad accum step:7089/14336 +step:28356/57344 train_time:16296822ms step_avg:574.72ms +step:28357/57344 train_time:16296839ms step_avg:574.70ms +step:28358/57344 train_time:16297089ms step_avg:574.69ms +step:28359/57344 train_time:16297637ms step_avg:574.69ms +grad accum step:7090/14336 +step:28360/57344 train_time:16298937ms step_avg:574.72ms +step:28361/57344 train_time:16298954ms step_avg:574.70ms +step:28362/57344 train_time:16299204ms step_avg:574.68ms +step:28363/57344 train_time:16299768ms step_avg:574.68ms +grad accum step:7091/14336 +step:28364/57344 train_time:16301196ms step_avg:574.71ms +step:28365/57344 train_time:16301213ms step_avg:574.69ms +step:28366/57344 train_time:16301461ms step_avg:574.68ms +step:28367/57344 train_time:16302012ms step_avg:574.68ms +grad accum step:7092/14336 +step:28368/57344 train_time:16303317ms step_avg:574.71ms +step:28369/57344 train_time:16303334ms step_avg:574.69ms +step:28370/57344 train_time:16303586ms step_avg:574.68ms +step:28371/57344 train_time:16304148ms step_avg:574.68ms +grad accum step:7093/14336 +step:28372/57344 train_time:16305462ms step_avg:574.70ms +step:28373/57344 train_time:16305479ms step_avg:574.68ms +step:28374/57344 train_time:16305734ms step_avg:574.67ms +step:28375/57344 train_time:16306302ms step_avg:574.67ms +grad accum step:7094/14336 +step:28376/57344 train_time:16307589ms step_avg:574.70ms +step:28377/57344 train_time:16307606ms step_avg:574.68ms +step:28378/57344 train_time:16307851ms step_avg:574.67ms +step:28379/57344 train_time:16308414ms step_avg:574.66ms +grad accum step:7095/14336 +step:28380/57344 train_time:16309782ms step_avg:574.69ms +step:28381/57344 train_time:16309798ms step_avg:574.67ms +step:28382/57344 train_time:16310046ms step_avg:574.66ms +step:28383/57344 train_time:16310598ms step_avg:574.66ms +grad accum step:7096/14336 +step:28384/57344 train_time:16311884ms step_avg:574.69ms +step:28385/57344 train_time:16311902ms step_avg:574.67ms +step:28386/57344 train_time:16312146ms step_avg:574.65ms +step:28387/57344 train_time:16312692ms step_avg:574.65ms +grad accum step:7097/14336 +step:28388/57344 train_time:16313970ms step_avg:574.68ms +step:28389/57344 train_time:16313987ms step_avg:574.66ms +step:28390/57344 train_time:16314232ms step_avg:574.65ms +step:28391/57344 train_time:16314781ms step_avg:574.65ms +grad accum step:7098/14336 +step:28392/57344 train_time:16316113ms step_avg:574.67ms +step:28393/57344 train_time:16316129ms step_avg:574.65ms +step:28394/57344 train_time:16316373ms step_avg:574.64ms +step:28395/57344 train_time:16316918ms step_avg:574.64ms +grad accum step:7099/14336 +step:28396/57344 train_time:16318191ms step_avg:574.67ms +step:28397/57344 train_time:16318208ms step_avg:574.65ms +step:28398/57344 train_time:16318461ms step_avg:574.63ms +step:28399/57344 train_time:16319020ms step_avg:574.63ms +grad accum step:7100/14336 +step:28400/57344 train_time:16320309ms step_avg:574.66ms +step:28401/57344 train_time:16320326ms step_avg:574.64ms +step:28402/57344 train_time:16320575ms step_avg:574.63ms +step:28403/57344 train_time:16321130ms step_avg:574.63ms +grad accum step:7101/14336 +step:28404/57344 train_time:16322440ms step_avg:574.65ms +step:28405/57344 train_time:16322456ms step_avg:574.63ms +step:28406/57344 train_time:16322704ms step_avg:574.62ms +step:28407/57344 train_time:16323264ms step_avg:574.62ms +grad accum step:7102/14336 +step:28408/57344 train_time:16324611ms step_avg:574.65ms +step:28409/57344 train_time:16324628ms step_avg:574.63ms +step:28410/57344 train_time:16324876ms step_avg:574.62ms +step:28411/57344 train_time:16325748ms step_avg:574.63ms +grad accum step:7103/14336 +step:28412/57344 train_time:16326777ms step_avg:574.64ms +step:28413/57344 train_time:16326794ms step_avg:574.62ms +step:28414/57344 train_time:16327041ms step_avg:574.61ms +step:28415/57344 train_time:16327581ms step_avg:574.61ms +grad accum step:7104/14336 +step:28416/57344 train_time:16328879ms step_avg:574.64ms +step:28416/57344 val_loss:6.487670 train_time:16328880ms step_avg:574.64ms +step:28417/57344 train_time:16328892ms step_avg:574.62ms +step:28418/57344 train_time:16329112ms step_avg:574.60ms +step:28419/57344 train_time:16329665ms step_avg:574.60ms +grad accum step:7105/14336 +step:28420/57344 train_time:16330988ms step_avg:574.63ms +step:28421/57344 train_time:16331004ms step_avg:574.61ms +step:28422/57344 train_time:16331253ms step_avg:574.60ms +step:28423/57344 train_time:16331805ms step_avg:574.60ms +grad accum step:7106/14336 +step:28424/57344 train_time:16333141ms step_avg:574.62ms +step:28425/57344 train_time:16333158ms step_avg:574.61ms +step:28426/57344 train_time:16333403ms step_avg:574.59ms +step:28427/57344 train_time:16333950ms step_avg:574.59ms +grad accum step:7107/14336 +step:28428/57344 train_time:16335277ms step_avg:574.62ms +step:28429/57344 train_time:16335294ms step_avg:574.60ms +step:28430/57344 train_time:16335544ms step_avg:574.59ms +step:28431/57344 train_time:16336103ms step_avg:574.59ms +grad accum step:7108/14336 +step:28432/57344 train_time:16337393ms step_avg:574.61ms +step:28433/57344 train_time:16337410ms step_avg:574.59ms +step:28434/57344 train_time:16337658ms step_avg:574.58ms +step:28435/57344 train_time:16338209ms step_avg:574.58ms +grad accum step:7109/14336 +step:28436/57344 train_time:16339577ms step_avg:574.61ms +step:28437/57344 train_time:16339594ms step_avg:574.59ms +step:28438/57344 train_time:16339841ms step_avg:574.58ms +step:28439/57344 train_time:16340387ms step_avg:574.58ms +grad accum step:7110/14336 +step:28440/57344 train_time:16341714ms step_avg:574.60ms +step:28441/57344 train_time:16341731ms step_avg:574.58ms +step:28442/57344 train_time:16341980ms step_avg:574.57ms +step:28443/57344 train_time:16342536ms step_avg:574.57ms +grad accum step:7111/14336 +step:28444/57344 train_time:16343865ms step_avg:574.60ms +step:28445/57344 train_time:16343882ms step_avg:574.58ms +step:28446/57344 train_time:16344130ms step_avg:574.57ms +step:28447/57344 train_time:16344680ms step_avg:574.57ms +grad accum step:7112/14336 +step:28448/57344 train_time:16345984ms step_avg:574.59ms +step:28449/57344 train_time:16346001ms step_avg:574.57ms +step:28450/57344 train_time:16346254ms step_avg:574.56ms +step:28451/57344 train_time:16346812ms step_avg:574.56ms +grad accum step:7113/14336 +step:28452/57344 train_time:16348121ms step_avg:574.59ms +step:28453/57344 train_time:16348138ms step_avg:574.57ms +step:28454/57344 train_time:16348390ms step_avg:574.56ms +step:28455/57344 train_time:16348950ms step_avg:574.55ms +grad accum step:7114/14336 +step:28456/57344 train_time:16350261ms step_avg:574.58ms +step:28457/57344 train_time:16350277ms step_avg:574.56ms +step:28458/57344 train_time:16350524ms step_avg:574.55ms +step:28459/57344 train_time:16351074ms step_avg:574.55ms +grad accum step:7115/14336 +step:28460/57344 train_time:16352397ms step_avg:574.57ms +step:28461/57344 train_time:16352414ms step_avg:574.56ms +step:28462/57344 train_time:16352671ms step_avg:574.54ms +step:28463/57344 train_time:16353250ms step_avg:574.54ms +grad accum step:7116/14336 +step:28464/57344 train_time:16354549ms step_avg:574.57ms +step:28465/57344 train_time:16354566ms step_avg:574.55ms +step:28466/57344 train_time:16354818ms step_avg:574.54ms +step:28467/57344 train_time:16355378ms step_avg:574.54ms +grad accum step:7117/14336 +step:28468/57344 train_time:16356688ms step_avg:574.56ms +step:28469/57344 train_time:16356705ms step_avg:574.54ms +step:28470/57344 train_time:16356963ms step_avg:574.53ms +step:28471/57344 train_time:16357529ms step_avg:574.53ms +grad accum step:7118/14336 +step:28472/57344 train_time:16358845ms step_avg:574.56ms +step:28473/57344 train_time:16358861ms step_avg:574.54ms +step:28474/57344 train_time:16359109ms step_avg:574.53ms +step:28475/57344 train_time:16359647ms step_avg:574.53ms +grad accum step:7119/14336 +step:28476/57344 train_time:16360958ms step_avg:574.55ms +step:28477/57344 train_time:16360975ms step_avg:574.53ms +step:28478/57344 train_time:16361221ms step_avg:574.52ms +step:28479/57344 train_time:16361777ms step_avg:574.52ms +grad accum step:7120/14336 +step:28480/57344 train_time:16363109ms step_avg:574.55ms +step:28480/57344 val_loss:6.491364 train_time:16363110ms step_avg:574.55ms +step:28481/57344 train_time:16363122ms step_avg:574.53ms +step:28482/57344 train_time:16363348ms step_avg:574.52ms +step:28483/57344 train_time:16363892ms step_avg:574.51ms +grad accum step:7121/14336 +step:28484/57344 train_time:16365202ms step_avg:574.54ms +step:28485/57344 train_time:16365219ms step_avg:574.52ms +step:28486/57344 train_time:16365469ms step_avg:574.51ms +step:28487/57344 train_time:16366029ms step_avg:574.51ms +grad accum step:7122/14336 +step:28488/57344 train_time:16367328ms step_avg:574.53ms +step:28489/57344 train_time:16367345ms step_avg:574.51ms +step:28490/57344 train_time:16367612ms step_avg:574.50ms +step:28491/57344 train_time:16368232ms step_avg:574.51ms +grad accum step:7123/14336 +step:28492/57344 train_time:16369577ms step_avg:574.53ms +step:28493/57344 train_time:16369594ms step_avg:574.51ms +step:28494/57344 train_time:16369843ms step_avg:574.50ms +step:28495/57344 train_time:16370397ms step_avg:574.50ms +grad accum step:7124/14336 +step:28496/57344 train_time:16371708ms step_avg:574.53ms +step:28497/57344 train_time:16371725ms step_avg:574.51ms +step:28498/57344 train_time:16371968ms step_avg:574.50ms +step:28499/57344 train_time:16372509ms step_avg:574.49ms +grad accum step:7125/14336 +step:28500/57344 train_time:16373810ms step_avg:574.52ms +step:28501/57344 train_time:16373827ms step_avg:574.50ms +step:28502/57344 train_time:16374078ms step_avg:574.49ms +step:28503/57344 train_time:16374633ms step_avg:574.49ms +grad accum step:7126/14336 +step:28504/57344 train_time:16375927ms step_avg:574.51ms +step:28505/57344 train_time:16375944ms step_avg:574.49ms +step:28506/57344 train_time:16376196ms step_avg:574.48ms +step:28507/57344 train_time:16376755ms step_avg:574.48ms +grad accum step:7127/14336 +step:28508/57344 train_time:16378067ms step_avg:574.51ms +step:28509/57344 train_time:16378084ms step_avg:574.49ms +step:28510/57344 train_time:16378330ms step_avg:574.48ms +step:28511/57344 train_time:16378887ms step_avg:574.48ms +grad accum step:7128/14336 +step:28512/57344 train_time:16380217ms step_avg:574.50ms +step:28513/57344 train_time:16380234ms step_avg:574.48ms +step:28514/57344 train_time:16380480ms step_avg:574.47ms +step:28515/57344 train_time:16381023ms step_avg:574.47ms +grad accum step:7129/14336 +step:28516/57344 train_time:16382321ms step_avg:574.50ms +step:28517/57344 train_time:16382338ms step_avg:574.48ms +step:28518/57344 train_time:16382585ms step_avg:574.46ms +step:28519/57344 train_time:16383130ms step_avg:574.46ms +grad accum step:7130/14336 +step:28520/57344 train_time:16384445ms step_avg:574.49ms +step:28521/57344 train_time:16384462ms step_avg:574.47ms +step:28522/57344 train_time:16384709ms step_avg:574.46ms +step:28523/57344 train_time:16385260ms step_avg:574.46ms +grad accum step:7131/14336 +step:28524/57344 train_time:16386571ms step_avg:574.48ms +step:28525/57344 train_time:16386588ms step_avg:574.46ms +step:28526/57344 train_time:16386833ms step_avg:574.45ms +step:28527/57344 train_time:16387381ms step_avg:574.45ms +grad accum step:7132/14336 +step:28528/57344 train_time:16388692ms step_avg:574.48ms +step:28529/57344 train_time:16388709ms step_avg:574.46ms +step:28530/57344 train_time:16388954ms step_avg:574.45ms +step:28531/57344 train_time:16389498ms step_avg:574.45ms +grad accum step:7133/14336 +step:28532/57344 train_time:16390815ms step_avg:574.47ms +step:28533/57344 train_time:16390832ms step_avg:574.45ms +step:28534/57344 train_time:16391077ms step_avg:574.44ms +step:28535/57344 train_time:16391624ms step_avg:574.44ms +grad accum step:7134/14336 +step:28536/57344 train_time:16392901ms step_avg:574.46ms +step:28537/57344 train_time:16392918ms step_avg:574.44ms +step:28538/57344 train_time:16393169ms step_avg:574.43ms +step:28539/57344 train_time:16393728ms step_avg:574.43ms +grad accum step:7135/14336 +step:28540/57344 train_time:16395039ms step_avg:574.46ms +step:28541/57344 train_time:16395056ms step_avg:574.44ms +step:28542/57344 train_time:16395309ms step_avg:574.43ms +step:28543/57344 train_time:16395872ms step_avg:574.43ms +grad accum step:7136/14336 +step:28544/57344 train_time:16397166ms step_avg:574.45ms +step:28544/57344 val_loss:6.495669 train_time:16397167ms step_avg:574.45ms +step:28545/57344 train_time:16397179ms step_avg:574.43ms +step:28546/57344 train_time:16397399ms step_avg:574.42ms +step:28547/57344 train_time:16397932ms step_avg:574.42ms +grad accum step:7137/14336 +step:28548/57344 train_time:16399232ms step_avg:574.44ms +step:28549/57344 train_time:16399249ms step_avg:574.42ms +step:28550/57344 train_time:16399501ms step_avg:574.41ms +step:28551/57344 train_time:16400059ms step_avg:574.41ms +grad accum step:7138/14336 +step:28552/57344 train_time:16401344ms step_avg:574.44ms +step:28553/57344 train_time:16401360ms step_avg:574.42ms +step:28554/57344 train_time:16401606ms step_avg:574.41ms +step:28555/57344 train_time:16402146ms step_avg:574.41ms +grad accum step:7139/14336 +step:28556/57344 train_time:16403440ms step_avg:574.43ms +step:28557/57344 train_time:16403457ms step_avg:574.41ms +step:28558/57344 train_time:16403702ms step_avg:574.40ms +step:28559/57344 train_time:16404258ms step_avg:574.40ms +grad accum step:7140/14336 +step:28560/57344 train_time:16405608ms step_avg:574.43ms +step:28561/57344 train_time:16405624ms step_avg:574.41ms +step:28562/57344 train_time:16405876ms step_avg:574.40ms +step:28563/57344 train_time:16406435ms step_avg:574.39ms +grad accum step:7141/14336 +step:28564/57344 train_time:16407748ms step_avg:574.42ms +step:28565/57344 train_time:16407765ms step_avg:574.40ms +step:28566/57344 train_time:16408010ms step_avg:574.39ms +step:28567/57344 train_time:16408555ms step_avg:574.39ms +grad accum step:7142/14336 +step:28568/57344 train_time:16409868ms step_avg:574.41ms +step:28569/57344 train_time:16409885ms step_avg:574.39ms +step:28570/57344 train_time:16410140ms step_avg:574.38ms +step:28571/57344 train_time:16410709ms step_avg:574.38ms +grad accum step:7143/14336 +step:28572/57344 train_time:16412019ms step_avg:574.41ms +step:28573/57344 train_time:16412036ms step_avg:574.39ms +step:28574/57344 train_time:16412284ms step_avg:574.38ms +step:28575/57344 train_time:16412842ms step_avg:574.38ms +grad accum step:7144/14336 +step:28576/57344 train_time:16414166ms step_avg:574.40ms +step:28577/57344 train_time:16414182ms step_avg:574.38ms +step:28578/57344 train_time:16414432ms step_avg:574.37ms +step:28579/57344 train_time:16414975ms step_avg:574.37ms +grad accum step:7145/14336 +step:28580/57344 train_time:16416289ms step_avg:574.40ms +step:28581/57344 train_time:16416306ms step_avg:574.38ms +step:28582/57344 train_time:16416560ms step_avg:574.37ms +step:28583/57344 train_time:16417128ms step_avg:574.37ms +grad accum step:7146/14336 +step:28584/57344 train_time:16418408ms step_avg:574.39ms +step:28585/57344 train_time:16418426ms step_avg:574.37ms +step:28586/57344 train_time:16418675ms step_avg:574.36ms +step:28587/57344 train_time:16419220ms step_avg:574.36ms +grad accum step:7147/14336 +step:28588/57344 train_time:16420507ms step_avg:574.38ms +step:28589/57344 train_time:16420524ms step_avg:574.37ms +step:28590/57344 train_time:16420770ms step_avg:574.35ms +step:28591/57344 train_time:16421325ms step_avg:574.35ms +grad accum step:7148/14336 +step:28592/57344 train_time:16422658ms step_avg:574.38ms +step:28593/57344 train_time:16422675ms step_avg:574.36ms +step:28594/57344 train_time:16422919ms step_avg:574.35ms +step:28595/57344 train_time:16423475ms step_avg:574.35ms +grad accum step:7149/14336 +step:28596/57344 train_time:16424824ms step_avg:574.37ms +step:28597/57344 train_time:16424841ms step_avg:574.36ms +step:28598/57344 train_time:16425087ms step_avg:574.34ms +step:28599/57344 train_time:16425636ms step_avg:574.34ms +grad accum step:7150/14336 +step:28600/57344 train_time:16426927ms step_avg:574.37ms +step:28601/57344 train_time:16426944ms step_avg:574.35ms +step:28602/57344 train_time:16427194ms step_avg:574.34ms +step:28603/57344 train_time:16427742ms step_avg:574.34ms +grad accum step:7151/14336 +step:28604/57344 train_time:16429053ms step_avg:574.36ms +step:28605/57344 train_time:16429069ms step_avg:574.34ms +step:28606/57344 train_time:16429316ms step_avg:574.33ms +step:28607/57344 train_time:16429858ms step_avg:574.33ms +grad accum step:7152/14336 +step:28608/57344 train_time:16431164ms step_avg:574.36ms +step:28608/57344 val_loss:6.496770 train_time:16431164ms step_avg:574.36ms +step:28609/57344 train_time:16431176ms step_avg:574.34ms +step:28610/57344 train_time:16431400ms step_avg:574.32ms +step:28611/57344 train_time:16431949ms step_avg:574.32ms +grad accum step:7153/14336 +step:28612/57344 train_time:16433223ms step_avg:574.35ms +step:28613/57344 train_time:16433240ms step_avg:574.33ms +step:28614/57344 train_time:16433490ms step_avg:574.32ms +step:28615/57344 train_time:16434046ms step_avg:574.32ms +grad accum step:7154/14336 +step:28616/57344 train_time:16435350ms step_avg:574.34ms +step:28617/57344 train_time:16435367ms step_avg:574.32ms +step:28618/57344 train_time:16435625ms step_avg:574.31ms +step:28619/57344 train_time:16436198ms step_avg:574.31ms +grad accum step:7155/14336 +step:28620/57344 train_time:16437526ms step_avg:574.34ms +step:28621/57344 train_time:16437543ms step_avg:574.32ms +step:28622/57344 train_time:16437792ms step_avg:574.31ms +step:28623/57344 train_time:16438344ms step_avg:574.31ms +grad accum step:7156/14336 +step:28624/57344 train_time:16439634ms step_avg:574.33ms +step:28625/57344 train_time:16439651ms step_avg:574.31ms +step:28626/57344 train_time:16439902ms step_avg:574.30ms +step:28627/57344 train_time:16440466ms step_avg:574.30ms +grad accum step:7157/14336 +step:28628/57344 train_time:16441781ms step_avg:574.33ms +step:28629/57344 train_time:16441798ms step_avg:574.31ms +step:28630/57344 train_time:16442041ms step_avg:574.29ms +step:28631/57344 train_time:16442587ms step_avg:574.29ms +grad accum step:7158/14336 +step:28632/57344 train_time:16443909ms step_avg:574.32ms +step:28633/57344 train_time:16443925ms step_avg:574.30ms +step:28634/57344 train_time:16444174ms step_avg:574.29ms +step:28635/57344 train_time:16444724ms step_avg:574.29ms +grad accum step:7159/14336 +step:28636/57344 train_time:16446049ms step_avg:574.31ms +step:28637/57344 train_time:16446066ms step_avg:574.29ms +step:28638/57344 train_time:16446326ms step_avg:574.28ms +step:28639/57344 train_time:16446911ms step_avg:574.28ms +grad accum step:7160/14336 +step:28640/57344 train_time:16448209ms step_avg:574.31ms +step:28641/57344 train_time:16448226ms step_avg:574.29ms +step:28642/57344 train_time:16448472ms step_avg:574.28ms +step:28643/57344 train_time:16449020ms step_avg:574.28ms +grad accum step:7161/14336 +step:28644/57344 train_time:16450319ms step_avg:574.30ms +step:28645/57344 train_time:16450336ms step_avg:574.28ms +step:28646/57344 train_time:16450585ms step_avg:574.27ms +step:28647/57344 train_time:16451141ms step_avg:574.27ms +grad accum step:7162/14336 +step:28648/57344 train_time:16452453ms step_avg:574.30ms +step:28649/57344 train_time:16452470ms step_avg:574.28ms +step:28650/57344 train_time:16452721ms step_avg:574.27ms +step:28651/57344 train_time:16453276ms step_avg:574.27ms +grad accum step:7163/14336 +step:28652/57344 train_time:16454604ms step_avg:574.29ms +step:28653/57344 train_time:16454621ms step_avg:574.27ms +step:28654/57344 train_time:16454866ms step_avg:574.26ms +step:28655/57344 train_time:16455421ms step_avg:574.26ms +grad accum step:7164/14336 +step:28656/57344 train_time:16456748ms step_avg:574.29ms +step:28657/57344 train_time:16456765ms step_avg:574.27ms +step:28658/57344 train_time:16457014ms step_avg:574.26ms +step:28659/57344 train_time:16457559ms step_avg:574.25ms +grad accum step:7165/14336 +step:28660/57344 train_time:16458864ms step_avg:574.28ms +step:28661/57344 train_time:16458881ms step_avg:574.26ms +step:28662/57344 train_time:16459127ms step_avg:574.25ms +step:28663/57344 train_time:16459674ms step_avg:574.25ms +grad accum step:7166/14336 +step:28664/57344 train_time:16460996ms step_avg:574.27ms +step:28665/57344 train_time:16461013ms step_avg:574.25ms +step:28666/57344 train_time:16461259ms step_avg:574.24ms +step:28667/57344 train_time:16461807ms step_avg:574.24ms +grad accum step:7167/14336 +step:28668/57344 train_time:16463099ms step_avg:574.27ms +step:28669/57344 train_time:16463117ms step_avg:574.25ms +step:28670/57344 train_time:16463364ms step_avg:574.24ms +step:28671/57344 train_time:16463915ms step_avg:574.24ms +grad accum step:7168/14336 +step:28672/57344 train_time:16465240ms step_avg:574.26ms +step:28672/57344 val_loss:6.499097 train_time:16465240ms step_avg:574.26ms +step:28673/57344 train_time:16465252ms step_avg:574.24ms +step:28674/57344 train_time:16465476ms step_avg:574.23ms +step:28675/57344 train_time:16466018ms step_avg:574.23ms +grad accum step:7169/14336 +step:28676/57344 train_time:16467402ms step_avg:574.26ms +step:28677/57344 train_time:16467419ms step_avg:574.24ms +step:28678/57344 train_time:16467668ms step_avg:574.23ms +step:28679/57344 train_time:16468225ms step_avg:574.23ms +grad accum step:7170/14336 +step:28680/57344 train_time:16469560ms step_avg:574.25ms +step:28681/57344 train_time:16469577ms step_avg:574.23ms +step:28682/57344 train_time:16469827ms step_avg:574.22ms +step:28683/57344 train_time:16470386ms step_avg:574.22ms +grad accum step:7171/14336 +step:28684/57344 train_time:16471743ms step_avg:574.25ms +step:28685/57344 train_time:16471759ms step_avg:574.23ms +step:28686/57344 train_time:16472015ms step_avg:574.22ms +step:28687/57344 train_time:16472585ms step_avg:574.22ms +grad accum step:7172/14336 +step:28688/57344 train_time:16473887ms step_avg:574.24ms +step:28689/57344 train_time:16473904ms step_avg:574.22ms +step:28690/57344 train_time:16474154ms step_avg:574.21ms +step:28691/57344 train_time:16474707ms step_avg:574.21ms +grad accum step:7173/14336 +step:28692/57344 train_time:16476033ms step_avg:574.24ms +step:28693/57344 train_time:16476050ms step_avg:574.22ms +step:28694/57344 train_time:16476294ms step_avg:574.21ms +step:28695/57344 train_time:16476844ms step_avg:574.21ms +grad accum step:7174/14336 +step:28696/57344 train_time:16478236ms step_avg:574.23ms +step:28697/57344 train_time:16478253ms step_avg:574.22ms +step:28698/57344 train_time:16478509ms step_avg:574.20ms +step:28699/57344 train_time:16479079ms step_avg:574.20ms +grad accum step:7175/14336 +step:28700/57344 train_time:16480379ms step_avg:574.23ms +step:28701/57344 train_time:16480396ms step_avg:574.21ms +step:28702/57344 train_time:16480643ms step_avg:574.20ms +step:28703/57344 train_time:16481186ms step_avg:574.20ms +grad accum step:7176/14336 +step:28704/57344 train_time:16482483ms step_avg:574.22ms +step:28705/57344 train_time:16482500ms step_avg:574.20ms +step:28706/57344 train_time:16482749ms step_avg:574.19ms +step:28707/57344 train_time:16483306ms step_avg:574.19ms +grad accum step:7177/14336 +step:28708/57344 train_time:16484627ms step_avg:574.22ms +step:28709/57344 train_time:16484644ms step_avg:574.20ms +step:28710/57344 train_time:16484893ms step_avg:574.19ms +step:28711/57344 train_time:16485447ms step_avg:574.19ms +grad accum step:7178/14336 +step:28712/57344 train_time:16486786ms step_avg:574.21ms +step:28713/57344 train_time:16486803ms step_avg:574.19ms +step:28714/57344 train_time:16487049ms step_avg:574.18ms +step:28715/57344 train_time:16487596ms step_avg:574.18ms +grad accum step:7179/14336 +step:28716/57344 train_time:16488913ms step_avg:574.21ms +step:28717/57344 train_time:16488930ms step_avg:574.19ms +step:28718/57344 train_time:16489176ms step_avg:574.18ms +step:28719/57344 train_time:16489719ms step_avg:574.17ms +grad accum step:7180/14336 +step:28720/57344 train_time:16491002ms step_avg:574.20ms +step:28721/57344 train_time:16491019ms step_avg:574.18ms +step:28722/57344 train_time:16491278ms step_avg:574.17ms +step:28723/57344 train_time:16491860ms step_avg:574.17ms +grad accum step:7181/14336 +step:28724/57344 train_time:16493181ms step_avg:574.20ms +step:28725/57344 train_time:16493198ms step_avg:574.18ms +step:28726/57344 train_time:16493448ms step_avg:574.16ms +step:28727/57344 train_time:16494000ms step_avg:574.16ms +grad accum step:7182/14336 +step:28728/57344 train_time:16495327ms step_avg:574.19ms +step:28729/57344 train_time:16495343ms step_avg:574.17ms +step:28730/57344 train_time:16495592ms step_avg:574.16ms +step:28731/57344 train_time:16496154ms step_avg:574.16ms +grad accum step:7183/14336 +step:28732/57344 train_time:16497452ms step_avg:574.18ms +step:28733/57344 train_time:16497469ms step_avg:574.16ms +step:28734/57344 train_time:16497718ms step_avg:574.15ms +step:28735/57344 train_time:16498266ms step_avg:574.15ms +grad accum step:7184/14336 +step:28736/57344 train_time:16499564ms step_avg:574.18ms +step:28736/57344 val_loss:6.498493 train_time:16499564ms step_avg:574.18ms +step:28737/57344 train_time:16499576ms step_avg:574.16ms +step:28738/57344 train_time:16499808ms step_avg:574.15ms +step:28739/57344 train_time:16500372ms step_avg:574.15ms +grad accum step:7185/14336 +step:28740/57344 train_time:16501645ms step_avg:574.17ms +step:28741/57344 train_time:16501662ms step_avg:574.15ms +step:28742/57344 train_time:16501909ms step_avg:574.14ms +step:28743/57344 train_time:16502458ms step_avg:574.14ms +grad accum step:7186/14336 +step:28744/57344 train_time:16503765ms step_avg:574.16ms +step:28745/57344 train_time:16503782ms step_avg:574.14ms +step:28746/57344 train_time:16504029ms step_avg:574.13ms +step:28747/57344 train_time:16504575ms step_avg:574.13ms +grad accum step:7187/14336 +step:28748/57344 train_time:16505917ms step_avg:574.16ms +step:28749/57344 train_time:16505934ms step_avg:574.14ms +step:28750/57344 train_time:16506187ms step_avg:574.13ms +step:28751/57344 train_time:16506747ms step_avg:574.13ms +grad accum step:7188/14336 +step:28752/57344 train_time:16508058ms step_avg:574.15ms +step:28753/57344 train_time:16508074ms step_avg:574.13ms +step:28754/57344 train_time:16508322ms step_avg:574.12ms +step:28755/57344 train_time:16508866ms step_avg:574.12ms +grad accum step:7189/14336 +step:28756/57344 train_time:16510190ms step_avg:574.15ms +step:28757/57344 train_time:16510207ms step_avg:574.13ms +step:28758/57344 train_time:16510456ms step_avg:574.12ms +step:28759/57344 train_time:16511020ms step_avg:574.12ms +grad accum step:7190/14336 +step:28760/57344 train_time:16512348ms step_avg:574.14ms +step:28761/57344 train_time:16512364ms step_avg:574.12ms +step:28762/57344 train_time:16512614ms step_avg:574.11ms +step:28763/57344 train_time:16513172ms step_avg:574.11ms +grad accum step:7191/14336 +step:28764/57344 train_time:16514473ms step_avg:574.14ms +step:28765/57344 train_time:16514490ms step_avg:574.12ms +step:28766/57344 train_time:16514736ms step_avg:574.11ms +step:28767/57344 train_time:16515279ms step_avg:574.11ms +grad accum step:7192/14336 +step:28768/57344 train_time:16516567ms step_avg:574.13ms +step:28769/57344 train_time:16516585ms step_avg:574.11ms +step:28770/57344 train_time:16516830ms step_avg:574.10ms +step:28771/57344 train_time:16517378ms step_avg:574.10ms +grad accum step:7193/14336 +step:28772/57344 train_time:16518703ms step_avg:574.12ms +step:28773/57344 train_time:16518720ms step_avg:574.10ms +step:28774/57344 train_time:16518964ms step_avg:574.09ms +step:28775/57344 train_time:16519503ms step_avg:574.09ms +grad accum step:7194/14336 +step:28776/57344 train_time:16520809ms step_avg:574.12ms +step:28777/57344 train_time:16520827ms step_avg:574.10ms +step:28778/57344 train_time:16521077ms step_avg:574.09ms +step:28779/57344 train_time:16521631ms step_avg:574.09ms +grad accum step:7195/14336 +step:28780/57344 train_time:16522921ms step_avg:574.11ms +step:28781/57344 train_time:16522938ms step_avg:574.09ms +step:28782/57344 train_time:16523182ms step_avg:574.08ms +step:28783/57344 train_time:16523733ms step_avg:574.08ms +grad accum step:7196/14336 +step:28784/57344 train_time:16525032ms step_avg:574.10ms +step:28785/57344 train_time:16525049ms step_avg:574.09ms +step:28786/57344 train_time:16525305ms step_avg:574.07ms +step:28787/57344 train_time:16525881ms step_avg:574.07ms +grad accum step:7197/14336 +step:28788/57344 train_time:16527193ms step_avg:574.10ms +step:28789/57344 train_time:16527210ms step_avg:574.08ms +step:28790/57344 train_time:16527460ms step_avg:574.07ms +step:28791/57344 train_time:16528016ms step_avg:574.07ms +grad accum step:7198/14336 +step:28792/57344 train_time:16529320ms step_avg:574.09ms +step:28793/57344 train_time:16529338ms step_avg:574.07ms +step:28794/57344 train_time:16529588ms step_avg:574.06ms +step:28795/57344 train_time:16530144ms step_avg:574.06ms +grad accum step:7199/14336 +step:28796/57344 train_time:16531498ms step_avg:574.09ms +step:28797/57344 train_time:16531515ms step_avg:574.07ms +step:28798/57344 train_time:16531771ms step_avg:574.06ms +step:28799/57344 train_time:16532340ms step_avg:574.06ms +grad accum step:7200/14336 +step:28800/57344 train_time:16533630ms step_avg:574.08ms +step:28800/57344 val_loss:6.493935 train_time:16533631ms step_avg:574.08ms +step:28801/57344 train_time:16533643ms step_avg:574.06ms +step:28802/57344 train_time:16533872ms step_avg:574.05ms +step:28803/57344 train_time:16534430ms step_avg:574.05ms +grad accum step:7201/14336 +step:28804/57344 train_time:16535739ms step_avg:574.08ms +step:28805/57344 train_time:16535756ms step_avg:574.06ms +step:28806/57344 train_time:16536001ms step_avg:574.05ms +step:28807/57344 train_time:16536552ms step_avg:574.05ms +grad accum step:7202/14336 +step:28808/57344 train_time:16537885ms step_avg:574.07ms +step:28809/57344 train_time:16537902ms step_avg:574.05ms +step:28810/57344 train_time:16538169ms step_avg:574.04ms +step:28811/57344 train_time:16538782ms step_avg:574.04ms +grad accum step:7203/14336 +step:28812/57344 train_time:16540112ms step_avg:574.07ms +step:28813/57344 train_time:16540129ms step_avg:574.05ms +step:28814/57344 train_time:16540380ms step_avg:574.04ms +step:28815/57344 train_time:16540948ms step_avg:574.04ms +grad accum step:7204/14336 +step:28816/57344 train_time:16542271ms step_avg:574.07ms +step:28817/57344 train_time:16542288ms step_avg:574.05ms +step:28818/57344 train_time:16542536ms step_avg:574.03ms +step:28819/57344 train_time:16543084ms step_avg:574.03ms +grad accum step:7205/14336 +step:28820/57344 train_time:16544416ms step_avg:574.06ms +step:28821/57344 train_time:16544434ms step_avg:574.04ms +step:28822/57344 train_time:16544686ms step_avg:574.03ms +step:28823/57344 train_time:16545246ms step_avg:574.03ms +grad accum step:7206/14336 +step:28824/57344 train_time:16546570ms step_avg:574.06ms +step:28825/57344 train_time:16546587ms step_avg:574.04ms +step:28826/57344 train_time:16546845ms step_avg:574.03ms +step:28827/57344 train_time:16547413ms step_avg:574.02ms +grad accum step:7207/14336 +step:28828/57344 train_time:16548727ms step_avg:574.05ms +step:28829/57344 train_time:16548744ms step_avg:574.03ms +step:28830/57344 train_time:16548992ms step_avg:574.02ms +step:28831/57344 train_time:16549537ms step_avg:574.02ms +grad accum step:7208/14336 +step:28832/57344 train_time:16550811ms step_avg:574.04ms +step:28833/57344 train_time:16550827ms step_avg:574.02ms +step:28834/57344 train_time:16551075ms step_avg:574.01ms +step:28835/57344 train_time:16551622ms step_avg:574.01ms +grad accum step:7209/14336 +step:28836/57344 train_time:16552915ms step_avg:574.04ms +step:28837/57344 train_time:16552932ms step_avg:574.02ms +step:28838/57344 train_time:16553180ms step_avg:574.01ms +step:28839/57344 train_time:16553732ms step_avg:574.01ms +grad accum step:7210/14336 +step:28840/57344 train_time:16555028ms step_avg:574.03ms +step:28841/57344 train_time:16555044ms step_avg:574.01ms +step:28842/57344 train_time:16555298ms step_avg:574.00ms +step:28843/57344 train_time:16555855ms step_avg:574.00ms +grad accum step:7211/14336 +step:28844/57344 train_time:16557149ms step_avg:574.02ms +step:28845/57344 train_time:16557166ms step_avg:574.00ms +step:28846/57344 train_time:16557413ms step_avg:573.99ms +step:28847/57344 train_time:16557963ms step_avg:573.99ms +grad accum step:7212/14336 +step:28848/57344 train_time:16559280ms step_avg:574.02ms +step:28849/57344 train_time:16559297ms step_avg:574.00ms +step:28850/57344 train_time:16559546ms step_avg:573.99ms +step:28851/57344 train_time:16560111ms step_avg:573.99ms +grad accum step:7213/14336 +step:28852/57344 train_time:16561449ms step_avg:574.01ms +step:28853/57344 train_time:16561466ms step_avg:573.99ms +step:28854/57344 train_time:16561713ms step_avg:573.98ms +step:28855/57344 train_time:16562259ms step_avg:573.98ms +grad accum step:7214/14336 +step:28856/57344 train_time:16563571ms step_avg:574.01ms +step:28857/57344 train_time:16563589ms step_avg:573.99ms +step:28858/57344 train_time:16563834ms step_avg:573.98ms +step:28859/57344 train_time:16564386ms step_avg:573.98ms +grad accum step:7215/14336 +step:28860/57344 train_time:16565679ms step_avg:574.00ms +step:28861/57344 train_time:16565696ms step_avg:573.98ms +step:28862/57344 train_time:16565943ms step_avg:573.97ms +step:28863/57344 train_time:16566489ms step_avg:573.97ms +grad accum step:7216/14336 +step:28864/57344 train_time:16567804ms step_avg:574.00ms +step:28864/57344 val_loss:6.495569 train_time:16567805ms step_avg:574.00ms +step:28865/57344 train_time:16567817ms step_avg:573.98ms +step:28866/57344 train_time:16568038ms step_avg:573.96ms +step:28867/57344 train_time:16568575ms step_avg:573.96ms +grad accum step:7217/14336 +step:28868/57344 train_time:16569897ms step_avg:573.99ms +step:28869/57344 train_time:16569914ms step_avg:573.97ms +step:28870/57344 train_time:16570161ms step_avg:573.96ms +step:28871/57344 train_time:16570704ms step_avg:573.96ms +grad accum step:7218/14336 +step:28872/57344 train_time:16572013ms step_avg:573.98ms +step:28873/57344 train_time:16572030ms step_avg:573.96ms +step:28874/57344 train_time:16572277ms step_avg:573.95ms +step:28875/57344 train_time:16572824ms step_avg:573.95ms +grad accum step:7219/14336 +step:28876/57344 train_time:16574125ms step_avg:573.98ms +step:28877/57344 train_time:16574142ms step_avg:573.96ms +step:28878/57344 train_time:16574393ms step_avg:573.95ms +step:28879/57344 train_time:16574946ms step_avg:573.94ms +grad accum step:7220/14336 +step:28880/57344 train_time:16576225ms step_avg:573.97ms +step:28881/57344 train_time:16576242ms step_avg:573.95ms +step:28882/57344 train_time:16576489ms step_avg:573.94ms +step:28883/57344 train_time:16577032ms step_avg:573.94ms +grad accum step:7221/14336 +step:28884/57344 train_time:16578339ms step_avg:573.96ms +step:28885/57344 train_time:16578355ms step_avg:573.94ms +step:28886/57344 train_time:16578607ms step_avg:573.93ms +step:28887/57344 train_time:16579165ms step_avg:573.93ms +grad accum step:7222/14336 +step:28888/57344 train_time:16580501ms step_avg:573.96ms +step:28889/57344 train_time:16580518ms step_avg:573.94ms +step:28890/57344 train_time:16580771ms step_avg:573.93ms +step:28891/57344 train_time:16581341ms step_avg:573.93ms +grad accum step:7223/14336 +step:28892/57344 train_time:16582684ms step_avg:573.95ms +step:28893/57344 train_time:16582701ms step_avg:573.93ms +step:28894/57344 train_time:16582964ms step_avg:573.92ms +step:28895/57344 train_time:16583560ms step_avg:573.92ms +grad accum step:7224/14336 +step:28896/57344 train_time:16584887ms step_avg:573.95ms +step:28897/57344 train_time:16584904ms step_avg:573.93ms +step:28898/57344 train_time:16585154ms step_avg:573.92ms +step:28899/57344 train_time:16585713ms step_avg:573.92ms +grad accum step:7225/14336 +step:28900/57344 train_time:16587017ms step_avg:573.95ms +step:28901/57344 train_time:16587033ms step_avg:573.93ms +step:28902/57344 train_time:16587278ms step_avg:573.91ms +step:28903/57344 train_time:16587822ms step_avg:573.91ms +grad accum step:7226/14336 +step:28904/57344 train_time:16589119ms step_avg:573.94ms +step:28905/57344 train_time:16589137ms step_avg:573.92ms +step:28906/57344 train_time:16589385ms step_avg:573.91ms +step:28907/57344 train_time:16589930ms step_avg:573.91ms +grad accum step:7227/14336 +step:28908/57344 train_time:16591250ms step_avg:573.93ms +step:28909/57344 train_time:16591266ms step_avg:573.91ms +step:28910/57344 train_time:16591511ms step_avg:573.90ms +step:28911/57344 train_time:16592063ms step_avg:573.90ms +grad accum step:7228/14336 +step:28912/57344 train_time:16593388ms step_avg:573.93ms +step:28913/57344 train_time:16593405ms step_avg:573.91ms +step:28914/57344 train_time:16593651ms step_avg:573.90ms +step:28915/57344 train_time:16594208ms step_avg:573.90ms +grad accum step:7229/14336 +step:28916/57344 train_time:16595557ms step_avg:573.92ms +step:28917/57344 train_time:16595574ms step_avg:573.90ms +step:28918/57344 train_time:16595831ms step_avg:573.89ms +step:28919/57344 train_time:16596392ms step_avg:573.89ms +grad accum step:7230/14336 +step:28920/57344 train_time:16597667ms step_avg:573.92ms +step:28921/57344 train_time:16597684ms step_avg:573.90ms +step:28922/57344 train_time:16597929ms step_avg:573.89ms +step:28923/57344 train_time:16598476ms step_avg:573.89ms +grad accum step:7231/14336 +step:28924/57344 train_time:16599757ms step_avg:573.91ms +step:28925/57344 train_time:16599774ms step_avg:573.89ms +step:28926/57344 train_time:16600021ms step_avg:573.88ms +step:28927/57344 train_time:16600570ms step_avg:573.88ms +grad accum step:7232/14336 +step:28928/57344 train_time:16601893ms step_avg:573.90ms +step:28928/57344 val_loss:6.491815 train_time:16601894ms step_avg:573.90ms +step:28929/57344 train_time:16601906ms step_avg:573.88ms +step:28930/57344 train_time:16602145ms step_avg:573.87ms +step:28931/57344 train_time:16602735ms step_avg:573.87ms +grad accum step:7233/14336 +step:28932/57344 train_time:16604149ms step_avg:573.90ms +step:28933/57344 train_time:16604165ms step_avg:573.88ms +step:28934/57344 train_time:16604412ms step_avg:573.87ms +step:28935/57344 train_time:16604960ms step_avg:573.87ms +grad accum step:7234/14336 +step:28936/57344 train_time:16606287ms step_avg:573.90ms +step:28937/57344 train_time:16606304ms step_avg:573.88ms +step:28938/57344 train_time:16606548ms step_avg:573.87ms +step:28939/57344 train_time:16607098ms step_avg:573.87ms +grad accum step:7235/14336 +step:28940/57344 train_time:16608427ms step_avg:573.89ms +step:28941/57344 train_time:16608444ms step_avg:573.87ms +step:28942/57344 train_time:16608694ms step_avg:573.86ms +step:28943/57344 train_time:16609257ms step_avg:573.86ms +grad accum step:7236/14336 +step:28944/57344 train_time:16610593ms step_avg:573.89ms +step:28945/57344 train_time:16610610ms step_avg:573.87ms +step:28946/57344 train_time:16610857ms step_avg:573.86ms +step:28947/57344 train_time:16611408ms step_avg:573.86ms +grad accum step:7237/14336 +step:28948/57344 train_time:16612734ms step_avg:573.88ms +step:28949/57344 train_time:16612751ms step_avg:573.86ms +step:28950/57344 train_time:16612994ms step_avg:573.85ms +step:28951/57344 train_time:16613530ms step_avg:573.85ms +grad accum step:7238/14336 +step:28952/57344 train_time:16614852ms step_avg:573.88ms +step:28953/57344 train_time:16614869ms step_avg:573.86ms +step:28954/57344 train_time:16615119ms step_avg:573.85ms +step:28955/57344 train_time:16615668ms step_avg:573.84ms +grad accum step:7239/14336 +step:28956/57344 train_time:16617032ms step_avg:573.87ms +step:28957/57344 train_time:16617049ms step_avg:573.85ms +step:28958/57344 train_time:16617302ms step_avg:573.84ms +step:28959/57344 train_time:16617866ms step_avg:573.84ms +grad accum step:7240/14336 +step:28960/57344 train_time:16619164ms step_avg:573.87ms +step:28961/57344 train_time:16619181ms step_avg:573.85ms +step:28962/57344 train_time:16619433ms step_avg:573.84ms +step:28963/57344 train_time:16619988ms step_avg:573.84ms +grad accum step:7241/14336 +step:28964/57344 train_time:16621315ms step_avg:573.86ms +step:28965/57344 train_time:16621332ms step_avg:573.84ms +step:28966/57344 train_time:16621583ms step_avg:573.83ms +step:28967/57344 train_time:16622135ms step_avg:573.83ms +grad accum step:7242/14336 +step:28968/57344 train_time:16623448ms step_avg:573.86ms +step:28969/57344 train_time:16623462ms step_avg:573.84ms +step:28970/57344 train_time:16623712ms step_avg:573.83ms +step:28971/57344 train_time:16624272ms step_avg:573.82ms +grad accum step:7243/14336 +step:28972/57344 train_time:16625613ms step_avg:573.85ms +step:28973/57344 train_time:16625629ms step_avg:573.83ms +step:28974/57344 train_time:16625881ms step_avg:573.82ms +step:28975/57344 train_time:16626436ms step_avg:573.82ms +grad accum step:7244/14336 +step:28976/57344 train_time:16627734ms step_avg:573.85ms +step:28977/57344 train_time:16627751ms step_avg:573.83ms +step:28978/57344 train_time:16627995ms step_avg:573.81ms +step:28979/57344 train_time:16628546ms step_avg:573.81ms +grad accum step:7245/14336 +step:28980/57344 train_time:16629847ms step_avg:573.84ms +step:28981/57344 train_time:16629863ms step_avg:573.82ms +step:28982/57344 train_time:16630116ms step_avg:573.81ms +step:28983/57344 train_time:16630683ms step_avg:573.81ms +grad accum step:7246/14336 +step:28984/57344 train_time:16631999ms step_avg:573.83ms +step:28985/57344 train_time:16632015ms step_avg:573.81ms +step:28986/57344 train_time:16632264ms step_avg:573.80ms +step:28987/57344 train_time:16632807ms step_avg:573.80ms +grad accum step:7247/14336 +step:28988/57344 train_time:16634109ms step_avg:573.83ms +step:28989/57344 train_time:16634125ms step_avg:573.81ms +step:28990/57344 train_time:16634383ms step_avg:573.80ms +step:28991/57344 train_time:16634957ms step_avg:573.80ms +grad accum step:7248/14336 +step:28992/57344 train_time:16636259ms step_avg:573.82ms +step:28992/57344 val_loss:6.487421 train_time:16636259ms step_avg:573.82ms +step:28993/57344 train_time:16636271ms step_avg:573.80ms +step:28994/57344 train_time:16636494ms step_avg:573.79ms +step:28995/57344 train_time:16637031ms step_avg:573.79ms +grad accum step:7249/14336 +step:28996/57344 train_time:16638305ms step_avg:573.81ms +step:28997/57344 train_time:16638322ms step_avg:573.79ms +step:28998/57344 train_time:16638571ms step_avg:573.78ms +step:28999/57344 train_time:16639121ms step_avg:573.78ms +grad accum step:7250/14336 +step:29000/57344 train_time:16640412ms step_avg:573.81ms +step:29001/57344 train_time:16640429ms step_avg:573.79ms +step:29002/57344 train_time:16640673ms step_avg:573.78ms +step:29003/57344 train_time:16641219ms step_avg:573.78ms +grad accum step:7251/14336 +step:29004/57344 train_time:16642515ms step_avg:573.80ms +step:29005/57344 train_time:16642532ms step_avg:573.78ms +step:29006/57344 train_time:16642785ms step_avg:573.77ms +step:29007/57344 train_time:16643351ms step_avg:573.77ms +grad accum step:7252/14336 +step:29008/57344 train_time:16644668ms step_avg:573.80ms +step:29009/57344 train_time:16644684ms step_avg:573.78ms +step:29010/57344 train_time:16644939ms step_avg:573.77ms +step:29011/57344 train_time:16645502ms step_avg:573.77ms +grad accum step:7253/14336 +step:29012/57344 train_time:16646793ms step_avg:573.79ms +step:29013/57344 train_time:16646811ms step_avg:573.77ms +step:29014/57344 train_time:16647061ms step_avg:573.76ms +step:29015/57344 train_time:16647616ms step_avg:573.76ms +grad accum step:7254/14336 +step:29016/57344 train_time:16648928ms step_avg:573.78ms +step:29017/57344 train_time:16648945ms step_avg:573.77ms +step:29018/57344 train_time:16649191ms step_avg:573.75ms +step:29019/57344 train_time:16649733ms step_avg:573.75ms +grad accum step:7255/14336 +step:29020/57344 train_time:16651053ms step_avg:573.78ms +step:29021/57344 train_time:16651070ms step_avg:573.76ms +step:29022/57344 train_time:16651318ms step_avg:573.75ms +step:29023/57344 train_time:16651866ms step_avg:573.75ms +grad accum step:7256/14336 +step:29024/57344 train_time:16653203ms step_avg:573.77ms +step:29025/57344 train_time:16653220ms step_avg:573.75ms +step:29026/57344 train_time:16653466ms step_avg:573.74ms +step:29027/57344 train_time:16654003ms step_avg:573.74ms +grad accum step:7257/14336 +step:29028/57344 train_time:16655283ms step_avg:573.77ms +step:29029/57344 train_time:16655300ms step_avg:573.75ms +step:29030/57344 train_time:16655548ms step_avg:573.74ms +step:29031/57344 train_time:16656091ms step_avg:573.73ms +grad accum step:7258/14336 +step:29032/57344 train_time:16657384ms step_avg:573.76ms +step:29033/57344 train_time:16657402ms step_avg:573.74ms +step:29034/57344 train_time:16657648ms step_avg:573.73ms +step:29035/57344 train_time:16658204ms step_avg:573.73ms +grad accum step:7259/14336 +step:29036/57344 train_time:16659526ms step_avg:573.75ms +step:29037/57344 train_time:16659543ms step_avg:573.73ms +step:29038/57344 train_time:16659795ms step_avg:573.72ms +step:29039/57344 train_time:16660358ms step_avg:573.72ms +grad accum step:7260/14336 +step:29040/57344 train_time:16661689ms step_avg:573.75ms +step:29041/57344 train_time:16661706ms step_avg:573.73ms +step:29042/57344 train_time:16661955ms step_avg:573.72ms +step:29043/57344 train_time:16662515ms step_avg:573.72ms +grad accum step:7261/14336 +step:29044/57344 train_time:16663844ms step_avg:573.74ms +step:29045/57344 train_time:16663861ms step_avg:573.73ms +step:29046/57344 train_time:16664111ms step_avg:573.71ms +step:29047/57344 train_time:16664653ms step_avg:573.71ms +grad accum step:7262/14336 +step:29048/57344 train_time:16665934ms step_avg:573.74ms +step:29049/57344 train_time:16665951ms step_avg:573.72ms +step:29050/57344 train_time:16666197ms step_avg:573.71ms +step:29051/57344 train_time:16666738ms step_avg:573.71ms +grad accum step:7263/14336 +step:29052/57344 train_time:16668042ms step_avg:573.73ms +step:29053/57344 train_time:16668058ms step_avg:573.71ms +step:29054/57344 train_time:16668307ms step_avg:573.70ms +step:29055/57344 train_time:16668856ms step_avg:573.70ms +grad accum step:7264/14336 +step:29056/57344 train_time:16672426ms step_avg:573.80ms +step:29056/57344 val_loss:6.480526 train_time:16672426ms step_avg:573.80ms +step:29057/57344 train_time:16672438ms step_avg:573.78ms +step:29058/57344 train_time:16672662ms step_avg:573.77ms +step:29059/57344 train_time:16673209ms step_avg:573.77ms +grad accum step:7265/14336 +step:29060/57344 train_time:16674525ms step_avg:573.80ms +step:29061/57344 train_time:16674542ms step_avg:573.78ms +step:29062/57344 train_time:16674793ms step_avg:573.77ms +step:29063/57344 train_time:16675341ms step_avg:573.77ms +grad accum step:7266/14336 +step:29064/57344 train_time:16676651ms step_avg:573.79ms +step:29065/57344 train_time:16676668ms step_avg:573.77ms +step:29066/57344 train_time:16676917ms step_avg:573.76ms +step:29067/57344 train_time:16677467ms step_avg:573.76ms +grad accum step:7267/14336 +step:29068/57344 train_time:16678770ms step_avg:573.78ms +step:29069/57344 train_time:16678787ms step_avg:573.77ms +step:29070/57344 train_time:16679034ms step_avg:573.75ms +step:29071/57344 train_time:16679584ms step_avg:573.75ms +grad accum step:7268/14336 +step:29072/57344 train_time:16680863ms step_avg:573.78ms +step:29073/57344 train_time:16680880ms step_avg:573.76ms +step:29074/57344 train_time:16681125ms step_avg:573.75ms +step:29075/57344 train_time:16681673ms step_avg:573.75ms +grad accum step:7269/14336 +step:29076/57344 train_time:16683031ms step_avg:573.77ms +step:29077/57344 train_time:16683048ms step_avg:573.75ms +step:29078/57344 train_time:16683297ms step_avg:573.74ms +step:29079/57344 train_time:16683850ms step_avg:573.74ms +grad accum step:7270/14336 +step:29080/57344 train_time:16685153ms step_avg:573.77ms +step:29081/57344 train_time:16685170ms step_avg:573.75ms +step:29082/57344 train_time:16685418ms step_avg:573.74ms +step:29083/57344 train_time:16685965ms step_avg:573.74ms +grad accum step:7271/14336 +step:29084/57344 train_time:16687270ms step_avg:573.76ms +step:29085/57344 train_time:16687287ms step_avg:573.74ms +step:29086/57344 train_time:16687539ms step_avg:573.73ms +step:29087/57344 train_time:16688097ms step_avg:573.73ms +grad accum step:7272/14336 +step:29088/57344 train_time:16689412ms step_avg:573.76ms +step:29089/57344 train_time:16689429ms step_avg:573.74ms +step:29090/57344 train_time:16689679ms step_avg:573.73ms +step:29091/57344 train_time:16690236ms step_avg:573.73ms +grad accum step:7273/14336 +step:29092/57344 train_time:16691535ms step_avg:573.75ms +step:29093/57344 train_time:16691552ms step_avg:573.73ms +step:29094/57344 train_time:16691803ms step_avg:573.72ms +step:29095/57344 train_time:16692369ms step_avg:573.72ms +grad accum step:7274/14336 +step:29096/57344 train_time:16693687ms step_avg:573.75ms +step:29097/57344 train_time:16693704ms step_avg:573.73ms +step:29098/57344 train_time:16693949ms step_avg:573.71ms +step:29099/57344 train_time:16694487ms step_avg:573.71ms +grad accum step:7275/14336 +step:29100/57344 train_time:16695784ms step_avg:573.74ms +step:29101/57344 train_time:16695801ms step_avg:573.72ms +step:29102/57344 train_time:16696051ms step_avg:573.71ms +step:29103/57344 train_time:16696605ms step_avg:573.71ms +grad accum step:7276/14336 +step:29104/57344 train_time:16697896ms step_avg:573.73ms +step:29105/57344 train_time:16697913ms step_avg:573.71ms +step:29106/57344 train_time:16698160ms step_avg:573.70ms +step:29107/57344 train_time:16698710ms step_avg:573.70ms +grad accum step:7277/14336 +step:29108/57344 train_time:16700001ms step_avg:573.73ms +step:29109/57344 train_time:16700018ms step_avg:573.71ms +step:29110/57344 train_time:16700268ms step_avg:573.70ms +step:29111/57344 train_time:16700820ms step_avg:573.69ms +grad accum step:7278/14336 +step:29112/57344 train_time:16702108ms step_avg:573.72ms +step:29113/57344 train_time:16702125ms step_avg:573.70ms +step:29114/57344 train_time:16702373ms step_avg:573.69ms +step:29115/57344 train_time:16702922ms step_avg:573.69ms +grad accum step:7279/14336 +step:29116/57344 train_time:16704236ms step_avg:573.71ms +step:29117/57344 train_time:16704253ms step_avg:573.69ms +step:29118/57344 train_time:16704500ms step_avg:573.68ms +step:29119/57344 train_time:16705045ms step_avg:573.68ms +grad accum step:7280/14336 +step:29120/57344 train_time:16706354ms step_avg:573.71ms +step:29120/57344 val_loss:6.472998 train_time:16706354ms step_avg:573.71ms +step:29121/57344 train_time:16706380ms step_avg:573.69ms +step:29122/57344 train_time:16706590ms step_avg:573.68ms +step:29123/57344 train_time:16707138ms step_avg:573.68ms +grad accum step:7281/14336 +step:29124/57344 train_time:16708449ms step_avg:573.70ms +step:29125/57344 train_time:16708466ms step_avg:573.68ms +step:29126/57344 train_time:16708714ms step_avg:573.67ms +step:29127/57344 train_time:16709257ms step_avg:573.67ms +grad accum step:7282/14336 +step:29128/57344 train_time:16710582ms step_avg:573.69ms +step:29129/57344 train_time:16710599ms step_avg:573.68ms +step:29130/57344 train_time:16710854ms step_avg:573.66ms +step:29131/57344 train_time:16711429ms step_avg:573.66ms +grad accum step:7283/14336 +step:29132/57344 train_time:16712721ms step_avg:573.69ms +step:29133/57344 train_time:16712738ms step_avg:573.67ms +step:29134/57344 train_time:16712985ms step_avg:573.66ms +step:29135/57344 train_time:16713536ms step_avg:573.66ms +grad accum step:7284/14336 +step:29136/57344 train_time:16714843ms step_avg:573.68ms +step:29137/57344 train_time:16714860ms step_avg:573.66ms +step:29138/57344 train_time:16715107ms step_avg:573.65ms +step:29139/57344 train_time:16715654ms step_avg:573.65ms +grad accum step:7285/14336 +step:29140/57344 train_time:16716965ms step_avg:573.68ms +step:29141/57344 train_time:16716982ms step_avg:573.66ms +step:29142/57344 train_time:16717231ms step_avg:573.65ms +step:29143/57344 train_time:16717787ms step_avg:573.65ms +grad accum step:7286/14336 +step:29144/57344 train_time:16719100ms step_avg:573.67ms +step:29145/57344 train_time:16719117ms step_avg:573.65ms +step:29146/57344 train_time:16719365ms step_avg:573.64ms +step:29147/57344 train_time:16719916ms step_avg:573.64ms +grad accum step:7287/14336 +step:29148/57344 train_time:16721238ms step_avg:573.67ms +step:29149/57344 train_time:16721254ms step_avg:573.65ms +step:29150/57344 train_time:16721505ms step_avg:573.64ms +step:29151/57344 train_time:16722065ms step_avg:573.64ms +grad accum step:7288/14336 +step:29152/57344 train_time:16723388ms step_avg:573.66ms +step:29153/57344 train_time:16723405ms step_avg:573.64ms +step:29154/57344 train_time:16723652ms step_avg:573.63ms +step:29155/57344 train_time:16724200ms step_avg:573.63ms +grad accum step:7289/14336 +step:29156/57344 train_time:16725529ms step_avg:573.66ms +step:29157/57344 train_time:16725546ms step_avg:573.64ms +step:29158/57344 train_time:16725793ms step_avg:573.63ms +step:29159/57344 train_time:16726344ms step_avg:573.63ms +grad accum step:7290/14336 +step:29160/57344 train_time:16727626ms step_avg:573.65ms +step:29161/57344 train_time:16727643ms step_avg:573.63ms +step:29162/57344 train_time:16727891ms step_avg:573.62ms +step:29163/57344 train_time:16728438ms step_avg:573.62ms +grad accum step:7291/14336 +step:29164/57344 train_time:16729732ms step_avg:573.64ms +step:29165/57344 train_time:16729748ms step_avg:573.62ms +step:29166/57344 train_time:16729996ms step_avg:573.61ms +step:29167/57344 train_time:16730540ms step_avg:573.61ms +grad accum step:7292/14336 +step:29168/57344 train_time:16731815ms step_avg:573.64ms +step:29169/57344 train_time:16731832ms step_avg:573.62ms +step:29170/57344 train_time:16732079ms step_avg:573.61ms +step:29171/57344 train_time:16732624ms step_avg:573.60ms +grad accum step:7293/14336 +step:29172/57344 train_time:16733901ms step_avg:573.63ms +step:29173/57344 train_time:16733918ms step_avg:573.61ms +step:29174/57344 train_time:16734163ms step_avg:573.60ms +step:29175/57344 train_time:16734710ms step_avg:573.60ms +grad accum step:7294/14336 +step:29176/57344 train_time:16736022ms step_avg:573.62ms +step:29177/57344 train_time:16736039ms step_avg:573.60ms +step:29178/57344 train_time:16736288ms step_avg:573.59ms +step:29179/57344 train_time:16736842ms step_avg:573.59ms +grad accum step:7295/14336 +step:29180/57344 train_time:16738152ms step_avg:573.62ms +step:29181/57344 train_time:16738169ms step_avg:573.60ms +step:29182/57344 train_time:16738421ms step_avg:573.59ms +step:29183/57344 train_time:16738974ms step_avg:573.59ms +grad accum step:7296/14336 +step:29184/57344 train_time:16740296ms step_avg:573.61ms +step:29184/57344 val_loss:6.470836 train_time:16740297ms step_avg:573.61ms +step:29185/57344 train_time:16740309ms step_avg:573.59ms +step:29186/57344 train_time:16740527ms step_avg:573.58ms +step:29187/57344 train_time:16741068ms step_avg:573.58ms +grad accum step:7297/14336 +step:29188/57344 train_time:16742365ms step_avg:573.60ms +step:29189/57344 train_time:16742382ms step_avg:573.59ms +step:29190/57344 train_time:16742631ms step_avg:573.57ms +step:29191/57344 train_time:16743178ms step_avg:573.57ms +grad accum step:7298/14336 +step:29192/57344 train_time:16744478ms step_avg:573.60ms +step:29193/57344 train_time:16744494ms step_avg:573.58ms +step:29194/57344 train_time:16744743ms step_avg:573.57ms +step:29195/57344 train_time:16745296ms step_avg:573.57ms +grad accum step:7299/14336 +step:29196/57344 train_time:16746632ms step_avg:573.59ms +step:29197/57344 train_time:16746649ms step_avg:573.57ms +step:29198/57344 train_time:16746895ms step_avg:573.56ms +step:29199/57344 train_time:16747448ms step_avg:573.56ms +grad accum step:7300/14336 +step:29200/57344 train_time:16748785ms step_avg:573.59ms +step:29201/57344 train_time:16748802ms step_avg:573.57ms +step:29202/57344 train_time:16749050ms step_avg:573.56ms +step:29203/57344 train_time:16749601ms step_avg:573.56ms +grad accum step:7301/14336 +step:29204/57344 train_time:16750907ms step_avg:573.58ms +step:29205/57344 train_time:16750924ms step_avg:573.56ms +step:29206/57344 train_time:16751170ms step_avg:573.55ms +step:29207/57344 train_time:16751715ms step_avg:573.55ms +grad accum step:7302/14336 +step:29208/57344 train_time:16753022ms step_avg:573.58ms +step:29209/57344 train_time:16753039ms step_avg:573.56ms +step:29210/57344 train_time:16753283ms step_avg:573.55ms +step:29211/57344 train_time:16753835ms step_avg:573.55ms +grad accum step:7303/14336 +step:29212/57344 train_time:16755171ms step_avg:573.57ms +step:29213/57344 train_time:16755187ms step_avg:573.55ms +step:29214/57344 train_time:16755435ms step_avg:573.54ms +step:29215/57344 train_time:16755979ms step_avg:573.54ms +grad accum step:7304/14336 +step:29216/57344 train_time:16757254ms step_avg:573.56ms +step:29217/57344 train_time:16757271ms step_avg:573.55ms +step:29218/57344 train_time:16757515ms step_avg:573.53ms +step:29219/57344 train_time:16758066ms step_avg:573.53ms +grad accum step:7305/14336 +step:29220/57344 train_time:16759377ms step_avg:573.56ms +step:29221/57344 train_time:16759394ms step_avg:573.54ms +step:29222/57344 train_time:16759639ms step_avg:573.53ms +step:29223/57344 train_time:16760186ms step_avg:573.53ms +grad accum step:7306/14336 +step:29224/57344 train_time:16761489ms step_avg:573.55ms +step:29225/57344 train_time:16761506ms step_avg:573.53ms +step:29226/57344 train_time:16761752ms step_avg:573.52ms +step:29227/57344 train_time:16762294ms step_avg:573.52ms +grad accum step:7307/14336 +step:29228/57344 train_time:16763573ms step_avg:573.54ms +step:29229/57344 train_time:16763590ms step_avg:573.53ms +step:29230/57344 train_time:16763839ms step_avg:573.51ms +step:29231/57344 train_time:16764387ms step_avg:573.51ms +grad accum step:7308/14336 +step:29232/57344 train_time:16765693ms step_avg:573.54ms +step:29233/57344 train_time:16765710ms step_avg:573.52ms +step:29234/57344 train_time:16765957ms step_avg:573.51ms +step:29235/57344 train_time:16766503ms step_avg:573.51ms +grad accum step:7309/14336 +step:29236/57344 train_time:16767794ms step_avg:573.53ms +step:29237/57344 train_time:16767811ms step_avg:573.51ms +step:29238/57344 train_time:16768056ms step_avg:573.50ms +step:29239/57344 train_time:16768600ms step_avg:573.50ms +grad accum step:7310/14336 +step:29240/57344 train_time:16769885ms step_avg:573.53ms +step:29241/57344 train_time:16769901ms step_avg:573.51ms +step:29242/57344 train_time:16770154ms step_avg:573.50ms +step:29243/57344 train_time:16770714ms step_avg:573.49ms +grad accum step:7311/14336 +step:29244/57344 train_time:16772025ms step_avg:573.52ms +step:29245/57344 train_time:16772042ms step_avg:573.50ms +step:29246/57344 train_time:16772292ms step_avg:573.49ms +step:29247/57344 train_time:16772851ms step_avg:573.49ms +grad accum step:7312/14336 +step:29248/57344 train_time:16774149ms step_avg:573.51ms +step:29248/57344 val_loss:6.461094 train_time:16774150ms step_avg:573.51ms +step:29249/57344 train_time:16774162ms step_avg:573.50ms +step:29250/57344 train_time:16774383ms step_avg:573.48ms +step:29251/57344 train_time:16774921ms step_avg:573.48ms +grad accum step:7313/14336 +step:29252/57344 train_time:16776222ms step_avg:573.51ms +step:29253/57344 train_time:16776239ms step_avg:573.49ms +step:29254/57344 train_time:16776491ms step_avg:573.48ms +step:29255/57344 train_time:16777050ms step_avg:573.48ms +grad accum step:7314/14336 +step:29256/57344 train_time:16778326ms step_avg:573.50ms +step:29257/57344 train_time:16778343ms step_avg:573.48ms +step:29258/57344 train_time:16778587ms step_avg:573.47ms +step:29259/57344 train_time:16779128ms step_avg:573.47ms +grad accum step:7315/14336 +step:29260/57344 train_time:16780420ms step_avg:573.49ms +step:29261/57344 train_time:16780437ms step_avg:573.47ms +step:29262/57344 train_time:16780688ms step_avg:573.46ms +step:29263/57344 train_time:16781238ms step_avg:573.46ms +grad accum step:7316/14336 +step:29264/57344 train_time:16782534ms step_avg:573.49ms +step:29265/57344 train_time:16782549ms step_avg:573.47ms +step:29266/57344 train_time:16782799ms step_avg:573.46ms +step:29267/57344 train_time:16783359ms step_avg:573.46ms +grad accum step:7317/14336 +step:29268/57344 train_time:16784652ms step_avg:573.48ms +step:29269/57344 train_time:16784669ms step_avg:573.46ms +step:29270/57344 train_time:16784922ms step_avg:573.45ms +step:29271/57344 train_time:16785489ms step_avg:573.45ms +grad accum step:7318/14336 +step:29272/57344 train_time:16786785ms step_avg:573.48ms +step:29273/57344 train_time:16786802ms step_avg:573.46ms +step:29274/57344 train_time:16787046ms step_avg:573.45ms +step:29275/57344 train_time:16787591ms step_avg:573.44ms +grad accum step:7319/14336 +step:29276/57344 train_time:16788916ms step_avg:573.47ms +step:29277/57344 train_time:16788932ms step_avg:573.45ms +step:29278/57344 train_time:16789183ms step_avg:573.44ms +step:29279/57344 train_time:16789740ms step_avg:573.44ms +grad accum step:7320/14336 +step:29280/57344 train_time:16791072ms step_avg:573.47ms +step:29281/57344 train_time:16791089ms step_avg:573.45ms +step:29282/57344 train_time:16791335ms step_avg:573.44ms +step:29283/57344 train_time:16791884ms step_avg:573.43ms +grad accum step:7321/14336 +step:29284/57344 train_time:16793183ms step_avg:573.46ms +step:29285/57344 train_time:16793200ms step_avg:573.44ms +step:29286/57344 train_time:16793446ms step_avg:573.43ms +step:29287/57344 train_time:16793989ms step_avg:573.43ms +grad accum step:7322/14336 +step:29288/57344 train_time:16795313ms step_avg:573.45ms +step:29289/57344 train_time:16795330ms step_avg:573.43ms +step:29290/57344 train_time:16795571ms step_avg:573.42ms +step:29291/57344 train_time:16796107ms step_avg:573.42ms +grad accum step:7323/14336 +step:29292/57344 train_time:16799586ms step_avg:573.52ms +step:29293/57344 train_time:16799603ms step_avg:573.50ms +step:29294/57344 train_time:16799847ms step_avg:573.49ms +step:29295/57344 train_time:16800392ms step_avg:573.49ms +grad accum step:7324/14336 +step:29296/57344 train_time:16801676ms step_avg:573.51ms +step:29297/57344 train_time:16801692ms step_avg:573.50ms +step:29298/57344 train_time:16801952ms step_avg:573.48ms +step:29299/57344 train_time:16802526ms step_avg:573.48ms +grad accum step:7325/14336 +step:29300/57344 train_time:16803837ms step_avg:573.51ms +step:29301/57344 train_time:16803854ms step_avg:573.49ms +step:29302/57344 train_time:16804097ms step_avg:573.48ms +step:29303/57344 train_time:16804639ms step_avg:573.48ms +grad accum step:7326/14336 +step:29304/57344 train_time:16805977ms step_avg:573.50ms +step:29305/57344 train_time:16805994ms step_avg:573.49ms +step:29306/57344 train_time:16806247ms step_avg:573.47ms +step:29307/57344 train_time:16806807ms step_avg:573.47ms +grad accum step:7327/14336 +step:29308/57344 train_time:16808103ms step_avg:573.50ms +step:29309/57344 train_time:16808119ms step_avg:573.48ms +step:29310/57344 train_time:16808367ms step_avg:573.47ms +step:29311/57344 train_time:16808912ms step_avg:573.47ms +grad accum step:7328/14336 +step:29312/57344 train_time:16810211ms step_avg:573.49ms +step:29312/57344 val_loss:6.463924 train_time:16810211ms step_avg:573.49ms +step:29313/57344 train_time:16810223ms step_avg:573.47ms +step:29314/57344 train_time:16810446ms step_avg:573.46ms +step:29315/57344 train_time:16810980ms step_avg:573.46ms +grad accum step:7329/14336 +step:29316/57344 train_time:16812331ms step_avg:573.49ms +step:29317/57344 train_time:16812348ms step_avg:573.47ms +step:29318/57344 train_time:16812595ms step_avg:573.46ms +step:29319/57344 train_time:16813150ms step_avg:573.46ms +grad accum step:7330/14336 +step:29320/57344 train_time:16814442ms step_avg:573.48ms +step:29321/57344 train_time:16814459ms step_avg:573.46ms +step:29322/57344 train_time:16814704ms step_avg:573.45ms +step:29323/57344 train_time:16815242ms step_avg:573.45ms +grad accum step:7331/14336 +step:29324/57344 train_time:16816557ms step_avg:573.47ms +step:29325/57344 train_time:16816574ms step_avg:573.46ms +step:29326/57344 train_time:16816822ms step_avg:573.44ms +step:29327/57344 train_time:16817369ms step_avg:573.44ms +grad accum step:7332/14336 +step:29328/57344 train_time:16818659ms step_avg:573.47ms +step:29329/57344 train_time:16818676ms step_avg:573.45ms +step:29330/57344 train_time:16818924ms step_avg:573.44ms +step:29331/57344 train_time:16819470ms step_avg:573.44ms +grad accum step:7333/14336 +step:29332/57344 train_time:16820751ms step_avg:573.46ms +step:29333/57344 train_time:16820767ms step_avg:573.44ms +step:29334/57344 train_time:16821014ms step_avg:573.43ms +step:29335/57344 train_time:16821563ms step_avg:573.43ms +grad accum step:7334/14336 +step:29336/57344 train_time:16822956ms step_avg:573.46ms +step:29337/57344 train_time:16822972ms step_avg:573.44ms +step:29338/57344 train_time:16823221ms step_avg:573.43ms +step:29339/57344 train_time:16823764ms step_avg:573.43ms +grad accum step:7335/14336 +step:29340/57344 train_time:16825066ms step_avg:573.45ms +step:29341/57344 train_time:16825084ms step_avg:573.43ms +step:29342/57344 train_time:16825334ms step_avg:573.42ms +step:29343/57344 train_time:16825883ms step_avg:573.42ms +grad accum step:7336/14336 +step:29344/57344 train_time:16827198ms step_avg:573.45ms +step:29345/57344 train_time:16827214ms step_avg:573.43ms +step:29346/57344 train_time:16827463ms step_avg:573.42ms +step:29347/57344 train_time:16828030ms step_avg:573.42ms +grad accum step:7337/14336 +step:29348/57344 train_time:16829394ms step_avg:573.44ms +step:29349/57344 train_time:16829411ms step_avg:573.42ms +step:29350/57344 train_time:16829660ms step_avg:573.41ms +step:29351/57344 train_time:16830218ms step_avg:573.41ms +grad accum step:7338/14336 +step:29352/57344 train_time:16831525ms step_avg:573.44ms +step:29353/57344 train_time:16831542ms step_avg:573.42ms +step:29354/57344 train_time:16831790ms step_avg:573.41ms +step:29355/57344 train_time:16832340ms step_avg:573.41ms +grad accum step:7339/14336 +step:29356/57344 train_time:16833662ms step_avg:573.43ms +step:29357/57344 train_time:16833679ms step_avg:573.41ms +step:29358/57344 train_time:16833925ms step_avg:573.40ms +step:29359/57344 train_time:16834474ms step_avg:573.40ms +grad accum step:7340/14336 +step:29360/57344 train_time:16835796ms step_avg:573.43ms +step:29361/57344 train_time:16835813ms step_avg:573.41ms +step:29362/57344 train_time:16836059ms step_avg:573.40ms +step:29363/57344 train_time:16836599ms step_avg:573.40ms +grad accum step:7341/14336 +step:29364/57344 train_time:16837887ms step_avg:573.42ms +step:29365/57344 train_time:16837904ms step_avg:573.40ms +step:29366/57344 train_time:16838147ms step_avg:573.39ms +step:29367/57344 train_time:16838680ms step_avg:573.39ms +grad accum step:7342/14336 +step:29368/57344 train_time:16840014ms step_avg:573.41ms +step:29369/57344 train_time:16840030ms step_avg:573.39ms +step:29370/57344 train_time:16840264ms step_avg:573.38ms +step:29371/57344 train_time:16840812ms step_avg:573.38ms +grad accum step:7343/14336 +step:29372/57344 train_time:16842140ms step_avg:573.41ms +step:29373/57344 train_time:16842157ms step_avg:573.39ms +step:29374/57344 train_time:16842406ms step_avg:573.38ms +step:29375/57344 train_time:16842950ms step_avg:573.38ms +grad accum step:7344/14336 +step:29376/57344 train_time:16844222ms step_avg:573.40ms +step:29376/57344 val_loss:6.455650 train_time:16844223ms step_avg:573.40ms +step:29377/57344 train_time:16844235ms step_avg:573.38ms +step:29378/57344 train_time:16844457ms step_avg:573.37ms +step:29379/57344 train_time:16845011ms step_avg:573.37ms +grad accum step:7345/14336 +step:29380/57344 train_time:16846345ms step_avg:573.40ms +step:29381/57344 train_time:16846363ms step_avg:573.38ms +step:29382/57344 train_time:16846605ms step_avg:573.36ms +step:29383/57344 train_time:16847153ms step_avg:573.36ms +grad accum step:7346/14336 +step:29384/57344 train_time:16848475ms step_avg:573.39ms +step:29385/57344 train_time:16848492ms step_avg:573.37ms +step:29386/57344 train_time:16848746ms step_avg:573.36ms +step:29387/57344 train_time:16849315ms step_avg:573.36ms +grad accum step:7347/14336 +step:29388/57344 train_time:16850634ms step_avg:573.38ms +step:29389/57344 train_time:16850651ms step_avg:573.37ms +step:29390/57344 train_time:16850903ms step_avg:573.35ms +step:29391/57344 train_time:16851470ms step_avg:573.35ms +grad accum step:7348/14336 +step:29392/57344 train_time:16852798ms step_avg:573.38ms +step:29393/57344 train_time:16852815ms step_avg:573.36ms +step:29394/57344 train_time:16853060ms step_avg:573.35ms +step:29395/57344 train_time:16853601ms step_avg:573.35ms +grad accum step:7349/14336 +step:29396/57344 train_time:16854895ms step_avg:573.37ms +step:29397/57344 train_time:16854912ms step_avg:573.35ms +step:29398/57344 train_time:16855158ms step_avg:573.34ms +step:29399/57344 train_time:16855707ms step_avg:573.34ms +grad accum step:7350/14336 +step:29400/57344 train_time:16857004ms step_avg:573.37ms +step:29401/57344 train_time:16857021ms step_avg:573.35ms +step:29402/57344 train_time:16857268ms step_avg:573.34ms +step:29403/57344 train_time:16857811ms step_avg:573.34ms +grad accum step:7351/14336 +step:29404/57344 train_time:16859088ms step_avg:573.36ms +step:29405/57344 train_time:16859105ms step_avg:573.34ms +step:29406/57344 train_time:16859353ms step_avg:573.33ms +step:29407/57344 train_time:16859899ms step_avg:573.33ms +grad accum step:7352/14336 +step:29408/57344 train_time:16861209ms step_avg:573.35ms +step:29409/57344 train_time:16861225ms step_avg:573.34ms +step:29410/57344 train_time:16861479ms step_avg:573.32ms +step:29411/57344 train_time:16862051ms step_avg:573.32ms +grad accum step:7353/14336 +step:29412/57344 train_time:16863371ms step_avg:573.35ms +step:29413/57344 train_time:16863388ms step_avg:573.33ms +step:29414/57344 train_time:16863633ms step_avg:573.32ms +step:29415/57344 train_time:16864182ms step_avg:573.32ms +grad accum step:7354/14336 +step:29416/57344 train_time:16865494ms step_avg:573.34ms +step:29417/57344 train_time:16865511ms step_avg:573.33ms +step:29418/57344 train_time:16865768ms step_avg:573.31ms +step:29419/57344 train_time:16866351ms step_avg:573.31ms +grad accum step:7355/14336 +step:29420/57344 train_time:16867659ms step_avg:573.34ms +step:29421/57344 train_time:16867676ms step_avg:573.32ms +step:29422/57344 train_time:16867923ms step_avg:573.31ms +step:29423/57344 train_time:16868467ms step_avg:573.31ms +grad accum step:7356/14336 +step:29424/57344 train_time:16869765ms step_avg:573.33ms +step:29425/57344 train_time:16869779ms step_avg:573.31ms +step:29426/57344 train_time:16870029ms step_avg:573.30ms +step:29427/57344 train_time:16870590ms step_avg:573.30ms +grad accum step:7357/14336 +step:29428/57344 train_time:16871906ms step_avg:573.33ms +step:29429/57344 train_time:16871923ms step_avg:573.31ms +step:29430/57344 train_time:16872175ms step_avg:573.30ms +step:29431/57344 train_time:16872747ms step_avg:573.30ms +grad accum step:7358/14336 +step:29432/57344 train_time:16874088ms step_avg:573.32ms +step:29433/57344 train_time:16874105ms step_avg:573.31ms +step:29434/57344 train_time:16874353ms step_avg:573.29ms +step:29435/57344 train_time:16874899ms step_avg:573.29ms +grad accum step:7359/14336 +step:29436/57344 train_time:16876194ms step_avg:573.32ms +step:29437/57344 train_time:16876211ms step_avg:573.30ms +step:29438/57344 train_time:16876455ms step_avg:573.29ms +step:29439/57344 train_time:16876996ms step_avg:573.29ms +grad accum step:7360/14336 +step:29440/57344 train_time:16878323ms step_avg:573.31ms +step:29440/57344 val_loss:6.455267 train_time:16878323ms step_avg:573.31ms +step:29441/57344 train_time:16878335ms step_avg:573.29ms +step:29442/57344 train_time:16878556ms step_avg:573.28ms +step:29443/57344 train_time:16879105ms step_avg:573.28ms +grad accum step:7361/14336 +step:29444/57344 train_time:16880439ms step_avg:573.31ms +step:29445/57344 train_time:16880452ms step_avg:573.29ms +step:29446/57344 train_time:16880687ms step_avg:573.28ms +step:29447/57344 train_time:16881254ms step_avg:573.28ms +grad accum step:7362/14336 +step:29448/57344 train_time:16882586ms step_avg:573.30ms +step:29449/57344 train_time:16882603ms step_avg:573.28ms +step:29450/57344 train_time:16882850ms step_avg:573.27ms +step:29451/57344 train_time:16883394ms step_avg:573.27ms +grad accum step:7363/14336 +step:29452/57344 train_time:16884691ms step_avg:573.30ms +step:29453/57344 train_time:16884705ms step_avg:573.28ms +step:29454/57344 train_time:16884955ms step_avg:573.27ms +step:29455/57344 train_time:16885503ms step_avg:573.26ms +grad accum step:7364/14336 +step:29456/57344 train_time:16886801ms step_avg:573.29ms +step:29457/57344 train_time:16886818ms step_avg:573.27ms +step:29458/57344 train_time:16887070ms step_avg:573.26ms +step:29459/57344 train_time:16887636ms step_avg:573.26ms +grad accum step:7365/14336 +step:29460/57344 train_time:16888940ms step_avg:573.28ms +step:29461/57344 train_time:16888957ms step_avg:573.26ms +step:29462/57344 train_time:16889202ms step_avg:573.25ms +step:29463/57344 train_time:16889740ms step_avg:573.25ms +grad accum step:7366/14336 +step:29464/57344 train_time:16891017ms step_avg:573.28ms +step:29465/57344 train_time:16891033ms step_avg:573.26ms +step:29466/57344 train_time:16891279ms step_avg:573.25ms +step:29467/57344 train_time:16891825ms step_avg:573.25ms +grad accum step:7367/14336 +step:29468/57344 train_time:16893137ms step_avg:573.27ms +step:29469/57344 train_time:16893154ms step_avg:573.25ms +step:29470/57344 train_time:16893414ms step_avg:573.24ms +step:29471/57344 train_time:16893988ms step_avg:573.24ms +grad accum step:7368/14336 +step:29472/57344 train_time:16895274ms step_avg:573.27ms +step:29473/57344 train_time:16895290ms step_avg:573.25ms +step:29474/57344 train_time:16895531ms step_avg:573.24ms +step:29475/57344 train_time:16896080ms step_avg:573.23ms +grad accum step:7369/14336 +step:29476/57344 train_time:16897403ms step_avg:573.26ms +step:29477/57344 train_time:16897418ms step_avg:573.24ms +step:29478/57344 train_time:16897664ms step_avg:573.23ms +step:29479/57344 train_time:16898219ms step_avg:573.23ms +grad accum step:7370/14336 +step:29480/57344 train_time:16899574ms step_avg:573.26ms +step:29481/57344 train_time:16899591ms step_avg:573.24ms +step:29482/57344 train_time:16899844ms step_avg:573.23ms +step:29483/57344 train_time:16900411ms step_avg:573.23ms +grad accum step:7371/14336 +step:29484/57344 train_time:16901750ms step_avg:573.25ms +step:29485/57344 train_time:16901767ms step_avg:573.23ms +step:29486/57344 train_time:16902020ms step_avg:573.22ms +step:29487/57344 train_time:16902586ms step_avg:573.22ms +grad accum step:7372/14336 +step:29488/57344 train_time:16903907ms step_avg:573.25ms +step:29489/57344 train_time:16903924ms step_avg:573.23ms +step:29490/57344 train_time:16904169ms step_avg:573.22ms +step:29491/57344 train_time:16904709ms step_avg:573.22ms +grad accum step:7373/14336 +step:29492/57344 train_time:16906008ms step_avg:573.24ms +step:29493/57344 train_time:16906025ms step_avg:573.22ms +step:29494/57344 train_time:16906274ms step_avg:573.21ms +step:29495/57344 train_time:16906839ms step_avg:573.21ms +grad accum step:7374/14336 +step:29496/57344 train_time:16908178ms step_avg:573.24ms +step:29497/57344 train_time:16908195ms step_avg:573.22ms +step:29498/57344 train_time:16908443ms step_avg:573.21ms +step:29499/57344 train_time:16908983ms step_avg:573.21ms +grad accum step:7375/14336 +step:29500/57344 train_time:16910309ms step_avg:573.23ms +step:29501/57344 train_time:16910326ms step_avg:573.21ms +step:29502/57344 train_time:16910577ms step_avg:573.20ms +step:29503/57344 train_time:16911139ms step_avg:573.20ms +grad accum step:7376/14336 +step:29504/57344 train_time:16912458ms step_avg:573.23ms +step:29504/57344 val_loss:6.452744 train_time:16912458ms step_avg:573.23ms +step:29505/57344 train_time:16912470ms step_avg:573.21ms +step:29506/57344 train_time:16912693ms step_avg:573.20ms +step:29507/57344 train_time:16913242ms step_avg:573.19ms +grad accum step:7377/14336 +step:29508/57344 train_time:16914559ms step_avg:573.22ms +step:29509/57344 train_time:16914576ms step_avg:573.20ms +step:29510/57344 train_time:16914824ms step_avg:573.19ms +step:29511/57344 train_time:16915373ms step_avg:573.19ms +grad accum step:7378/14336 +step:29512/57344 train_time:16916659ms step_avg:573.21ms +step:29513/57344 train_time:16916676ms step_avg:573.19ms +step:29514/57344 train_time:16916923ms step_avg:573.18ms +step:29515/57344 train_time:16917470ms step_avg:573.18ms +grad accum step:7379/14336 +step:29516/57344 train_time:16918772ms step_avg:573.21ms +step:29517/57344 train_time:16918789ms step_avg:573.19ms +step:29518/57344 train_time:16919037ms step_avg:573.18ms +step:29519/57344 train_time:16919594ms step_avg:573.18ms +grad accum step:7380/14336 +step:29520/57344 train_time:16920918ms step_avg:573.20ms +step:29521/57344 train_time:16920936ms step_avg:573.18ms +step:29522/57344 train_time:16921189ms step_avg:573.17ms +step:29523/57344 train_time:16921753ms step_avg:573.17ms +grad accum step:7381/14336 +step:29524/57344 train_time:16923085ms step_avg:573.20ms +step:29525/57344 train_time:16923102ms step_avg:573.18ms +step:29526/57344 train_time:16923347ms step_avg:573.17ms +step:29527/57344 train_time:16923892ms step_avg:573.17ms +grad accum step:7382/14336 +step:29528/57344 train_time:16925173ms step_avg:573.19ms +step:29529/57344 train_time:16925190ms step_avg:573.17ms +step:29530/57344 train_time:16925439ms step_avg:573.16ms +step:29531/57344 train_time:16925988ms step_avg:573.16ms +grad accum step:7383/14336 +step:29532/57344 train_time:16927426ms step_avg:573.19ms +step:29533/57344 train_time:16927443ms step_avg:573.17ms +step:29534/57344 train_time:16927702ms step_avg:573.16ms +step:29535/57344 train_time:16928279ms step_avg:573.16ms +grad accum step:7384/14336 +step:29536/57344 train_time:16929607ms step_avg:573.19ms +step:29537/57344 train_time:16929623ms step_avg:573.17ms +step:29538/57344 train_time:16929872ms step_avg:573.16ms +step:29539/57344 train_time:16930423ms step_avg:573.15ms +grad accum step:7385/14336 +step:29540/57344 train_time:16931711ms step_avg:573.18ms +step:29541/57344 train_time:16931727ms step_avg:573.16ms +step:29542/57344 train_time:16931972ms step_avg:573.15ms +step:29543/57344 train_time:16932515ms step_avg:573.15ms +grad accum step:7386/14336 +step:29544/57344 train_time:16933821ms step_avg:573.17ms +step:29545/57344 train_time:16933838ms step_avg:573.15ms +step:29546/57344 train_time:16934086ms step_avg:573.14ms +step:29547/57344 train_time:16934635ms step_avg:573.14ms +grad accum step:7387/14336 +step:29548/57344 train_time:16935983ms step_avg:573.17ms +step:29549/57344 train_time:16936000ms step_avg:573.15ms +step:29550/57344 train_time:16936249ms step_avg:573.14ms +step:29551/57344 train_time:16936795ms step_avg:573.14ms +grad accum step:7388/14336 +step:29552/57344 train_time:16938116ms step_avg:573.16ms +step:29553/57344 train_time:16938133ms step_avg:573.14ms +step:29554/57344 train_time:16938376ms step_avg:573.13ms +step:29555/57344 train_time:16938917ms step_avg:573.13ms +grad accum step:7389/14336 +step:29556/57344 train_time:16940209ms step_avg:573.16ms +step:29557/57344 train_time:16940226ms step_avg:573.14ms +step:29558/57344 train_time:16940477ms step_avg:573.13ms +step:29559/57344 train_time:16941039ms step_avg:573.13ms +grad accum step:7390/14336 +step:29560/57344 train_time:16942353ms step_avg:573.15ms +step:29561/57344 train_time:16942370ms step_avg:573.13ms +step:29562/57344 train_time:16942620ms step_avg:573.12ms +step:29563/57344 train_time:16943167ms step_avg:573.12ms +grad accum step:7391/14336 +step:29564/57344 train_time:16944465ms step_avg:573.15ms +step:29565/57344 train_time:16944480ms step_avg:573.13ms +step:29566/57344 train_time:16944725ms step_avg:573.12ms +step:29567/57344 train_time:16945271ms step_avg:573.11ms +grad accum step:7392/14336 +step:29568/57344 train_time:16946560ms step_avg:573.14ms +step:29568/57344 val_loss:6.450254 train_time:16946562ms step_avg:573.14ms +step:29569/57344 train_time:16946574ms step_avg:573.12ms +step:29570/57344 train_time:16946799ms step_avg:573.11ms +step:29571/57344 train_time:16947361ms step_avg:573.11ms +grad accum step:7393/14336 +step:29572/57344 train_time:16948667ms step_avg:573.13ms +step:29573/57344 train_time:16948683ms step_avg:573.11ms +step:29574/57344 train_time:16948932ms step_avg:573.10ms +step:29575/57344 train_time:16949485ms step_avg:573.10ms +grad accum step:7394/14336 +step:29576/57344 train_time:16950819ms step_avg:573.13ms +step:29577/57344 train_time:16950833ms step_avg:573.11ms +step:29578/57344 train_time:16951080ms step_avg:573.10ms +step:29579/57344 train_time:16951635ms step_avg:573.10ms +grad accum step:7395/14336 +step:29580/57344 train_time:16952939ms step_avg:573.12ms +step:29581/57344 train_time:16952956ms step_avg:573.10ms +step:29582/57344 train_time:16953198ms step_avg:573.09ms +step:29583/57344 train_time:16953735ms step_avg:573.09ms +grad accum step:7396/14336 +step:29584/57344 train_time:16955036ms step_avg:573.12ms +step:29585/57344 train_time:16955053ms step_avg:573.10ms +step:29586/57344 train_time:16955302ms step_avg:573.09ms +step:29587/57344 train_time:16955854ms step_avg:573.08ms +grad accum step:7397/14336 +step:29588/57344 train_time:16957185ms step_avg:573.11ms +step:29589/57344 train_time:16957202ms step_avg:573.09ms +step:29590/57344 train_time:16957452ms step_avg:573.08ms +step:29591/57344 train_time:16958013ms step_avg:573.08ms +grad accum step:7398/14336 +step:29592/57344 train_time:16959332ms step_avg:573.11ms +step:29593/57344 train_time:16959348ms step_avg:573.09ms +step:29594/57344 train_time:16959604ms step_avg:573.08ms +step:29595/57344 train_time:16960167ms step_avg:573.08ms +grad accum step:7399/14336 +step:29596/57344 train_time:16961476ms step_avg:573.10ms +step:29597/57344 train_time:16961493ms step_avg:573.08ms +step:29598/57344 train_time:16961741ms step_avg:573.07ms +step:29599/57344 train_time:16962297ms step_avg:573.07ms +grad accum step:7400/14336 +step:29600/57344 train_time:16963597ms step_avg:573.09ms +step:29601/57344 train_time:16963614ms step_avg:573.08ms +step:29602/57344 train_time:16963860ms step_avg:573.06ms +step:29603/57344 train_time:16964410ms step_avg:573.06ms +grad accum step:7401/14336 +step:29604/57344 train_time:16965711ms step_avg:573.09ms +step:29605/57344 train_time:16965726ms step_avg:573.07ms +step:29606/57344 train_time:16965972ms step_avg:573.06ms +step:29607/57344 train_time:16966525ms step_avg:573.06ms +grad accum step:7402/14336 +step:29608/57344 train_time:16967852ms step_avg:573.08ms +step:29609/57344 train_time:16967869ms step_avg:573.06ms +step:29610/57344 train_time:16968116ms step_avg:573.05ms +step:29611/57344 train_time:16968664ms step_avg:573.05ms +grad accum step:7403/14336 +step:29612/57344 train_time:16969992ms step_avg:573.08ms +step:29613/57344 train_time:16970009ms step_avg:573.06ms +step:29614/57344 train_time:16970263ms step_avg:573.05ms +step:29615/57344 train_time:16970824ms step_avg:573.05ms +grad accum step:7404/14336 +step:29616/57344 train_time:16972119ms step_avg:573.07ms +step:29617/57344 train_time:16972136ms step_avg:573.05ms +step:29618/57344 train_time:16972385ms step_avg:573.04ms +step:29619/57344 train_time:16972933ms step_avg:573.04ms +grad accum step:7405/14336 +step:29620/57344 train_time:16974270ms step_avg:573.07ms +step:29621/57344 train_time:16974286ms step_avg:573.05ms +step:29622/57344 train_time:16974536ms step_avg:573.04ms +step:29623/57344 train_time:16975087ms step_avg:573.04ms +grad accum step:7406/14336 +step:29624/57344 train_time:16976378ms step_avg:573.06ms +step:29625/57344 train_time:16976395ms step_avg:573.04ms +step:29626/57344 train_time:16976647ms step_avg:573.03ms +step:29627/57344 train_time:16977206ms step_avg:573.03ms +grad accum step:7407/14336 +step:29628/57344 train_time:16978499ms step_avg:573.06ms +step:29629/57344 train_time:16978515ms step_avg:573.04ms +step:29630/57344 train_time:16978764ms step_avg:573.03ms +step:29631/57344 train_time:16979349ms step_avg:573.03ms +grad accum step:7408/14336 +step:29632/57344 train_time:16980671ms step_avg:573.05ms +step:29632/57344 val_loss:6.448990 train_time:16980672ms step_avg:573.05ms +step:29633/57344 train_time:16980684ms step_avg:573.03ms +step:29634/57344 train_time:16980911ms step_avg:573.02ms +step:29635/57344 train_time:16981468ms step_avg:573.02ms +grad accum step:7409/14336 +step:29636/57344 train_time:16982816ms step_avg:573.05ms +step:29637/57344 train_time:16982832ms step_avg:573.03ms +step:29638/57344 train_time:16983083ms step_avg:573.02ms +step:29639/57344 train_time:16983630ms step_avg:573.02ms +grad accum step:7410/14336 +step:29640/57344 train_time:16984941ms step_avg:573.04ms +step:29641/57344 train_time:16984957ms step_avg:573.02ms +step:29642/57344 train_time:16985203ms step_avg:573.01ms +step:29643/57344 train_time:16985750ms step_avg:573.01ms +grad accum step:7411/14336 +step:29644/57344 train_time:16987068ms step_avg:573.04ms +step:29645/57344 train_time:16987085ms step_avg:573.02ms +step:29646/57344 train_time:16987338ms step_avg:573.01ms +step:29647/57344 train_time:16987895ms step_avg:573.01ms +grad accum step:7412/14336 +step:29648/57344 train_time:16989188ms step_avg:573.03ms +step:29649/57344 train_time:16989204ms step_avg:573.01ms +step:29650/57344 train_time:16989455ms step_avg:573.00ms +step:29651/57344 train_time:16990017ms step_avg:573.00ms +grad accum step:7413/14336 +step:29652/57344 train_time:16991330ms step_avg:573.02ms +step:29653/57344 train_time:16991347ms step_avg:573.01ms +step:29654/57344 train_time:16991593ms step_avg:572.99ms +step:29655/57344 train_time:16992144ms step_avg:572.99ms +grad accum step:7414/14336 +step:29656/57344 train_time:16993438ms step_avg:573.02ms +step:29657/57344 train_time:16993455ms step_avg:573.00ms +step:29658/57344 train_time:16993700ms step_avg:572.99ms +step:29659/57344 train_time:16994248ms step_avg:572.99ms +grad accum step:7415/14336 +step:29660/57344 train_time:16995542ms step_avg:573.01ms +step:29661/57344 train_time:16995559ms step_avg:572.99ms +step:29662/57344 train_time:16995816ms step_avg:572.98ms +step:29663/57344 train_time:16996383ms step_avg:572.98ms +grad accum step:7416/14336 +step:29664/57344 train_time:16997718ms step_avg:573.01ms +step:29665/57344 train_time:16997735ms step_avg:572.99ms +step:29666/57344 train_time:16997984ms step_avg:572.98ms +step:29667/57344 train_time:16998536ms step_avg:572.98ms +grad accum step:7417/14336 +step:29668/57344 train_time:16999866ms step_avg:573.00ms +step:29669/57344 train_time:16999882ms step_avg:572.98ms +step:29670/57344 train_time:17000128ms step_avg:572.97ms +step:29671/57344 train_time:17000679ms step_avg:572.97ms +grad accum step:7418/14336 +step:29672/57344 train_time:17002020ms step_avg:573.00ms +step:29673/57344 train_time:17002037ms step_avg:572.98ms +step:29674/57344 train_time:17002281ms step_avg:572.97ms +step:29675/57344 train_time:17002822ms step_avg:572.97ms +grad accum step:7419/14336 +step:29676/57344 train_time:17004146ms step_avg:572.99ms +step:29677/57344 train_time:17004164ms step_avg:572.97ms +step:29678/57344 train_time:17004414ms step_avg:572.96ms +step:29679/57344 train_time:17004973ms step_avg:572.96ms +grad accum step:7420/14336 +step:29680/57344 train_time:17006332ms step_avg:572.99ms +step:29681/57344 train_time:17006349ms step_avg:572.97ms +step:29682/57344 train_time:17006605ms step_avg:572.96ms +step:29683/57344 train_time:17007168ms step_avg:572.96ms +grad accum step:7421/14336 +step:29684/57344 train_time:17008471ms step_avg:572.98ms +step:29685/57344 train_time:17008486ms step_avg:572.97ms +step:29686/57344 train_time:17008734ms step_avg:572.95ms +step:29687/57344 train_time:17009285ms step_avg:572.95ms +grad accum step:7422/14336 +step:29688/57344 train_time:17010632ms step_avg:572.98ms +step:29689/57344 train_time:17010649ms step_avg:572.96ms +step:29690/57344 train_time:17010893ms step_avg:572.95ms +step:29691/57344 train_time:17011439ms step_avg:572.95ms +grad accum step:7423/14336 +step:29692/57344 train_time:17012715ms step_avg:572.97ms +step:29693/57344 train_time:17012731ms step_avg:572.95ms +step:29694/57344 train_time:17012979ms step_avg:572.94ms +step:29695/57344 train_time:17013530ms step_avg:572.94ms +grad accum step:7424/14336 +step:29696/57344 train_time:17014854ms step_avg:572.97ms +step:29696/57344 val_loss:6.447800 train_time:17014854ms step_avg:572.97ms +step:29697/57344 train_time:17014866ms step_avg:572.95ms +step:29698/57344 train_time:17015091ms step_avg:572.94ms +step:29699/57344 train_time:17015645ms step_avg:572.94ms +grad accum step:7425/14336 +step:29700/57344 train_time:17016963ms step_avg:572.96ms +step:29701/57344 train_time:17016980ms step_avg:572.94ms +step:29702/57344 train_time:17017224ms step_avg:572.93ms +step:29703/57344 train_time:17017762ms step_avg:572.93ms +grad accum step:7426/14336 +step:29704/57344 train_time:17019077ms step_avg:572.96ms +step:29705/57344 train_time:17019094ms step_avg:572.94ms +step:29706/57344 train_time:17019346ms step_avg:572.93ms +step:29707/57344 train_time:17019898ms step_avg:572.93ms +grad accum step:7427/14336 +step:29708/57344 train_time:17021173ms step_avg:572.95ms +step:29709/57344 train_time:17021189ms step_avg:572.93ms +step:29710/57344 train_time:17021436ms step_avg:572.92ms +step:29711/57344 train_time:17021979ms step_avg:572.92ms +grad accum step:7428/14336 +step:29712/57344 train_time:17023270ms step_avg:572.94ms +step:29713/57344 train_time:17023287ms step_avg:572.92ms +step:29714/57344 train_time:17023533ms step_avg:572.91ms +step:29715/57344 train_time:17024083ms step_avg:572.91ms +grad accum step:7429/14336 +step:29716/57344 train_time:17025403ms step_avg:572.94ms +step:29717/57344 train_time:17025419ms step_avg:572.92ms +step:29718/57344 train_time:17025668ms step_avg:572.91ms +step:29719/57344 train_time:17026219ms step_avg:572.91ms +grad accum step:7430/14336 +step:29720/57344 train_time:17027530ms step_avg:572.93ms +step:29721/57344 train_time:17027547ms step_avg:572.91ms +step:29722/57344 train_time:17027799ms step_avg:572.90ms +step:29723/57344 train_time:17028358ms step_avg:572.90ms +grad accum step:7431/14336 +step:29724/57344 train_time:17029670ms step_avg:572.93ms +step:29725/57344 train_time:17029687ms step_avg:572.91ms +step:29726/57344 train_time:17029937ms step_avg:572.90ms +step:29727/57344 train_time:17030486ms step_avg:572.90ms +grad accum step:7432/14336 +step:29728/57344 train_time:17031809ms step_avg:572.92ms +step:29729/57344 train_time:17031826ms step_avg:572.90ms +step:29730/57344 train_time:17032072ms step_avg:572.89ms +step:29731/57344 train_time:17032623ms step_avg:572.89ms +grad accum step:7433/14336 +step:29732/57344 train_time:17033919ms step_avg:572.92ms +step:29733/57344 train_time:17033936ms step_avg:572.90ms +step:29734/57344 train_time:17034188ms step_avg:572.89ms +step:29735/57344 train_time:17034749ms step_avg:572.89ms +grad accum step:7434/14336 +step:29736/57344 train_time:17036070ms step_avg:572.91ms +step:29737/57344 train_time:17036087ms step_avg:572.89ms +step:29738/57344 train_time:17036340ms step_avg:572.88ms +step:29739/57344 train_time:17036907ms step_avg:572.88ms +grad accum step:7435/14336 +step:29740/57344 train_time:17038277ms step_avg:572.91ms +step:29741/57344 train_time:17038294ms step_avg:572.89ms +step:29742/57344 train_time:17038541ms step_avg:572.88ms +step:29743/57344 train_time:17039087ms step_avg:572.88ms +grad accum step:7436/14336 +step:29744/57344 train_time:17040409ms step_avg:572.90ms +step:29745/57344 train_time:17040426ms step_avg:572.88ms +step:29746/57344 train_time:17040671ms step_avg:572.87ms +step:29747/57344 train_time:17041215ms step_avg:572.87ms +grad accum step:7437/14336 +step:29748/57344 train_time:17042514ms step_avg:572.90ms +step:29749/57344 train_time:17042531ms step_avg:572.88ms +step:29750/57344 train_time:17042787ms step_avg:572.87ms +step:29751/57344 train_time:17043359ms step_avg:572.87ms +grad accum step:7438/14336 +step:29752/57344 train_time:17044683ms step_avg:572.89ms +step:29753/57344 train_time:17044700ms step_avg:572.87ms +step:29754/57344 train_time:17044949ms step_avg:572.86ms +step:29755/57344 train_time:17045491ms step_avg:572.86ms +grad accum step:7439/14336 +step:29756/57344 train_time:17046794ms step_avg:572.89ms +step:29757/57344 train_time:17046811ms step_avg:572.87ms +step:29758/57344 train_time:17047054ms step_avg:572.86ms +step:29759/57344 train_time:17047602ms step_avg:572.86ms +grad accum step:7440/14336 +step:29760/57344 train_time:17048930ms step_avg:572.88ms +step:29760/57344 val_loss:6.440109 train_time:17048931ms step_avg:572.88ms +step:29761/57344 train_time:17049626ms step_avg:572.88ms +step:29762/57344 train_time:17049689ms step_avg:572.87ms +step:29763/57344 train_time:17050116ms step_avg:572.86ms +grad accum step:7441/14336 +step:29764/57344 train_time:17051661ms step_avg:572.90ms +step:29765/57344 train_time:17051673ms step_avg:572.88ms +step:29766/57344 train_time:17051897ms step_avg:572.86ms +step:29767/57344 train_time:17052446ms step_avg:572.86ms +grad accum step:7442/14336 +step:29768/57344 train_time:17053775ms step_avg:572.89ms +step:29769/57344 train_time:17053791ms step_avg:572.87ms +step:29770/57344 train_time:17054038ms step_avg:572.86ms +step:29771/57344 train_time:17054579ms step_avg:572.86ms +grad accum step:7443/14336 +step:29772/57344 train_time:17055932ms step_avg:572.88ms +step:29773/57344 train_time:17055947ms step_avg:572.87ms +step:29774/57344 train_time:17056197ms step_avg:572.86ms +step:29775/57344 train_time:17056759ms step_avg:572.86ms +grad accum step:7444/14336 +step:29776/57344 train_time:17058086ms step_avg:572.88ms +step:29777/57344 train_time:17058103ms step_avg:572.86ms +step:29778/57344 train_time:17058348ms step_avg:572.85ms +step:29779/57344 train_time:17058886ms step_avg:572.85ms +grad accum step:7445/14336 +step:29780/57344 train_time:17060168ms step_avg:572.87ms +step:29781/57344 train_time:17060185ms step_avg:572.85ms +step:29782/57344 train_time:17060431ms step_avg:572.84ms +step:29783/57344 train_time:17060976ms step_avg:572.84ms +grad accum step:7446/14336 +step:29784/57344 train_time:17062283ms step_avg:572.87ms +step:29785/57344 train_time:17062300ms step_avg:572.85ms +step:29786/57344 train_time:17062546ms step_avg:572.84ms +step:29787/57344 train_time:17063086ms step_avg:572.84ms +grad accum step:7447/14336 +step:29788/57344 train_time:17064396ms step_avg:572.86ms +step:29789/57344 train_time:17064413ms step_avg:572.84ms +step:29790/57344 train_time:17064660ms step_avg:572.83ms +step:29791/57344 train_time:17065212ms step_avg:572.83ms +grad accum step:7448/14336 +step:29792/57344 train_time:17066532ms step_avg:572.86ms +step:29793/57344 train_time:17066548ms step_avg:572.84ms +step:29794/57344 train_time:17066797ms step_avg:572.83ms +step:29795/57344 train_time:17067343ms step_avg:572.83ms +grad accum step:7449/14336 +step:29796/57344 train_time:17068657ms step_avg:572.85ms +step:29797/57344 train_time:17068673ms step_avg:572.83ms +step:29798/57344 train_time:17068919ms step_avg:572.82ms +step:29799/57344 train_time:17069465ms step_avg:572.82ms +grad accum step:7450/14336 +step:29800/57344 train_time:17070740ms step_avg:572.84ms +step:29801/57344 train_time:17070757ms step_avg:572.82ms +step:29802/57344 train_time:17071008ms step_avg:572.81ms +step:29803/57344 train_time:17071558ms step_avg:572.81ms +grad accum step:7451/14336 +step:29804/57344 train_time:17072887ms step_avg:572.84ms +step:29805/57344 train_time:17072900ms step_avg:572.82ms +step:29806/57344 train_time:17073147ms step_avg:572.81ms +step:29807/57344 train_time:17073700ms step_avg:572.81ms +grad accum step:7452/14336 +step:29808/57344 train_time:17075005ms step_avg:572.83ms +step:29809/57344 train_time:17075022ms step_avg:572.81ms +step:29810/57344 train_time:17075270ms step_avg:572.80ms +step:29811/57344 train_time:17075820ms step_avg:572.80ms +grad accum step:7453/14336 +step:29812/57344 train_time:17077141ms step_avg:572.83ms +step:29813/57344 train_time:17077156ms step_avg:572.81ms +step:29814/57344 train_time:17077406ms step_avg:572.80ms +step:29815/57344 train_time:17077964ms step_avg:572.80ms +grad accum step:7454/14336 +step:29816/57344 train_time:17079301ms step_avg:572.82ms +step:29817/57344 train_time:17079317ms step_avg:572.80ms +step:29818/57344 train_time:17079568ms step_avg:572.79ms +step:29819/57344 train_time:17080120ms step_avg:572.79ms +grad accum step:7455/14336 +step:29820/57344 train_time:17081416ms step_avg:572.82ms +step:29821/57344 train_time:17081433ms step_avg:572.80ms +step:29822/57344 train_time:17081681ms step_avg:572.79ms +step:29823/57344 train_time:17082234ms step_avg:572.79ms +grad accum step:7456/14336 +step:29824/57344 train_time:17083545ms step_avg:572.81ms +step:29824/57344 val_loss:6.438864 train_time:17083546ms step_avg:572.81ms +step:29825/57344 train_time:17083558ms step_avg:572.79ms +step:29826/57344 train_time:17083781ms step_avg:572.78ms +step:29827/57344 train_time:17084329ms step_avg:572.78ms +grad accum step:7457/14336 +step:29828/57344 train_time:17085626ms step_avg:572.80ms +step:29829/57344 train_time:17085644ms step_avg:572.79ms +step:29830/57344 train_time:17085894ms step_avg:572.78ms +step:29831/57344 train_time:17086448ms step_avg:572.77ms +grad accum step:7458/14336 +step:29832/57344 train_time:17087758ms step_avg:572.80ms +step:29833/57344 train_time:17087775ms step_avg:572.78ms +step:29834/57344 train_time:17088021ms step_avg:572.77ms +step:29835/57344 train_time:17088568ms step_avg:572.77ms +grad accum step:7459/14336 +step:29836/57344 train_time:17089892ms step_avg:572.79ms +step:29837/57344 train_time:17089909ms step_avg:572.78ms +step:29838/57344 train_time:17090156ms step_avg:572.76ms +step:29839/57344 train_time:17090699ms step_avg:572.76ms +grad accum step:7460/14336 +step:29840/57344 train_time:17092016ms step_avg:572.79ms +step:29841/57344 train_time:17092033ms step_avg:572.77ms +step:29842/57344 train_time:17092281ms step_avg:572.76ms +step:29843/57344 train_time:17092828ms step_avg:572.76ms +grad accum step:7461/14336 +step:29844/57344 train_time:17094121ms step_avg:572.78ms +step:29845/57344 train_time:17094138ms step_avg:572.76ms +step:29846/57344 train_time:17094380ms step_avg:572.75ms +step:29847/57344 train_time:17094921ms step_avg:572.75ms +grad accum step:7462/14336 +step:29848/57344 train_time:17096249ms step_avg:572.78ms +step:29849/57344 train_time:17096267ms step_avg:572.76ms +step:29850/57344 train_time:17096519ms step_avg:572.75ms +step:29851/57344 train_time:17097081ms step_avg:572.75ms +grad accum step:7463/14336 +step:29852/57344 train_time:17098382ms step_avg:572.77ms +step:29853/57344 train_time:17098399ms step_avg:572.75ms +step:29854/57344 train_time:17098642ms step_avg:572.74ms +step:29855/57344 train_time:17099193ms step_avg:572.74ms +grad accum step:7464/14336 +step:29856/57344 train_time:17100508ms step_avg:572.77ms +step:29857/57344 train_time:17100525ms step_avg:572.75ms +step:29858/57344 train_time:17100773ms step_avg:572.74ms +step:29859/57344 train_time:17101321ms step_avg:572.74ms +grad accum step:7465/14336 +step:29860/57344 train_time:17102610ms step_avg:572.76ms +step:29861/57344 train_time:17102627ms step_avg:572.74ms +step:29862/57344 train_time:17102872ms step_avg:572.73ms +step:29863/57344 train_time:17103419ms step_avg:572.73ms +grad accum step:7466/14336 +step:29864/57344 train_time:17104734ms step_avg:572.75ms +step:29865/57344 train_time:17104751ms step_avg:572.74ms +step:29866/57344 train_time:17104999ms step_avg:572.72ms +step:29867/57344 train_time:17105553ms step_avg:572.72ms +grad accum step:7467/14336 +step:29868/57344 train_time:17106854ms step_avg:572.75ms +step:29869/57344 train_time:17106871ms step_avg:572.73ms +step:29870/57344 train_time:17107127ms step_avg:572.72ms +step:29871/57344 train_time:17107700ms step_avg:572.72ms +grad accum step:7468/14336 +step:29872/57344 train_time:17109001ms step_avg:572.74ms +step:29873/57344 train_time:17109018ms step_avg:572.73ms +step:29874/57344 train_time:17109262ms step_avg:572.71ms +step:29875/57344 train_time:17109810ms step_avg:572.71ms +grad accum step:7469/14336 +step:29876/57344 train_time:17111109ms step_avg:572.74ms +step:29877/57344 train_time:17111125ms step_avg:572.72ms +step:29878/57344 train_time:17111375ms step_avg:572.71ms +step:29879/57344 train_time:17111923ms step_avg:572.71ms +grad accum step:7470/14336 +step:29880/57344 train_time:17113238ms step_avg:572.73ms +step:29881/57344 train_time:17113254ms step_avg:572.71ms +step:29882/57344 train_time:17113502ms step_avg:572.70ms +step:29883/57344 train_time:17114058ms step_avg:572.70ms +grad accum step:7471/14336 +step:29884/57344 train_time:17115399ms step_avg:572.73ms +step:29885/57344 train_time:17115415ms step_avg:572.71ms +step:29886/57344 train_time:17115654ms step_avg:572.70ms +step:29887/57344 train_time:17116198ms step_avg:572.70ms +grad accum step:7472/14336 +step:29888/57344 train_time:17117503ms step_avg:572.72ms +step:29888/57344 val_loss:6.435298 train_time:17117505ms step_avg:572.72ms +step:29889/57344 train_time:17117516ms step_avg:572.70ms +step:29890/57344 train_time:17117743ms step_avg:572.69ms +step:29891/57344 train_time:17118311ms step_avg:572.69ms +grad accum step:7473/14336 +step:29892/57344 train_time:17119660ms step_avg:572.72ms +step:29893/57344 train_time:17119677ms step_avg:572.70ms +step:29894/57344 train_time:17119924ms step_avg:572.69ms +step:29895/57344 train_time:17120474ms step_avg:572.69ms +grad accum step:7474/14336 +step:29896/57344 train_time:17121795ms step_avg:572.71ms +step:29897/57344 train_time:17121812ms step_avg:572.69ms +step:29898/57344 train_time:17122058ms step_avg:572.68ms +step:29899/57344 train_time:17122610ms step_avg:572.68ms +grad accum step:7475/14336 +step:29900/57344 train_time:17123907ms step_avg:572.71ms +step:29901/57344 train_time:17123924ms step_avg:572.69ms +step:29902/57344 train_time:17124175ms step_avg:572.68ms +step:29903/57344 train_time:17124738ms step_avg:572.68ms +grad accum step:7476/14336 +step:29904/57344 train_time:17126070ms step_avg:572.70ms +step:29905/57344 train_time:17126087ms step_avg:572.68ms +step:29906/57344 train_time:17126335ms step_avg:572.67ms +step:29907/57344 train_time:17126897ms step_avg:572.67ms +grad accum step:7477/14336 +step:29908/57344 train_time:17128245ms step_avg:572.70ms +step:29909/57344 train_time:17128261ms step_avg:572.68ms +step:29910/57344 train_time:17128510ms step_avg:572.67ms +step:29911/57344 train_time:17129063ms step_avg:572.67ms +grad accum step:7478/14336 +step:29912/57344 train_time:17130370ms step_avg:572.69ms +step:29913/57344 train_time:17130387ms step_avg:572.67ms +step:29914/57344 train_time:17130638ms step_avg:572.66ms +step:29915/57344 train_time:17131200ms step_avg:572.66ms +grad accum step:7479/14336 +step:29916/57344 train_time:17132508ms step_avg:572.69ms +step:29917/57344 train_time:17132525ms step_avg:572.67ms +step:29918/57344 train_time:17132772ms step_avg:572.66ms +step:29919/57344 train_time:17133319ms step_avg:572.66ms +grad accum step:7480/14336 +step:29920/57344 train_time:17134597ms step_avg:572.68ms +step:29921/57344 train_time:17134614ms step_avg:572.66ms +step:29922/57344 train_time:17134862ms step_avg:572.65ms +step:29923/57344 train_time:17135409ms step_avg:572.65ms +grad accum step:7481/14336 +step:29924/57344 train_time:17136700ms step_avg:572.67ms +step:29925/57344 train_time:17136717ms step_avg:572.66ms +step:29926/57344 train_time:17136961ms step_avg:572.64ms +step:29927/57344 train_time:17137505ms step_avg:572.64ms +grad accum step:7482/14336 +step:29928/57344 train_time:17138804ms step_avg:572.67ms +step:29929/57344 train_time:17138821ms step_avg:572.65ms +step:29930/57344 train_time:17139069ms step_avg:572.64ms +step:29931/57344 train_time:17139615ms step_avg:572.64ms +grad accum step:7483/14336 +step:29932/57344 train_time:17140924ms step_avg:572.66ms +step:29933/57344 train_time:17140941ms step_avg:572.64ms +step:29934/57344 train_time:17141192ms step_avg:572.63ms +step:29935/57344 train_time:17141750ms step_avg:572.63ms +grad accum step:7484/14336 +step:29936/57344 train_time:17143071ms step_avg:572.66ms +step:29937/57344 train_time:17143088ms step_avg:572.64ms +step:29938/57344 train_time:17143340ms step_avg:572.63ms +step:29939/57344 train_time:17143898ms step_avg:572.63ms +grad accum step:7485/14336 +step:29940/57344 train_time:17145213ms step_avg:572.65ms +step:29941/57344 train_time:17145230ms step_avg:572.63ms +step:29942/57344 train_time:17145477ms step_avg:572.62ms +step:29943/57344 train_time:17146020ms step_avg:572.62ms +grad accum step:7486/14336 +step:29944/57344 train_time:17147312ms step_avg:572.65ms +step:29945/57344 train_time:17147329ms step_avg:572.63ms +step:29946/57344 train_time:17147572ms step_avg:572.62ms +step:29947/57344 train_time:17148112ms step_avg:572.62ms +grad accum step:7487/14336 +step:29948/57344 train_time:17149451ms step_avg:572.64ms +step:29949/57344 train_time:17149468ms step_avg:572.62ms +step:29950/57344 train_time:17149718ms step_avg:572.61ms +step:29951/57344 train_time:17150275ms step_avg:572.61ms +grad accum step:7488/14336 +step:29952/57344 train_time:17151591ms step_avg:572.64ms +step:29952/57344 val_loss:6.433727 train_time:17151591ms step_avg:572.64ms +step:29953/57344 train_time:17151603ms step_avg:572.62ms +step:29954/57344 train_time:17151834ms step_avg:572.61ms +step:29955/57344 train_time:17152394ms step_avg:572.61ms +grad accum step:7489/14336 +step:29956/57344 train_time:17153699ms step_avg:572.63ms +step:29957/57344 train_time:17153716ms step_avg:572.61ms +step:29958/57344 train_time:17153967ms step_avg:572.60ms +step:29959/57344 train_time:17154522ms step_avg:572.60ms +grad accum step:7490/14336 +step:29960/57344 train_time:17155850ms step_avg:572.63ms +step:29961/57344 train_time:17155867ms step_avg:572.61ms +step:29962/57344 train_time:17156115ms step_avg:572.60ms +step:29963/57344 train_time:17156660ms step_avg:572.59ms +grad accum step:7491/14336 +step:29964/57344 train_time:17157974ms step_avg:572.62ms +step:29965/57344 train_time:17157991ms step_avg:572.60ms +step:29966/57344 train_time:17158240ms step_avg:572.59ms +step:29967/57344 train_time:17158797ms step_avg:572.59ms +grad accum step:7492/14336 +step:29968/57344 train_time:17160118ms step_avg:572.61ms +step:29969/57344 train_time:17160135ms step_avg:572.60ms +step:29970/57344 train_time:17160380ms step_avg:572.59ms +step:29971/57344 train_time:17160923ms step_avg:572.58ms +grad accum step:7493/14336 +step:29972/57344 train_time:17162222ms step_avg:572.61ms +step:29973/57344 train_time:17162239ms step_avg:572.59ms +step:29974/57344 train_time:17162492ms step_avg:572.58ms +step:29975/57344 train_time:17163062ms step_avg:572.58ms +grad accum step:7494/14336 +step:29976/57344 train_time:17164367ms step_avg:572.60ms +step:29977/57344 train_time:17164383ms step_avg:572.59ms +step:29978/57344 train_time:17164628ms step_avg:572.57ms +step:29979/57344 train_time:17165170ms step_avg:572.57ms +grad accum step:7495/14336 +step:29980/57344 train_time:17166461ms step_avg:572.60ms +step:29981/57344 train_time:17166478ms step_avg:572.58ms +step:29982/57344 train_time:17166727ms step_avg:572.57ms +step:29983/57344 train_time:17167280ms step_avg:572.57ms +grad accum step:7496/14336 +step:29984/57344 train_time:17168639ms step_avg:572.59ms +step:29985/57344 train_time:17168656ms step_avg:572.57ms +step:29986/57344 train_time:17168907ms step_avg:572.56ms +step:29987/57344 train_time:17169472ms step_avg:572.56ms +grad accum step:7497/14336 +step:29988/57344 train_time:17170838ms step_avg:572.59ms +step:29989/57344 train_time:17170855ms step_avg:572.57ms +step:29990/57344 train_time:17171102ms step_avg:572.56ms +step:29991/57344 train_time:17171643ms step_avg:572.56ms +grad accum step:7498/14336 +step:29992/57344 train_time:17172929ms step_avg:572.58ms +step:29993/57344 train_time:17172944ms step_avg:572.57ms +step:29994/57344 train_time:17173193ms step_avg:572.55ms +step:29995/57344 train_time:17173754ms step_avg:572.55ms +grad accum step:7499/14336 +step:29996/57344 train_time:17175064ms step_avg:572.58ms +step:29997/57344 train_time:17175081ms step_avg:572.56ms +step:29998/57344 train_time:17175334ms step_avg:572.55ms +step:29999/57344 train_time:17175895ms step_avg:572.55ms +grad accum step:7500/14336 +step:30000/57344 train_time:17177195ms step_avg:572.57ms +step:30001/57344 train_time:17177211ms step_avg:572.55ms +step:30002/57344 train_time:17177461ms step_avg:572.54ms +step:30003/57344 train_time:17178011ms step_avg:572.54ms +grad accum step:7501/14336 +step:30004/57344 train_time:17179316ms step_avg:572.57ms +step:30005/57344 train_time:17179331ms step_avg:572.55ms +step:30006/57344 train_time:17179585ms step_avg:572.54ms +step:30007/57344 train_time:17180147ms step_avg:572.54ms +grad accum step:7502/14336 +step:30008/57344 train_time:17181497ms step_avg:572.56ms +step:30009/57344 train_time:17181511ms step_avg:572.55ms +step:30010/57344 train_time:17181759ms step_avg:572.53ms +step:30011/57344 train_time:17182313ms step_avg:572.53ms +grad accum step:7503/14336 +step:30012/57344 train_time:17183619ms step_avg:572.56ms +step:30013/57344 train_time:17183636ms step_avg:572.54ms +step:30014/57344 train_time:17183883ms step_avg:572.53ms +step:30015/57344 train_time:17184443ms step_avg:572.53ms +grad accum step:7504/14336 +step:30016/57344 train_time:17185884ms step_avg:572.56ms +step:30016/57344 val_loss:6.450425 train_time:17185884ms step_avg:572.56ms +step:30017/57344 train_time:17185896ms step_avg:572.54ms +step:30018/57344 train_time:17186120ms step_avg:572.53ms +step:30019/57344 train_time:17186666ms step_avg:572.53ms +grad accum step:7505/14336 +step:30020/57344 train_time:17187953ms step_avg:572.55ms +step:30021/57344 train_time:17187970ms step_avg:572.53ms +step:30022/57344 train_time:17188218ms step_avg:572.52ms +step:30023/57344 train_time:17188775ms step_avg:572.52ms +grad accum step:7506/14336 +step:30024/57344 train_time:17190088ms step_avg:572.54ms +step:30025/57344 train_time:17190104ms step_avg:572.53ms +step:30026/57344 train_time:17190353ms step_avg:572.52ms +step:30027/57344 train_time:17190909ms step_avg:572.52ms +grad accum step:7507/14336 +step:30028/57344 train_time:17192210ms step_avg:572.54ms +step:30029/57344 train_time:17192227ms step_avg:572.52ms +step:30030/57344 train_time:17192476ms step_avg:572.51ms +step:30031/57344 train_time:17193033ms step_avg:572.51ms +grad accum step:7508/14336 +step:30032/57344 train_time:17194388ms step_avg:572.54ms +step:30033/57344 train_time:17194405ms step_avg:572.52ms +step:30034/57344 train_time:17194650ms step_avg:572.51ms +step:30035/57344 train_time:17195197ms step_avg:572.51ms +grad accum step:7509/14336 +step:30036/57344 train_time:17196495ms step_avg:572.53ms +step:30037/57344 train_time:17196511ms step_avg:572.51ms +step:30038/57344 train_time:17196762ms step_avg:572.50ms +step:30039/57344 train_time:17197322ms step_avg:572.50ms +grad accum step:7510/14336 +step:30040/57344 train_time:17198650ms step_avg:572.52ms +step:30041/57344 train_time:17198665ms step_avg:572.51ms +step:30042/57344 train_time:17198913ms step_avg:572.50ms +step:30043/57344 train_time:17199463ms step_avg:572.49ms +grad accum step:7511/14336 +step:30044/57344 train_time:17200814ms step_avg:572.52ms +step:30045/57344 train_time:17200832ms step_avg:572.50ms +step:30046/57344 train_time:17201073ms step_avg:572.49ms +step:30047/57344 train_time:17201634ms step_avg:572.49ms +grad accum step:7512/14336 +step:30048/57344 train_time:17202968ms step_avg:572.52ms +step:30049/57344 train_time:17202985ms step_avg:572.50ms +step:30050/57344 train_time:17203229ms step_avg:572.49ms +step:30051/57344 train_time:17203765ms step_avg:572.49ms +grad accum step:7513/14336 +step:30052/57344 train_time:17205074ms step_avg:572.51ms +step:30053/57344 train_time:17205091ms step_avg:572.49ms +step:30054/57344 train_time:17205340ms step_avg:572.48ms +step:30055/57344 train_time:17205890ms step_avg:572.48ms +grad accum step:7514/14336 +step:30056/57344 train_time:17207226ms step_avg:572.51ms +step:30057/57344 train_time:17207241ms step_avg:572.49ms +step:30058/57344 train_time:17207488ms step_avg:572.48ms +step:30059/57344 train_time:17208033ms step_avg:572.48ms +grad accum step:7515/14336 +step:30060/57344 train_time:17209327ms step_avg:572.50ms +step:30061/57344 train_time:17209344ms step_avg:572.48ms +step:30062/57344 train_time:17209592ms step_avg:572.47ms +step:30063/57344 train_time:17210136ms step_avg:572.47ms +grad accum step:7516/14336 +step:30064/57344 train_time:17211433ms step_avg:572.49ms +step:30065/57344 train_time:17211449ms step_avg:572.47ms +step:30066/57344 train_time:17211712ms step_avg:572.46ms +step:30067/57344 train_time:17212298ms step_avg:572.46ms +grad accum step:7517/14336 +step:30068/57344 train_time:17213628ms step_avg:572.49ms +step:30069/57344 train_time:17213644ms step_avg:572.47ms +step:30070/57344 train_time:17213891ms step_avg:572.46ms +step:30071/57344 train_time:17214439ms step_avg:572.46ms +grad accum step:7518/14336 +step:30072/57344 train_time:17215812ms step_avg:572.49ms +step:30073/57344 train_time:17215829ms step_avg:572.47ms +step:30074/57344 train_time:17216077ms step_avg:572.46ms +step:30075/57344 train_time:17216630ms step_avg:572.46ms +grad accum step:7519/14336 +step:30076/57344 train_time:17217950ms step_avg:572.48ms +step:30077/57344 train_time:17217967ms step_avg:572.46ms +step:30078/57344 train_time:17218212ms step_avg:572.45ms +step:30079/57344 train_time:17218753ms step_avg:572.45ms +grad accum step:7520/14336 +step:30080/57344 train_time:17220054ms step_avg:572.48ms +step:30080/57344 val_loss:6.421892 train_time:17220054ms step_avg:572.48ms +step:30081/57344 train_time:17220066ms step_avg:572.46ms +step:30082/57344 train_time:17220298ms step_avg:572.45ms +step:30083/57344 train_time:17220867ms step_avg:572.45ms +grad accum step:7521/14336 +step:30084/57344 train_time:17222165ms step_avg:572.47ms +step:30085/57344 train_time:17222182ms step_avg:572.45ms +step:30086/57344 train_time:17222431ms step_avg:572.44ms +step:30087/57344 train_time:17222978ms step_avg:572.44ms +grad accum step:7522/14336 +step:30088/57344 train_time:17224267ms step_avg:572.46ms +step:30089/57344 train_time:17224284ms step_avg:572.44ms +step:30090/57344 train_time:17224531ms step_avg:572.43ms +step:30091/57344 train_time:17225072ms step_avg:572.43ms +grad accum step:7523/14336 +step:30092/57344 train_time:17226394ms step_avg:572.46ms +step:30093/57344 train_time:17226409ms step_avg:572.44ms +step:30094/57344 train_time:17226657ms step_avg:572.43ms +step:30095/57344 train_time:17227201ms step_avg:572.43ms +grad accum step:7524/14336 +step:30096/57344 train_time:17228513ms step_avg:572.45ms +step:30097/57344 train_time:17228526ms step_avg:572.43ms +step:30098/57344 train_time:17228775ms step_avg:572.42ms +step:30099/57344 train_time:17229339ms step_avg:572.42ms +grad accum step:7525/14336 +step:30100/57344 train_time:17230636ms step_avg:572.45ms +step:30101/57344 train_time:17230653ms step_avg:572.43ms +step:30102/57344 train_time:17230900ms step_avg:572.42ms +step:30103/57344 train_time:17231445ms step_avg:572.42ms +grad accum step:7526/14336 +step:30104/57344 train_time:17232744ms step_avg:572.44ms +step:30105/57344 train_time:17232761ms step_avg:572.42ms +step:30106/57344 train_time:17233010ms step_avg:572.41ms +step:30107/57344 train_time:17233562ms step_avg:572.41ms +grad accum step:7527/14336 +step:30108/57344 train_time:17234884ms step_avg:572.44ms +step:30109/57344 train_time:17234901ms step_avg:572.42ms +step:30110/57344 train_time:17235152ms step_avg:572.41ms +step:30111/57344 train_time:17235703ms step_avg:572.41ms +grad accum step:7528/14336 +step:30112/57344 train_time:17237008ms step_avg:572.43ms +step:30113/57344 train_time:17237025ms step_avg:572.41ms +step:30114/57344 train_time:17237277ms step_avg:572.40ms +step:30115/57344 train_time:17239501ms step_avg:572.46ms +grad accum step:7529/14336 +step:30116/57344 train_time:17243872ms step_avg:572.58ms +step:30117/57344 train_time:17243887ms step_avg:572.56ms +step:30118/57344 train_time:17244106ms step_avg:572.55ms +step:30119/57344 train_time:17244655ms step_avg:572.55ms +grad accum step:7530/14336 +step:30120/57344 train_time:17245957ms step_avg:572.57ms +step:30121/57344 train_time:17245974ms step_avg:572.56ms +step:30122/57344 train_time:17246215ms step_avg:572.55ms +step:30123/57344 train_time:17246752ms step_avg:572.54ms +grad accum step:7531/14336 +step:30124/57344 train_time:17248059ms step_avg:572.57ms +step:30125/57344 train_time:17248076ms step_avg:572.55ms +step:30126/57344 train_time:17248324ms step_avg:572.54ms +step:30127/57344 train_time:17248870ms step_avg:572.54ms +grad accum step:7532/14336 +step:30128/57344 train_time:17250184ms step_avg:572.56ms +step:30129/57344 train_time:17250201ms step_avg:572.54ms +step:30130/57344 train_time:17250452ms step_avg:572.53ms +step:30131/57344 train_time:17251014ms step_avg:572.53ms +grad accum step:7533/14336 +step:30132/57344 train_time:17252336ms step_avg:572.56ms +step:30133/57344 train_time:17252353ms step_avg:572.54ms +step:30134/57344 train_time:17252601ms step_avg:572.53ms +step:30135/57344 train_time:17253150ms step_avg:572.53ms +grad accum step:7534/14336 +step:30136/57344 train_time:17254469ms step_avg:572.55ms +step:30137/57344 train_time:17254490ms step_avg:572.54ms +step:30138/57344 train_time:17254747ms step_avg:572.52ms +step:30139/57344 train_time:17255344ms step_avg:572.53ms +grad accum step:7535/14336 +step:30140/57344 train_time:17256740ms step_avg:572.55ms +step:30141/57344 train_time:17256757ms step_avg:572.53ms +step:30142/57344 train_time:17257001ms step_avg:572.52ms +step:30143/57344 train_time:17257543ms step_avg:572.52ms +grad accum step:7536/14336 +step:30144/57344 train_time:17258882ms step_avg:572.55ms +step:30144/57344 val_loss:6.429942 train_time:17258882ms step_avg:572.55ms +step:30145/57344 train_time:17258894ms step_avg:572.53ms +step:30146/57344 train_time:17259141ms step_avg:572.52ms +step:30147/57344 train_time:17259753ms step_avg:572.52ms +grad accum step:7537/14336 +step:30148/57344 train_time:17261149ms step_avg:572.55ms +step:30149/57344 train_time:17261165ms step_avg:572.53ms +step:30150/57344 train_time:17261408ms step_avg:572.52ms +step:30151/57344 train_time:17261948ms step_avg:572.52ms +grad accum step:7538/14336 +step:30152/57344 train_time:17263241ms step_avg:572.54ms +step:30153/57344 train_time:17263257ms step_avg:572.52ms +step:30154/57344 train_time:17263506ms step_avg:572.51ms +step:30155/57344 train_time:17264062ms step_avg:572.51ms +grad accum step:7539/14336 +step:30156/57344 train_time:17265364ms step_avg:572.53ms +step:30157/57344 train_time:17265381ms step_avg:572.52ms +step:30158/57344 train_time:17265633ms step_avg:572.51ms +step:30159/57344 train_time:17266193ms step_avg:572.51ms +grad accum step:7540/14336 +step:30160/57344 train_time:17267504ms step_avg:572.53ms +step:30161/57344 train_time:17267521ms step_avg:572.51ms +step:30162/57344 train_time:17267766ms step_avg:572.50ms +step:30163/57344 train_time:17268310ms step_avg:572.50ms +grad accum step:7541/14336 +step:30164/57344 train_time:17269614ms step_avg:572.52ms +step:30165/57344 train_time:17269631ms step_avg:572.51ms +step:30166/57344 train_time:17269876ms step_avg:572.49ms +step:30167/57344 train_time:17270423ms step_avg:572.49ms +grad accum step:7542/14336 +step:30168/57344 train_time:17271717ms step_avg:572.52ms +step:30169/57344 train_time:17271733ms step_avg:572.50ms +step:30170/57344 train_time:17271983ms step_avg:572.49ms +step:30171/57344 train_time:17272530ms step_avg:572.49ms +grad accum step:7543/14336 +step:30172/57344 train_time:17273824ms step_avg:572.51ms +step:30173/57344 train_time:17273840ms step_avg:572.49ms +step:30174/57344 train_time:17274089ms step_avg:572.48ms +step:30175/57344 train_time:17274645ms step_avg:572.48ms +grad accum step:7544/14336 +step:30176/57344 train_time:17275992ms step_avg:572.51ms +step:30177/57344 train_time:17276007ms step_avg:572.49ms +step:30178/57344 train_time:17276251ms step_avg:572.48ms +step:30179/57344 train_time:17276791ms step_avg:572.48ms +grad accum step:7545/14336 +step:30180/57344 train_time:17278087ms step_avg:572.50ms +step:30181/57344 train_time:17278104ms step_avg:572.48ms +step:30182/57344 train_time:17278351ms step_avg:572.47ms +step:30183/57344 train_time:17278897ms step_avg:572.47ms +grad accum step:7546/14336 +step:30184/57344 train_time:17280170ms step_avg:572.49ms +step:30185/57344 train_time:17280187ms step_avg:572.48ms +step:30186/57344 train_time:17280437ms step_avg:572.47ms +step:30187/57344 train_time:17280990ms step_avg:572.46ms +grad accum step:7547/14336 +step:30188/57344 train_time:17282284ms step_avg:572.49ms +step:30189/57344 train_time:17282304ms step_avg:572.47ms +step:30190/57344 train_time:17282541ms step_avg:572.46ms +step:30191/57344 train_time:17283086ms step_avg:572.46ms +grad accum step:7548/14336 +step:30192/57344 train_time:17284398ms step_avg:572.48ms +step:30193/57344 train_time:17284415ms step_avg:572.46ms +step:30194/57344 train_time:17284660ms step_avg:572.45ms +step:30195/57344 train_time:17285204ms step_avg:572.45ms +grad accum step:7549/14336 +step:30196/57344 train_time:17286517ms step_avg:572.48ms +step:30197/57344 train_time:17286533ms step_avg:572.46ms +step:30198/57344 train_time:17286794ms step_avg:572.45ms +step:30199/57344 train_time:17287374ms step_avg:572.45ms +grad accum step:7550/14336 +step:30200/57344 train_time:17288660ms step_avg:572.47ms +step:30201/57344 train_time:17288676ms step_avg:572.45ms +step:30202/57344 train_time:17288924ms step_avg:572.44ms +step:30203/57344 train_time:17289469ms step_avg:572.44ms +grad accum step:7551/14336 +step:30204/57344 train_time:17290793ms step_avg:572.47ms +step:30205/57344 train_time:17290810ms step_avg:572.45ms +step:30206/57344 train_time:17291056ms step_avg:572.44ms +step:30207/57344 train_time:17291593ms step_avg:572.44ms +grad accum step:7552/14336 +step:30208/57344 train_time:17292891ms step_avg:572.46ms +step:30208/57344 val_loss:6.392270 train_time:17292891ms step_avg:572.46ms +step:30209/57344 train_time:17292903ms step_avg:572.44ms +step:30210/57344 train_time:17293140ms step_avg:572.43ms +step:30211/57344 train_time:17293714ms step_avg:572.43ms +grad accum step:7553/14336 +step:30212/57344 train_time:17295031ms step_avg:572.46ms +step:30213/57344 train_time:17295048ms step_avg:572.44ms +step:30214/57344 train_time:17295298ms step_avg:572.43ms +step:30215/57344 train_time:17295855ms step_avg:572.43ms +grad accum step:7554/14336 +step:30216/57344 train_time:17297158ms step_avg:572.45ms +step:30217/57344 train_time:17297175ms step_avg:572.43ms +step:30218/57344 train_time:17297421ms step_avg:572.42ms +step:30219/57344 train_time:17297965ms step_avg:572.42ms +grad accum step:7555/14336 +step:30220/57344 train_time:17299257ms step_avg:572.44ms +step:30221/57344 train_time:17299274ms step_avg:572.43ms +step:30222/57344 train_time:17299523ms step_avg:572.41ms +step:30223/57344 train_time:17300077ms step_avg:572.41ms +grad accum step:7556/14336 +step:30224/57344 train_time:17301390ms step_avg:572.44ms +step:30225/57344 train_time:17301406ms step_avg:572.42ms +step:30226/57344 train_time:17301733ms step_avg:572.41ms +step:30227/57344 train_time:17302208ms step_avg:572.41ms +grad accum step:7557/14336 +step:30228/57344 train_time:17303516ms step_avg:572.43ms +step:30229/57344 train_time:17303532ms step_avg:572.41ms +step:30230/57344 train_time:17303776ms step_avg:572.40ms +step:30231/57344 train_time:17304315ms step_avg:572.40ms +grad accum step:7558/14336 +step:30232/57344 train_time:17305588ms step_avg:572.43ms +step:30233/57344 train_time:17305605ms step_avg:572.41ms +step:30234/57344 train_time:17305854ms step_avg:572.40ms +step:30235/57344 train_time:17306402ms step_avg:572.40ms +grad accum step:7559/14336 +step:30236/57344 train_time:17307703ms step_avg:572.42ms +step:30237/57344 train_time:17307720ms step_avg:572.40ms +step:30238/57344 train_time:17307971ms step_avg:572.39ms +step:30239/57344 train_time:17308525ms step_avg:572.39ms +grad accum step:7560/14336 +step:30240/57344 train_time:17309817ms step_avg:572.41ms +step:30241/57344 train_time:17309834ms step_avg:572.40ms +step:30242/57344 train_time:17310086ms step_avg:572.39ms +step:30243/57344 train_time:17310641ms step_avg:572.39ms +grad accum step:7561/14336 +step:30244/57344 train_time:17311945ms step_avg:572.41ms +step:30245/57344 train_time:17311961ms step_avg:572.39ms +step:30246/57344 train_time:17312211ms step_avg:572.38ms +step:30247/57344 train_time:17312758ms step_avg:572.38ms +grad accum step:7562/14336 +step:30248/57344 train_time:17314074ms step_avg:572.40ms +step:30249/57344 train_time:17314090ms step_avg:572.39ms +step:30250/57344 train_time:17314337ms step_avg:572.37ms +step:30251/57344 train_time:17314881ms step_avg:572.37ms +grad accum step:7563/14336 +step:30252/57344 train_time:17316167ms step_avg:572.40ms +step:30253/57344 train_time:17316184ms step_avg:572.38ms +step:30254/57344 train_time:17316437ms step_avg:572.37ms +step:30255/57344 train_time:17316997ms step_avg:572.37ms +grad accum step:7564/14336 +step:30256/57344 train_time:17318365ms step_avg:572.39ms +step:30257/57344 train_time:17318382ms step_avg:572.38ms +step:30258/57344 train_time:17318630ms step_avg:572.37ms +step:30259/57344 train_time:17319176ms step_avg:572.36ms +grad accum step:7565/14336 +step:30260/57344 train_time:17320471ms step_avg:572.39ms +step:30261/57344 train_time:17320488ms step_avg:572.37ms +step:30262/57344 train_time:17320740ms step_avg:572.36ms +step:30263/57344 train_time:17321302ms step_avg:572.36ms +grad accum step:7566/14336 +step:30264/57344 train_time:17322662ms step_avg:572.39ms +step:30265/57344 train_time:17322679ms step_avg:572.37ms +step:30266/57344 train_time:17322927ms step_avg:572.36ms +step:30267/57344 train_time:17323466ms step_avg:572.35ms +grad accum step:7567/14336 +step:30268/57344 train_time:17324751ms step_avg:572.38ms +step:30269/57344 train_time:17324768ms step_avg:572.36ms +step:30270/57344 train_time:17325015ms step_avg:572.35ms +step:30271/57344 train_time:17325558ms step_avg:572.35ms +grad accum step:7568/14336 +step:30272/57344 train_time:17326875ms step_avg:572.37ms +step:30272/57344 val_loss:6.373115 train_time:17326876ms step_avg:572.37ms +step:30273/57344 train_time:17326888ms step_avg:572.35ms +step:30274/57344 train_time:17327112ms step_avg:572.34ms +step:30275/57344 train_time:17327666ms step_avg:572.34ms +grad accum step:7569/14336 +step:30276/57344 train_time:17329000ms step_avg:572.37ms +step:30277/57344 train_time:17329017ms step_avg:572.35ms +step:30278/57344 train_time:17329264ms step_avg:572.34ms +step:30279/57344 train_time:17329822ms step_avg:572.34ms +grad accum step:7570/14336 +step:30280/57344 train_time:17331151ms step_avg:572.36ms +step:30281/57344 train_time:17331168ms step_avg:572.34ms +step:30282/57344 train_time:17331417ms step_avg:572.33ms +step:30283/57344 train_time:17331969ms step_avg:572.33ms +grad accum step:7571/14336 +step:30284/57344 train_time:17333256ms step_avg:572.36ms +step:30285/57344 train_time:17333272ms step_avg:572.34ms +step:30286/57344 train_time:17333518ms step_avg:572.33ms +step:30287/57344 train_time:17334061ms step_avg:572.33ms +grad accum step:7572/14336 +step:30288/57344 train_time:17335356ms step_avg:572.35ms +step:30289/57344 train_time:17335373ms step_avg:572.33ms +step:30290/57344 train_time:17335622ms step_avg:572.32ms +step:30291/57344 train_time:17336177ms step_avg:572.32ms +grad accum step:7573/14336 +step:30292/57344 train_time:17337463ms step_avg:572.34ms +step:30293/57344 train_time:17337480ms step_avg:572.33ms +step:30294/57344 train_time:17337728ms step_avg:572.32ms +step:30295/57344 train_time:17338276ms step_avg:572.31ms +grad accum step:7574/14336 +step:30296/57344 train_time:17339583ms step_avg:572.34ms +step:30297/57344 train_time:17339600ms step_avg:572.32ms +step:30298/57344 train_time:17339848ms step_avg:572.31ms +step:30299/57344 train_time:17340392ms step_avg:572.31ms +grad accum step:7575/14336 +step:30300/57344 train_time:17341734ms step_avg:572.33ms +step:30301/57344 train_time:17341751ms step_avg:572.32ms +step:30302/57344 train_time:17341998ms step_avg:572.31ms +step:30303/57344 train_time:17342543ms step_avg:572.30ms +grad accum step:7576/14336 +step:30304/57344 train_time:17343823ms step_avg:572.33ms +step:30305/57344 train_time:17343840ms step_avg:572.31ms +step:30306/57344 train_time:17344086ms step_avg:572.30ms +step:30307/57344 train_time:17344626ms step_avg:572.30ms +grad accum step:7577/14336 +step:30308/57344 train_time:17345926ms step_avg:572.32ms +step:30309/57344 train_time:17345943ms step_avg:572.30ms +step:30310/57344 train_time:17346191ms step_avg:572.29ms +step:30311/57344 train_time:17346741ms step_avg:572.29ms +grad accum step:7578/14336 +step:30312/57344 train_time:17348047ms step_avg:572.32ms +step:30313/57344 train_time:17348064ms step_avg:572.30ms +step:30314/57344 train_time:17348313ms step_avg:572.29ms +step:30315/57344 train_time:17348866ms step_avg:572.29ms +grad accum step:7579/14336 +step:30316/57344 train_time:17350179ms step_avg:572.31ms +step:30317/57344 train_time:17350196ms step_avg:572.29ms +step:30318/57344 train_time:17350446ms step_avg:572.28ms +step:30319/57344 train_time:17350995ms step_avg:572.28ms +grad accum step:7580/14336 +step:30320/57344 train_time:17352310ms step_avg:572.31ms +step:30321/57344 train_time:17352326ms step_avg:572.29ms +step:30322/57344 train_time:17352573ms step_avg:572.28ms +step:30323/57344 train_time:17353122ms step_avg:572.28ms +grad accum step:7581/14336 +step:30324/57344 train_time:17354475ms step_avg:572.30ms +step:30325/57344 train_time:17354492ms step_avg:572.28ms +step:30326/57344 train_time:17354739ms step_avg:572.27ms +step:30327/57344 train_time:17355290ms step_avg:572.27ms +grad accum step:7582/14336 +step:30328/57344 train_time:17356579ms step_avg:572.30ms +step:30329/57344 train_time:17356596ms step_avg:572.28ms +step:30330/57344 train_time:17356842ms step_avg:572.27ms +step:30331/57344 train_time:17357388ms step_avg:572.27ms +grad accum step:7583/14336 +step:30332/57344 train_time:17358717ms step_avg:572.29ms +step:30333/57344 train_time:17358734ms step_avg:572.27ms +step:30334/57344 train_time:17358979ms step_avg:572.26ms +step:30335/57344 train_time:17359522ms step_avg:572.26ms +grad accum step:7584/14336 +step:30336/57344 train_time:17360839ms step_avg:572.29ms +step:30336/57344 val_loss:6.355168 train_time:17360840ms step_avg:572.29ms +step:30337/57344 train_time:17360918ms step_avg:572.27ms +step:30338/57344 train_time:17361093ms step_avg:572.26ms +step:30339/57344 train_time:17361644ms step_avg:572.25ms +grad accum step:7585/14336 +step:30340/57344 train_time:17362986ms step_avg:572.28ms +step:30341/57344 train_time:17362998ms step_avg:572.26ms +step:30342/57344 train_time:17363228ms step_avg:572.25ms +step:30343/57344 train_time:17363787ms step_avg:572.25ms +grad accum step:7586/14336 +step:30344/57344 train_time:17365090ms step_avg:572.27ms +step:30345/57344 train_time:17365106ms step_avg:572.26ms +step:30346/57344 train_time:17365357ms step_avg:572.25ms +step:30347/57344 train_time:17365912ms step_avg:572.24ms +grad accum step:7587/14336 +step:30348/57344 train_time:17367229ms step_avg:572.27ms +step:30349/57344 train_time:17367246ms step_avg:572.25ms +step:30350/57344 train_time:17367491ms step_avg:572.24ms +step:30351/57344 train_time:17368032ms step_avg:572.24ms +grad accum step:7588/14336 +step:30352/57344 train_time:17369334ms step_avg:572.26ms +step:30353/57344 train_time:17369351ms step_avg:572.24ms +step:30354/57344 train_time:17369597ms step_avg:572.23ms +step:30355/57344 train_time:17370144ms step_avg:572.23ms +grad accum step:7589/14336 +step:30356/57344 train_time:17371412ms step_avg:572.26ms +step:30357/57344 train_time:17371429ms step_avg:572.24ms +step:30358/57344 train_time:17371681ms step_avg:572.23ms +step:30359/57344 train_time:17372238ms step_avg:572.23ms +grad accum step:7590/14336 +step:30360/57344 train_time:17373534ms step_avg:572.25ms +step:30361/57344 train_time:17373550ms step_avg:572.23ms +step:30362/57344 train_time:17373807ms step_avg:572.22ms +step:30363/57344 train_time:17374379ms step_avg:572.22ms +grad accum step:7591/14336 +step:30364/57344 train_time:17375652ms step_avg:572.25ms +step:30365/57344 train_time:17375669ms step_avg:572.23ms +step:30366/57344 train_time:17375915ms step_avg:572.22ms +step:30367/57344 train_time:17376468ms step_avg:572.22ms +grad accum step:7592/14336 +step:30368/57344 train_time:17377795ms step_avg:572.24ms +step:30369/57344 train_time:17377812ms step_avg:572.22ms +step:30370/57344 train_time:17378057ms step_avg:572.21ms +step:30371/57344 train_time:17378605ms step_avg:572.21ms +grad accum step:7593/14336 +step:30372/57344 train_time:17379910ms step_avg:572.23ms +step:30373/57344 train_time:17379927ms step_avg:572.22ms +step:30374/57344 train_time:17380174ms step_avg:572.21ms +step:30375/57344 train_time:17380718ms step_avg:572.20ms +grad accum step:7594/14336 +step:30376/57344 train_time:17382014ms step_avg:572.23ms +step:30377/57344 train_time:17382031ms step_avg:572.21ms +step:30378/57344 train_time:17382279ms step_avg:572.20ms +step:30379/57344 train_time:17382830ms step_avg:572.20ms +grad accum step:7595/14336 +step:30380/57344 train_time:17384165ms step_avg:572.22ms +step:30381/57344 train_time:17384178ms step_avg:572.21ms +step:30382/57344 train_time:17384424ms step_avg:572.19ms +step:30383/57344 train_time:17384968ms step_avg:572.19ms +grad accum step:7596/14336 +step:30384/57344 train_time:17386249ms step_avg:572.22ms +step:30385/57344 train_time:17386266ms step_avg:572.20ms +step:30386/57344 train_time:17386512ms step_avg:572.19ms +step:30387/57344 train_time:17387055ms step_avg:572.19ms +grad accum step:7597/14336 +step:30388/57344 train_time:17388354ms step_avg:572.21ms +step:30389/57344 train_time:17388371ms step_avg:572.19ms +step:30390/57344 train_time:17388617ms step_avg:572.18ms +step:30391/57344 train_time:17389165ms step_avg:572.18ms +grad accum step:7598/14336 +step:30392/57344 train_time:17390463ms step_avg:572.21ms +step:30393/57344 train_time:17390480ms step_avg:572.19ms +step:30394/57344 train_time:17390730ms step_avg:572.18ms +step:30395/57344 train_time:17391284ms step_avg:572.18ms +grad accum step:7599/14336 +step:30396/57344 train_time:17392586ms step_avg:572.20ms +step:30397/57344 train_time:17392601ms step_avg:572.18ms +step:30398/57344 train_time:17392851ms step_avg:572.17ms +step:30399/57344 train_time:17393408ms step_avg:572.17ms +grad accum step:7600/14336 +step:30400/57344 train_time:17396293ms step_avg:572.25ms +step:30400/57344 val_loss:6.326638 train_time:17396376ms step_avg:572.25ms +step:30401/57344 train_time:17396697ms step_avg:572.24ms +step:30402/57344 train_time:17396853ms step_avg:572.23ms +step:30403/57344 train_time:17397401ms step_avg:572.23ms +grad accum step:7601/14336 +step:30404/57344 train_time:17398751ms step_avg:572.25ms +step:30405/57344 train_time:17398766ms step_avg:572.23ms +step:30406/57344 train_time:17399011ms step_avg:572.22ms +step:30407/57344 train_time:17399556ms step_avg:572.22ms +grad accum step:7602/14336 +step:30408/57344 train_time:17400839ms step_avg:572.25ms +step:30409/57344 train_time:17400855ms step_avg:572.23ms +step:30410/57344 train_time:17401099ms step_avg:572.22ms +step:30411/57344 train_time:17401644ms step_avg:572.22ms +grad accum step:7603/14336 +step:30412/57344 train_time:17402962ms step_avg:572.24ms +step:30413/57344 train_time:17402979ms step_avg:572.22ms +step:30414/57344 train_time:17403230ms step_avg:572.21ms +step:30415/57344 train_time:17403789ms step_avg:572.21ms +grad accum step:7604/14336 +step:30416/57344 train_time:17405090ms step_avg:572.23ms +step:30417/57344 train_time:17405105ms step_avg:572.22ms +step:30418/57344 train_time:17405351ms step_avg:572.21ms +step:30419/57344 train_time:17405896ms step_avg:572.20ms +grad accum step:7605/14336 +step:30420/57344 train_time:17407177ms step_avg:572.23ms +step:30421/57344 train_time:17407194ms step_avg:572.21ms +step:30422/57344 train_time:17407437ms step_avg:572.20ms +step:30423/57344 train_time:17407983ms step_avg:572.20ms +grad accum step:7606/14336 +step:30424/57344 train_time:17409281ms step_avg:572.22ms +step:30425/57344 train_time:17409298ms step_avg:572.20ms +step:30426/57344 train_time:17409544ms step_avg:572.19ms +step:30427/57344 train_time:17410090ms step_avg:572.19ms +grad accum step:7607/14336 +step:30428/57344 train_time:17411385ms step_avg:572.22ms +step:30429/57344 train_time:17411402ms step_avg:572.20ms +step:30430/57344 train_time:17411647ms step_avg:572.19ms +step:30431/57344 train_time:17412192ms step_avg:572.19ms +grad accum step:7608/14336 +step:30432/57344 train_time:17413492ms step_avg:572.21ms +step:30433/57344 train_time:17413509ms step_avg:572.19ms +step:30434/57344 train_time:17413762ms step_avg:572.18ms +step:30435/57344 train_time:17414326ms step_avg:572.18ms +grad accum step:7609/14336 +step:30436/57344 train_time:17415650ms step_avg:572.21ms +step:30437/57344 train_time:17415665ms step_avg:572.19ms +step:30438/57344 train_time:17415914ms step_avg:572.18ms +step:30439/57344 train_time:17416468ms step_avg:572.18ms +grad accum step:7610/14336 +step:30440/57344 train_time:17417751ms step_avg:572.20ms +step:30441/57344 train_time:17417768ms step_avg:572.18ms +step:30442/57344 train_time:17418011ms step_avg:572.17ms +step:30443/57344 train_time:17418554ms step_avg:572.17ms +grad accum step:7611/14336 +step:30444/57344 train_time:17419849ms step_avg:572.19ms +step:30445/57344 train_time:17419866ms step_avg:572.17ms +step:30446/57344 train_time:17420114ms step_avg:572.16ms +step:30447/57344 train_time:17420665ms step_avg:572.16ms +grad accum step:7612/14336 +step:30448/57344 train_time:17421974ms step_avg:572.19ms +step:30449/57344 train_time:17421990ms step_avg:572.17ms +step:30450/57344 train_time:17422238ms step_avg:572.16ms +step:30451/57344 train_time:17422781ms step_avg:572.16ms +grad accum step:7613/14336 +step:30452/57344 train_time:17424119ms step_avg:572.18ms +step:30453/57344 train_time:17424136ms step_avg:572.16ms +step:30454/57344 train_time:17424389ms step_avg:572.15ms +step:30455/57344 train_time:17424951ms step_avg:572.15ms +grad accum step:7614/14336 +step:30456/57344 train_time:17426272ms step_avg:572.18ms +step:30457/57344 train_time:17426289ms step_avg:572.16ms +step:30458/57344 train_time:17426537ms step_avg:572.15ms +step:30459/57344 train_time:17427081ms step_avg:572.15ms +grad accum step:7615/14336 +step:30460/57344 train_time:17428428ms step_avg:572.17ms +step:30461/57344 train_time:17428445ms step_avg:572.16ms +step:30462/57344 train_time:17428698ms step_avg:572.15ms +step:30463/57344 train_time:17429254ms step_avg:572.15ms +grad accum step:7616/14336 +step:30464/57344 train_time:17430567ms step_avg:572.17ms +step:30464/57344 val_loss:6.320801 train_time:17430568ms step_avg:572.17ms +step:30465/57344 train_time:17430579ms step_avg:572.15ms +step:30466/57344 train_time:17430810ms step_avg:572.14ms +step:30467/57344 train_time:17431369ms step_avg:572.14ms +grad accum step:7617/14336 +step:30468/57344 train_time:17432678ms step_avg:572.16ms +step:30469/57344 train_time:17432695ms step_avg:572.15ms +step:30470/57344 train_time:17432942ms step_avg:572.13ms +step:30471/57344 train_time:17433492ms step_avg:572.13ms +grad accum step:7618/14336 +step:30472/57344 train_time:17434782ms step_avg:572.16ms +step:30473/57344 train_time:17434799ms step_avg:572.14ms +step:30474/57344 train_time:17435044ms step_avg:572.13ms +step:30475/57344 train_time:17435596ms step_avg:572.13ms +grad accum step:7619/14336 +step:30476/57344 train_time:17436911ms step_avg:572.15ms +step:30477/57344 train_time:17436928ms step_avg:572.13ms +step:30478/57344 train_time:17437167ms step_avg:572.12ms +step:30479/57344 train_time:17437707ms step_avg:572.12ms +grad accum step:7620/14336 +step:30480/57344 train_time:17439002ms step_avg:572.15ms +step:30481/57344 train_time:17439018ms step_avg:572.13ms +step:30482/57344 train_time:17439265ms step_avg:572.12ms +step:30483/57344 train_time:17439815ms step_avg:572.12ms +grad accum step:7621/14336 +step:30484/57344 train_time:17441127ms step_avg:572.14ms +step:30485/57344 train_time:17441144ms step_avg:572.12ms +step:30486/57344 train_time:17441393ms step_avg:572.11ms +step:30487/57344 train_time:17441952ms step_avg:572.11ms +grad accum step:7622/14336 +step:30488/57344 train_time:17443296ms step_avg:572.14ms +step:30489/57344 train_time:17443313ms step_avg:572.12ms +step:30490/57344 train_time:17443568ms step_avg:572.11ms +step:30491/57344 train_time:17444129ms step_avg:572.11ms +grad accum step:7623/14336 +step:30492/57344 train_time:17445430ms step_avg:572.13ms +step:30493/57344 train_time:17445447ms step_avg:572.11ms +step:30494/57344 train_time:17445703ms step_avg:572.10ms +step:30495/57344 train_time:17446270ms step_avg:572.10ms +grad accum step:7624/14336 +step:30496/57344 train_time:17447577ms step_avg:572.13ms +step:30497/57344 train_time:17447593ms step_avg:572.11ms +step:30498/57344 train_time:17447847ms step_avg:572.10ms +step:30499/57344 train_time:17448409ms step_avg:572.10ms +grad accum step:7625/14336 +step:30500/57344 train_time:17449830ms step_avg:572.13ms +step:30501/57344 train_time:17449847ms step_avg:572.11ms +step:30502/57344 train_time:17450097ms step_avg:572.10ms +step:30503/57344 train_time:17450653ms step_avg:572.10ms +grad accum step:7626/14336 +step:30504/57344 train_time:17452020ms step_avg:572.12ms +step:30505/57344 train_time:17452037ms step_avg:572.10ms +step:30506/57344 train_time:17452282ms step_avg:572.09ms +step:30507/57344 train_time:17452835ms step_avg:572.09ms +grad accum step:7627/14336 +step:30508/57344 train_time:17454170ms step_avg:572.12ms +step:30509/57344 train_time:17454186ms step_avg:572.10ms +step:30510/57344 train_time:17454429ms step_avg:572.09ms +step:30511/57344 train_time:17454983ms step_avg:572.09ms +grad accum step:7628/14336 +step:30512/57344 train_time:17456349ms step_avg:572.11ms +step:30513/57344 train_time:17456366ms step_avg:572.10ms +step:30514/57344 train_time:17456614ms step_avg:572.09ms +step:30515/57344 train_time:17457172ms step_avg:572.08ms +grad accum step:7629/14336 +step:30516/57344 train_time:17458511ms step_avg:572.11ms +step:30517/57344 train_time:17458528ms step_avg:572.09ms +step:30518/57344 train_time:17458781ms step_avg:572.08ms +step:30519/57344 train_time:17459337ms step_avg:572.08ms +grad accum step:7630/14336 +step:30520/57344 train_time:17460610ms step_avg:572.10ms +step:30521/57344 train_time:17460627ms step_avg:572.09ms +step:30522/57344 train_time:17460881ms step_avg:572.08ms +step:30523/57344 train_time:17461444ms step_avg:572.07ms +grad accum step:7631/14336 +step:30524/57344 train_time:17462780ms step_avg:572.10ms +step:30525/57344 train_time:17462794ms step_avg:572.08ms +step:30526/57344 train_time:17463043ms step_avg:572.07ms +step:30527/57344 train_time:17463600ms step_avg:572.07ms +grad accum step:7632/14336 +step:30528/57344 train_time:17464938ms step_avg:572.10ms +step:30528/57344 val_loss:6.297551 train_time:17464938ms step_avg:572.10ms +step:30529/57344 train_time:17464950ms step_avg:572.08ms +step:30530/57344 train_time:17465173ms step_avg:572.07ms +step:30531/57344 train_time:17465718ms step_avg:572.07ms +grad accum step:7633/14336 +step:30532/57344 train_time:17467031ms step_avg:572.09ms +step:30533/57344 train_time:17467048ms step_avg:572.07ms +step:30534/57344 train_time:17467296ms step_avg:572.06ms +step:30535/57344 train_time:17467850ms step_avg:572.06ms +grad accum step:7634/14336 +step:30536/57344 train_time:17469144ms step_avg:572.08ms +step:30537/57344 train_time:17469160ms step_avg:572.07ms +step:30538/57344 train_time:17469409ms step_avg:572.05ms +step:30539/57344 train_time:17469962ms step_avg:572.05ms +grad accum step:7635/14336 +step:30540/57344 train_time:17471257ms step_avg:572.08ms +step:30541/57344 train_time:17471273ms step_avg:572.06ms +step:30542/57344 train_time:17471529ms step_avg:572.05ms +step:30543/57344 train_time:17472098ms step_avg:572.05ms +grad accum step:7636/14336 +step:30544/57344 train_time:17473413ms step_avg:572.07ms +step:30545/57344 train_time:17473430ms step_avg:572.06ms +step:30546/57344 train_time:17473683ms step_avg:572.04ms +step:30547/57344 train_time:17474257ms step_avg:572.04ms +grad accum step:7637/14336 +step:30548/57344 train_time:17475585ms step_avg:572.07ms +step:30549/57344 train_time:17475602ms step_avg:572.05ms +step:30550/57344 train_time:17475848ms step_avg:572.04ms +step:30551/57344 train_time:17476397ms step_avg:572.04ms +grad accum step:7638/14336 +step:30552/57344 train_time:17477697ms step_avg:572.06ms +step:30553/57344 train_time:17477713ms step_avg:572.05ms +step:30554/57344 train_time:17477958ms step_avg:572.04ms +step:30555/57344 train_time:17478504ms step_avg:572.03ms +grad accum step:7639/14336 +step:30556/57344 train_time:17479807ms step_avg:572.06ms +step:30557/57344 train_time:17479823ms step_avg:572.04ms +step:30558/57344 train_time:17480071ms step_avg:572.03ms +step:30559/57344 train_time:17480613ms step_avg:572.03ms +grad accum step:7640/14336 +step:30560/57344 train_time:17481899ms step_avg:572.05ms +step:30561/57344 train_time:17481915ms step_avg:572.03ms +step:30562/57344 train_time:17482163ms step_avg:572.02ms +step:30563/57344 train_time:17482703ms step_avg:572.02ms +grad accum step:7641/14336 +step:30564/57344 train_time:17483980ms step_avg:572.04ms +step:30565/57344 train_time:17483997ms step_avg:572.03ms +step:30566/57344 train_time:17484256ms step_avg:572.02ms +step:30567/57344 train_time:17484836ms step_avg:572.02ms +grad accum step:7642/14336 +step:30568/57344 train_time:17486170ms step_avg:572.04ms +step:30569/57344 train_time:17486187ms step_avg:572.02ms +step:30570/57344 train_time:17486435ms step_avg:572.01ms +step:30571/57344 train_time:17486983ms step_avg:572.01ms +grad accum step:7643/14336 +step:30572/57344 train_time:17488274ms step_avg:572.04ms +step:30573/57344 train_time:17488291ms step_avg:572.02ms +step:30574/57344 train_time:17488539ms step_avg:572.01ms +step:30575/57344 train_time:17489088ms step_avg:572.01ms +grad accum step:7644/14336 +step:30576/57344 train_time:17490388ms step_avg:572.03ms +step:30577/57344 train_time:17490405ms step_avg:572.01ms +step:30578/57344 train_time:17490654ms step_avg:572.00ms +step:30579/57344 train_time:17491198ms step_avg:572.00ms +grad accum step:7645/14336 +step:30580/57344 train_time:17492489ms step_avg:572.02ms +step:30581/57344 train_time:17492506ms step_avg:572.01ms +step:30582/57344 train_time:17492764ms step_avg:572.00ms +step:30583/57344 train_time:17493348ms step_avg:572.00ms +grad accum step:7646/14336 +step:30584/57344 train_time:17494714ms step_avg:572.02ms +step:30585/57344 train_time:17494731ms step_avg:572.00ms +step:30586/57344 train_time:17494974ms step_avg:571.99ms +step:30587/57344 train_time:17495519ms step_avg:571.99ms +grad accum step:7647/14336 +step:30588/57344 train_time:17496816ms step_avg:572.02ms +step:30589/57344 train_time:17496833ms step_avg:572.00ms +step:30590/57344 train_time:17497086ms step_avg:571.99ms +step:30591/57344 train_time:17497648ms step_avg:571.99ms +grad accum step:7648/14336 +step:30592/57344 train_time:17498998ms step_avg:572.01ms +step:30592/57344 val_loss:6.274022 train_time:17498999ms step_avg:572.01ms +step:30593/57344 train_time:17499011ms step_avg:571.99ms +step:30594/57344 train_time:17499230ms step_avg:571.98ms +step:30595/57344 train_time:17499772ms step_avg:571.98ms +grad accum step:7649/14336 +step:30596/57344 train_time:17501052ms step_avg:572.00ms +step:30597/57344 train_time:17501069ms step_avg:571.99ms +step:30598/57344 train_time:17501317ms step_avg:571.98ms +step:30599/57344 train_time:17501868ms step_avg:571.98ms +grad accum step:7650/14336 +step:30600/57344 train_time:17503182ms step_avg:572.00ms +step:30601/57344 train_time:17503199ms step_avg:571.98ms +step:30602/57344 train_time:17503450ms step_avg:571.97ms +step:30603/57344 train_time:17504015ms step_avg:571.97ms +grad accum step:7651/14336 +step:30604/57344 train_time:17505335ms step_avg:572.00ms +step:30605/57344 train_time:17505353ms step_avg:571.98ms +step:30606/57344 train_time:17505591ms step_avg:571.97ms +step:30607/57344 train_time:17506143ms step_avg:571.97ms +grad accum step:7652/14336 +step:30608/57344 train_time:17507458ms step_avg:571.99ms +step:30609/57344 train_time:17507474ms step_avg:571.97ms +step:30610/57344 train_time:17507720ms step_avg:571.96ms +step:30611/57344 train_time:17508264ms step_avg:571.96ms +grad accum step:7653/14336 +step:30612/57344 train_time:17509544ms step_avg:571.98ms +step:30613/57344 train_time:17509561ms step_avg:571.96ms +step:30614/57344 train_time:17509812ms step_avg:571.95ms +step:30615/57344 train_time:17510371ms step_avg:571.95ms +grad accum step:7654/14336 +step:30616/57344 train_time:17511676ms step_avg:571.98ms +step:30617/57344 train_time:17511693ms step_avg:571.96ms +step:30618/57344 train_time:17511937ms step_avg:571.95ms +step:30619/57344 train_time:17512485ms step_avg:571.95ms +grad accum step:7655/14336 +step:30620/57344 train_time:17513769ms step_avg:571.97ms +step:30621/57344 train_time:17513786ms step_avg:571.95ms +step:30622/57344 train_time:17514037ms step_avg:571.94ms +step:30623/57344 train_time:17514592ms step_avg:571.94ms +grad accum step:7656/14336 +step:30624/57344 train_time:17515893ms step_avg:571.97ms +step:30625/57344 train_time:17515910ms step_avg:571.95ms +step:30626/57344 train_time:17516157ms step_avg:571.94ms +step:30627/57344 train_time:17516708ms step_avg:571.94ms +grad accum step:7657/14336 +step:30628/57344 train_time:17518020ms step_avg:571.96ms +step:30629/57344 train_time:17518037ms step_avg:571.94ms +step:30630/57344 train_time:17518285ms step_avg:571.93ms +step:30631/57344 train_time:17518839ms step_avg:571.93ms +grad accum step:7658/14336 +step:30632/57344 train_time:17520157ms step_avg:571.96ms +step:30633/57344 train_time:17520172ms step_avg:571.94ms +step:30634/57344 train_time:17520417ms step_avg:571.93ms +step:30635/57344 train_time:17520970ms step_avg:571.93ms +grad accum step:7659/14336 +step:30636/57344 train_time:17522297ms step_avg:571.95ms +step:30637/57344 train_time:17522313ms step_avg:571.93ms +step:30638/57344 train_time:17522559ms step_avg:571.92ms +step:30639/57344 train_time:17523104ms step_avg:571.92ms +grad accum step:7660/14336 +step:30640/57344 train_time:17524379ms step_avg:571.94ms +step:30641/57344 train_time:17524396ms step_avg:571.93ms +step:30642/57344 train_time:17524645ms step_avg:571.92ms +step:30643/57344 train_time:17525198ms step_avg:571.92ms +grad accum step:7661/14336 +step:30644/57344 train_time:17526525ms step_avg:571.94ms +step:30645/57344 train_time:17526542ms step_avg:571.92ms +step:30646/57344 train_time:17526789ms step_avg:571.91ms +step:30647/57344 train_time:17527334ms step_avg:571.91ms +grad accum step:7662/14336 +step:30648/57344 train_time:17528632ms step_avg:571.93ms +step:30649/57344 train_time:17528648ms step_avg:571.92ms +step:30650/57344 train_time:17528897ms step_avg:571.91ms +step:30651/57344 train_time:17529438ms step_avg:571.90ms +grad accum step:7663/14336 +step:30652/57344 train_time:17530742ms step_avg:571.93ms +step:30653/57344 train_time:17530758ms step_avg:571.91ms +step:30654/57344 train_time:17531009ms step_avg:571.90ms +step:30655/57344 train_time:17531559ms step_avg:571.90ms +grad accum step:7664/14336 +step:30656/57344 train_time:17532860ms step_avg:571.92ms +step:30656/57344 val_loss:6.243879 train_time:17532860ms step_avg:571.92ms +step:30657/57344 train_time:17532872ms step_avg:571.90ms +step:30658/57344 train_time:17533100ms step_avg:571.89ms +step:30659/57344 train_time:17533653ms step_avg:571.89ms +grad accum step:7665/14336 +step:30660/57344 train_time:17534959ms step_avg:571.92ms +step:30661/57344 train_time:17534973ms step_avg:571.90ms +step:30662/57344 train_time:17535220ms step_avg:571.89ms +step:30663/57344 train_time:17535765ms step_avg:571.89ms +grad accum step:7666/14336 +step:30664/57344 train_time:17537044ms step_avg:571.91ms +step:30665/57344 train_time:17537060ms step_avg:571.89ms +step:30666/57344 train_time:17537306ms step_avg:571.88ms +step:30667/57344 train_time:17537849ms step_avg:571.88ms +grad accum step:7667/14336 +step:30668/57344 train_time:17539209ms step_avg:571.91ms +step:30669/57344 train_time:17539225ms step_avg:571.89ms +step:30670/57344 train_time:17539482ms step_avg:571.88ms +step:30671/57344 train_time:17540066ms step_avg:571.88ms +grad accum step:7668/14336 +step:30672/57344 train_time:17541404ms step_avg:571.90ms +step:30673/57344 train_time:17541421ms step_avg:571.88ms +step:30674/57344 train_time:17541678ms step_avg:571.87ms +step:30675/57344 train_time:17542256ms step_avg:571.87ms +grad accum step:7669/14336 +step:30676/57344 train_time:17543585ms step_avg:571.90ms +step:30677/57344 train_time:17543601ms step_avg:571.88ms +step:30678/57344 train_time:17543846ms step_avg:571.87ms +step:30679/57344 train_time:17544386ms step_avg:571.87ms +grad accum step:7670/14336 +step:30680/57344 train_time:17545682ms step_avg:571.89ms +step:30681/57344 train_time:17545699ms step_avg:571.88ms +step:30682/57344 train_time:17545949ms step_avg:571.86ms +step:30683/57344 train_time:17546512ms step_avg:571.86ms +grad accum step:7671/14336 +step:30684/57344 train_time:17547841ms step_avg:571.89ms +step:30685/57344 train_time:17547858ms step_avg:571.87ms +step:30686/57344 train_time:17548106ms step_avg:571.86ms +step:30687/57344 train_time:17548652ms step_avg:571.86ms +grad accum step:7672/14336 +step:30688/57344 train_time:17549941ms step_avg:571.88ms +step:30689/57344 train_time:17549958ms step_avg:571.86ms +step:30690/57344 train_time:17550225ms step_avg:571.85ms +step:30691/57344 train_time:17550840ms step_avg:571.86ms +grad accum step:7673/14336 +step:30692/57344 train_time:17552186ms step_avg:571.88ms +step:30693/57344 train_time:17552203ms step_avg:571.86ms +step:30694/57344 train_time:17552448ms step_avg:571.85ms +step:30695/57344 train_time:17552992ms step_avg:571.85ms +grad accum step:7674/14336 +step:30696/57344 train_time:17554286ms step_avg:571.88ms +step:30697/57344 train_time:17554303ms step_avg:571.86ms +step:30698/57344 train_time:17554548ms step_avg:571.85ms +step:30699/57344 train_time:17555093ms step_avg:571.85ms +grad accum step:7675/14336 +step:30700/57344 train_time:17556380ms step_avg:571.87ms +step:30701/57344 train_time:17556397ms step_avg:571.85ms +step:30702/57344 train_time:17556649ms step_avg:571.84ms +step:30703/57344 train_time:17557203ms step_avg:571.84ms +grad accum step:7676/14336 +step:30704/57344 train_time:17558492ms step_avg:571.86ms +step:30705/57344 train_time:17558509ms step_avg:571.85ms +step:30706/57344 train_time:17558756ms step_avg:571.83ms +step:30707/57344 train_time:17559304ms step_avg:571.83ms +grad accum step:7677/14336 +step:30708/57344 train_time:17560616ms step_avg:571.86ms +step:30709/57344 train_time:17560633ms step_avg:571.84ms +step:30710/57344 train_time:17560880ms step_avg:571.83ms +step:30711/57344 train_time:17561427ms step_avg:571.83ms +grad accum step:7678/14336 +step:30712/57344 train_time:17562718ms step_avg:571.85ms +step:30713/57344 train_time:17562735ms step_avg:571.83ms +step:30714/57344 train_time:17562980ms step_avg:571.82ms +step:30715/57344 train_time:17563531ms step_avg:571.82ms +grad accum step:7679/14336 +step:30716/57344 train_time:17564816ms step_avg:571.85ms +step:30717/57344 train_time:17564833ms step_avg:571.83ms +step:30718/57344 train_time:17565082ms step_avg:571.82ms +step:30719/57344 train_time:17565633ms step_avg:571.82ms +grad accum step:7680/14336 +step:30720/57344 train_time:17566948ms step_avg:571.84ms +step:30720/57344 val_loss:6.233229 train_time:17566949ms step_avg:571.84ms +step:30721/57344 train_time:17566961ms step_avg:571.82ms +step:30722/57344 train_time:17567192ms step_avg:571.81ms +step:30723/57344 train_time:17567753ms step_avg:571.81ms +grad accum step:7681/14336 +step:30724/57344 train_time:17569042ms step_avg:571.83ms +step:30725/57344 train_time:17569059ms step_avg:571.82ms +step:30726/57344 train_time:17569308ms step_avg:571.81ms +step:30727/57344 train_time:17569859ms step_avg:571.81ms +grad accum step:7682/14336 +step:30728/57344 train_time:17571150ms step_avg:571.83ms +step:30729/57344 train_time:17571167ms step_avg:571.81ms +step:30730/57344 train_time:17571413ms step_avg:571.80ms +step:30731/57344 train_time:17571955ms step_avg:571.80ms +grad accum step:7683/14336 +step:30732/57344 train_time:17573239ms step_avg:571.82ms +step:30733/57344 train_time:17573256ms step_avg:571.80ms +step:30734/57344 train_time:17573503ms step_avg:571.79ms +step:30735/57344 train_time:17574051ms step_avg:571.79ms +grad accum step:7684/14336 +step:30736/57344 train_time:17575372ms step_avg:571.82ms +step:30737/57344 train_time:17575389ms step_avg:571.80ms +step:30738/57344 train_time:17575633ms step_avg:571.79ms +step:30739/57344 train_time:17576170ms step_avg:571.79ms +grad accum step:7685/14336 +step:30740/57344 train_time:17577479ms step_avg:571.81ms +step:30741/57344 train_time:17577496ms step_avg:571.79ms +step:30742/57344 train_time:17577746ms step_avg:571.78ms +step:30743/57344 train_time:17578306ms step_avg:571.78ms +grad accum step:7686/14336 +step:30744/57344 train_time:17579671ms step_avg:571.81ms +step:30745/57344 train_time:17579688ms step_avg:571.79ms +step:30746/57344 train_time:17579943ms step_avg:571.78ms +step:30747/57344 train_time:17580507ms step_avg:571.78ms +grad accum step:7687/14336 +step:30748/57344 train_time:17581813ms step_avg:571.80ms +step:30749/57344 train_time:17581830ms step_avg:571.79ms +step:30750/57344 train_time:17582078ms step_avg:571.77ms +step:30751/57344 train_time:17582626ms step_avg:571.77ms +grad accum step:7688/14336 +step:30752/57344 train_time:17583921ms step_avg:571.80ms +step:30753/57344 train_time:17583938ms step_avg:571.78ms +step:30754/57344 train_time:17584187ms step_avg:571.77ms +step:30755/57344 train_time:17584733ms step_avg:571.77ms +grad accum step:7689/14336 +step:30756/57344 train_time:17586011ms step_avg:571.79ms +step:30757/57344 train_time:17586028ms step_avg:571.77ms +step:30758/57344 train_time:17586274ms step_avg:571.76ms +step:30759/57344 train_time:17586835ms step_avg:571.76ms +grad accum step:7690/14336 +step:30760/57344 train_time:17588195ms step_avg:571.79ms +step:30761/57344 train_time:17588212ms step_avg:571.77ms +step:30762/57344 train_time:17588455ms step_avg:571.76ms +step:30763/57344 train_time:17589000ms step_avg:571.76ms +grad accum step:7691/14336 +step:30764/57344 train_time:17590284ms step_avg:571.78ms +step:30765/57344 train_time:17590301ms step_avg:571.76ms +step:30766/57344 train_time:17590543ms step_avg:571.75ms +step:30767/57344 train_time:17591086ms step_avg:571.75ms +grad accum step:7692/14336 +step:30768/57344 train_time:17592382ms step_avg:571.78ms +step:30769/57344 train_time:17592399ms step_avg:571.76ms +step:30770/57344 train_time:17592646ms step_avg:571.75ms +step:30771/57344 train_time:17593188ms step_avg:571.75ms +grad accum step:7693/14336 +step:30772/57344 train_time:17594463ms step_avg:571.77ms +step:30773/57344 train_time:17594480ms step_avg:571.75ms +step:30774/57344 train_time:17594731ms step_avg:571.74ms +step:30775/57344 train_time:17595288ms step_avg:571.74ms +grad accum step:7694/14336 +step:30776/57344 train_time:17596610ms step_avg:571.76ms +step:30777/57344 train_time:17596627ms step_avg:571.75ms +step:30778/57344 train_time:17596874ms step_avg:571.74ms +step:30779/57344 train_time:17597418ms step_avg:571.73ms +grad accum step:7695/14336 +step:30780/57344 train_time:17598731ms step_avg:571.76ms +step:30781/57344 train_time:17598747ms step_avg:571.74ms +step:30782/57344 train_time:17598994ms step_avg:571.73ms +step:30783/57344 train_time:17599536ms step_avg:571.73ms +grad accum step:7696/14336 +step:30784/57344 train_time:17603382ms step_avg:571.84ms +step:30784/57344 val_loss:6.208236 train_time:17603383ms step_avg:571.84ms +step:30785/57344 train_time:17603395ms step_avg:571.82ms +step:30786/57344 train_time:17603619ms step_avg:571.81ms +step:30787/57344 train_time:17604168ms step_avg:571.81ms +grad accum step:7697/14336 +step:30788/57344 train_time:17605480ms step_avg:571.83ms +step:30789/57344 train_time:17605497ms step_avg:571.81ms +step:30790/57344 train_time:17605742ms step_avg:571.80ms +step:30791/57344 train_time:17606296ms step_avg:571.80ms +grad accum step:7698/14336 +step:30792/57344 train_time:17607629ms step_avg:571.82ms +step:30793/57344 train_time:17607646ms step_avg:571.81ms +step:30794/57344 train_time:17607895ms step_avg:571.80ms +step:30795/57344 train_time:17608455ms step_avg:571.80ms +grad accum step:7699/14336 +step:30796/57344 train_time:17609776ms step_avg:571.82ms +step:30797/57344 train_time:17609793ms step_avg:571.80ms +step:30798/57344 train_time:17610036ms step_avg:571.79ms +step:30799/57344 train_time:17610577ms step_avg:571.79ms +grad accum step:7700/14336 +step:30800/57344 train_time:17611856ms step_avg:571.81ms +step:30801/57344 train_time:17611870ms step_avg:571.80ms +step:30802/57344 train_time:17612117ms step_avg:571.78ms +step:30803/57344 train_time:17612668ms step_avg:571.78ms +grad accum step:7701/14336 +step:30804/57344 train_time:17613972ms step_avg:571.81ms +step:30805/57344 train_time:17613989ms step_avg:571.79ms +step:30806/57344 train_time:17614238ms step_avg:571.78ms +step:30807/57344 train_time:17614805ms step_avg:571.78ms +grad accum step:7702/14336 +step:30808/57344 train_time:17616137ms step_avg:571.80ms +step:30809/57344 train_time:17616154ms step_avg:571.79ms +step:30810/57344 train_time:17616401ms step_avg:571.78ms +step:30811/57344 train_time:17616947ms step_avg:571.77ms +grad accum step:7703/14336 +step:30812/57344 train_time:17618235ms step_avg:571.80ms +step:30813/57344 train_time:17618251ms step_avg:571.78ms +step:30814/57344 train_time:17618494ms step_avg:571.77ms +step:30815/57344 train_time:17619034ms step_avg:571.77ms +grad accum step:7704/14336 +step:30816/57344 train_time:17620356ms step_avg:571.79ms +step:30817/57344 train_time:17620372ms step_avg:571.77ms +step:30818/57344 train_time:17620626ms step_avg:571.76ms +step:30819/57344 train_time:17621193ms step_avg:571.76ms +grad accum step:7705/14336 +step:30820/57344 train_time:17622530ms step_avg:571.79ms +step:30821/57344 train_time:17622547ms step_avg:571.77ms +step:30822/57344 train_time:17622800ms step_avg:571.76ms +step:30823/57344 train_time:17623369ms step_avg:571.76ms +grad accum step:7706/14336 +step:30824/57344 train_time:17624693ms step_avg:571.78ms +step:30825/57344 train_time:17624710ms step_avg:571.77ms +step:30826/57344 train_time:17624962ms step_avg:571.76ms +step:30827/57344 train_time:17625509ms step_avg:571.76ms +grad accum step:7707/14336 +step:30828/57344 train_time:17626827ms step_avg:571.78ms +step:30829/57344 train_time:17626844ms step_avg:571.76ms +step:30830/57344 train_time:17627098ms step_avg:571.75ms +step:30831/57344 train_time:17627674ms step_avg:571.75ms +grad accum step:7708/14336 +step:30832/57344 train_time:17628989ms step_avg:571.78ms +step:30833/57344 train_time:17629005ms step_avg:571.76ms +step:30834/57344 train_time:17629251ms step_avg:571.75ms +step:30835/57344 train_time:17629792ms step_avg:571.75ms +grad accum step:7709/14336 +step:30836/57344 train_time:17631121ms step_avg:571.77ms +step:30837/57344 train_time:17631138ms step_avg:571.75ms +step:30838/57344 train_time:17631384ms step_avg:571.74ms +step:30839/57344 train_time:17631926ms step_avg:571.74ms +grad accum step:7710/14336 +step:30840/57344 train_time:17633346ms step_avg:571.77ms +step:30841/57344 train_time:17633362ms step_avg:571.75ms +step:30842/57344 train_time:17633613ms step_avg:571.74ms +step:30843/57344 train_time:17634166ms step_avg:571.74ms +grad accum step:7711/14336 +step:30844/57344 train_time:17635461ms step_avg:571.76ms +step:30845/57344 train_time:17635478ms step_avg:571.75ms +step:30846/57344 train_time:17635730ms step_avg:571.73ms +step:30847/57344 train_time:17636299ms step_avg:571.73ms +grad accum step:7712/14336 +step:30848/57344 train_time:17637609ms step_avg:571.76ms +step:30848/57344 val_loss:6.192863 train_time:17637615ms step_avg:571.76ms +step:30849/57344 train_time:17637627ms step_avg:571.74ms +step:30850/57344 train_time:17637847ms step_avg:571.73ms +step:30851/57344 train_time:17638392ms step_avg:571.73ms +grad accum step:7713/14336 +step:30852/57344 train_time:17639703ms step_avg:571.75ms +step:30853/57344 train_time:17639720ms step_avg:571.73ms +step:30854/57344 train_time:17639965ms step_avg:571.72ms +step:30855/57344 train_time:17640512ms step_avg:571.72ms +grad accum step:7714/14336 +step:30856/57344 train_time:17641789ms step_avg:571.75ms +step:30857/57344 train_time:17641806ms step_avg:571.73ms +step:30858/57344 train_time:17642054ms step_avg:571.72ms +step:30859/57344 train_time:17642608ms step_avg:571.72ms +grad accum step:7715/14336 +step:30860/57344 train_time:17643892ms step_avg:571.74ms +step:30861/57344 train_time:17643909ms step_avg:571.72ms +step:30862/57344 train_time:17644159ms step_avg:571.71ms +step:30863/57344 train_time:17644714ms step_avg:571.71ms +grad accum step:7716/14336 +step:30864/57344 train_time:17646003ms step_avg:571.73ms +step:30865/57344 train_time:17646020ms step_avg:571.72ms +step:30866/57344 train_time:17646263ms step_avg:571.71ms +step:30867/57344 train_time:17646803ms step_avg:571.70ms +grad accum step:7717/14336 +step:30868/57344 train_time:17648084ms step_avg:571.73ms +step:30869/57344 train_time:17648101ms step_avg:571.71ms +step:30870/57344 train_time:17648349ms step_avg:571.70ms +step:30871/57344 train_time:17648906ms step_avg:571.70ms +grad accum step:7718/14336 +step:30872/57344 train_time:17650205ms step_avg:571.72ms +step:30873/57344 train_time:17650221ms step_avg:571.70ms +step:30874/57344 train_time:17650470ms step_avg:571.69ms +step:30875/57344 train_time:17651022ms step_avg:571.69ms +grad accum step:7719/14336 +step:30876/57344 train_time:17652329ms step_avg:571.72ms +step:30877/57344 train_time:17652346ms step_avg:571.70ms +step:30878/57344 train_time:17652598ms step_avg:571.69ms +step:30879/57344 train_time:17653154ms step_avg:571.69ms +grad accum step:7720/14336 +step:30880/57344 train_time:17654499ms step_avg:571.71ms +step:30881/57344 train_time:17654515ms step_avg:571.70ms +step:30882/57344 train_time:17654768ms step_avg:571.68ms +step:30883/57344 train_time:17655329ms step_avg:571.68ms +grad accum step:7721/14336 +step:30884/57344 train_time:17656638ms step_avg:571.71ms +step:30885/57344 train_time:17656654ms step_avg:571.69ms +step:30886/57344 train_time:17656904ms step_avg:571.68ms +step:30887/57344 train_time:17657458ms step_avg:571.68ms +grad accum step:7722/14336 +step:30888/57344 train_time:17658763ms step_avg:571.70ms +step:30889/57344 train_time:17658780ms step_avg:571.69ms +step:30890/57344 train_time:17659030ms step_avg:571.67ms +step:30891/57344 train_time:17659587ms step_avg:571.67ms +grad accum step:7723/14336 +step:30892/57344 train_time:17660885ms step_avg:571.70ms +step:30893/57344 train_time:17660903ms step_avg:571.68ms +step:30894/57344 train_time:17661156ms step_avg:571.67ms +step:30895/57344 train_time:17661733ms step_avg:571.67ms +grad accum step:7724/14336 +step:30896/57344 train_time:17663088ms step_avg:571.69ms +step:30897/57344 train_time:17663105ms step_avg:571.68ms +step:30898/57344 train_time:17663354ms step_avg:571.67ms +step:30899/57344 train_time:17663907ms step_avg:571.67ms +grad accum step:7725/14336 +step:30900/57344 train_time:17665194ms step_avg:571.69ms +step:30901/57344 train_time:17665210ms step_avg:571.67ms +step:30902/57344 train_time:17665453ms step_avg:571.66ms +step:30903/57344 train_time:17665997ms step_avg:571.66ms +grad accum step:7726/14336 +step:30904/57344 train_time:17667295ms step_avg:571.68ms +step:30905/57344 train_time:17667312ms step_avg:571.67ms +step:30906/57344 train_time:17667561ms step_avg:571.65ms +step:30907/57344 train_time:17668122ms step_avg:571.65ms +grad accum step:7727/14336 +step:30908/57344 train_time:17669427ms step_avg:571.68ms +step:30909/57344 train_time:17669443ms step_avg:571.66ms +step:30910/57344 train_time:17669688ms step_avg:571.65ms +step:30911/57344 train_time:17670228ms step_avg:571.65ms +grad accum step:7728/14336 +step:30912/57344 train_time:17671505ms step_avg:571.67ms +step:30912/57344 val_loss:6.175398 train_time:17671507ms step_avg:571.67ms +step:30913/57344 train_time:17671519ms step_avg:571.65ms +step:30914/57344 train_time:17671739ms step_avg:571.64ms +step:30915/57344 train_time:17672292ms step_avg:571.64ms +grad accum step:7729/14336 +step:30916/57344 train_time:17673613ms step_avg:571.67ms +step:30917/57344 train_time:17673629ms step_avg:571.65ms +step:30918/57344 train_time:17673882ms step_avg:571.64ms +step:30919/57344 train_time:17674447ms step_avg:571.64ms +grad accum step:7730/14336 +step:30920/57344 train_time:17675728ms step_avg:571.66ms +step:30921/57344 train_time:17675745ms step_avg:571.64ms +step:30922/57344 train_time:17675993ms step_avg:571.63ms +step:30923/57344 train_time:17676537ms step_avg:571.63ms +grad accum step:7731/14336 +step:30924/57344 train_time:17677809ms step_avg:571.65ms +step:30925/57344 train_time:17677826ms step_avg:571.64ms +step:30926/57344 train_time:17678070ms step_avg:571.62ms +step:30927/57344 train_time:17678621ms step_avg:571.62ms +grad accum step:7732/14336 +step:30928/57344 train_time:17679927ms step_avg:571.65ms +step:30929/57344 train_time:17679944ms step_avg:571.63ms +step:30930/57344 train_time:17680204ms step_avg:571.62ms +step:30931/57344 train_time:17680775ms step_avg:571.62ms +grad accum step:7733/14336 +step:30932/57344 train_time:17682085ms step_avg:571.64ms +step:30933/57344 train_time:17682102ms step_avg:571.63ms +step:30934/57344 train_time:17682349ms step_avg:571.62ms +step:30935/57344 train_time:17682897ms step_avg:571.61ms +grad accum step:7734/14336 +step:30936/57344 train_time:17684234ms step_avg:571.64ms +step:30937/57344 train_time:17684251ms step_avg:571.62ms +step:30938/57344 train_time:17684499ms step_avg:571.61ms +step:30939/57344 train_time:17685049ms step_avg:571.61ms +grad accum step:7735/14336 +step:30940/57344 train_time:17686351ms step_avg:571.63ms +step:30941/57344 train_time:17686368ms step_avg:571.62ms +step:30942/57344 train_time:17686619ms step_avg:571.61ms +step:30943/57344 train_time:17687177ms step_avg:571.61ms +grad accum step:7736/14336 +step:30944/57344 train_time:17688469ms step_avg:571.63ms +step:30945/57344 train_time:17688486ms step_avg:571.61ms +step:30946/57344 train_time:17688729ms step_avg:571.60ms +step:30947/57344 train_time:17689277ms step_avg:571.60ms +grad accum step:7737/14336 +step:30948/57344 train_time:17690574ms step_avg:571.62ms +step:30949/57344 train_time:17690591ms step_avg:571.60ms +step:30950/57344 train_time:17690847ms step_avg:571.59ms +step:30951/57344 train_time:17691414ms step_avg:571.59ms +grad accum step:7738/14336 +step:30952/57344 train_time:17692715ms step_avg:571.62ms +step:30953/57344 train_time:17692732ms step_avg:571.60ms +step:30954/57344 train_time:17692980ms step_avg:571.59ms +step:30955/57344 train_time:17693527ms step_avg:571.59ms +grad accum step:7739/14336 +step:30956/57344 train_time:17694822ms step_avg:571.61ms +step:30957/57344 train_time:17694838ms step_avg:571.59ms +step:30958/57344 train_time:17695089ms step_avg:571.58ms +step:30959/57344 train_time:17695647ms step_avg:571.58ms +grad accum step:7740/14336 +step:30960/57344 train_time:17696988ms step_avg:571.61ms +step:30961/57344 train_time:17697005ms step_avg:571.59ms +step:30962/57344 train_time:17697258ms step_avg:571.58ms +step:30963/57344 train_time:17697815ms step_avg:571.58ms +grad accum step:7741/14336 +step:30964/57344 train_time:17699141ms step_avg:571.60ms +step:30965/57344 train_time:17699157ms step_avg:571.59ms +step:30966/57344 train_time:17699402ms step_avg:571.58ms +step:30967/57344 train_time:17699944ms step_avg:571.57ms +grad accum step:7742/14336 +step:30968/57344 train_time:17701253ms step_avg:571.60ms +step:30969/57344 train_time:17701269ms step_avg:571.58ms +step:30970/57344 train_time:17701513ms step_avg:571.57ms +step:30971/57344 train_time:17702052ms step_avg:571.57ms +grad accum step:7743/14336 +step:30972/57344 train_time:17703358ms step_avg:571.59ms +step:30973/57344 train_time:17703374ms step_avg:571.57ms +step:30974/57344 train_time:17703619ms step_avg:571.56ms +step:30975/57344 train_time:17704162ms step_avg:571.56ms +grad accum step:7744/14336 +step:30976/57344 train_time:17705483ms step_avg:571.59ms +step:30976/57344 val_loss:6.153916 train_time:17705483ms step_avg:571.59ms +step:30977/57344 train_time:17705495ms step_avg:571.57ms +step:30978/57344 train_time:17705719ms step_avg:571.56ms +step:30979/57344 train_time:17706259ms step_avg:571.56ms +grad accum step:7745/14336 +step:30980/57344 train_time:17707579ms step_avg:571.58ms +step:30981/57344 train_time:17707595ms step_avg:571.56ms +step:30982/57344 train_time:17707838ms step_avg:571.55ms +step:30983/57344 train_time:17708377ms step_avg:571.55ms +grad accum step:7746/14336 +step:30984/57344 train_time:17709666ms step_avg:571.57ms +step:30985/57344 train_time:17709679ms step_avg:571.56ms +step:30986/57344 train_time:17709923ms step_avg:571.55ms +step:30987/57344 train_time:17710474ms step_avg:571.55ms +grad accum step:7747/14336 +step:30988/57344 train_time:17711753ms step_avg:571.57ms +step:30989/57344 train_time:17711770ms step_avg:571.55ms +step:30990/57344 train_time:17712022ms step_avg:571.54ms +step:30991/57344 train_time:17712586ms step_avg:571.54ms +grad accum step:7748/14336 +step:30992/57344 train_time:17713923ms step_avg:571.56ms +step:30993/57344 train_time:17713940ms step_avg:571.55ms +step:30994/57344 train_time:17714184ms step_avg:571.54ms +step:30995/57344 train_time:17714725ms step_avg:571.53ms +grad accum step:7749/14336 +step:30996/57344 train_time:17716045ms step_avg:571.56ms +step:30997/57344 train_time:17716059ms step_avg:571.54ms +step:30998/57344 train_time:17716311ms step_avg:571.53ms +step:30999/57344 train_time:17716871ms step_avg:571.53ms +grad accum step:7750/14336 +step:31000/57344 train_time:17718190ms step_avg:571.55ms +step:31001/57344 train_time:17718205ms step_avg:571.54ms +step:31002/57344 train_time:17718455ms step_avg:571.53ms +step:31003/57344 train_time:17719011ms step_avg:571.53ms +grad accum step:7751/14336 +step:31004/57344 train_time:17720334ms step_avg:571.55ms +step:31005/57344 train_time:17720351ms step_avg:571.53ms +step:31006/57344 train_time:17720596ms step_avg:571.52ms +step:31007/57344 train_time:17721139ms step_avg:571.52ms +grad accum step:7752/14336 +step:31008/57344 train_time:17722450ms step_avg:571.54ms +step:31009/57344 train_time:17722467ms step_avg:571.53ms +step:31010/57344 train_time:17722715ms step_avg:571.52ms +step:31011/57344 train_time:17723271ms step_avg:571.52ms +grad accum step:7753/14336 +step:31012/57344 train_time:17724583ms step_avg:571.54ms +step:31013/57344 train_time:17724597ms step_avg:571.52ms +step:31014/57344 train_time:17724845ms step_avg:571.51ms +step:31015/57344 train_time:17725409ms step_avg:571.51ms +grad accum step:7754/14336 +step:31016/57344 train_time:17726769ms step_avg:571.54ms +step:31017/57344 train_time:17726786ms step_avg:571.52ms +step:31018/57344 train_time:17727034ms step_avg:571.51ms +step:31019/57344 train_time:17727581ms step_avg:571.51ms +grad accum step:7755/14336 +step:31020/57344 train_time:17728893ms step_avg:571.53ms +step:31021/57344 train_time:17728910ms step_avg:571.51ms +step:31022/57344 train_time:17729159ms step_avg:571.50ms +step:31023/57344 train_time:17729706ms step_avg:571.50ms +grad accum step:7756/14336 +step:31024/57344 train_time:17731042ms step_avg:571.53ms +step:31025/57344 train_time:17731059ms step_avg:571.51ms +step:31026/57344 train_time:17731304ms step_avg:571.50ms +step:31027/57344 train_time:17731856ms step_avg:571.50ms +grad accum step:7757/14336 +step:31028/57344 train_time:17733164ms step_avg:571.52ms +step:31029/57344 train_time:17733181ms step_avg:571.50ms +step:31030/57344 train_time:17733433ms step_avg:571.49ms +step:31031/57344 train_time:17733993ms step_avg:571.49ms +grad accum step:7758/14336 +step:31032/57344 train_time:17735298ms step_avg:571.52ms +step:31033/57344 train_time:17735315ms step_avg:571.50ms +step:31034/57344 train_time:17735562ms step_avg:571.49ms +step:31035/57344 train_time:17736105ms step_avg:571.49ms +grad accum step:7759/14336 +step:31036/57344 train_time:17737426ms step_avg:571.51ms +step:31037/57344 train_time:17737443ms step_avg:571.49ms +step:31038/57344 train_time:17737686ms step_avg:571.48ms +step:31039/57344 train_time:17738229ms step_avg:571.48ms +grad accum step:7760/14336 +step:31040/57344 train_time:17739547ms step_avg:571.51ms +step:31040/57344 val_loss:6.139095 train_time:17739547ms step_avg:571.51ms +step:31041/57344 train_time:17739559ms step_avg:571.49ms +step:31042/57344 train_time:17739792ms step_avg:571.48ms +step:31043/57344 train_time:17740358ms step_avg:571.48ms +grad accum step:7761/14336 +step:31044/57344 train_time:17741638ms step_avg:571.50ms +step:31045/57344 train_time:17741655ms step_avg:571.48ms +step:31046/57344 train_time:17741908ms step_avg:571.47ms +step:31047/57344 train_time:17742475ms step_avg:571.47ms +grad accum step:7762/14336 +step:31048/57344 train_time:17743801ms step_avg:571.50ms +step:31049/57344 train_time:17743817ms step_avg:571.48ms +step:31050/57344 train_time:17744062ms step_avg:571.47ms +step:31051/57344 train_time:17744604ms step_avg:571.47ms +grad accum step:7763/14336 +step:31052/57344 train_time:17745948ms step_avg:571.49ms +step:31053/57344 train_time:17745965ms step_avg:571.47ms +step:31054/57344 train_time:17746209ms step_avg:571.46ms +step:31055/57344 train_time:17746746ms step_avg:571.46ms +grad accum step:7764/14336 +step:31056/57344 train_time:17748068ms step_avg:571.49ms +step:31057/57344 train_time:17748084ms step_avg:571.47ms +step:31058/57344 train_time:17748332ms step_avg:571.46ms +step:31059/57344 train_time:17748874ms step_avg:571.46ms +grad accum step:7765/14336 +step:31060/57344 train_time:17750212ms step_avg:571.48ms +step:31061/57344 train_time:17750229ms step_avg:571.46ms +step:31062/57344 train_time:17750476ms step_avg:571.45ms +step:31063/57344 train_time:17751018ms step_avg:571.45ms +grad accum step:7766/14336 +step:31064/57344 train_time:17752313ms step_avg:571.48ms +step:31065/57344 train_time:17752329ms step_avg:571.46ms +step:31066/57344 train_time:17752575ms step_avg:571.45ms +step:31067/57344 train_time:17753118ms step_avg:571.45ms +grad accum step:7767/14336 +step:31068/57344 train_time:17754417ms step_avg:571.47ms +step:31069/57344 train_time:17754433ms step_avg:571.45ms +step:31070/57344 train_time:17754679ms step_avg:571.44ms +step:31071/57344 train_time:17755220ms step_avg:571.44ms +grad accum step:7768/14336 +step:31072/57344 train_time:17756495ms step_avg:571.46ms +step:31073/57344 train_time:17756512ms step_avg:571.45ms +step:31074/57344 train_time:17756759ms step_avg:571.43ms +step:31075/57344 train_time:17757305ms step_avg:571.43ms +grad accum step:7769/14336 +step:31076/57344 train_time:17758603ms step_avg:571.46ms +step:31077/57344 train_time:17758619ms step_avg:571.44ms +step:31078/57344 train_time:17758868ms step_avg:571.43ms +step:31079/57344 train_time:17759422ms step_avg:571.43ms +grad accum step:7770/14336 +step:31080/57344 train_time:17760783ms step_avg:571.45ms +step:31081/57344 train_time:17760800ms step_avg:571.44ms +step:31082/57344 train_time:17761053ms step_avg:571.43ms +step:31083/57344 train_time:17761615ms step_avg:571.43ms +grad accum step:7771/14336 +step:31084/57344 train_time:17762899ms step_avg:571.45ms +step:31085/57344 train_time:17762916ms step_avg:571.43ms +step:31086/57344 train_time:17763167ms step_avg:571.42ms +step:31087/57344 train_time:17763725ms step_avg:571.42ms +grad accum step:7772/14336 +step:31088/57344 train_time:17765047ms step_avg:571.44ms +step:31089/57344 train_time:17765064ms step_avg:571.43ms +step:31090/57344 train_time:17765317ms step_avg:571.42ms +step:31091/57344 train_time:17765882ms step_avg:571.42ms +grad accum step:7773/14336 +step:31092/57344 train_time:17767204ms step_avg:571.44ms +step:31093/57344 train_time:17767221ms step_avg:571.42ms +step:31094/57344 train_time:17767467ms step_avg:571.41ms +step:31095/57344 train_time:17768016ms step_avg:571.41ms +grad accum step:7774/14336 +step:31096/57344 train_time:17769317ms step_avg:571.43ms +step:31097/57344 train_time:17769334ms step_avg:571.42ms +step:31098/57344 train_time:17769579ms step_avg:571.41ms +step:31099/57344 train_time:17770123ms step_avg:571.40ms +grad accum step:7775/14336 +step:31100/57344 train_time:17771403ms step_avg:571.43ms +step:31101/57344 train_time:17771420ms step_avg:571.41ms +step:31102/57344 train_time:17771684ms step_avg:571.40ms +step:31103/57344 train_time:17772277ms step_avg:571.40ms +grad accum step:7776/14336 +step:31104/57344 train_time:17773590ms step_avg:571.42ms +step:31104/57344 val_loss:6.127968 train_time:17773591ms step_avg:571.42ms +step:31105/57344 train_time:17773603ms step_avg:571.41ms +step:31106/57344 train_time:17773843ms step_avg:571.40ms +step:31107/57344 train_time:17774442ms step_avg:571.40ms +grad accum step:7777/14336 +step:31108/57344 train_time:17775776ms step_avg:571.42ms +step:31109/57344 train_time:17775793ms step_avg:571.40ms +step:31110/57344 train_time:17776037ms step_avg:571.39ms +step:31111/57344 train_time:17776575ms step_avg:571.39ms +grad accum step:7778/14336 +step:31112/57344 train_time:17777871ms step_avg:571.42ms +step:31113/57344 train_time:17777888ms step_avg:571.40ms +step:31114/57344 train_time:17778133ms step_avg:571.39ms +step:31115/57344 train_time:17778683ms step_avg:571.39ms +grad accum step:7779/14336 +step:31116/57344 train_time:17779981ms step_avg:571.41ms +step:31117/57344 train_time:17779998ms step_avg:571.39ms +step:31118/57344 train_time:17780246ms step_avg:571.38ms +step:31119/57344 train_time:17780806ms step_avg:571.38ms +grad accum step:7780/14336 +step:31120/57344 train_time:17782125ms step_avg:571.41ms +step:31121/57344 train_time:17782143ms step_avg:571.39ms +step:31122/57344 train_time:17782387ms step_avg:571.38ms +step:31123/57344 train_time:17782937ms step_avg:571.38ms +grad accum step:7781/14336 +step:31124/57344 train_time:17784223ms step_avg:571.40ms +step:31125/57344 train_time:17784240ms step_avg:571.38ms +step:31126/57344 train_time:17784482ms step_avg:571.37ms +step:31127/57344 train_time:17785020ms step_avg:571.37ms +grad accum step:7782/14336 +step:31128/57344 train_time:17786332ms step_avg:571.39ms +step:31129/57344 train_time:17786349ms step_avg:571.38ms +step:31130/57344 train_time:17786593ms step_avg:571.37ms +step:31131/57344 train_time:17787138ms step_avg:571.36ms +grad accum step:7783/14336 +step:31132/57344 train_time:17788431ms step_avg:571.39ms +step:31133/57344 train_time:17788448ms step_avg:571.37ms +step:31134/57344 train_time:17788696ms step_avg:571.36ms +step:31135/57344 train_time:17789250ms step_avg:571.36ms +grad accum step:7784/14336 +step:31136/57344 train_time:17790547ms step_avg:571.38ms +step:31137/57344 train_time:17790563ms step_avg:571.36ms +step:31138/57344 train_time:17790814ms step_avg:571.35ms +step:31139/57344 train_time:17791371ms step_avg:571.35ms +grad accum step:7785/14336 +step:31140/57344 train_time:17792688ms step_avg:571.38ms +step:31141/57344 train_time:17792705ms step_avg:571.36ms +step:31142/57344 train_time:17792957ms step_avg:571.35ms +step:31143/57344 train_time:17793518ms step_avg:571.35ms +grad accum step:7786/14336 +step:31144/57344 train_time:17794847ms step_avg:571.37ms +step:31145/57344 train_time:17794864ms step_avg:571.36ms +step:31146/57344 train_time:17795125ms step_avg:571.35ms +step:31147/57344 train_time:17795706ms step_avg:571.35ms +grad accum step:7787/14336 +step:31148/57344 train_time:17797006ms step_avg:571.37ms +step:31149/57344 train_time:17797023ms step_avg:571.35ms +step:31150/57344 train_time:17797273ms step_avg:571.34ms +step:31151/57344 train_time:17797827ms step_avg:571.34ms +grad accum step:7788/14336 +step:31152/57344 train_time:17799165ms step_avg:571.37ms +step:31153/57344 train_time:17799181ms step_avg:571.35ms +step:31154/57344 train_time:17799429ms step_avg:571.34ms +step:31155/57344 train_time:17799980ms step_avg:571.34ms +grad accum step:7789/14336 +step:31156/57344 train_time:17801278ms step_avg:571.36ms +step:31157/57344 train_time:17801295ms step_avg:571.34ms +step:31158/57344 train_time:17801546ms step_avg:571.33ms +step:31159/57344 train_time:17802107ms step_avg:571.33ms +grad accum step:7790/14336 +step:31160/57344 train_time:17803405ms step_avg:571.35ms +step:31161/57344 train_time:17803422ms step_avg:571.34ms +step:31162/57344 train_time:17803669ms step_avg:571.33ms +step:31163/57344 train_time:17804216ms step_avg:571.33ms +grad accum step:7791/14336 +step:31164/57344 train_time:17805507ms step_avg:571.35ms +step:31165/57344 train_time:17805524ms step_avg:571.33ms +step:31166/57344 train_time:17805770ms step_avg:571.32ms +step:31167/57344 train_time:17806308ms step_avg:571.32ms +grad accum step:7792/14336 +step:31168/57344 train_time:17812373ms step_avg:571.50ms +step:31168/57344 val_loss:6.104289 train_time:17812380ms step_avg:571.50ms +step:31169/57344 train_time:17812392ms step_avg:571.48ms +step:31170/57344 train_time:17812628ms step_avg:571.47ms +step:31171/57344 train_time:17813201ms step_avg:571.47ms +grad accum step:7793/14336 +step:31172/57344 train_time:17814502ms step_avg:571.49ms +step:31173/57344 train_time:17814519ms step_avg:571.47ms +step:31174/57344 train_time:17814767ms step_avg:571.46ms +step:31175/57344 train_time:17815318ms step_avg:571.46ms +grad accum step:7794/14336 +step:31176/57344 train_time:17816589ms step_avg:571.48ms +step:31177/57344 train_time:17816606ms step_avg:571.47ms +step:31178/57344 train_time:17816852ms step_avg:571.46ms +step:31179/57344 train_time:17817404ms step_avg:571.46ms +grad accum step:7795/14336 +step:31180/57344 train_time:17818695ms step_avg:571.48ms +step:31181/57344 train_time:17818712ms step_avg:571.46ms +step:31182/57344 train_time:17818962ms step_avg:571.45ms +step:31183/57344 train_time:17819516ms step_avg:571.45ms +grad accum step:7796/14336 +step:31184/57344 train_time:17820815ms step_avg:571.47ms +step:31185/57344 train_time:17820832ms step_avg:571.46ms +step:31186/57344 train_time:17821078ms step_avg:571.44ms +step:31187/57344 train_time:17821628ms step_avg:571.44ms +grad accum step:7797/14336 +step:31188/57344 train_time:17822925ms step_avg:571.47ms +step:31189/57344 train_time:17822942ms step_avg:571.45ms +step:31190/57344 train_time:17823190ms step_avg:571.44ms +step:31191/57344 train_time:17823747ms step_avg:571.44ms +grad accum step:7798/14336 +step:31192/57344 train_time:17825064ms step_avg:571.46ms +step:31193/57344 train_time:17825081ms step_avg:571.44ms +step:31194/57344 train_time:17825324ms step_avg:571.43ms +step:31195/57344 train_time:17825870ms step_avg:571.43ms +grad accum step:7799/14336 +step:31196/57344 train_time:17827230ms step_avg:571.46ms +step:31197/57344 train_time:17827247ms step_avg:571.44ms +step:31198/57344 train_time:17827494ms step_avg:571.43ms +step:31199/57344 train_time:17828036ms step_avg:571.43ms +grad accum step:7800/14336 +step:31200/57344 train_time:17829306ms step_avg:571.45ms +step:31201/57344 train_time:17829324ms step_avg:571.43ms +step:31202/57344 train_time:17829570ms step_avg:571.42ms +step:31203/57344 train_time:17830118ms step_avg:571.42ms +grad accum step:7801/14336 +step:31204/57344 train_time:17831418ms step_avg:571.45ms +step:31205/57344 train_time:17831435ms step_avg:571.43ms +step:31206/57344 train_time:17831685ms step_avg:571.42ms +step:31207/57344 train_time:17832240ms step_avg:571.42ms +grad accum step:7802/14336 +step:31208/57344 train_time:17833526ms step_avg:571.44ms +step:31209/57344 train_time:17833543ms step_avg:571.42ms +step:31210/57344 train_time:17833787ms step_avg:571.41ms +step:31211/57344 train_time:17834331ms step_avg:571.41ms +grad accum step:7803/14336 +step:31212/57344 train_time:17835631ms step_avg:571.44ms +step:31213/57344 train_time:17835648ms step_avg:571.42ms +step:31214/57344 train_time:17835897ms step_avg:571.41ms +step:31215/57344 train_time:17836442ms step_avg:571.41ms +grad accum step:7804/14336 +step:31216/57344 train_time:17837749ms step_avg:571.43ms +step:31217/57344 train_time:17837766ms step_avg:571.41ms +step:31218/57344 train_time:17838013ms step_avg:571.40ms +step:31219/57344 train_time:17838556ms step_avg:571.40ms +grad accum step:7805/14336 +step:31220/57344 train_time:17839838ms step_avg:571.42ms +step:31221/57344 train_time:17839854ms step_avg:571.41ms +step:31222/57344 train_time:17840102ms step_avg:571.40ms +step:31223/57344 train_time:17840647ms step_avg:571.39ms +grad accum step:7806/14336 +step:31224/57344 train_time:17841991ms step_avg:571.42ms +step:31225/57344 train_time:17842008ms step_avg:571.40ms +step:31226/57344 train_time:17842259ms step_avg:571.39ms +step:31227/57344 train_time:17842812ms step_avg:571.39ms +grad accum step:7807/14336 +step:31228/57344 train_time:17844134ms step_avg:571.41ms +step:31229/57344 train_time:17844150ms step_avg:571.40ms +step:31230/57344 train_time:17844399ms step_avg:571.39ms +step:31231/57344 train_time:17844949ms step_avg:571.39ms +grad accum step:7808/14336 +step:31232/57344 train_time:17846246ms step_avg:571.41ms +step:31232/57344 val_loss:6.088856 train_time:17846246ms step_avg:571.41ms +step:31233/57344 train_time:17846258ms step_avg:571.39ms +step:31234/57344 train_time:17846479ms step_avg:571.38ms +step:31235/57344 train_time:17847029ms step_avg:571.38ms +grad accum step:7809/14336 +step:31236/57344 train_time:17848352ms step_avg:571.40ms +step:31237/57344 train_time:17848369ms step_avg:571.39ms +step:31238/57344 train_time:17848615ms step_avg:571.38ms +step:31239/57344 train_time:17849162ms step_avg:571.37ms +grad accum step:7810/14336 +step:31240/57344 train_time:17850451ms step_avg:571.40ms +step:31241/57344 train_time:17850468ms step_avg:571.38ms +step:31242/57344 train_time:17850711ms step_avg:571.37ms +step:31243/57344 train_time:17851259ms step_avg:571.37ms +grad accum step:7811/14336 +step:31244/57344 train_time:17852578ms step_avg:571.39ms +step:31245/57344 train_time:17852595ms step_avg:571.37ms +step:31246/57344 train_time:17852847ms step_avg:571.36ms +step:31247/57344 train_time:17853410ms step_avg:571.36ms +grad accum step:7812/14336 +step:31248/57344 train_time:17854720ms step_avg:571.39ms +step:31249/57344 train_time:17854737ms step_avg:571.37ms +step:31250/57344 train_time:17854982ms step_avg:571.36ms +step:31251/57344 train_time:17855526ms step_avg:571.36ms +grad accum step:7813/14336 +step:31252/57344 train_time:17856834ms step_avg:571.38ms +step:31253/57344 train_time:17856851ms step_avg:571.36ms +step:31254/57344 train_time:17857098ms step_avg:571.35ms +step:31255/57344 train_time:17857653ms step_avg:571.35ms +grad accum step:7814/14336 +step:31256/57344 train_time:17859005ms step_avg:571.38ms +step:31257/57344 train_time:17859022ms step_avg:571.36ms +step:31258/57344 train_time:17859272ms step_avg:571.35ms +step:31259/57344 train_time:17859819ms step_avg:571.35ms +grad accum step:7815/14336 +step:31260/57344 train_time:17861147ms step_avg:571.37ms +step:31261/57344 train_time:17861164ms step_avg:571.36ms +step:31262/57344 train_time:17861414ms step_avg:571.35ms +step:31263/57344 train_time:17861963ms step_avg:571.35ms +grad accum step:7816/14336 +step:31264/57344 train_time:17863276ms step_avg:571.37ms +step:31265/57344 train_time:17863292ms step_avg:571.35ms +step:31266/57344 train_time:17863541ms step_avg:571.34ms +step:31267/57344 train_time:17864092ms step_avg:571.34ms +grad accum step:7817/14336 +step:31268/57344 train_time:17865452ms step_avg:571.37ms +step:31269/57344 train_time:17865469ms step_avg:571.35ms +step:31270/57344 train_time:17865721ms step_avg:571.34ms +step:31271/57344 train_time:17866276ms step_avg:571.34ms +grad accum step:7818/14336 +step:31272/57344 train_time:17867553ms step_avg:571.36ms +step:31273/57344 train_time:17867570ms step_avg:571.34ms +step:31274/57344 train_time:17867824ms step_avg:571.33ms +step:31275/57344 train_time:17868389ms step_avg:571.33ms +grad accum step:7819/14336 +step:31276/57344 train_time:17869683ms step_avg:571.35ms +step:31277/57344 train_time:17869700ms step_avg:571.34ms +step:31278/57344 train_time:17869949ms step_avg:571.33ms +step:31279/57344 train_time:17870498ms step_avg:571.33ms +grad accum step:7820/14336 +step:31280/57344 train_time:17871792ms step_avg:571.35ms +step:31281/57344 train_time:17871809ms step_avg:571.33ms +step:31282/57344 train_time:17872056ms step_avg:571.32ms +step:31283/57344 train_time:17872604ms step_avg:571.32ms +grad accum step:7821/14336 +step:31284/57344 train_time:17873901ms step_avg:571.34ms +step:31285/57344 train_time:17873913ms step_avg:571.33ms +step:31286/57344 train_time:17874140ms step_avg:571.31ms +step:31287/57344 train_time:17874681ms step_avg:571.31ms +grad accum step:7822/14336 +step:31288/57344 train_time:17875967ms step_avg:571.34ms +step:31289/57344 train_time:17875984ms step_avg:571.32ms +step:31290/57344 train_time:17876230ms step_avg:571.31ms +step:31291/57344 train_time:17876782ms step_avg:571.31ms +grad accum step:7823/14336 +step:31292/57344 train_time:17878100ms step_avg:571.33ms +step:31293/57344 train_time:17878117ms step_avg:571.31ms +step:31294/57344 train_time:17878364ms step_avg:571.30ms +step:31295/57344 train_time:17878914ms step_avg:571.30ms +grad accum step:7824/14336 +step:31296/57344 train_time:17880202ms step_avg:571.33ms +step:31296/57344 val_loss:6.078449 train_time:17880202ms step_avg:571.33ms +step:31297/57344 train_time:17880214ms step_avg:571.31ms +step:31298/57344 train_time:17880440ms step_avg:571.30ms +step:31299/57344 train_time:17880984ms step_avg:571.30ms +grad accum step:7825/14336 +step:31300/57344 train_time:17882311ms step_avg:571.32ms +step:31301/57344 train_time:17882328ms step_avg:571.30ms +step:31302/57344 train_time:17882575ms step_avg:571.29ms +step:31303/57344 train_time:17883133ms step_avg:571.29ms +grad accum step:7826/14336 +step:31304/57344 train_time:17884421ms step_avg:571.31ms +step:31305/57344 train_time:17884437ms step_avg:571.30ms +step:31306/57344 train_time:17884683ms step_avg:571.29ms +step:31307/57344 train_time:17885234ms step_avg:571.29ms +grad accum step:7827/14336 +step:31308/57344 train_time:17886537ms step_avg:571.31ms +step:31309/57344 train_time:17886554ms step_avg:571.29ms +step:31310/57344 train_time:17886804ms step_avg:571.28ms +step:31311/57344 train_time:17887359ms step_avg:571.28ms +grad accum step:7828/14336 +step:31312/57344 train_time:17888660ms step_avg:571.30ms +step:31313/57344 train_time:17888677ms step_avg:571.29ms +step:31314/57344 train_time:17888920ms step_avg:571.28ms +step:31315/57344 train_time:17889462ms step_avg:571.27ms +grad accum step:7829/14336 +step:31316/57344 train_time:17890803ms step_avg:571.30ms +step:31317/57344 train_time:17890819ms step_avg:571.28ms +step:31318/57344 train_time:17891068ms step_avg:571.27ms +step:31319/57344 train_time:17891629ms step_avg:571.27ms +grad accum step:7830/14336 +step:31320/57344 train_time:17892954ms step_avg:571.29ms +step:31321/57344 train_time:17892970ms step_avg:571.28ms +step:31322/57344 train_time:17893216ms step_avg:571.27ms +step:31323/57344 train_time:17893757ms step_avg:571.27ms +grad accum step:7831/14336 +step:31324/57344 train_time:17895045ms step_avg:571.29ms +step:31325/57344 train_time:17895061ms step_avg:571.27ms +step:31326/57344 train_time:17895311ms step_avg:571.26ms +step:31327/57344 train_time:17895866ms step_avg:571.26ms +grad accum step:7832/14336 +step:31328/57344 train_time:17897172ms step_avg:571.28ms +step:31329/57344 train_time:17897189ms step_avg:571.27ms +step:31330/57344 train_time:17897436ms step_avg:571.26ms +step:31331/57344 train_time:17897985ms step_avg:571.25ms +grad accum step:7833/14336 +step:31332/57344 train_time:17899283ms step_avg:571.28ms +step:31333/57344 train_time:17899300ms step_avg:571.26ms +step:31334/57344 train_time:17899557ms step_avg:571.25ms +step:31335/57344 train_time:17900131ms step_avg:571.25ms +grad accum step:7834/14336 +step:31336/57344 train_time:17901402ms step_avg:571.27ms +step:31337/57344 train_time:17901420ms step_avg:571.26ms +step:31338/57344 train_time:17901676ms step_avg:571.25ms +step:31339/57344 train_time:17902244ms step_avg:571.24ms +grad accum step:7835/14336 +step:31340/57344 train_time:17903535ms step_avg:571.27ms +step:31341/57344 train_time:17903553ms step_avg:571.25ms +step:31342/57344 train_time:17903796ms step_avg:571.24ms +step:31343/57344 train_time:17904345ms step_avg:571.24ms +grad accum step:7836/14336 +step:31344/57344 train_time:17905626ms step_avg:571.26ms +step:31345/57344 train_time:17905642ms step_avg:571.24ms +step:31346/57344 train_time:17905889ms step_avg:571.23ms +step:31347/57344 train_time:17906435ms step_avg:571.23ms +grad accum step:7837/14336 +step:31348/57344 train_time:17907732ms step_avg:571.26ms +step:31349/57344 train_time:17907749ms step_avg:571.24ms +step:31350/57344 train_time:17907992ms step_avg:571.23ms +step:31351/57344 train_time:17908539ms step_avg:571.23ms +grad accum step:7838/14336 +step:31352/57344 train_time:17909836ms step_avg:571.25ms +step:31353/57344 train_time:17909853ms step_avg:571.23ms +step:31354/57344 train_time:17910101ms step_avg:571.22ms +step:31355/57344 train_time:17910652ms step_avg:571.22ms +grad accum step:7839/14336 +step:31356/57344 train_time:17911954ms step_avg:571.24ms +step:31357/57344 train_time:17911971ms step_avg:571.23ms +step:31358/57344 train_time:17912225ms step_avg:571.22ms +step:31359/57344 train_time:17912790ms step_avg:571.22ms +grad accum step:7840/14336 +step:31360/57344 train_time:17914089ms step_avg:571.24ms +step:31360/57344 val_loss:6.055421 train_time:17914090ms step_avg:571.24ms +step:31361/57344 train_time:17914102ms step_avg:571.22ms +step:31362/57344 train_time:17914327ms step_avg:571.21ms +step:31363/57344 train_time:17914879ms step_avg:571.21ms +grad accum step:7841/14336 +step:31364/57344 train_time:17916177ms step_avg:571.23ms +step:31365/57344 train_time:17916194ms step_avg:571.22ms +step:31366/57344 train_time:17916439ms step_avg:571.21ms +step:31367/57344 train_time:17916982ms step_avg:571.20ms +grad accum step:7842/14336 +step:31368/57344 train_time:17918262ms step_avg:571.23ms +step:31369/57344 train_time:17918278ms step_avg:571.21ms +step:31370/57344 train_time:17918521ms step_avg:571.20ms +step:31371/57344 train_time:17919061ms step_avg:571.20ms +grad accum step:7843/14336 +step:31372/57344 train_time:17920345ms step_avg:571.22ms +step:31373/57344 train_time:17920362ms step_avg:571.20ms +step:31374/57344 train_time:17920608ms step_avg:571.19ms +step:31375/57344 train_time:17921158ms step_avg:571.19ms +grad accum step:7844/14336 +step:31376/57344 train_time:17922453ms step_avg:571.22ms +step:31377/57344 train_time:17922468ms step_avg:571.20ms +step:31378/57344 train_time:17922719ms step_avg:571.19ms +step:31379/57344 train_time:17923286ms step_avg:571.19ms +grad accum step:7845/14336 +step:31380/57344 train_time:17924624ms step_avg:571.21ms +step:31381/57344 train_time:17924641ms step_avg:571.19ms +step:31382/57344 train_time:17924892ms step_avg:571.18ms +step:31383/57344 train_time:17925434ms step_avg:571.18ms +grad accum step:7846/14336 +step:31384/57344 train_time:17926730ms step_avg:571.21ms +step:31385/57344 train_time:17926747ms step_avg:571.19ms +step:31386/57344 train_time:17927000ms step_avg:571.18ms +step:31387/57344 train_time:17927568ms step_avg:571.18ms +grad accum step:7847/14336 +step:31388/57344 train_time:17928902ms step_avg:571.20ms +step:31389/57344 train_time:17928919ms step_avg:571.18ms +step:31390/57344 train_time:17929168ms step_avg:571.17ms +step:31391/57344 train_time:17929713ms step_avg:571.17ms +grad accum step:7848/14336 +step:31392/57344 train_time:17931012ms step_avg:571.20ms +step:31393/57344 train_time:17931028ms step_avg:571.18ms +step:31394/57344 train_time:17931278ms step_avg:571.17ms +step:31395/57344 train_time:17931831ms step_avg:571.17ms +grad accum step:7849/14336 +step:31396/57344 train_time:17933144ms step_avg:571.19ms +step:31397/57344 train_time:17933161ms step_avg:571.17ms +step:31398/57344 train_time:17933412ms step_avg:571.16ms +step:31399/57344 train_time:17933967ms step_avg:571.16ms +grad accum step:7850/14336 +step:31400/57344 train_time:17935258ms step_avg:571.19ms +step:31401/57344 train_time:17935275ms step_avg:571.17ms +step:31402/57344 train_time:17935520ms step_avg:571.16ms +step:31403/57344 train_time:17936063ms step_avg:571.16ms +grad accum step:7851/14336 +step:31404/57344 train_time:17937350ms step_avg:571.18ms +step:31405/57344 train_time:17937368ms step_avg:571.16ms +step:31406/57344 train_time:17937616ms step_avg:571.15ms +step:31407/57344 train_time:17938166ms step_avg:571.15ms +grad accum step:7852/14336 +step:31408/57344 train_time:17939444ms step_avg:571.17ms +step:31409/57344 train_time:17939460ms step_avg:571.16ms +step:31410/57344 train_time:17939707ms step_avg:571.15ms +step:31411/57344 train_time:17940261ms step_avg:571.15ms +grad accum step:7853/14336 +step:31412/57344 train_time:17941594ms step_avg:571.17ms +step:31413/57344 train_time:17941611ms step_avg:571.15ms +step:31414/57344 train_time:17941855ms step_avg:571.14ms +step:31415/57344 train_time:17942389ms step_avg:571.14ms +grad accum step:7854/14336 +step:31416/57344 train_time:17943668ms step_avg:571.16ms +step:31417/57344 train_time:17943685ms step_avg:571.15ms +step:31418/57344 train_time:17943933ms step_avg:571.14ms +step:31419/57344 train_time:17944486ms step_avg:571.13ms +grad accum step:7855/14336 +step:31420/57344 train_time:17945774ms step_avg:571.16ms +step:31421/57344 train_time:17945791ms step_avg:571.14ms +step:31422/57344 train_time:17946036ms step_avg:571.13ms +step:31423/57344 train_time:17946580ms step_avg:571.13ms +grad accum step:7856/14336 +step:31424/57344 train_time:17947858ms step_avg:571.15ms +step:31424/57344 val_loss:6.046597 train_time:17947859ms step_avg:571.15ms +step:31425/57344 train_time:17947871ms step_avg:571.13ms +step:31426/57344 train_time:17948100ms step_avg:571.12ms +step:31427/57344 train_time:17948661ms step_avg:571.12ms +grad accum step:7857/14336 +step:31428/57344 train_time:17949977ms step_avg:571.15ms +step:31429/57344 train_time:17949994ms step_avg:571.13ms +step:31430/57344 train_time:17950244ms step_avg:571.12ms +step:31431/57344 train_time:17950800ms step_avg:571.12ms +grad accum step:7858/14336 +step:31432/57344 train_time:17952103ms step_avg:571.14ms +step:31433/57344 train_time:17952120ms step_avg:571.12ms +step:31434/57344 train_time:17952376ms step_avg:571.11ms +step:31435/57344 train_time:17952946ms step_avg:571.11ms +grad accum step:7859/14336 +step:31436/57344 train_time:17954246ms step_avg:571.14ms +step:31437/57344 train_time:17954263ms step_avg:571.12ms +step:31438/57344 train_time:17954513ms step_avg:571.11ms +step:31439/57344 train_time:17955071ms step_avg:571.11ms +grad accum step:7860/14336 +step:31440/57344 train_time:17956351ms step_avg:571.13ms +step:31441/57344 train_time:17956368ms step_avg:571.11ms +step:31442/57344 train_time:17956617ms step_avg:571.10ms +step:31443/57344 train_time:17957175ms step_avg:571.10ms +grad accum step:7861/14336 +step:31444/57344 train_time:17958500ms step_avg:571.13ms +step:31445/57344 train_time:17958517ms step_avg:571.11ms +step:31446/57344 train_time:17958756ms step_avg:571.10ms +step:31447/57344 train_time:17959313ms step_avg:571.10ms +grad accum step:7862/14336 +step:31448/57344 train_time:17960620ms step_avg:571.12ms +step:31449/57344 train_time:17960637ms step_avg:571.10ms +step:31450/57344 train_time:17960885ms step_avg:571.09ms +step:31451/57344 train_time:17961436ms step_avg:571.09ms +grad accum step:7863/14336 +step:31452/57344 train_time:17962771ms step_avg:571.12ms +step:31453/57344 train_time:17962788ms step_avg:571.10ms +step:31454/57344 train_time:17963042ms step_avg:571.09ms +step:31455/57344 train_time:17963610ms step_avg:571.09ms +grad accum step:7864/14336 +step:31456/57344 train_time:17964915ms step_avg:571.11ms +step:31457/57344 train_time:17964931ms step_avg:571.09ms +step:31458/57344 train_time:17965173ms step_avg:571.08ms +step:31459/57344 train_time:17965712ms step_avg:571.08ms +grad accum step:7865/14336 +step:31460/57344 train_time:17967025ms step_avg:571.11ms +step:31461/57344 train_time:17967042ms step_avg:571.09ms +step:31462/57344 train_time:17967286ms step_avg:571.08ms +step:31463/57344 train_time:17967824ms step_avg:571.08ms +grad accum step:7866/14336 +step:31464/57344 train_time:17969113ms step_avg:571.10ms +step:31465/57344 train_time:17969130ms step_avg:571.08ms +step:31466/57344 train_time:17969373ms step_avg:571.07ms +step:31467/57344 train_time:17969910ms step_avg:571.07ms +grad accum step:7867/14336 +step:31468/57344 train_time:17971226ms step_avg:571.10ms +step:31469/57344 train_time:17971243ms step_avg:571.08ms +step:31470/57344 train_time:17971499ms step_avg:571.07ms +step:31471/57344 train_time:17972074ms step_avg:571.07ms +grad accum step:7868/14336 +step:31472/57344 train_time:17973394ms step_avg:571.09ms +step:31473/57344 train_time:17973411ms step_avg:571.07ms +step:31474/57344 train_time:17973656ms step_avg:571.06ms +step:31475/57344 train_time:17974203ms step_avg:571.06ms +grad accum step:7869/14336 +step:31476/57344 train_time:17975500ms step_avg:571.09ms +step:31477/57344 train_time:17975517ms step_avg:571.07ms +step:31478/57344 train_time:17975761ms step_avg:571.06ms +step:31479/57344 train_time:17976308ms step_avg:571.06ms +grad accum step:7870/14336 +step:31480/57344 train_time:17977635ms step_avg:571.08ms +step:31481/57344 train_time:17977652ms step_avg:571.06ms +step:31482/57344 train_time:17977897ms step_avg:571.05ms +step:31483/57344 train_time:17978439ms step_avg:571.05ms +grad accum step:7871/14336 +step:31484/57344 train_time:17979732ms step_avg:571.08ms +step:31485/57344 train_time:17979749ms step_avg:571.06ms +step:31486/57344 train_time:17980009ms step_avg:571.05ms +step:31487/57344 train_time:17980579ms step_avg:571.05ms +grad accum step:7872/14336 +step:31488/57344 train_time:17981873ms step_avg:571.07ms +step:31488/57344 val_loss:6.036800 train_time:17981874ms step_avg:571.07ms +step:31489/57344 train_time:17981885ms step_avg:571.05ms +step:31490/57344 train_time:17982109ms step_avg:571.04ms +step:31491/57344 train_time:17982646ms step_avg:571.04ms +grad accum step:7873/14336 +step:31492/57344 train_time:17983933ms step_avg:571.06ms +step:31493/57344 train_time:17983950ms step_avg:571.05ms +step:31494/57344 train_time:17984193ms step_avg:571.04ms +step:31495/57344 train_time:17984738ms step_avg:571.03ms +grad accum step:7874/14336 +step:31496/57344 train_time:17986057ms step_avg:571.06ms +step:31497/57344 train_time:17986074ms step_avg:571.04ms +step:31498/57344 train_time:17986322ms step_avg:571.03ms +step:31499/57344 train_time:17986867ms step_avg:571.03ms +grad accum step:7875/14336 +step:31500/57344 train_time:17988142ms step_avg:571.05ms +step:31501/57344 train_time:17988159ms step_avg:571.03ms +step:31502/57344 train_time:17988403ms step_avg:571.02ms +step:31503/57344 train_time:17988957ms step_avg:571.02ms +grad accum step:7876/14336 +step:31504/57344 train_time:17990307ms step_avg:571.05ms +step:31505/57344 train_time:17990324ms step_avg:571.03ms +step:31506/57344 train_time:17990570ms step_avg:571.02ms +step:31507/57344 train_time:17991112ms step_avg:571.02ms +grad accum step:7877/14336 +step:31508/57344 train_time:17992387ms step_avg:571.04ms +step:31509/57344 train_time:17992404ms step_avg:571.02ms +step:31510/57344 train_time:17992647ms step_avg:571.01ms +step:31511/57344 train_time:17993193ms step_avg:571.01ms +grad accum step:7878/14336 +step:31512/57344 train_time:17994506ms step_avg:571.04ms +step:31513/57344 train_time:17994523ms step_avg:571.02ms +step:31514/57344 train_time:17994770ms step_avg:571.01ms +step:31515/57344 train_time:17995321ms step_avg:571.01ms +grad accum step:7879/14336 +step:31516/57344 train_time:17996638ms step_avg:571.03ms +step:31517/57344 train_time:17996655ms step_avg:571.01ms +step:31518/57344 train_time:17996901ms step_avg:571.00ms +step:31519/57344 train_time:17997457ms step_avg:571.00ms +grad accum step:7880/14336 +step:31520/57344 train_time:17998764ms step_avg:571.03ms +step:31521/57344 train_time:17998781ms step_avg:571.01ms +step:31522/57344 train_time:17999026ms step_avg:571.00ms +step:31523/57344 train_time:17999567ms step_avg:571.00ms +grad accum step:7881/14336 +step:31524/57344 train_time:18000847ms step_avg:571.02ms +step:31525/57344 train_time:18000864ms step_avg:571.00ms +step:31526/57344 train_time:18001110ms step_avg:570.99ms +step:31527/57344 train_time:18001657ms step_avg:570.99ms +grad accum step:7882/14336 +step:31528/57344 train_time:18002975ms step_avg:571.02ms +step:31529/57344 train_time:18002993ms step_avg:571.00ms +step:31530/57344 train_time:18003242ms step_avg:570.99ms +step:31531/57344 train_time:18003791ms step_avg:570.99ms +grad accum step:7883/14336 +step:31532/57344 train_time:18005101ms step_avg:571.01ms +step:31533/57344 train_time:18005118ms step_avg:570.99ms +step:31534/57344 train_time:18005365ms step_avg:570.98ms +step:31535/57344 train_time:18005902ms step_avg:570.98ms +grad accum step:7884/14336 +step:31536/57344 train_time:18007195ms step_avg:571.00ms +step:31537/57344 train_time:18007212ms step_avg:570.99ms +step:31538/57344 train_time:18007461ms step_avg:570.98ms +step:31539/57344 train_time:18008009ms step_avg:570.98ms +grad accum step:7885/14336 +step:31540/57344 train_time:18009302ms step_avg:571.00ms +step:31541/57344 train_time:18009319ms step_avg:570.98ms +step:31542/57344 train_time:18009566ms step_avg:570.97ms +step:31543/57344 train_time:18010112ms step_avg:570.97ms +grad accum step:7886/14336 +step:31544/57344 train_time:18011428ms step_avg:570.99ms +step:31545/57344 train_time:18011445ms step_avg:570.98ms +step:31546/57344 train_time:18011689ms step_avg:570.97ms +step:31547/57344 train_time:18012230ms step_avg:570.96ms +grad accum step:7887/14336 +step:31548/57344 train_time:18013541ms step_avg:570.99ms +step:31549/57344 train_time:18013558ms step_avg:570.97ms +step:31550/57344 train_time:18013809ms step_avg:570.96ms +step:31551/57344 train_time:18014360ms step_avg:570.96ms +grad accum step:7888/14336 +step:31552/57344 train_time:18015676ms step_avg:570.98ms +step:31552/57344 val_loss:6.014101 train_time:18015676ms step_avg:570.98ms +step:31553/57344 train_time:18015688ms step_avg:570.97ms +step:31554/57344 train_time:18015907ms step_avg:570.95ms +step:31555/57344 train_time:18016456ms step_avg:570.95ms +grad accum step:7889/14336 +step:31556/57344 train_time:18017766ms step_avg:570.98ms +step:31557/57344 train_time:18017778ms step_avg:570.96ms +step:31558/57344 train_time:18018022ms step_avg:570.95ms +step:31559/57344 train_time:18018572ms step_avg:570.95ms +grad accum step:7890/14336 +step:31560/57344 train_time:18019868ms step_avg:570.97ms +step:31561/57344 train_time:18019884ms step_avg:570.95ms +step:31562/57344 train_time:18020127ms step_avg:570.94ms +step:31563/57344 train_time:18020673ms step_avg:570.94ms +grad accum step:7891/14336 +step:31564/57344 train_time:18021946ms step_avg:570.97ms +step:31565/57344 train_time:18021963ms step_avg:570.95ms +step:31566/57344 train_time:18022210ms step_avg:570.94ms +step:31567/57344 train_time:18022761ms step_avg:570.94ms +grad accum step:7892/14336 +step:31568/57344 train_time:18024061ms step_avg:570.96ms +step:31569/57344 train_time:18024075ms step_avg:570.94ms +step:31570/57344 train_time:18024320ms step_avg:570.93ms +step:31571/57344 train_time:18024867ms step_avg:570.93ms +grad accum step:7893/14336 +step:31572/57344 train_time:18026146ms step_avg:570.95ms +step:31573/57344 train_time:18026162ms step_avg:570.94ms +step:31574/57344 train_time:18026405ms step_avg:570.93ms +step:31575/57344 train_time:18026948ms step_avg:570.92ms +grad accum step:7894/14336 +step:31576/57344 train_time:18028230ms step_avg:570.95ms +step:31577/57344 train_time:18028245ms step_avg:570.93ms +step:31578/57344 train_time:18028484ms step_avg:570.92ms +step:31579/57344 train_time:18029020ms step_avg:570.92ms +grad accum step:7895/14336 +step:31580/57344 train_time:18030309ms step_avg:570.94ms +step:31581/57344 train_time:18030322ms step_avg:570.92ms +step:31582/57344 train_time:18030569ms step_avg:570.91ms +step:31583/57344 train_time:18031119ms step_avg:570.91ms +grad accum step:7896/14336 +step:31584/57344 train_time:18032434ms step_avg:570.94ms +step:31585/57344 train_time:18032451ms step_avg:570.92ms +step:31586/57344 train_time:18032694ms step_avg:570.91ms +step:31587/57344 train_time:18033239ms step_avg:570.91ms +grad accum step:7897/14336 +step:31588/57344 train_time:18034558ms step_avg:570.93ms +step:31589/57344 train_time:18034575ms step_avg:570.91ms +step:31590/57344 train_time:18034818ms step_avg:570.90ms +step:31591/57344 train_time:18035354ms step_avg:570.90ms +grad accum step:7898/14336 +step:31592/57344 train_time:18036629ms step_avg:570.92ms +step:31593/57344 train_time:18036645ms step_avg:570.91ms +step:31594/57344 train_time:18036904ms step_avg:570.90ms +step:31595/57344 train_time:18037489ms step_avg:570.90ms +grad accum step:7899/14336 +step:31596/57344 train_time:18038821ms step_avg:570.92ms +step:31597/57344 train_time:18038835ms step_avg:570.90ms +step:31598/57344 train_time:18039081ms step_avg:570.89ms +step:31599/57344 train_time:18039630ms step_avg:570.89ms +grad accum step:7900/14336 +step:31600/57344 train_time:18040902ms step_avg:570.91ms +step:31601/57344 train_time:18040919ms step_avg:570.90ms +step:31602/57344 train_time:18041170ms step_avg:570.89ms +step:31603/57344 train_time:18041729ms step_avg:570.89ms +grad accum step:7901/14336 +step:31604/57344 train_time:18043024ms step_avg:570.91ms +step:31605/57344 train_time:18043039ms step_avg:570.89ms +step:31606/57344 train_time:18043262ms step_avg:570.88ms +step:31607/57344 train_time:18043810ms step_avg:570.88ms +grad accum step:7902/14336 +step:31608/57344 train_time:18045098ms step_avg:570.90ms +step:31609/57344 train_time:18045115ms step_avg:570.89ms +step:31610/57344 train_time:18045365ms step_avg:570.88ms +step:31611/57344 train_time:18045916ms step_avg:570.87ms +grad accum step:7903/14336 +step:31612/57344 train_time:18047442ms step_avg:570.90ms +step:31613/57344 train_time:18047454ms step_avg:570.89ms +step:31614/57344 train_time:18047669ms step_avg:570.88ms +step:31615/57344 train_time:18048216ms step_avg:570.88ms +grad accum step:7904/14336 +step:31616/57344 train_time:18049534ms step_avg:570.90ms +step:31616/57344 val_loss:6.005621 train_time:18049535ms step_avg:570.90ms +step:31617/57344 train_time:18049546ms step_avg:570.88ms +step:31618/57344 train_time:18049772ms step_avg:570.87ms +step:31619/57344 train_time:18050330ms step_avg:570.87ms +grad accum step:7905/14336 +step:31620/57344 train_time:18051643ms step_avg:570.89ms +step:31621/57344 train_time:18051660ms step_avg:570.88ms +step:31622/57344 train_time:18051902ms step_avg:570.87ms +step:31623/57344 train_time:18052446ms step_avg:570.86ms +grad accum step:7906/14336 +step:31624/57344 train_time:18053753ms step_avg:570.89ms +step:31625/57344 train_time:18053770ms step_avg:570.87ms +step:31626/57344 train_time:18054016ms step_avg:570.86ms +step:31627/57344 train_time:18054559ms step_avg:570.86ms +grad accum step:7907/14336 +step:31628/57344 train_time:18055869ms step_avg:570.88ms +step:31629/57344 train_time:18055886ms step_avg:570.86ms +step:31630/57344 train_time:18056135ms step_avg:570.85ms +step:31631/57344 train_time:18056686ms step_avg:570.85ms +grad accum step:7908/14336 +step:31632/57344 train_time:18057963ms step_avg:570.88ms +step:31633/57344 train_time:18057980ms step_avg:570.86ms +step:31634/57344 train_time:18058229ms step_avg:570.85ms +step:31635/57344 train_time:18058788ms step_avg:570.85ms +grad accum step:7909/14336 +step:31636/57344 train_time:18060099ms step_avg:570.87ms +step:31637/57344 train_time:18060115ms step_avg:570.85ms +step:31638/57344 train_time:18060369ms step_avg:570.84ms +step:31639/57344 train_time:18060934ms step_avg:570.84ms +grad accum step:7910/14336 +step:31640/57344 train_time:18062251ms step_avg:570.87ms +step:31641/57344 train_time:18062268ms step_avg:570.85ms +step:31642/57344 train_time:18062522ms step_avg:570.84ms +step:31643/57344 train_time:18063090ms step_avg:570.84ms +grad accum step:7911/14336 +step:31644/57344 train_time:18064399ms step_avg:570.86ms +step:31645/57344 train_time:18064416ms step_avg:570.85ms +step:31646/57344 train_time:18064663ms step_avg:570.84ms +step:31647/57344 train_time:18065210ms step_avg:570.83ms +grad accum step:7912/14336 +step:31648/57344 train_time:18066513ms step_avg:570.86ms +step:31649/57344 train_time:18066530ms step_avg:570.84ms +step:31650/57344 train_time:18066777ms step_avg:570.83ms +step:31651/57344 train_time:18067318ms step_avg:570.83ms +grad accum step:7913/14336 +step:31652/57344 train_time:18068627ms step_avg:570.85ms +step:31653/57344 train_time:18068644ms step_avg:570.84ms +step:31654/57344 train_time:18068896ms step_avg:570.83ms +step:31655/57344 train_time:18069456ms step_avg:570.82ms +grad accum step:7914/14336 +step:31656/57344 train_time:18070739ms step_avg:570.85ms +step:31657/57344 train_time:18070755ms step_avg:570.83ms +step:31658/57344 train_time:18071008ms step_avg:570.82ms +step:31659/57344 train_time:18071564ms step_avg:570.82ms +grad accum step:7915/14336 +step:31660/57344 train_time:18072865ms step_avg:570.84ms +step:31661/57344 train_time:18072882ms step_avg:570.82ms +step:31662/57344 train_time:18073137ms step_avg:570.81ms +step:31663/57344 train_time:18073702ms step_avg:570.81ms +grad accum step:7916/14336 +step:31664/57344 train_time:18075007ms step_avg:570.84ms +step:31665/57344 train_time:18075024ms step_avg:570.82ms +step:31666/57344 train_time:18075272ms step_avg:570.81ms +step:31667/57344 train_time:18075817ms step_avg:570.81ms +grad accum step:7917/14336 +step:31668/57344 train_time:18077096ms step_avg:570.83ms +step:31669/57344 train_time:18077113ms step_avg:570.81ms +step:31670/57344 train_time:18077359ms step_avg:570.80ms +step:31671/57344 train_time:18077899ms step_avg:570.80ms +grad accum step:7918/14336 +step:31672/57344 train_time:18079216ms step_avg:570.83ms +step:31673/57344 train_time:18079233ms step_avg:570.81ms +step:31674/57344 train_time:18079489ms step_avg:570.80ms +step:31675/57344 train_time:18080052ms step_avg:570.80ms +grad accum step:7919/14336 +step:31676/57344 train_time:18081396ms step_avg:570.82ms +step:31677/57344 train_time:18081413ms step_avg:570.81ms +step:31678/57344 train_time:18081660ms step_avg:570.80ms +step:31679/57344 train_time:18082214ms step_avg:570.79ms +grad accum step:7920/14336 +step:31680/57344 train_time:18083510ms step_avg:570.82ms +step:31680/57344 val_loss:6.021158 train_time:18083510ms step_avg:570.82ms +step:31681/57344 train_time:18083522ms step_avg:570.80ms +step:31682/57344 train_time:18083744ms step_avg:570.79ms +step:31683/57344 train_time:18084300ms step_avg:570.79ms +grad accum step:7921/14336 +step:31684/57344 train_time:18085628ms step_avg:570.81ms +step:31685/57344 train_time:18085645ms step_avg:570.80ms +step:31686/57344 train_time:18085896ms step_avg:570.79ms +step:31687/57344 train_time:18086452ms step_avg:570.78ms +grad accum step:7922/14336 +step:31688/57344 train_time:18087773ms step_avg:570.81ms +step:31689/57344 train_time:18087790ms step_avg:570.79ms +step:31690/57344 train_time:18088043ms step_avg:570.78ms +step:31691/57344 train_time:18088608ms step_avg:570.78ms +grad accum step:7923/14336 +step:31692/57344 train_time:18089905ms step_avg:570.80ms +step:31693/57344 train_time:18089922ms step_avg:570.79ms +step:31694/57344 train_time:18090165ms step_avg:570.78ms +step:31695/57344 train_time:18090719ms step_avg:570.78ms +grad accum step:7924/14336 +step:31696/57344 train_time:18092074ms step_avg:570.80ms +step:31697/57344 train_time:18092091ms step_avg:570.78ms +step:31698/57344 train_time:18092338ms step_avg:570.77ms +step:31699/57344 train_time:18092877ms step_avg:570.77ms +grad accum step:7925/14336 +step:31700/57344 train_time:18094149ms step_avg:570.79ms +step:31701/57344 train_time:18094166ms step_avg:570.78ms +step:31702/57344 train_time:18094418ms step_avg:570.77ms +step:31703/57344 train_time:18094979ms step_avg:570.77ms +grad accum step:7926/14336 +step:31704/57344 train_time:18096297ms step_avg:570.79ms +step:31705/57344 train_time:18096315ms step_avg:570.77ms +step:31706/57344 train_time:18096568ms step_avg:570.76ms +step:31707/57344 train_time:18097132ms step_avg:570.76ms +grad accum step:7927/14336 +step:31708/57344 train_time:18098471ms step_avg:570.79ms +step:31709/57344 train_time:18098488ms step_avg:570.77ms +step:31710/57344 train_time:18098737ms step_avg:570.76ms +step:31711/57344 train_time:18099275ms step_avg:570.76ms +grad accum step:7928/14336 +step:31712/57344 train_time:18100547ms step_avg:570.78ms +step:31713/57344 train_time:18100564ms step_avg:570.76ms +step:31714/57344 train_time:18100807ms step_avg:570.75ms +step:31715/57344 train_time:18101351ms step_avg:570.75ms +grad accum step:7929/14336 +step:31716/57344 train_time:18102702ms step_avg:570.78ms +step:31717/57344 train_time:18102719ms step_avg:570.76ms +step:31718/57344 train_time:18102964ms step_avg:570.75ms +step:31719/57344 train_time:18103516ms step_avg:570.75ms +grad accum step:7930/14336 +step:31720/57344 train_time:18104860ms step_avg:570.77ms +step:31721/57344 train_time:18104877ms step_avg:570.75ms +step:31722/57344 train_time:18105122ms step_avg:570.74ms +step:31723/57344 train_time:18105667ms step_avg:570.74ms +grad accum step:7931/14336 +step:31724/57344 train_time:18106984ms step_avg:570.77ms +step:31725/57344 train_time:18107001ms step_avg:570.75ms +step:31726/57344 train_time:18107252ms step_avg:570.74ms +step:31727/57344 train_time:18107809ms step_avg:570.74ms +grad accum step:7932/14336 +step:31728/57344 train_time:18109105ms step_avg:570.76ms +step:31729/57344 train_time:18109122ms step_avg:570.74ms +step:31730/57344 train_time:18109370ms step_avg:570.73ms +step:31731/57344 train_time:18109920ms step_avg:570.73ms +grad accum step:7933/14336 +step:31732/57344 train_time:18111214ms step_avg:570.76ms +step:31733/57344 train_time:18111231ms step_avg:570.74ms +step:31734/57344 train_time:18111478ms step_avg:570.73ms +step:31735/57344 train_time:18112024ms step_avg:570.73ms +grad accum step:7934/14336 +step:31736/57344 train_time:18113318ms step_avg:570.75ms +step:31737/57344 train_time:18113335ms step_avg:570.73ms +step:31738/57344 train_time:18113581ms step_avg:570.72ms +step:31739/57344 train_time:18114139ms step_avg:570.72ms +grad accum step:7935/14336 +step:31740/57344 train_time:18115494ms step_avg:570.75ms +step:31741/57344 train_time:18115512ms step_avg:570.73ms +step:31742/57344 train_time:18115758ms step_avg:570.72ms +step:31743/57344 train_time:18116743ms step_avg:570.73ms +grad accum step:7936/14336 +step:31744/57344 train_time:18117652ms step_avg:570.74ms +step:31744/57344 val_loss:6.030588 train_time:18117652ms step_avg:570.74ms +step:31745/57344 train_time:18117664ms step_avg:570.72ms +step:31746/57344 train_time:18117891ms step_avg:570.71ms +step:31747/57344 train_time:18118432ms step_avg:570.71ms +grad accum step:7937/14336 +step:31748/57344 train_time:18119730ms step_avg:570.74ms +step:31749/57344 train_time:18119747ms step_avg:570.72ms +step:31750/57344 train_time:18120001ms step_avg:570.71ms +step:31751/57344 train_time:18120564ms step_avg:570.71ms +grad accum step:7938/14336 +step:31752/57344 train_time:18121879ms step_avg:570.73ms +step:31753/57344 train_time:18121896ms step_avg:570.71ms +step:31754/57344 train_time:18122150ms step_avg:570.70ms +step:31755/57344 train_time:18122718ms step_avg:570.70ms +grad accum step:7939/14336 +step:31756/57344 train_time:18124026ms step_avg:570.73ms +step:31757/57344 train_time:18124043ms step_avg:570.71ms +step:31758/57344 train_time:18124294ms step_avg:570.70ms +step:31759/57344 train_time:18124851ms step_avg:570.70ms +grad accum step:7940/14336 +step:31760/57344 train_time:18126149ms step_avg:570.72ms +step:31761/57344 train_time:18126166ms step_avg:570.71ms +step:31762/57344 train_time:18126411ms step_avg:570.69ms +step:31763/57344 train_time:18126964ms step_avg:570.69ms +grad accum step:7941/14336 +step:31764/57344 train_time:18128301ms step_avg:570.72ms +step:31765/57344 train_time:18128317ms step_avg:570.70ms +step:31766/57344 train_time:18128566ms step_avg:570.69ms +step:31767/57344 train_time:18129113ms step_avg:570.69ms +grad accum step:7942/14336 +step:31768/57344 train_time:18130432ms step_avg:570.71ms +step:31769/57344 train_time:18130449ms step_avg:570.70ms +step:31770/57344 train_time:18130700ms step_avg:570.69ms +step:31771/57344 train_time:18131266ms step_avg:570.69ms +grad accum step:7943/14336 +step:31772/57344 train_time:18132599ms step_avg:570.71ms +step:31773/57344 train_time:18132616ms step_avg:570.69ms +step:31774/57344 train_time:18132865ms step_avg:570.68ms +step:31775/57344 train_time:18133421ms step_avg:570.68ms +grad accum step:7944/14336 +step:31776/57344 train_time:18134733ms step_avg:570.71ms +step:31777/57344 train_time:18134750ms step_avg:570.69ms +step:31778/57344 train_time:18134997ms step_avg:570.68ms +step:31779/57344 train_time:18135544ms step_avg:570.68ms +grad accum step:7945/14336 +step:31780/57344 train_time:18136854ms step_avg:570.70ms +step:31781/57344 train_time:18136870ms step_avg:570.68ms +step:31782/57344 train_time:18137122ms step_avg:570.67ms +step:31783/57344 train_time:18137685ms step_avg:570.67ms +grad accum step:7946/14336 +step:31784/57344 train_time:18139016ms step_avg:570.70ms +step:31785/57344 train_time:18139033ms step_avg:570.68ms +step:31786/57344 train_time:18139281ms step_avg:570.67ms +step:31787/57344 train_time:18139828ms step_avg:570.67ms +grad accum step:7947/14336 +step:31788/57344 train_time:18141103ms step_avg:570.69ms +step:31789/57344 train_time:18141120ms step_avg:570.67ms +step:31790/57344 train_time:18141371ms step_avg:570.66ms +step:31791/57344 train_time:18141925ms step_avg:570.66ms +grad accum step:7948/14336 +step:31792/57344 train_time:18143217ms step_avg:570.68ms +step:31793/57344 train_time:18143234ms step_avg:570.67ms +step:31794/57344 train_time:18143481ms step_avg:570.66ms +step:31795/57344 train_time:18144031ms step_avg:570.66ms +grad accum step:7949/14336 +step:31796/57344 train_time:18145345ms step_avg:570.68ms +step:31797/57344 train_time:18145362ms step_avg:570.66ms +step:31798/57344 train_time:18145619ms step_avg:570.65ms +step:31799/57344 train_time:18146186ms step_avg:570.65ms +grad accum step:7950/14336 +step:31800/57344 train_time:18147512ms step_avg:570.68ms +step:31801/57344 train_time:18147529ms step_avg:570.66ms +step:31802/57344 train_time:18147776ms step_avg:570.65ms +step:31803/57344 train_time:18148326ms step_avg:570.65ms +grad accum step:7951/14336 +step:31804/57344 train_time:18149632ms step_avg:570.67ms +step:31805/57344 train_time:18149649ms step_avg:570.65ms +step:31806/57344 train_time:18149896ms step_avg:570.64ms +step:31807/57344 train_time:18150448ms step_avg:570.64ms +grad accum step:7952/14336 +step:31808/57344 train_time:18151737ms step_avg:570.67ms +step:31808/57344 val_loss:6.041026 train_time:18151738ms step_avg:570.67ms +step:31809/57344 train_time:18151749ms step_avg:570.65ms +step:31810/57344 train_time:18151978ms step_avg:570.64ms +step:31811/57344 train_time:18152543ms step_avg:570.64ms +grad accum step:7953/14336 +step:31812/57344 train_time:18153892ms step_avg:570.66ms +step:31813/57344 train_time:18153909ms step_avg:570.64ms +step:31814/57344 train_time:18154158ms step_avg:570.63ms +step:31815/57344 train_time:18154711ms step_avg:570.63ms +grad accum step:7954/14336 +step:31816/57344 train_time:18156055ms step_avg:570.66ms +step:31817/57344 train_time:18156070ms step_avg:570.64ms +step:31818/57344 train_time:18156317ms step_avg:570.63ms +step:31819/57344 train_time:18156857ms step_avg:570.63ms +grad accum step:7955/14336 +step:31820/57344 train_time:18158147ms step_avg:570.65ms +step:31821/57344 train_time:18158164ms step_avg:570.63ms +step:31822/57344 train_time:18158406ms step_avg:570.62ms +step:31823/57344 train_time:18158951ms step_avg:570.62ms +grad accum step:7956/14336 +step:31824/57344 train_time:18160265ms step_avg:570.65ms +step:31825/57344 train_time:18160282ms step_avg:570.63ms +step:31826/57344 train_time:18160526ms step_avg:570.62ms +step:31827/57344 train_time:18161062ms step_avg:570.62ms +grad accum step:7957/14336 +step:31828/57344 train_time:18162362ms step_avg:570.64ms +step:31829/57344 train_time:18162377ms step_avg:570.62ms +step:31830/57344 train_time:18162626ms step_avg:570.61ms +step:31831/57344 train_time:18163173ms step_avg:570.61ms +grad accum step:7958/14336 +step:31832/57344 train_time:18164477ms step_avg:570.64ms +step:31833/57344 train_time:18164494ms step_avg:570.62ms +step:31834/57344 train_time:18164755ms step_avg:570.61ms +step:31835/57344 train_time:18165351ms step_avg:570.61ms +grad accum step:7959/14336 +step:31836/57344 train_time:18166709ms step_avg:570.63ms +step:31837/57344 train_time:18166725ms step_avg:570.62ms +step:31838/57344 train_time:18166974ms step_avg:570.61ms +step:31839/57344 train_time:18167518ms step_avg:570.61ms +grad accum step:7960/14336 +step:31840/57344 train_time:18168841ms step_avg:570.63ms +step:31841/57344 train_time:18168858ms step_avg:570.61ms +step:31842/57344 train_time:18169102ms step_avg:570.60ms +step:31843/57344 train_time:18169638ms step_avg:570.60ms +grad accum step:7961/14336 +step:31844/57344 train_time:18170972ms step_avg:570.62ms +step:31845/57344 train_time:18170989ms step_avg:570.61ms +step:31846/57344 train_time:18171242ms step_avg:570.60ms +step:31847/57344 train_time:18171807ms step_avg:570.60ms +grad accum step:7962/14336 +step:31848/57344 train_time:18173134ms step_avg:570.62ms +step:31849/57344 train_time:18173151ms step_avg:570.60ms +step:31850/57344 train_time:18173403ms step_avg:570.59ms +step:31851/57344 train_time:18173960ms step_avg:570.59ms +grad accum step:7963/14336 +step:31852/57344 train_time:18175327ms step_avg:570.62ms +step:31853/57344 train_time:18175343ms step_avg:570.60ms +step:31854/57344 train_time:18175593ms step_avg:570.59ms +step:31855/57344 train_time:18176155ms step_avg:570.59ms +grad accum step:7964/14336 +step:31856/57344 train_time:18177525ms step_avg:570.62ms +step:31857/57344 train_time:18177543ms step_avg:570.60ms +step:31858/57344 train_time:18177787ms step_avg:570.59ms +step:31859/57344 train_time:18178329ms step_avg:570.59ms +grad accum step:7965/14336 +step:31860/57344 train_time:18179626ms step_avg:570.61ms +step:31861/57344 train_time:18179643ms step_avg:570.59ms +step:31862/57344 train_time:18179891ms step_avg:570.58ms +step:31863/57344 train_time:18180436ms step_avg:570.58ms +grad accum step:7966/14336 +step:31864/57344 train_time:18181734ms step_avg:570.60ms +step:31865/57344 train_time:18181751ms step_avg:570.59ms +step:31866/57344 train_time:18181997ms step_avg:570.58ms +step:31867/57344 train_time:18182542ms step_avg:570.58ms +grad accum step:7967/14336 +step:31868/57344 train_time:18183885ms step_avg:570.60ms +step:31869/57344 train_time:18183902ms step_avg:570.58ms +step:31870/57344 train_time:18184150ms step_avg:570.57ms +step:31871/57344 train_time:18184699ms step_avg:570.57ms +grad accum step:7968/14336 +step:31872/57344 train_time:18186025ms step_avg:570.60ms +step:31872/57344 val_loss:6.059294 train_time:18186025ms step_avg:570.60ms +step:31873/57344 train_time:18186037ms step_avg:570.58ms +step:31874/57344 train_time:18186262ms step_avg:570.57ms +step:31875/57344 train_time:18186821ms step_avg:570.57ms +grad accum step:7969/14336 +step:31876/57344 train_time:18188135ms step_avg:570.59ms +step:31877/57344 train_time:18188153ms step_avg:570.57ms +step:31878/57344 train_time:18188392ms step_avg:570.56ms +step:31879/57344 train_time:18188938ms step_avg:570.56ms +grad accum step:7970/14336 +step:31880/57344 train_time:18190249ms step_avg:570.58ms +step:31881/57344 train_time:18190266ms step_avg:570.57ms +step:31882/57344 train_time:18190522ms step_avg:570.56ms +step:31883/57344 train_time:18191102ms step_avg:570.56ms +grad accum step:7971/14336 +step:31884/57344 train_time:18192446ms step_avg:570.58ms +step:31885/57344 train_time:18192461ms step_avg:570.56ms +step:31886/57344 train_time:18192700ms step_avg:570.55ms +step:31887/57344 train_time:18193248ms step_avg:570.55ms +grad accum step:7972/14336 +step:31888/57344 train_time:18194555ms step_avg:570.58ms +step:31889/57344 train_time:18194570ms step_avg:570.56ms +step:31890/57344 train_time:18194824ms step_avg:570.55ms +step:31891/57344 train_time:18195396ms step_avg:570.55ms +grad accum step:7973/14336 +step:31892/57344 train_time:18196703ms step_avg:570.57ms +step:31893/57344 train_time:18196720ms step_avg:570.56ms +step:31894/57344 train_time:18196970ms step_avg:570.55ms +step:31895/57344 train_time:18197516ms step_avg:570.54ms +grad accum step:7974/14336 +step:31896/57344 train_time:18198827ms step_avg:570.57ms +step:31897/57344 train_time:18198843ms step_avg:570.55ms +step:31898/57344 train_time:18199089ms step_avg:570.54ms +step:31899/57344 train_time:18199642ms step_avg:570.54ms +grad accum step:7975/14336 +step:31900/57344 train_time:18200973ms step_avg:570.56ms +step:31901/57344 train_time:18200990ms step_avg:570.55ms +step:31902/57344 train_time:18201237ms step_avg:570.54ms +step:31903/57344 train_time:18201781ms step_avg:570.54ms +grad accum step:7976/14336 +step:31904/57344 train_time:18203080ms step_avg:570.56ms +step:31905/57344 train_time:18203097ms step_avg:570.54ms +step:31906/57344 train_time:18203344ms step_avg:570.53ms +step:31907/57344 train_time:18203882ms step_avg:570.53ms +grad accum step:7977/14336 +step:31908/57344 train_time:18205180ms step_avg:570.55ms +step:31909/57344 train_time:18205195ms step_avg:570.53ms +step:31910/57344 train_time:18205445ms step_avg:570.52ms +step:31911/57344 train_time:18205991ms step_avg:570.52ms +grad accum step:7978/14336 +step:31912/57344 train_time:18207308ms step_avg:570.55ms +step:31913/57344 train_time:18207324ms step_avg:570.53ms +step:31914/57344 train_time:18207570ms step_avg:570.52ms +step:31915/57344 train_time:18208110ms step_avg:570.52ms +grad accum step:7979/14336 +step:31916/57344 train_time:18209425ms step_avg:570.54ms +step:31917/57344 train_time:18209441ms step_avg:570.52ms +step:31918/57344 train_time:18209689ms step_avg:570.51ms +step:31919/57344 train_time:18210241ms step_avg:570.51ms +grad accum step:7980/14336 +step:31920/57344 train_time:18211592ms step_avg:570.54ms +step:31921/57344 train_time:18211609ms step_avg:570.52ms +step:31922/57344 train_time:18211856ms step_avg:570.51ms +step:31923/57344 train_time:18212402ms step_avg:570.51ms +grad accum step:7981/14336 +step:31924/57344 train_time:18213716ms step_avg:570.53ms +step:31925/57344 train_time:18213729ms step_avg:570.52ms +step:31926/57344 train_time:18213976ms step_avg:570.51ms +step:31927/57344 train_time:18214519ms step_avg:570.51ms +grad accum step:7982/14336 +step:31928/57344 train_time:18215812ms step_avg:570.53ms +step:31929/57344 train_time:18215831ms step_avg:570.51ms +step:31930/57344 train_time:18216071ms step_avg:570.50ms +step:31931/57344 train_time:18216620ms step_avg:570.50ms +grad accum step:7983/14336 +step:31932/57344 train_time:18217942ms step_avg:570.52ms +step:31933/57344 train_time:18217958ms step_avg:570.51ms +step:31934/57344 train_time:18218214ms step_avg:570.50ms +step:31935/57344 train_time:18218779ms step_avg:570.50ms +grad accum step:7984/14336 +step:31936/57344 train_time:18220068ms step_avg:570.52ms +step:31936/57344 val_loss:6.074324 train_time:18220069ms step_avg:570.52ms +step:31937/57344 train_time:18220080ms step_avg:570.50ms +step:31938/57344 train_time:18220304ms step_avg:570.49ms +step:31939/57344 train_time:18220852ms step_avg:570.49ms +grad accum step:7985/14336 +step:31940/57344 train_time:18222128ms step_avg:570.51ms +step:31941/57344 train_time:18222145ms step_avg:570.49ms +step:31942/57344 train_time:18222390ms step_avg:570.48ms +step:31943/57344 train_time:18222937ms step_avg:570.48ms +grad accum step:7986/14336 +step:31944/57344 train_time:18224241ms step_avg:570.51ms +step:31945/57344 train_time:18224257ms step_avg:570.49ms +step:31946/57344 train_time:18224503ms step_avg:570.48ms +step:31947/57344 train_time:18225046ms step_avg:570.48ms +grad accum step:7987/14336 +step:31948/57344 train_time:18226337ms step_avg:570.50ms +step:31949/57344 train_time:18226354ms step_avg:570.48ms +step:31950/57344 train_time:18226609ms step_avg:570.47ms +step:31951/57344 train_time:18227174ms step_avg:570.47ms +grad accum step:7988/14336 +step:31952/57344 train_time:18228499ms step_avg:570.50ms +step:31953/57344 train_time:18228516ms step_avg:570.48ms +step:31954/57344 train_time:18228764ms step_avg:570.47ms +step:31955/57344 train_time:18229312ms step_avg:570.47ms +grad accum step:7989/14336 +step:31956/57344 train_time:18230614ms step_avg:570.49ms +step:31957/57344 train_time:18230631ms step_avg:570.47ms +step:31958/57344 train_time:18230882ms step_avg:570.46ms +step:31959/57344 train_time:18231432ms step_avg:570.46ms +grad accum step:7990/14336 +step:31960/57344 train_time:18232703ms step_avg:570.49ms +step:31961/57344 train_time:18232720ms step_avg:570.47ms +step:31962/57344 train_time:18232968ms step_avg:570.46ms +step:31963/57344 train_time:18233515ms step_avg:570.46ms +grad accum step:7991/14336 +step:31964/57344 train_time:18234794ms step_avg:570.48ms +step:31965/57344 train_time:18234811ms step_avg:570.46ms +step:31966/57344 train_time:18235062ms step_avg:570.45ms +step:31967/57344 train_time:18235621ms step_avg:570.45ms +grad accum step:7992/14336 +step:31968/57344 train_time:18236932ms step_avg:570.47ms +step:31969/57344 train_time:18236949ms step_avg:570.46ms +step:31970/57344 train_time:18237197ms step_avg:570.45ms +step:31971/57344 train_time:18237745ms step_avg:570.45ms +grad accum step:7993/14336 +step:31972/57344 train_time:18239059ms step_avg:570.47ms +step:31973/57344 train_time:18239076ms step_avg:570.45ms +step:31974/57344 train_time:18239325ms step_avg:570.44ms +step:31975/57344 train_time:18239874ms step_avg:570.44ms +grad accum step:7994/14336 +step:31976/57344 train_time:18241218ms step_avg:570.47ms +step:31977/57344 train_time:18241236ms step_avg:570.45ms +step:31978/57344 train_time:18241486ms step_avg:570.44ms +step:31979/57344 train_time:18242038ms step_avg:570.44ms +grad accum step:7995/14336 +step:31980/57344 train_time:18243350ms step_avg:570.46ms +step:31981/57344 train_time:18243365ms step_avg:570.44ms +step:31982/57344 train_time:18243614ms step_avg:570.43ms +step:31983/57344 train_time:18244161ms step_avg:570.43ms +grad accum step:7996/14336 +step:31984/57344 train_time:18245473ms step_avg:570.46ms +step:31985/57344 train_time:18245489ms step_avg:570.44ms +step:31986/57344 train_time:18245734ms step_avg:570.43ms +step:31987/57344 train_time:18246275ms step_avg:570.43ms +grad accum step:7997/14336 +step:31988/57344 train_time:18247596ms step_avg:570.45ms +step:31989/57344 train_time:18247613ms step_avg:570.43ms +step:31990/57344 train_time:18247862ms step_avg:570.42ms +step:31991/57344 train_time:18248412ms step_avg:570.42ms +grad accum step:7998/14336 +step:31992/57344 train_time:18249722ms step_avg:570.45ms +step:31993/57344 train_time:18249739ms step_avg:570.43ms +step:31994/57344 train_time:18249985ms step_avg:570.42ms +step:31995/57344 train_time:18250528ms step_avg:570.42ms +grad accum step:7999/14336 +step:31996/57344 train_time:18251827ms step_avg:570.44ms +step:31997/57344 train_time:18251844ms step_avg:570.42ms +step:31998/57344 train_time:18252111ms step_avg:570.41ms +step:31999/57344 train_time:18252715ms step_avg:570.42ms +grad accum step:8000/14336 +step:32000/57344 train_time:18254012ms step_avg:570.44ms +step:32000/57344 val_loss:6.097003 train_time:18254013ms step_avg:570.44ms +step:32001/57344 train_time:18254025ms step_avg:570.42ms +step:32002/57344 train_time:18254239ms step_avg:570.41ms +step:32003/57344 train_time:18256076ms step_avg:570.45ms +grad accum step:8001/14336 +step:32004/57344 train_time:18258106ms step_avg:570.49ms +step:32005/57344 train_time:18258118ms step_avg:570.48ms +step:32006/57344 train_time:18258333ms step_avg:570.47ms +step:32007/57344 train_time:18258876ms step_avg:570.47ms +grad accum step:8002/14336 +step:32008/57344 train_time:18260179ms step_avg:570.49ms +step:32009/57344 train_time:18260196ms step_avg:570.47ms +step:32010/57344 train_time:18260445ms step_avg:570.46ms +step:32011/57344 train_time:18260994ms step_avg:570.46ms +grad accum step:8003/14336 +step:32012/57344 train_time:18262325ms step_avg:570.48ms +step:32013/57344 train_time:18262340ms step_avg:570.47ms +step:32014/57344 train_time:18262585ms step_avg:570.46ms +step:32015/57344 train_time:18263130ms step_avg:570.46ms +grad accum step:8004/14336 +step:32016/57344 train_time:18264411ms step_avg:570.48ms +step:32017/57344 train_time:18264425ms step_avg:570.46ms +step:32018/57344 train_time:18264666ms step_avg:570.45ms +step:32019/57344 train_time:18265213ms step_avg:570.45ms +grad accum step:8005/14336 +step:32020/57344 train_time:18266493ms step_avg:570.47ms +step:32021/57344 train_time:18266509ms step_avg:570.45ms +step:32022/57344 train_time:18266757ms step_avg:570.44ms +step:32023/57344 train_time:18267306ms step_avg:570.44ms +grad accum step:8006/14336 +step:32024/57344 train_time:18268624ms step_avg:570.47ms +step:32025/57344 train_time:18268636ms step_avg:570.45ms +step:32026/57344 train_time:18268875ms step_avg:570.44ms +step:32027/57344 train_time:18269425ms step_avg:570.44ms +grad accum step:8007/14336 +step:32028/57344 train_time:18270711ms step_avg:570.46ms +step:32029/57344 train_time:18270728ms step_avg:570.44ms +step:32030/57344 train_time:18270979ms step_avg:570.43ms +step:32031/57344 train_time:18271531ms step_avg:570.43ms +grad accum step:8008/14336 +step:32032/57344 train_time:18272918ms step_avg:570.46ms +step:32033/57344 train_time:18272935ms step_avg:570.44ms +step:32034/57344 train_time:18273184ms step_avg:570.43ms +step:32035/57344 train_time:18273741ms step_avg:570.43ms +grad accum step:8009/14336 +step:32036/57344 train_time:18275098ms step_avg:570.46ms +step:32037/57344 train_time:18275124ms step_avg:570.44ms +step:32038/57344 train_time:18275353ms step_avg:570.43ms +step:32039/57344 train_time:18275899ms step_avg:570.43ms +grad accum step:8010/14336 +step:32040/57344 train_time:18277192ms step_avg:570.45ms +step:32041/57344 train_time:18277208ms step_avg:570.43ms +step:32042/57344 train_time:18277455ms step_avg:570.42ms +step:32043/57344 train_time:18278008ms step_avg:570.42ms +grad accum step:8011/14336 +step:32044/57344 train_time:18279314ms step_avg:570.44ms +step:32045/57344 train_time:18279331ms step_avg:570.43ms +step:32046/57344 train_time:18279576ms step_avg:570.42ms +step:32047/57344 train_time:18280123ms step_avg:570.42ms +grad accum step:8012/14336 +step:32048/57344 train_time:18281442ms step_avg:570.44ms +step:32049/57344 train_time:18281459ms step_avg:570.42ms +step:32050/57344 train_time:18281705ms step_avg:570.41ms +step:32051/57344 train_time:18282254ms step_avg:570.41ms +grad accum step:8013/14336 +step:32052/57344 train_time:18283606ms step_avg:570.44ms +step:32053/57344 train_time:18283622ms step_avg:570.42ms +step:32054/57344 train_time:18283871ms step_avg:570.41ms +step:32055/57344 train_time:18284430ms step_avg:570.41ms +grad accum step:8014/14336 +step:32056/57344 train_time:18285749ms step_avg:570.43ms +step:32057/57344 train_time:18285766ms step_avg:570.41ms +step:32058/57344 train_time:18286012ms step_avg:570.40ms +step:32059/57344 train_time:18286556ms step_avg:570.40ms +grad accum step:8015/14336 +step:32060/57344 train_time:18287863ms step_avg:570.43ms +step:32061/57344 train_time:18287880ms step_avg:570.41ms +step:32062/57344 train_time:18288128ms step_avg:570.40ms +step:32063/57344 train_time:18288673ms step_avg:570.40ms +grad accum step:8016/14336 +step:32064/57344 train_time:18289949ms step_avg:570.42ms +step:32064/57344 val_loss:6.111945 train_time:18289949ms step_avg:570.42ms +step:32065/57344 train_time:18289961ms step_avg:570.40ms +step:32066/57344 train_time:18290188ms step_avg:570.39ms +step:32067/57344 train_time:18290737ms step_avg:570.39ms +grad accum step:8017/14336 +step:32068/57344 train_time:18292022ms step_avg:570.41ms +step:32069/57344 train_time:18292039ms step_avg:570.40ms +step:32070/57344 train_time:18292296ms step_avg:570.39ms +step:32071/57344 train_time:18292863ms step_avg:570.39ms +grad accum step:8018/14336 +step:32072/57344 train_time:18294177ms step_avg:570.41ms +step:32073/57344 train_time:18294194ms step_avg:570.39ms +step:32074/57344 train_time:18294441ms step_avg:570.38ms +step:32075/57344 train_time:18294987ms step_avg:570.38ms +grad accum step:8019/14336 +step:32076/57344 train_time:18296290ms step_avg:570.40ms +step:32077/57344 train_time:18296306ms step_avg:570.39ms +step:32078/57344 train_time:18296555ms step_avg:570.38ms +step:32079/57344 train_time:18297106ms step_avg:570.38ms +grad accum step:8020/14336 +step:32080/57344 train_time:18298410ms step_avg:570.40ms +step:32081/57344 train_time:18298427ms step_avg:570.38ms +step:32082/57344 train_time:18298674ms step_avg:570.37ms +step:32083/57344 train_time:18299221ms step_avg:570.37ms +grad accum step:8021/14336 +step:32084/57344 train_time:18300522ms step_avg:570.39ms +step:32085/57344 train_time:18300539ms step_avg:570.38ms +step:32086/57344 train_time:18300786ms step_avg:570.37ms +step:32087/57344 train_time:18301336ms step_avg:570.37ms +grad accum step:8022/14336 +step:32088/57344 train_time:18302616ms step_avg:570.39ms +step:32089/57344 train_time:18302634ms step_avg:570.37ms +step:32090/57344 train_time:18302878ms step_avg:570.36ms +step:32091/57344 train_time:18303425ms step_avg:570.36ms +grad accum step:8023/14336 +step:32092/57344 train_time:18304745ms step_avg:570.38ms +step:32093/57344 train_time:18304761ms step_avg:570.37ms +step:32094/57344 train_time:18305012ms step_avg:570.36ms +step:32095/57344 train_time:18305564ms step_avg:570.36ms +grad accum step:8024/14336 +step:32096/57344 train_time:18306868ms step_avg:570.38ms +step:32097/57344 train_time:18306885ms step_avg:570.36ms +step:32098/57344 train_time:18307136ms step_avg:570.35ms +step:32099/57344 train_time:18307694ms step_avg:570.35ms +grad accum step:8025/14336 +step:32100/57344 train_time:18309012ms step_avg:570.37ms +step:32101/57344 train_time:18309029ms step_avg:570.36ms +step:32102/57344 train_time:18309280ms step_avg:570.35ms +step:32103/57344 train_time:18309843ms step_avg:570.35ms +grad accum step:8026/14336 +step:32104/57344 train_time:18311189ms step_avg:570.37ms +step:32105/57344 train_time:18311206ms step_avg:570.35ms +step:32106/57344 train_time:18311454ms step_avg:570.34ms +step:32107/57344 train_time:18312006ms step_avg:570.34ms +grad accum step:8027/14336 +step:32108/57344 train_time:18313307ms step_avg:570.37ms +step:32109/57344 train_time:18313324ms step_avg:570.35ms +step:32110/57344 train_time:18313571ms step_avg:570.34ms +step:32111/57344 train_time:18314113ms step_avg:570.34ms +grad accum step:8028/14336 +step:32112/57344 train_time:18315395ms step_avg:570.36ms +step:32113/57344 train_time:18315411ms step_avg:570.34ms +step:32114/57344 train_time:18315660ms step_avg:570.33ms +step:32115/57344 train_time:18316208ms step_avg:570.33ms +grad accum step:8029/14336 +step:32116/57344 train_time:18317500ms step_avg:570.35ms +step:32117/57344 train_time:18317516ms step_avg:570.34ms +step:32118/57344 train_time:18317767ms step_avg:570.33ms +step:32119/57344 train_time:18318326ms step_avg:570.33ms +grad accum step:8030/14336 +step:32120/57344 train_time:18319614ms step_avg:570.35ms +step:32121/57344 train_time:18319631ms step_avg:570.33ms +step:32122/57344 train_time:18319880ms step_avg:570.32ms +step:32123/57344 train_time:18320436ms step_avg:570.32ms +grad accum step:8031/14336 +step:32124/57344 train_time:18321731ms step_avg:570.34ms +step:32125/57344 train_time:18321748ms step_avg:570.33ms +step:32126/57344 train_time:18321994ms step_avg:570.32ms +step:32127/57344 train_time:18322543ms step_avg:570.32ms +grad accum step:8032/14336 +step:32128/57344 train_time:18323885ms step_avg:570.34ms +step:32128/57344 val_loss:6.132194 train_time:18323886ms step_avg:570.34ms +step:32129/57344 train_time:18323898ms step_avg:570.32ms +step:32130/57344 train_time:18324128ms step_avg:570.31ms +step:32131/57344 train_time:18324691ms step_avg:570.31ms +grad accum step:8033/14336 +step:32132/57344 train_time:18326036ms step_avg:570.34ms +step:32133/57344 train_time:18326053ms step_avg:570.32ms +step:32134/57344 train_time:18326299ms step_avg:570.31ms +step:32135/57344 train_time:18326848ms step_avg:570.31ms +grad accum step:8034/14336 +step:32136/57344 train_time:18328182ms step_avg:570.33ms +step:32137/57344 train_time:18328199ms step_avg:570.31ms +step:32138/57344 train_time:18328446ms step_avg:570.30ms +step:32139/57344 train_time:18329009ms step_avg:570.30ms +grad accum step:8035/14336 +step:32140/57344 train_time:18330367ms step_avg:570.33ms +step:32141/57344 train_time:18330384ms step_avg:570.31ms +step:32142/57344 train_time:18330630ms step_avg:570.30ms +step:32143/57344 train_time:18331175ms step_avg:570.30ms +grad accum step:8036/14336 +step:32144/57344 train_time:18332473ms step_avg:570.32ms +step:32145/57344 train_time:18332490ms step_avg:570.31ms +step:32146/57344 train_time:18332742ms step_avg:570.30ms +step:32147/57344 train_time:18333300ms step_avg:570.30ms +grad accum step:8037/14336 +step:32148/57344 train_time:18334621ms step_avg:570.32ms +step:32149/57344 train_time:18334638ms step_avg:570.30ms +step:32150/57344 train_time:18334884ms step_avg:570.29ms +step:32151/57344 train_time:18335426ms step_avg:570.29ms +grad accum step:8038/14336 +step:32152/57344 train_time:18336706ms step_avg:570.31ms +step:32153/57344 train_time:18336722ms step_avg:570.30ms +step:32154/57344 train_time:18336971ms step_avg:570.29ms +step:32155/57344 train_time:18337515ms step_avg:570.29ms +grad accum step:8039/14336 +step:32156/57344 train_time:18338818ms step_avg:570.31ms +step:32157/57344 train_time:18338835ms step_avg:570.29ms +step:32158/57344 train_time:18339078ms step_avg:570.28ms +step:32159/57344 train_time:18339616ms step_avg:570.28ms +grad accum step:8040/14336 +step:32160/57344 train_time:18340889ms step_avg:570.30ms +step:32161/57344 train_time:18340906ms step_avg:570.28ms +step:32162/57344 train_time:18341151ms step_avg:570.27ms +step:32163/57344 train_time:18341700ms step_avg:570.27ms +grad accum step:8041/14336 +step:32164/57344 train_time:18342995ms step_avg:570.30ms +step:32165/57344 train_time:18343012ms step_avg:570.28ms +step:32166/57344 train_time:18343258ms step_avg:570.27ms +step:32167/57344 train_time:18343807ms step_avg:570.27ms +grad accum step:8042/14336 +step:32168/57344 train_time:18345084ms step_avg:570.29ms +step:32169/57344 train_time:18345101ms step_avg:570.27ms +step:32170/57344 train_time:18345346ms step_avg:570.26ms +step:32171/57344 train_time:18345893ms step_avg:570.26ms +grad accum step:8043/14336 +step:32172/57344 train_time:18347194ms step_avg:570.28ms +step:32173/57344 train_time:18347210ms step_avg:570.27ms +step:32174/57344 train_time:18347473ms step_avg:570.26ms +step:32175/57344 train_time:18348067ms step_avg:570.26ms +grad accum step:8044/14336 +step:32176/57344 train_time:18349428ms step_avg:570.28ms +step:32177/57344 train_time:18349445ms step_avg:570.27ms +step:32178/57344 train_time:18349700ms step_avg:570.26ms +step:32179/57344 train_time:18350264ms step_avg:570.26ms +grad accum step:8045/14336 +step:32180/57344 train_time:18351580ms step_avg:570.28ms +step:32181/57344 train_time:18351597ms step_avg:570.26ms +step:32182/57344 train_time:18351850ms step_avg:570.25ms +step:32183/57344 train_time:18352418ms step_avg:570.25ms +grad accum step:8046/14336 +step:32184/57344 train_time:18353704ms step_avg:570.27ms +step:32185/57344 train_time:18353721ms step_avg:570.26ms +step:32186/57344 train_time:18353963ms step_avg:570.25ms +step:32187/57344 train_time:18354501ms step_avg:570.25ms +grad accum step:8047/14336 +step:32188/57344 train_time:18355786ms step_avg:570.27ms +step:32189/57344 train_time:18355803ms step_avg:570.25ms +step:32190/57344 train_time:18356048ms step_avg:570.24ms +step:32191/57344 train_time:18356594ms step_avg:570.24ms +grad accum step:8048/14336 +step:32192/57344 train_time:18357872ms step_avg:570.26ms +step:32192/57344 val_loss:6.154255 train_time:18357872ms step_avg:570.26ms +step:32193/57344 train_time:18357884ms step_avg:570.24ms +step:32194/57344 train_time:18358106ms step_avg:570.23ms +step:32195/57344 train_time:18358641ms step_avg:570.23ms +grad accum step:8049/14336 +step:32196/57344 train_time:18359950ms step_avg:570.26ms +step:32197/57344 train_time:18359967ms step_avg:570.24ms +step:32198/57344 train_time:18360219ms step_avg:570.23ms +step:32199/57344 train_time:18360775ms step_avg:570.23ms +grad accum step:8050/14336 +step:32200/57344 train_time:18362084ms step_avg:570.25ms +step:32201/57344 train_time:18362101ms step_avg:570.23ms +step:32202/57344 train_time:18362348ms step_avg:570.22ms +step:32203/57344 train_time:18362897ms step_avg:570.22ms +grad accum step:8051/14336 +step:32204/57344 train_time:18364221ms step_avg:570.25ms +step:32205/57344 train_time:18364238ms step_avg:570.23ms +step:32206/57344 train_time:18364487ms step_avg:570.22ms +step:32207/57344 train_time:18365034ms step_avg:570.22ms +grad accum step:8052/14336 +step:32208/57344 train_time:18366325ms step_avg:570.24ms +step:32209/57344 train_time:18366342ms step_avg:570.22ms +step:32210/57344 train_time:18366594ms step_avg:570.21ms +step:32211/57344 train_time:18367156ms step_avg:570.21ms +grad accum step:8053/14336 +step:32212/57344 train_time:18368448ms step_avg:570.24ms +step:32213/57344 train_time:18368466ms step_avg:570.22ms +step:32214/57344 train_time:18368716ms step_avg:570.21ms +step:32215/57344 train_time:18369278ms step_avg:570.21ms +grad accum step:8054/14336 +step:32216/57344 train_time:18370591ms step_avg:570.23ms +step:32217/57344 train_time:18370608ms step_avg:570.21ms +step:32218/57344 train_time:18370854ms step_avg:570.20ms +step:32219/57344 train_time:18371394ms step_avg:570.20ms +grad accum step:8055/14336 +step:32220/57344 train_time:18372716ms step_avg:570.23ms +step:32221/57344 train_time:18372732ms step_avg:570.21ms +step:32222/57344 train_time:18372977ms step_avg:570.20ms +step:32223/57344 train_time:18373518ms step_avg:570.20ms +grad accum step:8056/14336 +step:32224/57344 train_time:18374811ms step_avg:570.22ms +step:32225/57344 train_time:18374828ms step_avg:570.20ms +step:32226/57344 train_time:18375080ms step_avg:570.19ms +step:32227/57344 train_time:18375638ms step_avg:570.19ms +grad accum step:8057/14336 +step:32228/57344 train_time:18376966ms step_avg:570.22ms +step:32229/57344 train_time:18376983ms step_avg:570.20ms +step:32230/57344 train_time:18377235ms step_avg:570.19ms +step:32231/57344 train_time:18377798ms step_avg:570.19ms +grad accum step:8058/14336 +step:32232/57344 train_time:18379111ms step_avg:570.21ms +step:32233/57344 train_time:18379128ms step_avg:570.20ms +step:32234/57344 train_time:18379372ms step_avg:570.19ms +step:32235/57344 train_time:18379915ms step_avg:570.19ms +grad accum step:8059/14336 +step:32236/57344 train_time:18381198ms step_avg:570.21ms +step:32237/57344 train_time:18381215ms step_avg:570.19ms +step:32238/57344 train_time:18381460ms step_avg:570.18ms +step:32239/57344 train_time:18382008ms step_avg:570.18ms +grad accum step:8060/14336 +step:32240/57344 train_time:18383319ms step_avg:570.20ms +step:32241/57344 train_time:18383336ms step_avg:570.19ms +step:32242/57344 train_time:18383583ms step_avg:570.18ms +step:32243/57344 train_time:18384130ms step_avg:570.17ms +grad accum step:8061/14336 +step:32244/57344 train_time:18385430ms step_avg:570.20ms +step:32245/57344 train_time:18385447ms step_avg:570.18ms +step:32246/57344 train_time:18385694ms step_avg:570.17ms +step:32247/57344 train_time:18386240ms step_avg:570.17ms +grad accum step:8062/14336 +step:32248/57344 train_time:18387578ms step_avg:570.19ms +step:32249/57344 train_time:18387595ms step_avg:570.18ms +step:32250/57344 train_time:18387848ms step_avg:570.17ms +step:32251/57344 train_time:18388411ms step_avg:570.17ms +grad accum step:8063/14336 +step:32252/57344 train_time:18389702ms step_avg:570.19ms +step:32253/57344 train_time:18389719ms step_avg:570.17ms +step:32254/57344 train_time:18389968ms step_avg:570.16ms +step:32255/57344 train_time:18390527ms step_avg:570.16ms +grad accum step:8064/14336 +step:32256/57344 train_time:18391860ms step_avg:570.18ms +step:32256/57344 val_loss:6.176447 train_time:18391860ms step_avg:570.18ms +step:32257/57344 train_time:18391872ms step_avg:570.17ms +step:32258/57344 train_time:18392097ms step_avg:570.16ms +step:32259/57344 train_time:18392634ms step_avg:570.16ms +grad accum step:8065/14336 +step:32260/57344 train_time:18393949ms step_avg:570.18ms +step:32261/57344 train_time:18393966ms step_avg:570.16ms +step:32262/57344 train_time:18394215ms step_avg:570.15ms +step:32263/57344 train_time:18394783ms step_avg:570.15ms +grad accum step:8066/14336 +step:32264/57344 train_time:18396115ms step_avg:570.17ms +step:32265/57344 train_time:18396132ms step_avg:570.16ms +step:32266/57344 train_time:18396375ms step_avg:570.15ms +step:32267/57344 train_time:18396925ms step_avg:570.15ms +grad accum step:8067/14336 +step:32268/57344 train_time:18398223ms step_avg:570.17ms +step:32269/57344 train_time:18398239ms step_avg:570.15ms +step:32270/57344 train_time:18398487ms step_avg:570.14ms +step:32271/57344 train_time:18399034ms step_avg:570.14ms +grad accum step:8068/14336 +step:32272/57344 train_time:18400333ms step_avg:570.16ms +step:32273/57344 train_time:18400350ms step_avg:570.15ms +step:32274/57344 train_time:18400599ms step_avg:570.14ms +step:32275/57344 train_time:18401143ms step_avg:570.14ms +grad accum step:8069/14336 +step:32276/57344 train_time:18402445ms step_avg:570.16ms +step:32277/57344 train_time:18402462ms step_avg:570.14ms +step:32278/57344 train_time:18402705ms step_avg:570.13ms +step:32279/57344 train_time:18403247ms step_avg:570.13ms +grad accum step:8070/14336 +step:32280/57344 train_time:18404549ms step_avg:570.15ms +step:32281/57344 train_time:18404566ms step_avg:570.14ms +step:32282/57344 train_time:18404817ms step_avg:570.13ms +step:32283/57344 train_time:18405376ms step_avg:570.13ms +grad accum step:8071/14336 +step:32284/57344 train_time:18406664ms step_avg:570.15ms +step:32285/57344 train_time:18406681ms step_avg:570.13ms +step:32286/57344 train_time:18406928ms step_avg:570.12ms +step:32287/57344 train_time:18407479ms step_avg:570.12ms +grad accum step:8072/14336 +step:32288/57344 train_time:18408800ms step_avg:570.14ms +step:32289/57344 train_time:18408817ms step_avg:570.13ms +step:32290/57344 train_time:18409065ms step_avg:570.12ms +step:32291/57344 train_time:18409617ms step_avg:570.12ms +grad accum step:8073/14336 +step:32292/57344 train_time:18410926ms step_avg:570.14ms +step:32293/57344 train_time:18410943ms step_avg:570.12ms +step:32294/57344 train_time:18411197ms step_avg:570.11ms +step:32295/57344 train_time:18411762ms step_avg:570.11ms +grad accum step:8074/14336 +step:32296/57344 train_time:18413104ms step_avg:570.14ms +step:32297/57344 train_time:18413121ms step_avg:570.12ms +step:32298/57344 train_time:18413366ms step_avg:570.11ms +step:32299/57344 train_time:18413907ms step_avg:570.11ms +grad accum step:8075/14336 +step:32300/57344 train_time:18415208ms step_avg:570.13ms +step:32301/57344 train_time:18415225ms step_avg:570.11ms +step:32302/57344 train_time:18415466ms step_avg:570.10ms +step:32303/57344 train_time:18416009ms step_avg:570.10ms +grad accum step:8076/14336 +step:32304/57344 train_time:18417296ms step_avg:570.12ms +step:32305/57344 train_time:18417313ms step_avg:570.11ms +step:32306/57344 train_time:18417561ms step_avg:570.10ms +step:32307/57344 train_time:18418106ms step_avg:570.10ms +grad accum step:8077/14336 +step:32308/57344 train_time:18419401ms step_avg:570.12ms +step:32309/57344 train_time:18419418ms step_avg:570.10ms +step:32310/57344 train_time:18419670ms step_avg:570.09ms +step:32311/57344 train_time:18420235ms step_avg:570.09ms +grad accum step:8078/14336 +step:32312/57344 train_time:18421558ms step_avg:570.12ms +step:32313/57344 train_time:18421575ms step_avg:570.10ms +step:32314/57344 train_time:18421825ms step_avg:570.09ms +step:32315/57344 train_time:18422376ms step_avg:570.09ms +grad accum step:8079/14336 +step:32316/57344 train_time:18423688ms step_avg:570.11ms +step:32317/57344 train_time:18423705ms step_avg:570.09ms +step:32318/57344 train_time:18423951ms step_avg:570.08ms +step:32319/57344 train_time:18424497ms step_avg:570.08ms +grad accum step:8080/14336 +step:32320/57344 train_time:18425770ms step_avg:570.10ms +step:32320/57344 val_loss:6.193738 train_time:18425770ms step_avg:570.10ms +step:32321/57344 train_time:18425782ms step_avg:570.09ms +step:32322/57344 train_time:18426004ms step_avg:570.08ms +step:32323/57344 train_time:18426547ms step_avg:570.08ms +grad accum step:8081/14336 +step:32324/57344 train_time:18427822ms step_avg:570.10ms +step:32325/57344 train_time:18427839ms step_avg:570.08ms +step:32326/57344 train_time:18428086ms step_avg:570.07ms +step:32327/57344 train_time:18428634ms step_avg:570.07ms +grad accum step:8082/14336 +step:32328/57344 train_time:18429951ms step_avg:570.09ms +step:32329/57344 train_time:18429968ms step_avg:570.08ms +step:32330/57344 train_time:18430219ms step_avg:570.07ms +step:32331/57344 train_time:18430780ms step_avg:570.07ms +grad accum step:8083/14336 +step:32332/57344 train_time:18432127ms step_avg:570.09ms +step:32333/57344 train_time:18432144ms step_avg:570.07ms +step:32334/57344 train_time:18432388ms step_avg:570.06ms +step:32335/57344 train_time:18432925ms step_avg:570.06ms +grad accum step:8084/14336 +step:32336/57344 train_time:18434232ms step_avg:570.08ms +step:32337/57344 train_time:18434249ms step_avg:570.07ms +step:32338/57344 train_time:18434496ms step_avg:570.06ms +step:32339/57344 train_time:18435052ms step_avg:570.06ms +grad accum step:8085/14336 +step:32340/57344 train_time:18436360ms step_avg:570.08ms +step:32341/57344 train_time:18436377ms step_avg:570.06ms +step:32342/57344 train_time:18436627ms step_avg:570.05ms +step:32343/57344 train_time:18437182ms step_avg:570.05ms +grad accum step:8086/14336 +step:32344/57344 train_time:18438477ms step_avg:570.07ms +step:32345/57344 train_time:18438494ms step_avg:570.06ms +step:32346/57344 train_time:18438740ms step_avg:570.05ms +step:32347/57344 train_time:18439297ms step_avg:570.05ms +grad accum step:8087/14336 +step:32348/57344 train_time:18440647ms step_avg:570.07ms +step:32349/57344 train_time:18440664ms step_avg:570.05ms +step:32350/57344 train_time:18440912ms step_avg:570.04ms +step:32351/57344 train_time:18441460ms step_avg:570.04ms +grad accum step:8088/14336 +step:32352/57344 train_time:18442772ms step_avg:570.07ms +step:32353/57344 train_time:18442789ms step_avg:570.05ms +step:32354/57344 train_time:18443035ms step_avg:570.04ms +step:32355/57344 train_time:18443588ms step_avg:570.04ms +grad accum step:8089/14336 +step:32356/57344 train_time:18444902ms step_avg:570.06ms +step:32357/57344 train_time:18444919ms step_avg:570.04ms +step:32358/57344 train_time:18445168ms step_avg:570.03ms +step:32359/57344 train_time:18445717ms step_avg:570.03ms +grad accum step:8090/14336 +step:32360/57344 train_time:18447040ms step_avg:570.06ms +step:32361/57344 train_time:18447056ms step_avg:570.04ms +step:32362/57344 train_time:18447309ms step_avg:570.03ms +step:32363/57344 train_time:18447874ms step_avg:570.03ms +grad accum step:8091/14336 +step:32364/57344 train_time:18449209ms step_avg:570.05ms +step:32365/57344 train_time:18449226ms step_avg:570.04ms +step:32366/57344 train_time:18449476ms step_avg:570.03ms +step:32367/57344 train_time:18450036ms step_avg:570.03ms +grad accum step:8092/14336 +step:32368/57344 train_time:18451339ms step_avg:570.05ms +step:32369/57344 train_time:18451356ms step_avg:570.03ms +step:32370/57344 train_time:18451600ms step_avg:570.02ms +step:32371/57344 train_time:18452143ms step_avg:570.02ms +grad accum step:8093/14336 +step:32372/57344 train_time:18453425ms step_avg:570.04ms +step:32373/57344 train_time:18453442ms step_avg:570.03ms +step:32374/57344 train_time:18453686ms step_avg:570.02ms +step:32375/57344 train_time:18454239ms step_avg:570.02ms +grad accum step:8094/14336 +step:32376/57344 train_time:18455530ms step_avg:570.04ms +step:32377/57344 train_time:18455547ms step_avg:570.02ms +step:32378/57344 train_time:18455799ms step_avg:570.01ms +step:32379/57344 train_time:18456350ms step_avg:570.01ms +grad accum step:8095/14336 +step:32380/57344 train_time:18459738ms step_avg:570.10ms +step:32381/57344 train_time:18459755ms step_avg:570.08ms +step:32382/57344 train_time:18460003ms step_avg:570.07ms +step:32383/57344 train_time:18460554ms step_avg:570.07ms +grad accum step:8096/14336 +step:32384/57344 train_time:18461908ms step_avg:570.09ms +step:32384/57344 val_loss:6.210797 train_time:18461908ms step_avg:570.09ms +step:32385/57344 train_time:18461921ms step_avg:570.08ms +step:32386/57344 train_time:18462164ms step_avg:570.07ms +step:32387/57344 train_time:18462763ms step_avg:570.07ms +grad accum step:8097/14336 +step:32388/57344 train_time:18464088ms step_avg:570.09ms +step:32389/57344 train_time:18464105ms step_avg:570.07ms +step:32390/57344 train_time:18464350ms step_avg:570.06ms +step:32391/57344 train_time:18464891ms step_avg:570.06ms +grad accum step:8098/14336 +step:32392/57344 train_time:18466244ms step_avg:570.09ms +step:32393/57344 train_time:18466261ms step_avg:570.07ms +step:32394/57344 train_time:18466511ms step_avg:570.06ms +step:32395/57344 train_time:18467070ms step_avg:570.06ms +grad accum step:8099/14336 +step:32396/57344 train_time:18468375ms step_avg:570.08ms +step:32397/57344 train_time:18468392ms step_avg:570.06ms +step:32398/57344 train_time:18468643ms step_avg:570.06ms +step:32399/57344 train_time:18469200ms step_avg:570.05ms +grad accum step:8100/14336 +step:32400/57344 train_time:18470518ms step_avg:570.08ms +step:32401/57344 train_time:18470535ms step_avg:570.06ms +step:32402/57344 train_time:18470783ms step_avg:570.05ms +step:32403/57344 train_time:18471333ms step_avg:570.05ms +grad accum step:8101/14336 +step:32404/57344 train_time:18472651ms step_avg:570.07ms +step:32405/57344 train_time:18472668ms step_avg:570.06ms +step:32406/57344 train_time:18472911ms step_avg:570.05ms +step:32407/57344 train_time:18473459ms step_avg:570.05ms +grad accum step:8102/14336 +step:32408/57344 train_time:18474741ms step_avg:570.07ms +step:32409/57344 train_time:18474758ms step_avg:570.05ms +step:32410/57344 train_time:18475005ms step_avg:570.04ms +step:32411/57344 train_time:18475550ms step_avg:570.04ms +grad accum step:8103/14336 +step:32412/57344 train_time:18476833ms step_avg:570.06ms +step:32413/57344 train_time:18476850ms step_avg:570.04ms +step:32414/57344 train_time:18477096ms step_avg:570.03ms +step:32415/57344 train_time:18477641ms step_avg:570.03ms +grad accum step:8104/14336 +step:32416/57344 train_time:18478938ms step_avg:570.06ms +step:32417/57344 train_time:18478955ms step_avg:570.04ms +step:32418/57344 train_time:18479205ms step_avg:570.03ms +step:32419/57344 train_time:18479760ms step_avg:570.03ms +grad accum step:8105/14336 +step:32420/57344 train_time:18481068ms step_avg:570.05ms +step:32421/57344 train_time:18481085ms step_avg:570.03ms +step:32422/57344 train_time:18481330ms step_avg:570.02ms +step:32423/57344 train_time:18481878ms step_avg:570.02ms +grad accum step:8106/14336 +step:32424/57344 train_time:18483202ms step_avg:570.05ms +step:32425/57344 train_time:18483219ms step_avg:570.03ms +step:32426/57344 train_time:18483467ms step_avg:570.02ms +step:32427/57344 train_time:18484013ms step_avg:570.02ms +grad accum step:8107/14336 +step:32428/57344 train_time:18485307ms step_avg:570.04ms +step:32429/57344 train_time:18485324ms step_avg:570.02ms +step:32430/57344 train_time:18485568ms step_avg:570.01ms +step:32431/57344 train_time:18486115ms step_avg:570.01ms +grad accum step:8108/14336 +step:32432/57344 train_time:18487392ms step_avg:570.04ms +step:32433/57344 train_time:18487409ms step_avg:570.02ms +step:32434/57344 train_time:18487654ms step_avg:570.01ms +step:32435/57344 train_time:18488203ms step_avg:570.01ms +grad accum step:8109/14336 +step:32436/57344 train_time:18489510ms step_avg:570.03ms +step:32437/57344 train_time:18489527ms step_avg:570.01ms +step:32438/57344 train_time:18489775ms step_avg:570.00ms +step:32439/57344 train_time:18490320ms step_avg:570.00ms +grad accum step:8110/14336 +step:32440/57344 train_time:18491597ms step_avg:570.02ms +step:32441/57344 train_time:18491614ms step_avg:570.01ms +step:32442/57344 train_time:18491864ms step_avg:570.00ms +step:32443/57344 train_time:18492414ms step_avg:570.00ms +grad accum step:8111/14336 +step:32444/57344 train_time:18493715ms step_avg:570.02ms +step:32445/57344 train_time:18493732ms step_avg:570.00ms +step:32446/57344 train_time:18493980ms step_avg:569.99ms +step:32447/57344 train_time:18494531ms step_avg:569.99ms +grad accum step:8112/14336 +step:32448/57344 train_time:18495809ms step_avg:570.01ms +step:32448/57344 val_loss:6.225993 train_time:18495810ms step_avg:570.01ms +step:32449/57344 train_time:18495822ms step_avg:570.00ms +step:32450/57344 train_time:18496042ms step_avg:569.99ms +step:32451/57344 train_time:18496590ms step_avg:569.99ms +grad accum step:8113/14336 +step:32452/57344 train_time:18497911ms step_avg:570.01ms +step:32453/57344 train_time:18497927ms step_avg:569.99ms +step:32454/57344 train_time:18498171ms step_avg:569.98ms +step:32455/57344 train_time:18498718ms step_avg:569.98ms +grad accum step:8114/14336 +step:32456/57344 train_time:18500013ms step_avg:570.00ms +step:32457/57344 train_time:18500030ms step_avg:569.99ms +step:32458/57344 train_time:18500276ms step_avg:569.98ms +step:32459/57344 train_time:18500822ms step_avg:569.98ms +grad accum step:8115/14336 +step:32460/57344 train_time:18502100ms step_avg:570.00ms +step:32461/57344 train_time:18502117ms step_avg:569.98ms +step:32462/57344 train_time:18502365ms step_avg:569.97ms +step:32463/57344 train_time:18502915ms step_avg:569.97ms +grad accum step:8116/14336 +step:32464/57344 train_time:18504239ms step_avg:569.99ms +step:32465/57344 train_time:18504256ms step_avg:569.98ms +step:32466/57344 train_time:18504503ms step_avg:569.97ms +step:32467/57344 train_time:18505059ms step_avg:569.97ms +grad accum step:8117/14336 +step:32468/57344 train_time:18506366ms step_avg:569.99ms +step:32469/57344 train_time:18506382ms step_avg:569.97ms +step:32470/57344 train_time:18506633ms step_avg:569.96ms +step:32471/57344 train_time:18507197ms step_avg:569.96ms +grad accum step:8118/14336 +step:32472/57344 train_time:18508524ms step_avg:569.98ms +step:32473/57344 train_time:18508541ms step_avg:569.97ms +step:32474/57344 train_time:18508787ms step_avg:569.96ms +step:32475/57344 train_time:18509337ms step_avg:569.96ms +grad accum step:8119/14336 +step:32476/57344 train_time:18510654ms step_avg:569.98ms +step:32477/57344 train_time:18510670ms step_avg:569.96ms +step:32478/57344 train_time:18510917ms step_avg:569.95ms +step:32479/57344 train_time:18511462ms step_avg:569.95ms +grad accum step:8120/14336 +step:32480/57344 train_time:18512753ms step_avg:569.97ms +step:32481/57344 train_time:18512770ms step_avg:569.96ms +step:32482/57344 train_time:18513017ms step_avg:569.95ms +step:32483/57344 train_time:18513564ms step_avg:569.95ms +grad accum step:8121/14336 +step:32484/57344 train_time:18514850ms step_avg:569.97ms +step:32485/57344 train_time:18514867ms step_avg:569.95ms +step:32486/57344 train_time:18515117ms step_avg:569.94ms +step:32487/57344 train_time:18515670ms step_avg:569.94ms +grad accum step:8122/14336 +step:32488/57344 train_time:18516978ms step_avg:569.96ms +step:32489/57344 train_time:18516993ms step_avg:569.95ms +step:32490/57344 train_time:18517243ms step_avg:569.94ms +step:32491/57344 train_time:18517799ms step_avg:569.94ms +grad accum step:8123/14336 +step:32492/57344 train_time:18519101ms step_avg:569.96ms +step:32493/57344 train_time:18519118ms step_avg:569.94ms +step:32494/57344 train_time:18519366ms step_avg:569.93ms +step:32495/57344 train_time:18519917ms step_avg:569.93ms +grad accum step:8124/14336 +step:32496/57344 train_time:18521205ms step_avg:569.95ms +step:32497/57344 train_time:18521222ms step_avg:569.94ms +step:32498/57344 train_time:18521475ms step_avg:569.93ms +step:32499/57344 train_time:18522036ms step_avg:569.93ms +grad accum step:8125/14336 +step:32500/57344 train_time:18523339ms step_avg:569.95ms +step:32501/57344 train_time:18523356ms step_avg:569.93ms +step:32502/57344 train_time:18523600ms step_avg:569.92ms +step:32503/57344 train_time:18524137ms step_avg:569.92ms +grad accum step:8126/14336 +step:32504/57344 train_time:18525452ms step_avg:569.94ms +step:32505/57344 train_time:18525469ms step_avg:569.93ms +step:32506/57344 train_time:18525715ms step_avg:569.92ms +step:32507/57344 train_time:18526263ms step_avg:569.92ms +grad accum step:8127/14336 +step:32508/57344 train_time:18527554ms step_avg:569.94ms +step:32509/57344 train_time:18527570ms step_avg:569.92ms +step:32510/57344 train_time:18527815ms step_avg:569.91ms +step:32511/57344 train_time:18528366ms step_avg:569.91ms +grad accum step:8128/14336 +step:32512/57344 train_time:18529683ms step_avg:569.93ms +step:32512/57344 val_loss:6.246869 train_time:18529684ms step_avg:569.93ms +step:32513/57344 train_time:18529695ms step_avg:569.92ms +step:32514/57344 train_time:18529918ms step_avg:569.91ms +step:32515/57344 train_time:18530466ms step_avg:569.91ms +grad accum step:8129/14336 +step:32516/57344 train_time:18531767ms step_avg:569.93ms +step:32517/57344 train_time:18531784ms step_avg:569.91ms +step:32518/57344 train_time:18532033ms step_avg:569.90ms +step:32519/57344 train_time:18532596ms step_avg:569.90ms +grad accum step:8130/14336 +step:32520/57344 train_time:18533899ms step_avg:569.92ms +step:32521/57344 train_time:18533916ms step_avg:569.91ms +step:32522/57344 train_time:18534156ms step_avg:569.90ms +step:32523/57344 train_time:18534686ms step_avg:569.89ms +grad accum step:8131/14336 +step:32524/57344 train_time:18536002ms step_avg:569.92ms +step:32525/57344 train_time:18536019ms step_avg:569.90ms +step:32526/57344 train_time:18536264ms step_avg:569.89ms +step:32527/57344 train_time:18536807ms step_avg:569.89ms +grad accum step:8132/14336 +step:32528/57344 train_time:18538114ms step_avg:569.91ms +step:32529/57344 train_time:18538131ms step_avg:569.90ms +step:32530/57344 train_time:18538384ms step_avg:569.89ms +step:32531/57344 train_time:18538947ms step_avg:569.89ms +grad accum step:8133/14336 +step:32532/57344 train_time:18540238ms step_avg:569.91ms +step:32533/57344 train_time:18540255ms step_avg:569.89ms +step:32534/57344 train_time:18540508ms step_avg:569.88ms +step:32535/57344 train_time:18541071ms step_avg:569.88ms +grad accum step:8134/14336 +step:32536/57344 train_time:18542375ms step_avg:569.90ms +step:32537/57344 train_time:18542392ms step_avg:569.89ms +step:32538/57344 train_time:18542642ms step_avg:569.88ms +step:32539/57344 train_time:18543202ms step_avg:569.88ms +grad accum step:8135/14336 +step:32540/57344 train_time:18544497ms step_avg:569.90ms +step:32541/57344 train_time:18544514ms step_avg:569.88ms +step:32542/57344 train_time:18544769ms step_avg:569.87ms +step:32543/57344 train_time:18545333ms step_avg:569.87ms +grad accum step:8136/14336 +step:32544/57344 train_time:18546644ms step_avg:569.89ms +step:32545/57344 train_time:18546660ms step_avg:569.88ms +step:32546/57344 train_time:18546913ms step_avg:569.87ms +step:32547/57344 train_time:18547480ms step_avg:569.87ms +grad accum step:8137/14336 +step:32548/57344 train_time:18548837ms step_avg:569.89ms +step:32549/57344 train_time:18548853ms step_avg:569.87ms +step:32550/57344 train_time:18549103ms step_avg:569.86ms +step:32551/57344 train_time:18549657ms step_avg:569.86ms +grad accum step:8138/14336 +step:32552/57344 train_time:18550990ms step_avg:569.89ms +step:32553/57344 train_time:18551007ms step_avg:569.87ms +step:32554/57344 train_time:18551256ms step_avg:569.86ms +step:32555/57344 train_time:18551810ms step_avg:569.86ms +grad accum step:8139/14336 +step:32556/57344 train_time:18553104ms step_avg:569.88ms +step:32557/57344 train_time:18553119ms step_avg:569.87ms +step:32558/57344 train_time:18553366ms step_avg:569.86ms +step:32559/57344 train_time:18553906ms step_avg:569.85ms +grad accum step:8140/14336 +step:32560/57344 train_time:18555203ms step_avg:569.88ms +step:32561/57344 train_time:18555220ms step_avg:569.86ms +step:32562/57344 train_time:18555465ms step_avg:569.85ms +step:32563/57344 train_time:18556008ms step_avg:569.85ms +grad accum step:8141/14336 +step:32564/57344 train_time:18557327ms step_avg:569.87ms +step:32565/57344 train_time:18557344ms step_avg:569.86ms +step:32566/57344 train_time:18557598ms step_avg:569.85ms +step:32567/57344 train_time:18558158ms step_avg:569.85ms +grad accum step:8142/14336 +step:32568/57344 train_time:18559481ms step_avg:569.87ms +step:32569/57344 train_time:18559495ms step_avg:569.85ms +step:32570/57344 train_time:18559742ms step_avg:569.84ms +step:32571/57344 train_time:18560287ms step_avg:569.84ms +grad accum step:8143/14336 +step:32572/57344 train_time:18561578ms step_avg:569.86ms +step:32573/57344 train_time:18561595ms step_avg:569.85ms +step:32574/57344 train_time:18561841ms step_avg:569.84ms +step:32575/57344 train_time:18562385ms step_avg:569.84ms +grad accum step:8144/14336 +step:32576/57344 train_time:18563668ms step_avg:569.86ms +step:32576/57344 val_loss:6.263676 train_time:18563669ms step_avg:569.86ms +step:32577/57344 train_time:18563680ms step_avg:569.84ms +step:32578/57344 train_time:18563906ms step_avg:569.83ms +step:32579/57344 train_time:18564460ms step_avg:569.83ms +grad accum step:8145/14336 +step:32580/57344 train_time:18565759ms step_avg:569.85ms +step:32581/57344 train_time:18565779ms step_avg:569.83ms +step:32582/57344 train_time:18566011ms step_avg:569.82ms +step:32583/57344 train_time:18566564ms step_avg:569.82ms +grad accum step:8146/14336 +step:32584/57344 train_time:18567876ms step_avg:569.85ms +step:32585/57344 train_time:18567893ms step_avg:569.83ms +step:32586/57344 train_time:18568140ms step_avg:569.82ms +step:32587/57344 train_time:18568686ms step_avg:569.82ms +grad accum step:8147/14336 +step:32588/57344 train_time:18569958ms step_avg:569.84ms +step:32589/57344 train_time:18569975ms step_avg:569.82ms +step:32590/57344 train_time:18570218ms step_avg:569.81ms +step:32591/57344 train_time:18570771ms step_avg:569.81ms +grad accum step:8148/14336 +step:32592/57344 train_time:18572088ms step_avg:569.84ms +step:32593/57344 train_time:18572103ms step_avg:569.82ms +step:32594/57344 train_time:18572348ms step_avg:569.81ms +step:32595/57344 train_time:18572895ms step_avg:569.81ms +grad accum step:8149/14336 +step:32596/57344 train_time:18574231ms step_avg:569.83ms +step:32597/57344 train_time:18574247ms step_avg:569.81ms +step:32598/57344 train_time:18574497ms step_avg:569.80ms +step:32599/57344 train_time:18575047ms step_avg:569.80ms +grad accum step:8150/14336 +step:32600/57344 train_time:18576377ms step_avg:569.83ms +step:32601/57344 train_time:18576392ms step_avg:569.81ms +step:32602/57344 train_time:18576642ms step_avg:569.80ms +step:32603/57344 train_time:18577200ms step_avg:569.80ms +grad accum step:8151/14336 +step:32604/57344 train_time:18578480ms step_avg:569.82ms +step:32605/57344 train_time:18578501ms step_avg:569.81ms +step:32606/57344 train_time:18578737ms step_avg:569.80ms +step:32607/57344 train_time:18580525ms step_avg:569.83ms +grad accum step:8152/14336 +step:32608/57344 train_time:18584736ms step_avg:569.94ms +step:32609/57344 train_time:18584755ms step_avg:569.93ms +step:32610/57344 train_time:18586799ms step_avg:569.97ms +step:32611/57344 train_time:18588845ms step_avg:570.02ms +grad accum step:8153/14336 +step:32612/57344 train_time:18592929ms step_avg:570.13ms +step:32613/57344 train_time:18592946ms step_avg:570.11ms +step:32614/57344 train_time:18594988ms step_avg:570.15ms +step:32615/57344 train_time:18595660ms step_avg:570.16ms +grad accum step:8154/14336 +step:32616/57344 train_time:18596707ms step_avg:570.17ms +step:32617/57344 train_time:18596722ms step_avg:570.15ms +step:32618/57344 train_time:18596959ms step_avg:570.14ms +step:32619/57344 train_time:18597509ms step_avg:570.14ms +grad accum step:8155/14336 +step:32620/57344 train_time:18600167ms step_avg:570.21ms +step:32621/57344 train_time:18600185ms step_avg:570.19ms +step:32622/57344 train_time:18600402ms step_avg:570.18ms +step:32623/57344 train_time:18600944ms step_avg:570.18ms +grad accum step:8156/14336 +step:32624/57344 train_time:18602229ms step_avg:570.20ms +step:32625/57344 train_time:18602245ms step_avg:570.18ms +step:32626/57344 train_time:18602493ms step_avg:570.17ms +step:32627/57344 train_time:18603046ms step_avg:570.17ms +grad accum step:8157/14336 +step:32628/57344 train_time:18604380ms step_avg:570.20ms +step:32629/57344 train_time:18604396ms step_avg:570.18ms +step:32630/57344 train_time:18604648ms step_avg:570.17ms +step:32631/57344 train_time:18605205ms step_avg:570.17ms +grad accum step:8158/14336 +step:32632/57344 train_time:18606541ms step_avg:570.19ms +step:32633/57344 train_time:18606558ms step_avg:570.18ms +step:32634/57344 train_time:18606806ms step_avg:570.17ms +step:32635/57344 train_time:18607365ms step_avg:570.17ms +grad accum step:8159/14336 +step:32636/57344 train_time:18608677ms step_avg:570.19ms +step:32637/57344 train_time:18608693ms step_avg:570.17ms +step:32638/57344 train_time:18608936ms step_avg:570.16ms +step:32639/57344 train_time:18609484ms step_avg:570.16ms +grad accum step:8160/14336 +step:32640/57344 train_time:18610783ms step_avg:570.18ms +step:32640/57344 val_loss:6.276979 train_time:18610784ms step_avg:570.18ms +step:32641/57344 train_time:18610796ms step_avg:570.17ms +step:32642/57344 train_time:18611018ms step_avg:570.16ms +step:32643/57344 train_time:18611565ms step_avg:570.15ms +grad accum step:8161/14336 +step:32644/57344 train_time:18612871ms step_avg:570.18ms +step:32645/57344 train_time:18612886ms step_avg:570.16ms +step:32646/57344 train_time:18613128ms step_avg:570.15ms +step:32647/57344 train_time:18613667ms step_avg:570.15ms +grad accum step:8162/14336 +step:32648/57344 train_time:18615008ms step_avg:570.17ms +step:32649/57344 train_time:18615026ms step_avg:570.16ms +step:32650/57344 train_time:18615274ms step_avg:570.15ms +step:32651/57344 train_time:18615841ms step_avg:570.15ms +grad accum step:8163/14336 +step:32652/57344 train_time:18617191ms step_avg:570.17ms +step:32653/57344 train_time:18617207ms step_avg:570.15ms +step:32654/57344 train_time:18617452ms step_avg:570.14ms +step:32655/57344 train_time:18618004ms step_avg:570.14ms +grad accum step:8164/14336 +step:32656/57344 train_time:18619342ms step_avg:570.17ms +step:32657/57344 train_time:18619356ms step_avg:570.15ms +step:32658/57344 train_time:18619599ms step_avg:570.14ms +step:32659/57344 train_time:18620145ms step_avg:570.14ms +grad accum step:8165/14336 +step:32660/57344 train_time:18621485ms step_avg:570.16ms +step:32661/57344 train_time:18621497ms step_avg:570.14ms +step:32662/57344 train_time:18621741ms step_avg:570.13ms +step:32663/57344 train_time:18622294ms step_avg:570.13ms +grad accum step:8166/14336 +step:32664/57344 train_time:18623632ms step_avg:570.16ms +step:32665/57344 train_time:18623647ms step_avg:570.14ms +step:32666/57344 train_time:18623896ms step_avg:570.13ms +step:32667/57344 train_time:18624443ms step_avg:570.13ms +grad accum step:8167/14336 +step:32668/57344 train_time:18625770ms step_avg:570.15ms +step:32669/57344 train_time:18625786ms step_avg:570.14ms +step:32670/57344 train_time:18626035ms step_avg:570.13ms +step:32671/57344 train_time:18626588ms step_avg:570.13ms +grad accum step:8168/14336 +step:32672/57344 train_time:18627884ms step_avg:570.15ms +step:32673/57344 train_time:18627901ms step_avg:570.13ms +step:32674/57344 train_time:18628153ms step_avg:570.12ms +step:32675/57344 train_time:18628710ms step_avg:570.12ms +grad accum step:8169/14336 +step:32676/57344 train_time:18629997ms step_avg:570.14ms +step:32677/57344 train_time:18630013ms step_avg:570.13ms +step:32678/57344 train_time:18630257ms step_avg:570.12ms +step:32679/57344 train_time:18630800ms step_avg:570.12ms +grad accum step:8170/14336 +step:32680/57344 train_time:18635429ms step_avg:570.24ms +step:32681/57344 train_time:18635457ms step_avg:570.22ms +step:32682/57344 train_time:18635728ms step_avg:570.21ms +step:32683/57344 train_time:18636281ms step_avg:570.21ms +grad accum step:8171/14336 +step:32684/57344 train_time:18637576ms step_avg:570.24ms +step:32685/57344 train_time:18637592ms step_avg:570.22ms +step:32686/57344 train_time:18637836ms step_avg:570.21ms +step:32687/57344 train_time:18638384ms step_avg:570.21ms +grad accum step:8172/14336 +step:32688/57344 train_time:18639676ms step_avg:570.23ms +step:32689/57344 train_time:18639693ms step_avg:570.21ms +step:32690/57344 train_time:18639930ms step_avg:570.20ms +step:32691/57344 train_time:18641904ms step_avg:570.25ms +grad accum step:8173/14336 +step:32692/57344 train_time:18645994ms step_avg:570.35ms +step:32693/57344 train_time:18646012ms step_avg:570.34ms +step:32694/57344 train_time:18648054ms step_avg:570.38ms +step:32695/57344 train_time:18650095ms step_avg:570.43ms +grad accum step:8174/14336 +step:32696/57344 train_time:18655524ms step_avg:570.58ms +step:32697/57344 train_time:18655544ms step_avg:570.56ms +step:32698/57344 train_time:18657587ms step_avg:570.60ms +step:32699/57344 train_time:18659631ms step_avg:570.65ms +grad accum step:8175/14336 +step:32700/57344 train_time:18679529ms step_avg:571.24ms +step:32701/57344 train_time:18683568ms step_avg:571.35ms +step:32702/57344 train_time:18685679ms step_avg:571.39ms +step:32703/57344 train_time:18688174ms step_avg:571.45ms +grad accum step:8176/14336 +step:32704/57344 train_time:18690496ms step_avg:571.50ms +step:32704/57344 val_loss:6.297549 train_time:18690503ms step_avg:571.51ms +step:32705/57344 train_time:18690514ms step_avg:571.49ms +step:32706/57344 train_time:18690739ms step_avg:571.48ms +step:32707/57344 train_time:18691291ms step_avg:571.48ms +grad accum step:8177/14336 +step:32708/57344 train_time:18692618ms step_avg:571.50ms +step:32709/57344 train_time:18692637ms step_avg:571.48ms +step:32710/57344 train_time:18692874ms step_avg:571.47ms +step:32711/57344 train_time:18693409ms step_avg:571.47ms +grad accum step:8178/14336 +step:32712/57344 train_time:18694695ms step_avg:571.49ms +step:32713/57344 train_time:18694710ms step_avg:571.48ms +step:32714/57344 train_time:18694957ms step_avg:571.47ms +step:32715/57344 train_time:18695512ms step_avg:571.47ms +grad accum step:8179/14336 +step:32716/57344 train_time:18696856ms step_avg:571.49ms +step:32717/57344 train_time:18696872ms step_avg:571.47ms +step:32718/57344 train_time:18697116ms step_avg:571.46ms +step:32719/57344 train_time:18697668ms step_avg:571.46ms +grad accum step:8180/14336 +step:32720/57344 train_time:18699001ms step_avg:571.49ms +step:32721/57344 train_time:18699017ms step_avg:571.47ms +step:32722/57344 train_time:18699248ms step_avg:571.46ms +step:32723/57344 train_time:18699785ms step_avg:571.46ms +grad accum step:8181/14336 +step:32724/57344 train_time:18701105ms step_avg:571.48ms +step:32725/57344 train_time:18701119ms step_avg:571.46ms +step:32726/57344 train_time:18701367ms step_avg:571.45ms +step:32727/57344 train_time:18701924ms step_avg:571.45ms +grad accum step:8182/14336 +step:32728/57344 train_time:18703220ms step_avg:571.47ms +step:32729/57344 train_time:18703237ms step_avg:571.46ms +step:32730/57344 train_time:18703480ms step_avg:571.45ms +step:32731/57344 train_time:18704015ms step_avg:571.45ms +grad accum step:8183/14336 +step:32732/57344 train_time:18705327ms step_avg:571.47ms +step:32733/57344 train_time:18705343ms step_avg:571.45ms +step:32734/57344 train_time:18705592ms step_avg:571.44ms +step:32735/57344 train_time:18706145ms step_avg:571.44ms +grad accum step:8184/14336 +step:32736/57344 train_time:18707430ms step_avg:571.46ms +step:32737/57344 train_time:18707445ms step_avg:571.45ms +step:32738/57344 train_time:18707694ms step_avg:571.44ms +step:32739/57344 train_time:18708245ms step_avg:571.44ms +grad accum step:8185/14336 +step:32740/57344 train_time:18709565ms step_avg:571.46ms +step:32741/57344 train_time:18709579ms step_avg:571.44ms +step:32742/57344 train_time:18709839ms step_avg:571.43ms +step:32743/57344 train_time:18710425ms step_avg:571.43ms +grad accum step:8186/14336 +step:32744/57344 train_time:18711722ms step_avg:571.45ms +step:32745/57344 train_time:18711868ms step_avg:571.44ms +step:32746/57344 train_time:18712083ms step_avg:571.43ms +step:32747/57344 train_time:18712637ms step_avg:571.43ms +grad accum step:8187/14336 +step:32748/57344 train_time:18713929ms step_avg:571.45ms +step:32749/57344 train_time:18713945ms step_avg:571.44ms +step:32750/57344 train_time:18714192ms step_avg:571.43ms +step:32751/57344 train_time:18714746ms step_avg:571.43ms +grad accum step:8188/14336 +step:32752/57344 train_time:18716063ms step_avg:571.45ms +step:32753/57344 train_time:18716078ms step_avg:571.43ms +step:32754/57344 train_time:18716321ms step_avg:571.42ms +step:32755/57344 train_time:18716872ms step_avg:571.42ms +grad accum step:8189/14336 +step:32756/57344 train_time:18718219ms step_avg:571.44ms +step:32757/57344 train_time:18718233ms step_avg:571.43ms +step:32758/57344 train_time:18718481ms step_avg:571.42ms +step:32759/57344 train_time:18719029ms step_avg:571.42ms +grad accum step:8190/14336 +step:32760/57344 train_time:18720313ms step_avg:571.44ms +step:32761/57344 train_time:18720330ms step_avg:571.42ms +step:32762/57344 train_time:18720580ms step_avg:571.41ms +step:32763/57344 train_time:18721160ms step_avg:571.41ms +grad accum step:8191/14336 +step:32764/57344 train_time:18722481ms step_avg:571.43ms +step:32765/57344 train_time:18722497ms step_avg:571.42ms +step:32766/57344 train_time:18722747ms step_avg:571.41ms +step:32767/57344 train_time:18723298ms step_avg:571.41ms +grad accum step:8192/14336 +step:32768/57344 train_time:18724630ms step_avg:571.43ms +step:32768/57344 val_loss:6.321110 train_time:18724630ms step_avg:571.43ms +step:32769/57344 train_time:18724642ms step_avg:571.41ms +step:32770/57344 train_time:18724869ms step_avg:571.40ms +step:32771/57344 train_time:18725436ms step_avg:571.40ms +grad accum step:8193/14336 +step:32772/57344 train_time:18726782ms step_avg:571.43ms +step:32773/57344 train_time:18726799ms step_avg:571.41ms +step:32774/57344 train_time:18727044ms step_avg:571.40ms +step:32775/57344 train_time:18727593ms step_avg:571.40ms +grad accum step:8194/14336 +step:32776/57344 train_time:18728908ms step_avg:571.42ms +step:32777/57344 train_time:18728924ms step_avg:571.40ms +step:32778/57344 train_time:18729175ms step_avg:571.39ms +step:32779/57344 train_time:18729744ms step_avg:571.39ms +grad accum step:8195/14336 +step:32780/57344 train_time:18731054ms step_avg:571.42ms +step:32781/57344 train_time:18731068ms step_avg:571.40ms +step:32782/57344 train_time:18731317ms step_avg:571.39ms +step:32783/57344 train_time:18731873ms step_avg:571.39ms +grad accum step:8196/14336 +step:32784/57344 train_time:18733197ms step_avg:571.41ms +step:32785/57344 train_time:18733212ms step_avg:571.40ms +step:32786/57344 train_time:18733461ms step_avg:571.39ms +step:32787/57344 train_time:18734018ms step_avg:571.39ms +grad accum step:8197/14336 +step:32788/57344 train_time:18735338ms step_avg:571.41ms +step:32789/57344 train_time:18735355ms step_avg:571.39ms +step:32790/57344 train_time:18735597ms step_avg:571.38ms +step:32791/57344 train_time:18736149ms step_avg:571.38ms +grad accum step:8198/14336 +step:32792/57344 train_time:18737485ms step_avg:571.40ms +step:32793/57344 train_time:18737500ms step_avg:571.39ms +step:32794/57344 train_time:18737752ms step_avg:571.38ms +step:32795/57344 train_time:18738310ms step_avg:571.38ms +grad accum step:8199/14336 +step:32796/57344 train_time:18739604ms step_avg:571.40ms +step:32797/57344 train_time:18739624ms step_avg:571.38ms +step:32798/57344 train_time:18739864ms step_avg:571.37ms +step:32799/57344 train_time:18740421ms step_avg:571.37ms +grad accum step:8200/14336 +step:32800/57344 train_time:18741702ms step_avg:571.39ms +step:32801/57344 train_time:18741719ms step_avg:571.38ms +step:32802/57344 train_time:18741961ms step_avg:571.37ms +step:32803/57344 train_time:18742510ms step_avg:571.37ms +grad accum step:8201/14336 +step:32804/57344 train_time:18743811ms step_avg:571.39ms +step:32805/57344 train_time:18743830ms step_avg:571.37ms +step:32806/57344 train_time:18744070ms step_avg:571.36ms +step:32807/57344 train_time:18744610ms step_avg:571.36ms +grad accum step:8202/14336 +step:32808/57344 train_time:18745903ms step_avg:571.38ms +step:32809/57344 train_time:18745920ms step_avg:571.37ms +step:32810/57344 train_time:18746171ms step_avg:571.36ms +step:32811/57344 train_time:18746724ms step_avg:571.35ms +grad accum step:8203/14336 +step:32812/57344 train_time:18748204ms step_avg:571.38ms +step:32813/57344 train_time:18748222ms step_avg:571.37ms +step:32814/57344 train_time:18748438ms step_avg:571.35ms +step:32815/57344 train_time:18748985ms step_avg:571.35ms +grad accum step:8204/14336 +step:32816/57344 train_time:18750283ms step_avg:571.38ms +step:32817/57344 train_time:18750299ms step_avg:571.36ms +step:32818/57344 train_time:18750548ms step_avg:571.35ms +step:32819/57344 train_time:18751096ms step_avg:571.35ms +grad accum step:8205/14336 +step:32820/57344 train_time:18752403ms step_avg:571.37ms +step:32821/57344 train_time:18752420ms step_avg:571.35ms +step:32822/57344 train_time:18752670ms step_avg:571.34ms +step:32823/57344 train_time:18753226ms step_avg:571.34ms +grad accum step:8206/14336 +step:32824/57344 train_time:18754517ms step_avg:571.37ms +step:32825/57344 train_time:18754531ms step_avg:571.35ms +step:32826/57344 train_time:18754779ms step_avg:571.34ms +step:32827/57344 train_time:18755328ms step_avg:571.34ms +grad accum step:8207/14336 +step:32828/57344 train_time:18756630ms step_avg:571.36ms +step:32829/57344 train_time:18756648ms step_avg:571.34ms +step:32830/57344 train_time:18756891ms step_avg:571.33ms +step:32831/57344 train_time:18757434ms step_avg:571.33ms +grad accum step:8208/14336 +step:32832/57344 train_time:18758747ms step_avg:571.36ms +step:32832/57344 val_loss:6.319574 train_time:18758748ms step_avg:571.36ms +step:32833/57344 train_time:18758759ms step_avg:571.34ms +step:32834/57344 train_time:18758987ms step_avg:571.33ms +step:32835/57344 train_time:18759542ms step_avg:571.33ms +grad accum step:8209/14336 +step:32836/57344 train_time:18760856ms step_avg:571.35ms +step:32837/57344 train_time:18760872ms step_avg:571.33ms +step:32838/57344 train_time:18761119ms step_avg:571.32ms +step:32839/57344 train_time:18761687ms step_avg:571.32ms +grad accum step:8210/14336 +step:32840/57344 train_time:18762996ms step_avg:571.35ms +step:32841/57344 train_time:18763009ms step_avg:571.33ms +step:32842/57344 train_time:18763258ms step_avg:571.32ms +step:32843/57344 train_time:18763821ms step_avg:571.32ms +grad accum step:8211/14336 +step:32844/57344 train_time:18765141ms step_avg:571.34ms +step:32845/57344 train_time:18765157ms step_avg:571.32ms +step:32846/57344 train_time:18765401ms step_avg:571.31ms +step:32847/57344 train_time:18765958ms step_avg:571.31ms +grad accum step:8212/14336 +step:32848/57344 train_time:18767282ms step_avg:571.34ms +step:32849/57344 train_time:18767295ms step_avg:571.32ms +step:32850/57344 train_time:18767527ms step_avg:571.31ms +step:32851/57344 train_time:18768076ms step_avg:571.31ms +grad accum step:8213/14336 +step:32852/57344 train_time:18769406ms step_avg:571.33ms +step:32853/57344 train_time:18769423ms step_avg:571.32ms +step:32854/57344 train_time:18769676ms step_avg:571.31ms +step:32855/57344 train_time:18770237ms step_avg:571.31ms +grad accum step:8214/14336 +step:32856/57344 train_time:18771542ms step_avg:571.33ms +step:32857/57344 train_time:18771559ms step_avg:571.31ms +step:32858/57344 train_time:18771807ms step_avg:571.30ms +step:32859/57344 train_time:18772361ms step_avg:571.30ms +grad accum step:8215/14336 +step:32860/57344 train_time:18773668ms step_avg:571.32ms +step:32861/57344 train_time:18773686ms step_avg:571.31ms +step:32862/57344 train_time:18773934ms step_avg:571.30ms +step:32863/57344 train_time:18774504ms step_avg:571.30ms +grad accum step:8216/14336 +step:32864/57344 train_time:18775854ms step_avg:571.32ms +step:32865/57344 train_time:18775870ms step_avg:571.30ms +step:32866/57344 train_time:18776122ms step_avg:571.29ms +step:32867/57344 train_time:18776687ms step_avg:571.29ms +grad accum step:8217/14336 +step:32868/57344 train_time:18777972ms step_avg:571.31ms +step:32869/57344 train_time:18777986ms step_avg:571.30ms +step:32870/57344 train_time:18778234ms step_avg:571.29ms +step:32871/57344 train_time:18778794ms step_avg:571.29ms +grad accum step:8218/14336 +step:32872/57344 train_time:18780138ms step_avg:571.31ms +step:32873/57344 train_time:18780153ms step_avg:571.29ms +step:32874/57344 train_time:18780403ms step_avg:571.28ms +step:32875/57344 train_time:18780960ms step_avg:571.28ms +grad accum step:8219/14336 +step:32876/57344 train_time:18782284ms step_avg:571.31ms +step:32877/57344 train_time:18782302ms step_avg:571.29ms +step:32878/57344 train_time:18782539ms step_avg:571.28ms +step:32879/57344 train_time:18783065ms step_avg:571.28ms +grad accum step:8220/14336 +step:32880/57344 train_time:18784380ms step_avg:571.30ms +step:32881/57344 train_time:18784397ms step_avg:571.28ms +step:32882/57344 train_time:18784642ms step_avg:571.27ms +step:32883/57344 train_time:18785177ms step_avg:571.27ms +grad accum step:8221/14336 +step:32884/57344 train_time:18786506ms step_avg:571.30ms +step:32885/57344 train_time:18786526ms step_avg:571.28ms +step:32886/57344 train_time:18786769ms step_avg:571.27ms +step:32887/57344 train_time:18787322ms step_avg:571.27ms +grad accum step:8222/14336 +step:32888/57344 train_time:18788605ms step_avg:571.29ms +step:32889/57344 train_time:18788621ms step_avg:571.27ms +step:32890/57344 train_time:18788871ms step_avg:571.26ms +step:32891/57344 train_time:18789441ms step_avg:571.26ms +grad accum step:8223/14336 +step:32892/57344 train_time:18790774ms step_avg:571.29ms +step:32893/57344 train_time:18790789ms step_avg:571.27ms +step:32894/57344 train_time:18791037ms step_avg:571.26ms +step:32895/57344 train_time:18791583ms step_avg:571.26ms +grad accum step:8224/14336 +step:32896/57344 train_time:18792938ms step_avg:571.28ms +step:32896/57344 val_loss:6.337848 train_time:18792940ms step_avg:571.28ms +step:32897/57344 train_time:18792952ms step_avg:571.27ms +step:32898/57344 train_time:18793175ms step_avg:571.26ms +step:32899/57344 train_time:18793723ms step_avg:571.26ms +grad accum step:8225/14336 +step:32900/57344 train_time:18795009ms step_avg:571.28ms +step:32901/57344 train_time:18795032ms step_avg:571.26ms +step:32902/57344 train_time:18795273ms step_avg:571.25ms +step:32903/57344 train_time:18795839ms step_avg:571.25ms +grad accum step:8226/14336 +step:32904/57344 train_time:18797154ms step_avg:571.27ms +step:32905/57344 train_time:18797212ms step_avg:571.26ms +step:32906/57344 train_time:18797430ms step_avg:571.25ms +step:32907/57344 train_time:18797980ms step_avg:571.25ms +grad accum step:8227/14336 +step:32908/57344 train_time:18799302ms step_avg:571.27ms +step:32909/57344 train_time:18799319ms step_avg:571.25ms +step:32910/57344 train_time:18799567ms step_avg:571.24ms +step:32911/57344 train_time:18800120ms step_avg:571.24ms +grad accum step:8228/14336 +step:32912/57344 train_time:18801437ms step_avg:571.26ms +step:32913/57344 train_time:18801457ms step_avg:571.25ms +step:32914/57344 train_time:18801697ms step_avg:571.24ms +step:32915/57344 train_time:18802240ms step_avg:571.24ms +grad accum step:8229/14336 +step:32916/57344 train_time:18803569ms step_avg:571.26ms +step:32917/57344 train_time:18803584ms step_avg:571.24ms +step:32918/57344 train_time:18803828ms step_avg:571.23ms +step:32919/57344 train_time:18804367ms step_avg:571.23ms +grad accum step:8230/14336 +step:32920/57344 train_time:18805668ms step_avg:571.25ms +step:32921/57344 train_time:18805684ms step_avg:571.24ms +step:32922/57344 train_time:18805934ms step_avg:571.23ms +step:32923/57344 train_time:18806503ms step_avg:571.23ms +grad accum step:8231/14336 +step:32924/57344 train_time:18807852ms step_avg:571.25ms +step:32925/57344 train_time:18807869ms step_avg:571.23ms +step:32926/57344 train_time:18808130ms step_avg:571.22ms +step:32927/57344 train_time:18808709ms step_avg:571.22ms +grad accum step:8232/14336 +step:32928/57344 train_time:18810011ms step_avg:571.25ms +step:32929/57344 train_time:18810028ms step_avg:571.23ms +step:32930/57344 train_time:18810276ms step_avg:571.22ms +step:32931/57344 train_time:18810829ms step_avg:571.22ms +grad accum step:8233/14336 +step:32932/57344 train_time:18812104ms step_avg:571.24ms +step:32933/57344 train_time:18812121ms step_avg:571.22ms +step:32934/57344 train_time:18812367ms step_avg:571.21ms +step:32935/57344 train_time:18812915ms step_avg:571.21ms +grad accum step:8234/14336 +step:32936/57344 train_time:18814206ms step_avg:571.24ms +step:32937/57344 train_time:18814223ms step_avg:571.22ms +step:32938/57344 train_time:18814474ms step_avg:571.21ms +step:32939/57344 train_time:18815037ms step_avg:571.21ms +grad accum step:8235/14336 +step:32940/57344 train_time:18816372ms step_avg:571.23ms +step:32941/57344 train_time:18816387ms step_avg:571.21ms +step:32942/57344 train_time:18816646ms step_avg:571.21ms +step:32943/57344 train_time:18817226ms step_avg:571.21ms +grad accum step:8236/14336 +step:32944/57344 train_time:18818563ms step_avg:571.23ms +step:32945/57344 train_time:18818580ms step_avg:571.21ms +step:32946/57344 train_time:18818820ms step_avg:571.20ms +step:32947/57344 train_time:18819360ms step_avg:571.20ms +grad accum step:8237/14336 +step:32948/57344 train_time:18820719ms step_avg:571.22ms +step:32949/57344 train_time:18820736ms step_avg:571.21ms +step:32950/57344 train_time:18820985ms step_avg:571.20ms +step:32951/57344 train_time:18821539ms step_avg:571.20ms +grad accum step:8238/14336 +step:32952/57344 train_time:18822864ms step_avg:571.22ms +step:32953/57344 train_time:18822881ms step_avg:571.20ms +step:32954/57344 train_time:18823131ms step_avg:571.19ms +step:32955/57344 train_time:18823682ms step_avg:571.19ms +grad accum step:8239/14336 +step:32956/57344 train_time:18824975ms step_avg:571.22ms +step:32957/57344 train_time:18824992ms step_avg:571.20ms +step:32958/57344 train_time:18825237ms step_avg:571.19ms +step:32959/57344 train_time:18825787ms step_avg:571.19ms +grad accum step:8240/14336 +step:32960/57344 train_time:18827088ms step_avg:571.21ms +step:32960/57344 val_loss:6.347533 train_time:18827088ms step_avg:571.21ms +step:32961/57344 train_time:18828268ms step_avg:571.23ms +step:32962/57344 train_time:18828302ms step_avg:571.21ms +step:32963/57344 train_time:18828760ms step_avg:571.21ms +grad accum step:8241/14336 +step:32964/57344 train_time:18830711ms step_avg:571.25ms +step:32965/57344 train_time:18830723ms step_avg:571.23ms +step:32966/57344 train_time:18830944ms step_avg:571.22ms +step:32967/57344 train_time:18831514ms step_avg:571.22ms +grad accum step:8242/14336 +step:32968/57344 train_time:18832856ms step_avg:571.25ms +step:32969/57344 train_time:18832873ms step_avg:571.23ms +step:32970/57344 train_time:18833118ms step_avg:571.22ms +step:32971/57344 train_time:18833670ms step_avg:571.22ms +grad accum step:8243/14336 +step:32972/57344 train_time:18834992ms step_avg:571.24ms +step:32973/57344 train_time:18835008ms step_avg:571.23ms +step:32974/57344 train_time:18835256ms step_avg:571.22ms +step:32975/57344 train_time:18835806ms step_avg:571.21ms +grad accum step:8244/14336 +step:32976/57344 train_time:18837095ms step_avg:571.24ms +step:32977/57344 train_time:18837112ms step_avg:571.22ms +step:32978/57344 train_time:18837360ms step_avg:571.21ms +step:32979/57344 train_time:18837903ms step_avg:571.21ms +grad accum step:8245/14336 +step:32980/57344 train_time:18839225ms step_avg:571.23ms +step:32981/57344 train_time:18839242ms step_avg:571.21ms +step:32982/57344 train_time:18839499ms step_avg:571.21ms +step:32983/57344 train_time:18840078ms step_avg:571.21ms +grad accum step:8246/14336 +step:32984/57344 train_time:18841399ms step_avg:571.23ms +step:32985/57344 train_time:18841414ms step_avg:571.21ms +step:32986/57344 train_time:18841661ms step_avg:571.20ms +step:32987/57344 train_time:18842203ms step_avg:571.20ms +grad accum step:8247/14336 +step:32988/57344 train_time:18843495ms step_avg:571.22ms +step:32989/57344 train_time:18843512ms step_avg:571.21ms +step:32990/57344 train_time:18843761ms step_avg:571.20ms +step:32991/57344 train_time:18844307ms step_avg:571.20ms +grad accum step:8248/14336 +step:32992/57344 train_time:18845620ms step_avg:571.22ms +step:32993/57344 train_time:18845637ms step_avg:571.20ms +step:32994/57344 train_time:18845886ms step_avg:571.19ms +step:32995/57344 train_time:18846434ms step_avg:571.19ms +grad accum step:8249/14336 +step:32996/57344 train_time:18847715ms step_avg:571.21ms +step:32997/57344 train_time:18847732ms step_avg:571.20ms +step:32998/57344 train_time:18847973ms step_avg:571.19ms +step:32999/57344 train_time:18848515ms step_avg:571.18ms +grad accum step:8250/14336 +step:33000/57344 train_time:18849800ms step_avg:571.21ms +step:33001/57344 train_time:18849816ms step_avg:571.19ms +step:33002/57344 train_time:18850064ms step_avg:571.18ms +step:33003/57344 train_time:18850608ms step_avg:571.18ms +grad accum step:8251/14336 +step:33004/57344 train_time:18851934ms step_avg:571.20ms +step:33005/57344 train_time:18851950ms step_avg:571.18ms +step:33006/57344 train_time:18852198ms step_avg:571.17ms +step:33007/57344 train_time:18852740ms step_avg:571.17ms +grad accum step:8252/14336 +step:33008/57344 train_time:18854059ms step_avg:571.20ms +step:33009/57344 train_time:18854076ms step_avg:571.18ms +step:33010/57344 train_time:18854324ms step_avg:571.17ms +step:33011/57344 train_time:18854889ms step_avg:571.17ms +grad accum step:8253/14336 +step:33012/57344 train_time:18856236ms step_avg:571.19ms +step:33013/57344 train_time:18856253ms step_avg:571.18ms +step:33014/57344 train_time:18856498ms step_avg:571.17ms +step:33015/57344 train_time:18857046ms step_avg:571.17ms +grad accum step:8254/14336 +step:33016/57344 train_time:18858376ms step_avg:571.19ms +step:33017/57344 train_time:18858392ms step_avg:571.17ms +step:33018/57344 train_time:18858640ms step_avg:571.16ms +step:33019/57344 train_time:18859195ms step_avg:571.16ms +grad accum step:8255/14336 +step:33020/57344 train_time:18860515ms step_avg:571.18ms +step:33021/57344 train_time:18860535ms step_avg:571.17ms +step:33022/57344 train_time:18860774ms step_avg:571.16ms +step:33023/57344 train_time:18861320ms step_avg:571.16ms +grad accum step:8256/14336 +step:33024/57344 train_time:18862601ms step_avg:571.18ms +step:33024/57344 val_loss:6.363330 train_time:18862601ms step_avg:571.18ms +step:33025/57344 train_time:18862613ms step_avg:571.16ms +step:33026/57344 train_time:18862837ms step_avg:571.15ms +step:33027/57344 train_time:18863393ms step_avg:571.15ms +grad accum step:8257/14336 +step:33028/57344 train_time:18864724ms step_avg:571.17ms +step:33029/57344 train_time:18864742ms step_avg:571.16ms +step:33030/57344 train_time:18864984ms step_avg:571.15ms +step:33031/57344 train_time:18865540ms step_avg:571.15ms +grad accum step:8258/14336 +step:33032/57344 train_time:18866815ms step_avg:571.17ms +step:33033/57344 train_time:18866832ms step_avg:571.15ms +step:33034/57344 train_time:18867076ms step_avg:571.14ms +step:33035/57344 train_time:18867621ms step_avg:571.14ms +grad accum step:8259/14336 +step:33036/57344 train_time:18868925ms step_avg:571.16ms +step:33037/57344 train_time:18868940ms step_avg:571.15ms +step:33038/57344 train_time:18869182ms step_avg:571.14ms +step:33039/57344 train_time:18869721ms step_avg:571.13ms +grad accum step:8260/14336 +step:33040/57344 train_time:18871025ms step_avg:571.16ms +step:33041/57344 train_time:18871041ms step_avg:571.14ms +step:33042/57344 train_time:18871285ms step_avg:571.13ms +step:33043/57344 train_time:18871830ms step_avg:571.13ms +grad accum step:8261/14336 +step:33044/57344 train_time:18873106ms step_avg:571.15ms +step:33045/57344 train_time:18873123ms step_avg:571.13ms +step:33046/57344 train_time:18873381ms step_avg:571.12ms +step:33047/57344 train_time:18873955ms step_avg:571.12ms +grad accum step:8262/14336 +step:33048/57344 train_time:18875280ms step_avg:571.15ms +step:33049/57344 train_time:18875295ms step_avg:571.13ms +step:33050/57344 train_time:18875540ms step_avg:571.12ms +step:33051/57344 train_time:18876087ms step_avg:571.12ms +grad accum step:8263/14336 +step:33052/57344 train_time:18877445ms step_avg:571.14ms +step:33053/57344 train_time:18877460ms step_avg:571.13ms +step:33054/57344 train_time:18877705ms step_avg:571.12ms +step:33055/57344 train_time:18878258ms step_avg:571.12ms +grad accum step:8264/14336 +step:33056/57344 train_time:18879559ms step_avg:571.14ms +step:33057/57344 train_time:18879576ms step_avg:571.12ms +step:33058/57344 train_time:18879823ms step_avg:571.11ms +step:33059/57344 train_time:18880365ms step_avg:571.11ms +grad accum step:8265/14336 +step:33060/57344 train_time:18881667ms step_avg:571.13ms +step:33061/57344 train_time:18881684ms step_avg:571.12ms +step:33062/57344 train_time:18881930ms step_avg:571.11ms +step:33063/57344 train_time:18882479ms step_avg:571.11ms +grad accum step:8266/14336 +step:33064/57344 train_time:18883806ms step_avg:571.13ms +step:33065/57344 train_time:18883821ms step_avg:571.11ms +step:33066/57344 train_time:18884063ms step_avg:571.10ms +step:33067/57344 train_time:18884611ms step_avg:571.10ms +grad accum step:8267/14336 +step:33068/57344 train_time:18885932ms step_avg:571.12ms +step:33069/57344 train_time:18885947ms step_avg:571.11ms +step:33070/57344 train_time:18886197ms step_avg:571.10ms +step:33071/57344 train_time:18886757ms step_avg:571.10ms +grad accum step:8268/14336 +step:33072/57344 train_time:18888060ms step_avg:571.12ms +step:33073/57344 train_time:18888077ms step_avg:571.10ms +step:33074/57344 train_time:18888325ms step_avg:571.09ms +step:33075/57344 train_time:18888868ms step_avg:571.09ms +grad accum step:8269/14336 +step:33076/57344 train_time:18890178ms step_avg:571.11ms +step:33077/57344 train_time:18890195ms step_avg:571.10ms +step:33078/57344 train_time:18890445ms step_avg:571.09ms +step:33079/57344 train_time:18890998ms step_avg:571.09ms +grad accum step:8270/14336 +step:33080/57344 train_time:18892304ms step_avg:571.11ms +step:33081/57344 train_time:18892321ms step_avg:571.09ms +step:33082/57344 train_time:18892563ms step_avg:571.08ms +step:33083/57344 train_time:18893111ms step_avg:571.08ms +grad accum step:8271/14336 +step:33084/57344 train_time:18894403ms step_avg:571.10ms +step:33085/57344 train_time:18894420ms step_avg:571.09ms +step:33086/57344 train_time:18894670ms step_avg:571.08ms +step:33087/57344 train_time:18895227ms step_avg:571.08ms +grad accum step:8272/14336 +step:33088/57344 train_time:18896556ms step_avg:571.10ms +step:33088/57344 val_loss:6.366555 train_time:18896556ms step_avg:571.10ms +step:33089/57344 train_time:18896568ms step_avg:571.08ms +step:33090/57344 train_time:18896791ms step_avg:571.07ms +step:33091/57344 train_time:18897336ms step_avg:571.07ms +grad accum step:8273/14336 +step:33092/57344 train_time:18898656ms step_avg:571.09ms +step:33093/57344 train_time:18898672ms step_avg:571.08ms +step:33094/57344 train_time:18898919ms step_avg:571.07ms +step:33095/57344 train_time:18899461ms step_avg:571.07ms +grad accum step:8274/14336 +step:33096/57344 train_time:18900733ms step_avg:571.09ms +step:33097/57344 train_time:18900750ms step_avg:571.07ms +step:33098/57344 train_time:18900996ms step_avg:571.06ms +step:33099/57344 train_time:18901538ms step_avg:571.06ms +grad accum step:8275/14336 +step:33100/57344 train_time:18902833ms step_avg:571.08ms +step:33101/57344 train_time:18902850ms step_avg:571.07ms +step:33102/57344 train_time:18903095ms step_avg:571.06ms +step:33103/57344 train_time:18903642ms step_avg:571.06ms +grad accum step:8276/14336 +step:33104/57344 train_time:18904950ms step_avg:571.08ms +step:33105/57344 train_time:18904966ms step_avg:571.06ms +step:33106/57344 train_time:18905216ms step_avg:571.05ms +step:33107/57344 train_time:18905763ms step_avg:571.05ms +grad accum step:8277/14336 +step:33108/57344 train_time:18907058ms step_avg:571.07ms +step:33109/57344 train_time:18907076ms step_avg:571.06ms +step:33110/57344 train_time:18907319ms step_avg:571.05ms +step:33111/57344 train_time:18907868ms step_avg:571.04ms +grad accum step:8278/14336 +step:33112/57344 train_time:18909152ms step_avg:571.07ms +step:33113/57344 train_time:18909168ms step_avg:571.05ms +step:33114/57344 train_time:18909416ms step_avg:571.04ms +step:33115/57344 train_time:18909968ms step_avg:571.04ms +grad accum step:8279/14336 +step:33116/57344 train_time:18911289ms step_avg:571.06ms +step:33117/57344 train_time:18911305ms step_avg:571.05ms +step:33118/57344 train_time:18911552ms step_avg:571.04ms +step:33119/57344 train_time:18912114ms step_avg:571.04ms +grad accum step:8280/14336 +step:33120/57344 train_time:18913428ms step_avg:571.06ms +step:33121/57344 train_time:18913446ms step_avg:571.04ms +step:33122/57344 train_time:18913690ms step_avg:571.03ms +step:33123/57344 train_time:18914247ms step_avg:571.03ms +grad accum step:8281/14336 +step:33124/57344 train_time:18915593ms step_avg:571.05ms +step:33125/57344 train_time:18915608ms step_avg:571.04ms +step:33126/57344 train_time:18915852ms step_avg:571.03ms +step:33127/57344 train_time:18916399ms step_avg:571.03ms +grad accum step:8282/14336 +step:33128/57344 train_time:18917694ms step_avg:571.05ms +step:33129/57344 train_time:18917709ms step_avg:571.03ms +step:33130/57344 train_time:18917967ms step_avg:571.02ms +step:33131/57344 train_time:18918544ms step_avg:571.02ms +grad accum step:8283/14336 +step:33132/57344 train_time:18919862ms step_avg:571.04ms +step:33133/57344 train_time:18919881ms step_avg:571.03ms +step:33134/57344 train_time:18920116ms step_avg:571.02ms +step:33135/57344 train_time:18920662ms step_avg:571.02ms +grad accum step:8284/14336 +step:33136/57344 train_time:18922001ms step_avg:571.04ms +step:33137/57344 train_time:18922017ms step_avg:571.02ms +step:33138/57344 train_time:18922267ms step_avg:571.01ms +step:33139/57344 train_time:18922824ms step_avg:571.01ms +grad accum step:8285/14336 +step:33140/57344 train_time:18924156ms step_avg:571.04ms +step:33141/57344 train_time:18924173ms step_avg:571.02ms +step:33142/57344 train_time:18924416ms step_avg:571.01ms +step:33143/57344 train_time:18924952ms step_avg:571.01ms +grad accum step:8286/14336 +step:33144/57344 train_time:18926269ms step_avg:571.03ms +step:33145/57344 train_time:18926285ms step_avg:571.01ms +step:33146/57344 train_time:18926528ms step_avg:571.00ms +step:33147/57344 train_time:18927078ms step_avg:571.00ms +grad accum step:8287/14336 +step:33148/57344 train_time:18928397ms step_avg:571.03ms +step:33149/57344 train_time:18928413ms step_avg:571.01ms +step:33150/57344 train_time:18928656ms step_avg:571.00ms +step:33151/57344 train_time:18929197ms step_avg:571.00ms +grad accum step:8288/14336 +step:33152/57344 train_time:18930495ms step_avg:571.02ms +step:33152/57344 val_loss:6.381602 train_time:18930496ms step_avg:571.02ms +step:33153/57344 train_time:18930508ms step_avg:571.00ms +step:33154/57344 train_time:18930724ms step_avg:570.99ms +step:33155/57344 train_time:18931269ms step_avg:570.99ms +grad accum step:8289/14336 +step:33156/57344 train_time:18932597ms step_avg:571.02ms +step:33157/57344 train_time:18932613ms step_avg:571.00ms +step:33158/57344 train_time:18932851ms step_avg:570.99ms +step:33159/57344 train_time:18933399ms step_avg:570.99ms +grad accum step:8290/14336 +step:33160/57344 train_time:18934724ms step_avg:571.01ms +step:33161/57344 train_time:18934740ms step_avg:570.99ms +step:33162/57344 train_time:18934985ms step_avg:570.98ms +step:33163/57344 train_time:18935532ms step_avg:570.98ms +grad accum step:8291/14336 +step:33164/57344 train_time:18936846ms step_avg:571.01ms +step:33165/57344 train_time:18936862ms step_avg:570.99ms +step:33166/57344 train_time:18937104ms step_avg:570.98ms +step:33167/57344 train_time:18937640ms step_avg:570.98ms +grad accum step:8292/14336 +step:33168/57344 train_time:18938940ms step_avg:571.00ms +step:33169/57344 train_time:18938957ms step_avg:570.98ms +step:33170/57344 train_time:18939203ms step_avg:570.97ms +step:33171/57344 train_time:18939763ms step_avg:570.97ms +grad accum step:8293/14336 +step:33172/57344 train_time:18941110ms step_avg:571.00ms +step:33173/57344 train_time:18941129ms step_avg:570.98ms +step:33174/57344 train_time:18941363ms step_avg:570.97ms +step:33175/57344 train_time:18941901ms step_avg:570.97ms +grad accum step:8294/14336 +step:33176/57344 train_time:18943205ms step_avg:570.99ms +step:33177/57344 train_time:18943221ms step_avg:570.97ms +step:33178/57344 train_time:18943467ms step_avg:570.96ms +step:33179/57344 train_time:18944011ms step_avg:570.96ms +grad accum step:8295/14336 +step:33180/57344 train_time:18945333ms step_avg:570.99ms +step:33181/57344 train_time:18945349ms step_avg:570.97ms +step:33182/57344 train_time:18945596ms step_avg:570.96ms +step:33183/57344 train_time:18946145ms step_avg:570.96ms +grad accum step:8296/14336 +step:33184/57344 train_time:18947461ms step_avg:570.98ms +step:33185/57344 train_time:18947476ms step_avg:570.97ms +step:33186/57344 train_time:18947721ms step_avg:570.96ms +step:33187/57344 train_time:18948265ms step_avg:570.95ms +grad accum step:8297/14336 +step:33188/57344 train_time:18949571ms step_avg:570.98ms +step:33189/57344 train_time:18949586ms step_avg:570.96ms +step:33190/57344 train_time:18949832ms step_avg:570.95ms +step:33191/57344 train_time:18950376ms step_avg:570.95ms +grad accum step:8298/14336 +step:33192/57344 train_time:18951709ms step_avg:570.97ms +step:33193/57344 train_time:18951723ms step_avg:570.96ms +step:33194/57344 train_time:18951971ms step_avg:570.95ms +step:33195/57344 train_time:18952515ms step_avg:570.94ms +grad accum step:8299/14336 +step:33196/57344 train_time:18953814ms step_avg:570.97ms +step:33197/57344 train_time:18953829ms step_avg:570.95ms +step:33198/57344 train_time:18954078ms step_avg:570.94ms +step:33199/57344 train_time:18954634ms step_avg:570.94ms +grad accum step:8300/14336 +step:33200/57344 train_time:18955931ms step_avg:570.96ms +step:33201/57344 train_time:18955948ms step_avg:570.95ms +step:33202/57344 train_time:18956192ms step_avg:570.94ms +step:33203/57344 train_time:18956739ms step_avg:570.93ms +grad accum step:8301/14336 +step:33204/57344 train_time:18958037ms step_avg:570.96ms +step:33205/57344 train_time:18958055ms step_avg:570.94ms +step:33206/57344 train_time:18958297ms step_avg:570.93ms +step:33207/57344 train_time:18958843ms step_avg:570.93ms +grad accum step:8302/14336 +step:33208/57344 train_time:18960118ms step_avg:570.95ms +step:33209/57344 train_time:18960135ms step_avg:570.93ms +step:33210/57344 train_time:18960388ms step_avg:570.92ms +step:33211/57344 train_time:18960946ms step_avg:570.92ms +grad accum step:8303/14336 +step:33212/57344 train_time:18962245ms step_avg:570.95ms +step:33213/57344 train_time:18962261ms step_avg:570.93ms +step:33214/57344 train_time:18962509ms step_avg:570.92ms +step:33215/57344 train_time:18963054ms step_avg:570.92ms +grad accum step:8304/14336 +step:33216/57344 train_time:18964360ms step_avg:570.94ms +step:33216/57344 val_loss:6.383929 train_time:18964364ms step_avg:570.94ms +step:33217/57344 train_time:18964376ms step_avg:570.92ms +step:33218/57344 train_time:18964607ms step_avg:570.91ms +step:33219/57344 train_time:18965158ms step_avg:570.91ms +grad accum step:8305/14336 +step:33220/57344 train_time:18966475ms step_avg:570.94ms +step:33221/57344 train_time:18966492ms step_avg:570.92ms +step:33222/57344 train_time:18966740ms step_avg:570.91ms +step:33223/57344 train_time:18967292ms step_avg:570.91ms +grad accum step:8306/14336 +step:33224/57344 train_time:18968593ms step_avg:570.93ms +step:33225/57344 train_time:18968609ms step_avg:570.91ms +step:33226/57344 train_time:18968861ms step_avg:570.90ms +step:33227/57344 train_time:18969418ms step_avg:570.90ms +grad accum step:8307/14336 +step:33228/57344 train_time:18970715ms step_avg:570.93ms +step:33229/57344 train_time:18970732ms step_avg:570.91ms +step:33230/57344 train_time:18970978ms step_avg:570.90ms +step:33231/57344 train_time:18971529ms step_avg:570.90ms +grad accum step:8308/14336 +step:33232/57344 train_time:18972810ms step_avg:570.92ms +step:33233/57344 train_time:18972827ms step_avg:570.90ms +step:33234/57344 train_time:18973079ms step_avg:570.89ms +step:33235/57344 train_time:18973636ms step_avg:570.89ms +grad accum step:8309/14336 +step:33236/57344 train_time:18974918ms step_avg:570.91ms +step:33237/57344 train_time:18974935ms step_avg:570.90ms +step:33238/57344 train_time:18975181ms step_avg:570.89ms +step:33239/57344 train_time:18975728ms step_avg:570.89ms +grad accum step:8310/14336 +step:33240/57344 train_time:18977026ms step_avg:570.91ms +step:33241/57344 train_time:18977043ms step_avg:570.89ms +step:33242/57344 train_time:18977289ms step_avg:570.88ms +step:33243/57344 train_time:18977836ms step_avg:570.88ms +grad accum step:8311/14336 +step:33244/57344 train_time:18979147ms step_avg:570.90ms +step:33245/57344 train_time:18979164ms step_avg:570.89ms +step:33246/57344 train_time:18979424ms step_avg:570.88ms +step:33247/57344 train_time:18980002ms step_avg:570.88ms +grad accum step:8312/14336 +step:33248/57344 train_time:18981328ms step_avg:570.90ms +step:33249/57344 train_time:18981345ms step_avg:570.88ms +step:33250/57344 train_time:18981601ms step_avg:570.88ms +step:33251/57344 train_time:18982161ms step_avg:570.87ms +grad accum step:8313/14336 +step:33252/57344 train_time:18983435ms step_avg:570.90ms +step:33253/57344 train_time:18983453ms step_avg:570.88ms +step:33254/57344 train_time:18983704ms step_avg:570.87ms +step:33255/57344 train_time:18984264ms step_avg:570.87ms +grad accum step:8314/14336 +step:33256/57344 train_time:18985585ms step_avg:570.89ms +step:33257/57344 train_time:18985602ms step_avg:570.88ms +step:33258/57344 train_time:18985846ms step_avg:570.87ms +step:33259/57344 train_time:18986388ms step_avg:570.86ms +grad accum step:8315/14336 +step:33260/57344 train_time:18987669ms step_avg:570.89ms +step:33261/57344 train_time:18987686ms step_avg:570.87ms +step:33262/57344 train_time:18987933ms step_avg:570.86ms +step:33263/57344 train_time:18988478ms step_avg:570.86ms +grad accum step:8316/14336 +step:33264/57344 train_time:18989800ms step_avg:570.88ms +step:33265/57344 train_time:18989817ms step_avg:570.86ms +step:33266/57344 train_time:18990063ms step_avg:570.86ms +step:33267/57344 train_time:18990602ms step_avg:570.85ms +grad accum step:8317/14336 +step:33268/57344 train_time:18991911ms step_avg:570.88ms +step:33269/57344 train_time:18991928ms step_avg:570.86ms +step:33270/57344 train_time:18992178ms step_avg:570.85ms +step:33271/57344 train_time:18992724ms step_avg:570.85ms +grad accum step:8318/14336 +step:33272/57344 train_time:18994021ms step_avg:570.87ms +step:33273/57344 train_time:18994038ms step_avg:570.85ms +step:33274/57344 train_time:18994290ms step_avg:570.84ms +step:33275/57344 train_time:18994848ms step_avg:570.84ms +grad accum step:8319/14336 +step:33276/57344 train_time:18996175ms step_avg:570.87ms +step:33277/57344 train_time:18996192ms step_avg:570.85ms +step:33278/57344 train_time:18996441ms step_avg:570.84ms +step:33279/57344 train_time:18996996ms step_avg:570.84ms +grad accum step:8320/14336 +step:33280/57344 train_time:19074756ms step_avg:573.16ms +step:33280/57344 val_loss:6.393970 train_time:19074756ms step_avg:573.16ms +step:33281/57344 train_time:19074768ms step_avg:573.14ms +step:33282/57344 train_time:19075025ms step_avg:573.13ms +step:33283/57344 train_time:19075579ms step_avg:573.13ms +grad accum step:8321/14336 +step:33284/57344 train_time:19076897ms step_avg:573.16ms +step:33285/57344 train_time:19076914ms step_avg:573.14ms +step:33286/57344 train_time:19077157ms step_avg:573.13ms +step:33287/57344 train_time:19077697ms step_avg:573.13ms +grad accum step:8322/14336 +step:33288/57344 train_time:19079009ms step_avg:573.15ms +step:33289/57344 train_time:19079026ms step_avg:573.13ms +step:33290/57344 train_time:19079267ms step_avg:573.12ms +step:33291/57344 train_time:19079799ms step_avg:573.12ms +grad accum step:8323/14336 +step:33292/57344 train_time:19081097ms step_avg:573.14ms +step:33293/57344 train_time:19081113ms step_avg:573.13ms +step:33294/57344 train_time:19081360ms step_avg:573.12ms +step:33295/57344 train_time:19081905ms step_avg:573.12ms +grad accum step:8324/14336 +step:33296/57344 train_time:19083230ms step_avg:573.14ms +step:33297/57344 train_time:19083247ms step_avg:573.12ms +step:33298/57344 train_time:19083494ms step_avg:573.11ms +step:33299/57344 train_time:19084048ms step_avg:573.11ms +grad accum step:8325/14336 +step:33300/57344 train_time:19085386ms step_avg:573.13ms +step:33301/57344 train_time:19085403ms step_avg:573.12ms +step:33302/57344 train_time:19085649ms step_avg:573.11ms +step:33303/57344 train_time:19086187ms step_avg:573.11ms +grad accum step:8326/14336 +step:33304/57344 train_time:19087462ms step_avg:573.13ms +step:33305/57344 train_time:19087479ms step_avg:573.11ms +step:33306/57344 train_time:19087722ms step_avg:573.10ms +step:33307/57344 train_time:19088252ms step_avg:573.10ms +grad accum step:8327/14336 +step:33308/57344 train_time:19089540ms step_avg:573.12ms +step:33309/57344 train_time:19089556ms step_avg:573.11ms +step:33310/57344 train_time:19089800ms step_avg:573.10ms +step:33311/57344 train_time:19090343ms step_avg:573.09ms +grad accum step:8328/14336 +step:33312/57344 train_time:19091654ms step_avg:573.12ms +step:33313/57344 train_time:19091671ms step_avg:573.10ms +step:33314/57344 train_time:19091917ms step_avg:573.09ms +step:33315/57344 train_time:19092468ms step_avg:573.09ms +grad accum step:8329/14336 +step:33316/57344 train_time:19093757ms step_avg:573.11ms +step:33317/57344 train_time:19093774ms step_avg:573.09ms +step:33318/57344 train_time:19094018ms step_avg:573.08ms +step:33319/57344 train_time:19108904ms step_avg:573.51ms +grad accum step:8330/14336 +step:33320/57344 train_time:19109993ms step_avg:573.53ms +step:33321/57344 train_time:19115283ms step_avg:573.67ms +step:33322/57344 train_time:19115559ms step_avg:573.66ms +step:33323/57344 train_time:19116109ms step_avg:573.66ms +grad accum step:8331/14336 +step:33324/57344 train_time:19117414ms step_avg:573.68ms +step:33325/57344 train_time:19117431ms step_avg:573.67ms +step:33326/57344 train_time:19117671ms step_avg:573.66ms +step:33327/57344 train_time:19118214ms step_avg:573.66ms +grad accum step:8332/14336 +step:33328/57344 train_time:19119497ms step_avg:573.68ms +step:33329/57344 train_time:19119514ms step_avg:573.66ms +step:33330/57344 train_time:19119759ms step_avg:573.65ms +step:33331/57344 train_time:19120312ms step_avg:573.65ms +grad accum step:8333/14336 +step:33332/57344 train_time:19121620ms step_avg:573.67ms +step:33333/57344 train_time:19121636ms step_avg:573.65ms +step:33334/57344 train_time:19121882ms step_avg:573.64ms +step:33335/57344 train_time:19122422ms step_avg:573.64ms +grad accum step:8334/14336 +step:33336/57344 train_time:19123697ms step_avg:573.67ms +step:33337/57344 train_time:19123714ms step_avg:573.65ms +step:33338/57344 train_time:19123962ms step_avg:573.64ms +step:33339/57344 train_time:19124506ms step_avg:573.64ms +grad accum step:8335/14336 +step:33340/57344 train_time:19125863ms step_avg:573.66ms +step:33341/57344 train_time:19125880ms step_avg:573.64ms +step:33342/57344 train_time:19126127ms step_avg:573.63ms +step:33343/57344 train_time:19126676ms step_avg:573.63ms +grad accum step:8336/14336 +step:33344/57344 train_time:19127970ms step_avg:573.66ms +step:33344/57344 val_loss:6.404038 train_time:19127971ms step_avg:573.66ms +step:33345/57344 train_time:19127983ms step_avg:573.64ms +step:33346/57344 train_time:19128206ms step_avg:573.63ms +step:33347/57344 train_time:19128750ms step_avg:573.63ms +grad accum step:8337/14336 +step:33348/57344 train_time:19130062ms step_avg:573.65ms +step:33349/57344 train_time:19130079ms step_avg:573.63ms +step:33350/57344 train_time:19130333ms step_avg:573.62ms +step:33351/57344 train_time:19130896ms step_avg:573.62ms +grad accum step:8338/14336 +step:33352/57344 train_time:19132169ms step_avg:573.64ms +step:33353/57344 train_time:19132186ms step_avg:573.63ms +step:33354/57344 train_time:19132431ms step_avg:573.62ms +step:33355/57344 train_time:19132977ms step_avg:573.62ms +grad accum step:8339/14336 +step:33356/57344 train_time:19134278ms step_avg:573.64ms +step:33357/57344 train_time:19134295ms step_avg:573.62ms +step:33358/57344 train_time:19134542ms step_avg:573.61ms +step:33359/57344 train_time:19135088ms step_avg:573.61ms +grad accum step:8340/14336 +step:33360/57344 train_time:19136399ms step_avg:573.63ms +step:33361/57344 train_time:19136416ms step_avg:573.62ms +step:33362/57344 train_time:19136662ms step_avg:573.61ms +step:33363/57344 train_time:19137209ms step_avg:573.61ms +grad accum step:8341/14336 +step:33364/57344 train_time:19138501ms step_avg:573.63ms +step:33365/57344 train_time:19138518ms step_avg:573.61ms +step:33366/57344 train_time:19138763ms step_avg:573.60ms +step:33367/57344 train_time:19139310ms step_avg:573.60ms +grad accum step:8342/14336 +step:33368/57344 train_time:19140597ms step_avg:573.62ms +step:33369/57344 train_time:19140614ms step_avg:573.60ms +step:33370/57344 train_time:19140860ms step_avg:573.59ms +step:33371/57344 train_time:19141403ms step_avg:573.59ms +grad accum step:8343/14336 +step:33372/57344 train_time:19142718ms step_avg:573.62ms +step:33373/57344 train_time:19142735ms step_avg:573.60ms +step:33374/57344 train_time:19142984ms step_avg:573.59ms +step:33375/57344 train_time:19143536ms step_avg:573.59ms +grad accum step:8344/14336 +step:33376/57344 train_time:19144822ms step_avg:573.61ms +step:33377/57344 train_time:19144839ms step_avg:573.59ms +step:33378/57344 train_time:19145089ms step_avg:573.58ms +step:33379/57344 train_time:19145649ms step_avg:573.58ms +grad accum step:8345/14336 +step:33380/57344 train_time:19146972ms step_avg:573.61ms +step:33381/57344 train_time:19146990ms step_avg:573.59ms +step:33382/57344 train_time:19147242ms step_avg:573.58ms +step:33383/57344 train_time:19147798ms step_avg:573.58ms +grad accum step:8346/14336 +step:33384/57344 train_time:19149100ms step_avg:573.60ms +step:33385/57344 train_time:19149117ms step_avg:573.58ms +step:33386/57344 train_time:19149362ms step_avg:573.57ms +step:33387/57344 train_time:19149906ms step_avg:573.57ms +grad accum step:8347/14336 +step:33388/57344 train_time:19151228ms step_avg:573.60ms +step:33389/57344 train_time:19151245ms step_avg:573.58ms +step:33390/57344 train_time:19151493ms step_avg:573.57ms +step:33391/57344 train_time:19152040ms step_avg:573.57ms +grad accum step:8348/14336 +step:33392/57344 train_time:19153339ms step_avg:573.59ms +step:33393/57344 train_time:19153356ms step_avg:573.57ms +step:33394/57344 train_time:19153603ms step_avg:573.56ms +step:33395/57344 train_time:19154148ms step_avg:573.56ms +grad accum step:8349/14336 +step:33396/57344 train_time:19155440ms step_avg:573.58ms +step:33397/57344 train_time:19155457ms step_avg:573.57ms +step:33398/57344 train_time:19155703ms step_avg:573.56ms +step:33399/57344 train_time:19156249ms step_avg:573.56ms +grad accum step:8350/14336 +step:33400/57344 train_time:19157554ms step_avg:573.58ms +step:33401/57344 train_time:19157571ms step_avg:573.56ms +step:33402/57344 train_time:19157820ms step_avg:573.55ms +step:33403/57344 train_time:19158372ms step_avg:573.55ms +grad accum step:8351/14336 +step:33404/57344 train_time:19159668ms step_avg:573.57ms +step:33405/57344 train_time:19159685ms step_avg:573.56ms +step:33406/57344 train_time:19159931ms step_avg:573.55ms +step:33407/57344 train_time:19160482ms step_avg:573.55ms +grad accum step:8352/14336 +step:33408/57344 train_time:19161801ms step_avg:573.57ms +step:33408/57344 val_loss:6.407627 train_time:19161802ms step_avg:573.57ms +step:33409/57344 train_time:19161813ms step_avg:573.55ms +step:33410/57344 train_time:19162036ms step_avg:573.54ms +step:33411/57344 train_time:19162581ms step_avg:573.54ms +grad accum step:8353/14336 +step:33412/57344 train_time:19163881ms step_avg:573.56ms +step:33413/57344 train_time:19163898ms step_avg:573.55ms +step:33414/57344 train_time:19164146ms step_avg:573.54ms +step:33415/57344 train_time:19164708ms step_avg:573.54ms +grad accum step:8354/14336 +step:33416/57344 train_time:19166056ms step_avg:573.56ms +step:33417/57344 train_time:19166073ms step_avg:573.54ms +step:33418/57344 train_time:19166323ms step_avg:573.53ms +step:33419/57344 train_time:19166875ms step_avg:573.53ms +grad accum step:8355/14336 +step:33420/57344 train_time:19168181ms step_avg:573.55ms +step:33421/57344 train_time:19168197ms step_avg:573.54ms +step:33422/57344 train_time:19168444ms step_avg:573.53ms +step:33423/57344 train_time:19168983ms step_avg:573.53ms +grad accum step:8356/14336 +step:33424/57344 train_time:19170280ms step_avg:573.55ms +step:33425/57344 train_time:19170297ms step_avg:573.53ms +step:33426/57344 train_time:19170544ms step_avg:573.52ms +step:33427/57344 train_time:19171095ms step_avg:573.52ms +grad accum step:8357/14336 +step:33428/57344 train_time:19172416ms step_avg:573.54ms +step:33429/57344 train_time:19172433ms step_avg:573.53ms +step:33430/57344 train_time:19172682ms step_avg:573.52ms +step:33431/57344 train_time:19173225ms step_avg:573.52ms +grad accum step:8358/14336 +step:33432/57344 train_time:19174503ms step_avg:573.54ms +step:33433/57344 train_time:19174520ms step_avg:573.52ms +step:33434/57344 train_time:19174763ms step_avg:573.51ms +step:33435/57344 train_time:19175318ms step_avg:573.51ms +grad accum step:8359/14336 +step:33436/57344 train_time:19176762ms step_avg:573.54ms +step:33437/57344 train_time:19176779ms step_avg:573.52ms +step:33438/57344 train_time:19177047ms step_avg:573.51ms +step:33439/57344 train_time:19177639ms step_avg:573.51ms +grad accum step:8360/14336 +step:33440/57344 train_time:19178926ms step_avg:573.53ms +step:33441/57344 train_time:19178943ms step_avg:573.52ms +step:33442/57344 train_time:19179188ms step_avg:573.51ms +step:33443/57344 train_time:19179731ms step_avg:573.51ms +grad accum step:8361/14336 +step:33444/57344 train_time:19181024ms step_avg:573.53ms +step:33445/57344 train_time:19181042ms step_avg:573.51ms +step:33446/57344 train_time:19181292ms step_avg:573.50ms +step:33447/57344 train_time:19181846ms step_avg:573.50ms +grad accum step:8362/14336 +step:33448/57344 train_time:19183134ms step_avg:573.52ms +step:33449/57344 train_time:19183151ms step_avg:573.50ms +step:33450/57344 train_time:19183394ms step_avg:573.49ms +step:33451/57344 train_time:19183937ms step_avg:573.49ms +grad accum step:8363/14336 +step:33452/57344 train_time:19185239ms step_avg:573.52ms +step:33453/57344 train_time:19185288ms step_avg:573.50ms +step:33454/57344 train_time:19185513ms step_avg:573.49ms +step:33455/57344 train_time:19186081ms step_avg:573.49ms +grad accum step:8364/14336 +step:33456/57344 train_time:19187415ms step_avg:573.51ms +step:33457/57344 train_time:19187431ms step_avg:573.50ms +step:33458/57344 train_time:19187680ms step_avg:573.49ms +step:33459/57344 train_time:19188233ms step_avg:573.48ms +grad accum step:8365/14336 +step:33460/57344 train_time:19189538ms step_avg:573.51ms +step:33461/57344 train_time:19189555ms step_avg:573.49ms +step:33462/57344 train_time:19189803ms step_avg:573.48ms +step:33463/57344 train_time:19190354ms step_avg:573.48ms +grad accum step:8366/14336 +step:33464/57344 train_time:19191708ms step_avg:573.50ms +step:33465/57344 train_time:19191724ms step_avg:573.49ms +step:33466/57344 train_time:19191971ms step_avg:573.48ms +step:33467/57344 train_time:19192534ms step_avg:573.48ms +grad accum step:8367/14336 +step:33468/57344 train_time:19193891ms step_avg:573.50ms +step:33469/57344 train_time:19193907ms step_avg:573.48ms +step:33470/57344 train_time:19194154ms step_avg:573.47ms +step:33471/57344 train_time:19194692ms step_avg:573.47ms +grad accum step:8368/14336 +step:33472/57344 train_time:19195995ms step_avg:573.49ms +step:33472/57344 val_loss:6.429492 train_time:19195995ms step_avg:573.49ms +step:33473/57344 train_time:19196007ms step_avg:573.48ms +step:33474/57344 train_time:19196232ms step_avg:573.47ms +step:33475/57344 train_time:19196782ms step_avg:573.47ms +grad accum step:8369/14336 +step:33476/57344 train_time:19198085ms step_avg:573.49ms +step:33477/57344 train_time:19198101ms step_avg:573.47ms +step:33478/57344 train_time:19198354ms step_avg:573.46ms +step:33479/57344 train_time:19198915ms step_avg:573.46ms +grad accum step:8370/14336 +step:33480/57344 train_time:19200196ms step_avg:573.48ms +step:33481/57344 train_time:19200212ms step_avg:573.47ms +step:33482/57344 train_time:19200459ms step_avg:573.46ms +step:33483/57344 train_time:19201005ms step_avg:573.46ms +grad accum step:8371/14336 +step:33484/57344 train_time:19202322ms step_avg:573.48ms +step:33485/57344 train_time:19202337ms step_avg:573.46ms +step:33486/57344 train_time:19202584ms step_avg:573.45ms +step:33487/57344 train_time:19203136ms step_avg:573.45ms +grad accum step:8372/14336 +step:33488/57344 train_time:19204438ms step_avg:573.47ms +step:33489/57344 train_time:19204455ms step_avg:573.46ms +step:33490/57344 train_time:19204706ms step_avg:573.45ms +step:33491/57344 train_time:19205262ms step_avg:573.45ms +grad accum step:8373/14336 +step:33492/57344 train_time:19206560ms step_avg:573.47ms +step:33493/57344 train_time:19206577ms step_avg:573.45ms +step:33494/57344 train_time:19206829ms step_avg:573.44ms +step:33495/57344 train_time:19207382ms step_avg:573.44ms +grad accum step:8374/14336 +step:33496/57344 train_time:19208675ms step_avg:573.46ms +step:33497/57344 train_time:19208691ms step_avg:573.45ms +step:33498/57344 train_time:19208944ms step_avg:573.44ms +step:33499/57344 train_time:19209509ms step_avg:573.44ms +grad accum step:8375/14336 +step:33500/57344 train_time:19210804ms step_avg:573.46ms +step:33501/57344 train_time:19210821ms step_avg:573.44ms +step:33502/57344 train_time:19211067ms step_avg:573.43ms +step:33503/57344 train_time:19211612ms step_avg:573.43ms +grad accum step:8376/14336 +step:33504/57344 train_time:19212911ms step_avg:573.45ms +step:33505/57344 train_time:19212928ms step_avg:573.43ms +step:33506/57344 train_time:19213177ms step_avg:573.42ms +step:33507/57344 train_time:19213726ms step_avg:573.42ms +grad accum step:8377/14336 +step:33508/57344 train_time:19215038ms step_avg:573.45ms +step:33509/57344 train_time:19215055ms step_avg:573.43ms +step:33510/57344 train_time:19215300ms step_avg:573.42ms +step:33511/57344 train_time:19215852ms step_avg:573.42ms +grad accum step:8378/14336 +step:33512/57344 train_time:19217174ms step_avg:573.44ms +step:33513/57344 train_time:19217186ms step_avg:573.42ms +step:33514/57344 train_time:19217432ms step_avg:573.42ms +step:33515/57344 train_time:19217977ms step_avg:573.41ms +grad accum step:8379/14336 +step:33516/57344 train_time:19219274ms step_avg:573.44ms +step:33517/57344 train_time:19219289ms step_avg:573.42ms +step:33518/57344 train_time:19219537ms step_avg:573.41ms +step:33519/57344 train_time:19220094ms step_avg:573.41ms +grad accum step:8380/14336 +step:33520/57344 train_time:19221379ms step_avg:573.43ms +step:33521/57344 train_time:19221395ms step_avg:573.41ms +step:33522/57344 train_time:19221635ms step_avg:573.40ms +step:33523/57344 train_time:19222177ms step_avg:573.40ms +grad accum step:8381/14336 +step:33524/57344 train_time:19223503ms step_avg:573.43ms +step:33525/57344 train_time:19223518ms step_avg:573.41ms +step:33526/57344 train_time:19223770ms step_avg:573.40ms +step:33527/57344 train_time:19224331ms step_avg:573.40ms +grad accum step:8382/14336 +step:33528/57344 train_time:19225641ms step_avg:573.42ms +step:33529/57344 train_time:19225664ms step_avg:573.40ms +step:33530/57344 train_time:19225902ms step_avg:573.39ms +step:33531/57344 train_time:19226453ms step_avg:573.39ms +grad accum step:8383/14336 +step:33532/57344 train_time:19227765ms step_avg:573.42ms +step:33533/57344 train_time:19227782ms step_avg:573.40ms +step:33534/57344 train_time:19228028ms step_avg:573.39ms +step:33535/57344 train_time:19228575ms step_avg:573.39ms +grad accum step:8384/14336 +step:33536/57344 train_time:19229887ms step_avg:573.41ms +step:33536/57344 val_loss:6.421330 train_time:19229887ms step_avg:573.41ms +step:33537/57344 train_time:19229899ms step_avg:573.39ms +step:33538/57344 train_time:19230122ms step_avg:573.38ms +step:33539/57344 train_time:19230676ms step_avg:573.38ms +grad accum step:8385/14336 +step:33540/57344 train_time:19231984ms step_avg:573.40ms +step:33541/57344 train_time:19231999ms step_avg:573.39ms +step:33542/57344 train_time:19232241ms step_avg:573.38ms +step:33543/57344 train_time:19232789ms step_avg:573.38ms +grad accum step:8386/14336 +step:33544/57344 train_time:19234097ms step_avg:573.40ms +step:33545/57344 train_time:19234115ms step_avg:573.38ms +step:33546/57344 train_time:19234361ms step_avg:573.37ms +step:33547/57344 train_time:19234916ms step_avg:573.37ms +grad accum step:8387/14336 +step:33548/57344 train_time:19236213ms step_avg:573.39ms +step:33549/57344 train_time:19236230ms step_avg:573.38ms +step:33550/57344 train_time:19236479ms step_avg:573.37ms +step:33551/57344 train_time:19237026ms step_avg:573.37ms +grad accum step:8388/14336 +step:33552/57344 train_time:19238305ms step_avg:573.39ms +step:33553/57344 train_time:19238321ms step_avg:573.37ms +step:33554/57344 train_time:19238570ms step_avg:573.36ms +step:33555/57344 train_time:19239121ms step_avg:573.36ms +grad accum step:8389/14336 +step:33556/57344 train_time:19240421ms step_avg:573.38ms +step:33557/57344 train_time:19240436ms step_avg:573.37ms +step:33558/57344 train_time:19240678ms step_avg:573.36ms +step:33559/57344 train_time:19241226ms step_avg:573.36ms +grad accum step:8390/14336 +step:33560/57344 train_time:19242535ms step_avg:573.38ms +step:33561/57344 train_time:19242549ms step_avg:573.36ms +step:33562/57344 train_time:19242791ms step_avg:573.35ms +step:33563/57344 train_time:19243330ms step_avg:573.35ms +grad accum step:8391/14336 +step:33564/57344 train_time:19244628ms step_avg:573.37ms +step:33565/57344 train_time:19244647ms step_avg:573.35ms +step:33566/57344 train_time:19244878ms step_avg:573.34ms +step:33567/57344 train_time:19245416ms step_avg:573.34ms +grad accum step:8392/14336 +step:33568/57344 train_time:19246694ms step_avg:573.36ms +step:33569/57344 train_time:19246722ms step_avg:573.35ms +step:33570/57344 train_time:19246951ms step_avg:573.34ms +step:33571/57344 train_time:19247503ms step_avg:573.34ms +grad accum step:8393/14336 +step:33572/57344 train_time:19248899ms step_avg:573.36ms +step:33573/57344 train_time:19248915ms step_avg:573.35ms +step:33574/57344 train_time:19249156ms step_avg:573.34ms +step:33575/57344 train_time:19249708ms step_avg:573.33ms +grad accum step:8394/14336 +step:33576/57344 train_time:19251019ms step_avg:573.36ms +step:33577/57344 train_time:19251033ms step_avg:573.34ms +step:33578/57344 train_time:19251283ms step_avg:573.33ms +step:33579/57344 train_time:19251832ms step_avg:573.33ms +grad accum step:8395/14336 +step:33580/57344 train_time:19253135ms step_avg:573.35ms +step:33581/57344 train_time:19253150ms step_avg:573.33ms +step:33582/57344 train_time:19253407ms step_avg:573.33ms +step:33583/57344 train_time:19253977ms step_avg:573.33ms +grad accum step:8396/14336 +step:33584/57344 train_time:19255273ms step_avg:573.35ms +step:33585/57344 train_time:19255289ms step_avg:573.33ms +step:33586/57344 train_time:19255534ms step_avg:573.32ms +step:33587/57344 train_time:19256079ms step_avg:573.32ms +grad accum step:8397/14336 +step:33588/57344 train_time:19257395ms step_avg:573.34ms +step:33589/57344 train_time:19257411ms step_avg:573.32ms +step:33590/57344 train_time:19257659ms step_avg:573.32ms +step:33591/57344 train_time:19258207ms step_avg:573.31ms +grad accum step:8398/14336 +step:33592/57344 train_time:19259520ms step_avg:573.34ms +step:33593/57344 train_time:19259536ms step_avg:573.32ms +step:33594/57344 train_time:19259790ms step_avg:573.31ms +step:33595/57344 train_time:19260345ms step_avg:573.31ms +grad accum step:8399/14336 +step:33596/57344 train_time:19261694ms step_avg:573.33ms +step:33597/57344 train_time:19261712ms step_avg:573.32ms +step:33598/57344 train_time:19261939ms step_avg:573.31ms +step:33599/57344 train_time:19262502ms step_avg:573.31ms +grad accum step:8400/14336 +step:33600/57344 train_time:19263818ms step_avg:573.33ms +step:33600/57344 val_loss:6.433446 train_time:19263823ms step_avg:573.33ms +step:33601/57344 train_time:19263835ms step_avg:573.31ms +step:33602/57344 train_time:19264060ms step_avg:573.30ms +step:33603/57344 train_time:19264605ms step_avg:573.30ms +grad accum step:8401/14336 +step:33604/57344 train_time:19265902ms step_avg:573.32ms +step:33605/57344 train_time:19265919ms step_avg:573.31ms +step:33606/57344 train_time:19266172ms step_avg:573.30ms +step:33607/57344 train_time:19266730ms step_avg:573.30ms +grad accum step:8402/14336 +step:33608/57344 train_time:19268052ms step_avg:573.32ms +step:33609/57344 train_time:19268069ms step_avg:573.30ms +step:33610/57344 train_time:19268320ms step_avg:573.29ms +step:33611/57344 train_time:19268877ms step_avg:573.29ms +grad accum step:8403/14336 +step:33612/57344 train_time:19270174ms step_avg:573.31ms +step:33613/57344 train_time:19270187ms step_avg:573.30ms +step:33614/57344 train_time:19270434ms step_avg:573.29ms +step:33615/57344 train_time:19270984ms step_avg:573.29ms +grad accum step:8404/14336 +step:33616/57344 train_time:19272291ms step_avg:573.31ms +step:33617/57344 train_time:19272308ms step_avg:573.29ms +step:33618/57344 train_time:19272563ms step_avg:573.28ms +step:33619/57344 train_time:19273137ms step_avg:573.28ms +grad accum step:8405/14336 +step:33620/57344 train_time:19274428ms step_avg:573.30ms +step:33621/57344 train_time:19274443ms step_avg:573.29ms +step:33622/57344 train_time:19274686ms step_avg:573.28ms +step:33623/57344 train_time:19275227ms step_avg:573.28ms +grad accum step:8406/14336 +step:33624/57344 train_time:19276553ms step_avg:573.30ms +step:33625/57344 train_time:19276567ms step_avg:573.28ms +step:33626/57344 train_time:19276817ms step_avg:573.27ms +step:33627/57344 train_time:19277376ms step_avg:573.27ms +grad accum step:8407/14336 +step:33628/57344 train_time:19278691ms step_avg:573.29ms +step:33629/57344 train_time:19278704ms step_avg:573.28ms +step:33630/57344 train_time:19278946ms step_avg:573.27ms +step:33631/57344 train_time:19279497ms step_avg:573.27ms +grad accum step:8408/14336 +step:33632/57344 train_time:19280772ms step_avg:573.29ms +step:33633/57344 train_time:19280789ms step_avg:573.27ms +step:33634/57344 train_time:19281038ms step_avg:573.26ms +step:33635/57344 train_time:19281583ms step_avg:573.26ms +grad accum step:8409/14336 +step:33636/57344 train_time:19282879ms step_avg:573.28ms +step:33637/57344 train_time:19282895ms step_avg:573.26ms +step:33638/57344 train_time:19283141ms step_avg:573.25ms +step:33639/57344 train_time:19283688ms step_avg:573.25ms +grad accum step:8410/14336 +step:33640/57344 train_time:19284985ms step_avg:573.28ms +step:33641/57344 train_time:19285002ms step_avg:573.26ms +step:33642/57344 train_time:19285244ms step_avg:573.25ms +step:33643/57344 train_time:19285795ms step_avg:573.25ms +grad accum step:8411/14336 +step:33644/57344 train_time:19287102ms step_avg:573.27ms +step:33645/57344 train_time:19287119ms step_avg:573.25ms +step:33646/57344 train_time:19287376ms step_avg:573.24ms +step:33647/57344 train_time:19287948ms step_avg:573.24ms +grad accum step:8412/14336 +step:33648/57344 train_time:19289472ms step_avg:573.27ms +step:33649/57344 train_time:19289492ms step_avg:573.26ms +step:33650/57344 train_time:19289715ms step_avg:573.25ms +step:33651/57344 train_time:19290270ms step_avg:573.25ms +grad accum step:8413/14336 +step:33652/57344 train_time:19291555ms step_avg:573.27ms +step:33653/57344 train_time:19291572ms step_avg:573.25ms +step:33654/57344 train_time:19291817ms step_avg:573.24ms +step:33655/57344 train_time:19292360ms step_avg:573.24ms +grad accum step:8414/14336 +step:33656/57344 train_time:19293680ms step_avg:573.26ms +step:33657/57344 train_time:19293722ms step_avg:573.25ms +step:33658/57344 train_time:19293942ms step_avg:573.23ms +step:33659/57344 train_time:19294494ms step_avg:573.23ms +grad accum step:8415/14336 +step:33660/57344 train_time:19295813ms step_avg:573.26ms +step:33661/57344 train_time:19295829ms step_avg:573.24ms +step:33662/57344 train_time:19296075ms step_avg:573.23ms +step:33663/57344 train_time:19296631ms step_avg:573.23ms +grad accum step:8416/14336 +step:33664/57344 train_time:19298005ms step_avg:573.25ms +step:33664/57344 val_loss:6.436284 train_time:19298011ms step_avg:573.25ms +step:33665/57344 train_time:19298023ms step_avg:573.24ms +step:33666/57344 train_time:19298246ms step_avg:573.23ms +step:33667/57344 train_time:19298787ms step_avg:573.23ms +grad accum step:8417/14336 +step:33668/57344 train_time:19300110ms step_avg:573.25ms +step:33669/57344 train_time:19300126ms step_avg:573.23ms +step:33670/57344 train_time:19300372ms step_avg:573.22ms +step:33671/57344 train_time:19300923ms step_avg:573.22ms +grad accum step:8418/14336 +step:33672/57344 train_time:19302259ms step_avg:573.24ms +step:33673/57344 train_time:19302275ms step_avg:573.23ms +step:33674/57344 train_time:19302522ms step_avg:573.22ms +step:33675/57344 train_time:19303068ms step_avg:573.22ms +grad accum step:8419/14336 +step:33676/57344 train_time:19304371ms step_avg:573.24ms +step:33677/57344 train_time:19304383ms step_avg:573.22ms +step:33678/57344 train_time:19304628ms step_avg:573.21ms +step:33679/57344 train_time:19305177ms step_avg:573.21ms +grad accum step:8420/14336 +step:33680/57344 train_time:19306487ms step_avg:573.23ms +step:33681/57344 train_time:19306503ms step_avg:573.22ms +step:33682/57344 train_time:19306755ms step_avg:573.21ms +step:33683/57344 train_time:19307308ms step_avg:573.21ms +grad accum step:8421/14336 +step:33684/57344 train_time:19308596ms step_avg:573.23ms +step:33685/57344 train_time:19308613ms step_avg:573.21ms +step:33686/57344 train_time:19308866ms step_avg:573.20ms +step:33687/57344 train_time:19309423ms step_avg:573.20ms +grad accum step:8422/14336 +step:33688/57344 train_time:19310727ms step_avg:573.22ms +step:33689/57344 train_time:19310745ms step_avg:573.21ms +step:33690/57344 train_time:19310989ms step_avg:573.20ms +step:33691/57344 train_time:19311528ms step_avg:573.20ms +grad accum step:8423/14336 +step:33692/57344 train_time:19312840ms step_avg:573.22ms +step:33693/57344 train_time:19312856ms step_avg:573.20ms +step:33694/57344 train_time:19313106ms step_avg:573.19ms +step:33695/57344 train_time:19313665ms step_avg:573.19ms +grad accum step:8424/14336 +step:33696/57344 train_time:19315012ms step_avg:573.21ms +step:33697/57344 train_time:19315027ms step_avg:573.20ms +step:33698/57344 train_time:19315273ms step_avg:573.19ms +step:33699/57344 train_time:19315816ms step_avg:573.19ms +grad accum step:8425/14336 +step:33700/57344 train_time:19317108ms step_avg:573.21ms +step:33701/57344 train_time:19317123ms step_avg:573.19ms +step:33702/57344 train_time:19317367ms step_avg:573.18ms +step:33703/57344 train_time:19317918ms step_avg:573.18ms +grad accum step:8426/14336 +step:33704/57344 train_time:19319256ms step_avg:573.20ms +step:33705/57344 train_time:19319269ms step_avg:573.19ms +step:33706/57344 train_time:19319514ms step_avg:573.18ms +step:33707/57344 train_time:19320070ms step_avg:573.18ms +grad accum step:8427/14336 +step:33708/57344 train_time:19321402ms step_avg:573.20ms +step:33709/57344 train_time:19321418ms step_avg:573.18ms +step:33710/57344 train_time:19321663ms step_avg:573.17ms +step:33711/57344 train_time:19322211ms step_avg:573.17ms +grad accum step:8428/14336 +step:33712/57344 train_time:19323517ms step_avg:573.19ms +step:33713/57344 train_time:19323537ms step_avg:573.18ms +step:33714/57344 train_time:19323782ms step_avg:573.17ms +step:33715/57344 train_time:19324350ms step_avg:573.17ms +grad accum step:8429/14336 +step:33716/57344 train_time:19325663ms step_avg:573.19ms +step:33717/57344 train_time:19325680ms step_avg:573.17ms +step:33718/57344 train_time:19325922ms step_avg:573.16ms +step:33719/57344 train_time:19326472ms step_avg:573.16ms +grad accum step:8430/14336 +step:33720/57344 train_time:19327797ms step_avg:573.18ms +step:33721/57344 train_time:19327813ms step_avg:573.17ms +step:33722/57344 train_time:19328057ms step_avg:573.16ms +step:33723/57344 train_time:19328610ms step_avg:573.16ms +grad accum step:8431/14336 +step:33724/57344 train_time:19329918ms step_avg:573.18ms +step:33725/57344 train_time:19329935ms step_avg:573.16ms +step:33726/57344 train_time:19330177ms step_avg:573.15ms +step:33727/57344 train_time:19330733ms step_avg:573.15ms +grad accum step:8432/14336 +step:33728/57344 train_time:19332091ms step_avg:573.18ms +step:33728/57344 val_loss:6.445465 train_time:19332092ms step_avg:573.18ms +step:33729/57344 train_time:19332104ms step_avg:573.16ms +step:33730/57344 train_time:19332333ms step_avg:573.15ms +step:33731/57344 train_time:19332891ms step_avg:573.15ms +grad accum step:8433/14336 +step:33732/57344 train_time:19334221ms step_avg:573.17ms +step:33733/57344 train_time:19334238ms step_avg:573.16ms +step:33734/57344 train_time:19334476ms step_avg:573.15ms +step:33735/57344 train_time:19335012ms step_avg:573.14ms +grad accum step:8434/14336 +step:33736/57344 train_time:19336294ms step_avg:573.16ms +step:33737/57344 train_time:19336309ms step_avg:573.15ms +step:33738/57344 train_time:19336552ms step_avg:573.14ms +step:33739/57344 train_time:19337097ms step_avg:573.14ms +grad accum step:8435/14336 +step:33740/57344 train_time:19338415ms step_avg:573.16ms +step:33741/57344 train_time:19338431ms step_avg:573.14ms +step:33742/57344 train_time:19338675ms step_avg:573.13ms +step:33743/57344 train_time:19339223ms step_avg:573.13ms +grad accum step:8436/14336 +step:33744/57344 train_time:19340561ms step_avg:573.16ms +step:33745/57344 train_time:19340577ms step_avg:573.14ms +step:33746/57344 train_time:19340819ms step_avg:573.13ms +step:33747/57344 train_time:19341370ms step_avg:573.13ms +grad accum step:8437/14336 +step:33748/57344 train_time:19342686ms step_avg:573.15ms +step:33749/57344 train_time:19342700ms step_avg:573.13ms +step:33750/57344 train_time:19342943ms step_avg:573.12ms +step:33751/57344 train_time:19343487ms step_avg:573.12ms +grad accum step:8438/14336 +step:33752/57344 train_time:19344783ms step_avg:573.14ms +step:33753/57344 train_time:19344800ms step_avg:573.13ms +step:33754/57344 train_time:19345051ms step_avg:573.12ms +step:33755/57344 train_time:19345606ms step_avg:573.12ms +grad accum step:8439/14336 +step:33756/57344 train_time:19346928ms step_avg:573.14ms +step:33757/57344 train_time:19346947ms step_avg:573.12ms +step:33758/57344 train_time:19347170ms step_avg:573.11ms +step:33759/57344 train_time:19347717ms step_avg:573.11ms +grad accum step:8440/14336 +step:33760/57344 train_time:19349054ms step_avg:573.14ms +step:33761/57344 train_time:19349069ms step_avg:573.12ms +step:33762/57344 train_time:19349314ms step_avg:573.11ms +step:33763/57344 train_time:19349858ms step_avg:573.11ms +grad accum step:8441/14336 +step:33764/57344 train_time:19351157ms step_avg:573.13ms +step:33765/57344 train_time:19351172ms step_avg:573.11ms +step:33766/57344 train_time:19351415ms step_avg:573.10ms +step:33767/57344 train_time:19351961ms step_avg:573.10ms +grad accum step:8442/14336 +step:33768/57344 train_time:19353278ms step_avg:573.12ms +step:33769/57344 train_time:19353295ms step_avg:573.11ms +step:33770/57344 train_time:19353538ms step_avg:573.10ms +step:33771/57344 train_time:19354084ms step_avg:573.10ms +grad accum step:8443/14336 +step:33772/57344 train_time:19355418ms step_avg:573.12ms +step:33773/57344 train_time:19355443ms step_avg:573.10ms +step:33774/57344 train_time:19355678ms step_avg:573.09ms +step:33775/57344 train_time:19356232ms step_avg:573.09ms +grad accum step:8444/14336 +step:33776/57344 train_time:19357529ms step_avg:573.11ms +step:33777/57344 train_time:19357546ms step_avg:573.10ms +step:33778/57344 train_time:19357794ms step_avg:573.09ms +step:33779/57344 train_time:19358354ms step_avg:573.09ms +grad accum step:8445/14336 +step:33780/57344 train_time:19359646ms step_avg:573.11ms +step:33781/57344 train_time:19359663ms step_avg:573.09ms +step:33782/57344 train_time:19359910ms step_avg:573.08ms +step:33783/57344 train_time:19360455ms step_avg:573.08ms +grad accum step:8446/14336 +step:33784/57344 train_time:19361757ms step_avg:573.10ms +step:33785/57344 train_time:19361774ms step_avg:573.09ms +step:33786/57344 train_time:19362025ms step_avg:573.08ms +step:33787/57344 train_time:19362583ms step_avg:573.08ms +grad accum step:8447/14336 +step:33788/57344 train_time:19363882ms step_avg:573.10ms +step:33789/57344 train_time:19363899ms step_avg:573.08ms +step:33790/57344 train_time:19364145ms step_avg:573.07ms +step:33791/57344 train_time:19364690ms step_avg:573.07ms +grad accum step:8448/14336 +step:33792/57344 train_time:19366000ms step_avg:573.09ms +step:33792/57344 val_loss:6.442889 train_time:19366003ms step_avg:573.09ms +step:33793/57344 train_time:19366014ms step_avg:573.08ms +step:33794/57344 train_time:19366240ms step_avg:573.07ms +step:33795/57344 train_time:19366794ms step_avg:573.07ms +grad accum step:8449/14336 +step:33796/57344 train_time:19368085ms step_avg:573.09ms +step:33797/57344 train_time:19368101ms step_avg:573.07ms +step:33798/57344 train_time:19368349ms step_avg:573.06ms +step:33799/57344 train_time:19368916ms step_avg:573.06ms +grad accum step:8450/14336 +step:33800/57344 train_time:19370234ms step_avg:573.08ms +step:33801/57344 train_time:19370247ms step_avg:573.07ms +step:33802/57344 train_time:19370499ms step_avg:573.06ms +step:33803/57344 train_time:19371060ms step_avg:573.06ms +grad accum step:8451/14336 +step:33804/57344 train_time:19372359ms step_avg:573.08ms +step:33805/57344 train_time:19372374ms step_avg:573.06ms +step:33806/57344 train_time:19372621ms step_avg:573.05ms +step:33807/57344 train_time:19373161ms step_avg:573.05ms +grad accum step:8452/14336 +step:33808/57344 train_time:19374445ms step_avg:573.07ms +step:33809/57344 train_time:19374462ms step_avg:573.06ms +step:33810/57344 train_time:19374709ms step_avg:573.05ms +step:33811/57344 train_time:19375261ms step_avg:573.05ms +grad accum step:8453/14336 +step:33812/57344 train_time:19376581ms step_avg:573.07ms +step:33813/57344 train_time:19376601ms step_avg:573.05ms +step:33814/57344 train_time:19376839ms step_avg:573.04ms +step:33815/57344 train_time:19377385ms step_avg:573.04ms +grad accum step:8454/14336 +step:33816/57344 train_time:19378686ms step_avg:573.06ms +step:33817/57344 train_time:19378705ms step_avg:573.05ms +step:33818/57344 train_time:19378956ms step_avg:573.04ms +step:33819/57344 train_time:19379528ms step_avg:573.04ms +grad accum step:8455/14336 +step:33820/57344 train_time:19380827ms step_avg:573.06ms +step:33821/57344 train_time:19380843ms step_avg:573.04ms +step:33822/57344 train_time:19381090ms step_avg:573.03ms +step:33823/57344 train_time:19381641ms step_avg:573.03ms +grad accum step:8456/14336 +step:33824/57344 train_time:19382926ms step_avg:573.05ms +step:33825/57344 train_time:19382941ms step_avg:573.04ms +step:33826/57344 train_time:19383187ms step_avg:573.03ms +step:33827/57344 train_time:19383732ms step_avg:573.03ms +grad accum step:8457/14336 +step:33828/57344 train_time:19385053ms step_avg:573.05ms +step:33829/57344 train_time:19385072ms step_avg:573.03ms +step:33830/57344 train_time:19385313ms step_avg:573.02ms +step:33831/57344 train_time:19385859ms step_avg:573.02ms +grad accum step:8458/14336 +step:33832/57344 train_time:19387178ms step_avg:573.04ms +step:33833/57344 train_time:19387195ms step_avg:573.03ms +step:33834/57344 train_time:19387444ms step_avg:573.02ms +step:33835/57344 train_time:19388001ms step_avg:573.02ms +grad accum step:8459/14336 +step:33836/57344 train_time:19389301ms step_avg:573.04ms +step:33837/57344 train_time:19389320ms step_avg:573.02ms +step:33838/57344 train_time:19389565ms step_avg:573.01ms +step:33839/57344 train_time:19390123ms step_avg:573.01ms +grad accum step:8460/14336 +step:33840/57344 train_time:19391451ms step_avg:573.03ms +step:33841/57344 train_time:19391469ms step_avg:573.02ms +step:33842/57344 train_time:19391711ms step_avg:573.01ms +step:33843/57344 train_time:19392254ms step_avg:573.01ms +grad accum step:8461/14336 +step:33844/57344 train_time:19393563ms step_avg:573.03ms +step:33845/57344 train_time:19393592ms step_avg:573.01ms +step:33846/57344 train_time:19393829ms step_avg:573.00ms +step:33847/57344 train_time:19394392ms step_avg:573.00ms +grad accum step:8462/14336 +step:33848/57344 train_time:19395689ms step_avg:573.02ms +step:33849/57344 train_time:19395706ms step_avg:573.01ms +step:33850/57344 train_time:19395951ms step_avg:573.00ms +step:33851/57344 train_time:19396496ms step_avg:573.00ms +grad accum step:8463/14336 +step:33852/57344 train_time:19397790ms step_avg:573.02ms +step:33853/57344 train_time:19397807ms step_avg:573.00ms +step:33854/57344 train_time:19398052ms step_avg:572.99ms +step:33855/57344 train_time:19398604ms step_avg:572.99ms +grad accum step:8464/14336 +step:33856/57344 train_time:19399917ms step_avg:573.01ms +step:33856/57344 val_loss:6.447213 train_time:19399918ms step_avg:573.01ms +step:33857/57344 train_time:19399930ms step_avg:573.00ms +step:33858/57344 train_time:19400154ms step_avg:572.99ms +step:33859/57344 train_time:19400703ms step_avg:572.99ms +grad accum step:8465/14336 +step:33860/57344 train_time:19401988ms step_avg:573.01ms +step:33861/57344 train_time:19402002ms step_avg:572.99ms +step:33862/57344 train_time:19402246ms step_avg:572.98ms +step:33863/57344 train_time:19402812ms step_avg:572.98ms +grad accum step:8466/14336 +step:33864/57344 train_time:19404112ms step_avg:573.00ms +step:33865/57344 train_time:19404127ms step_avg:572.98ms +step:33866/57344 train_time:19404372ms step_avg:572.98ms +step:33867/57344 train_time:19404916ms step_avg:572.97ms +grad accum step:8467/14336 +step:33868/57344 train_time:19406230ms step_avg:573.00ms +step:33869/57344 train_time:19406245ms step_avg:572.98ms +step:33870/57344 train_time:19406492ms step_avg:572.97ms +step:33871/57344 train_time:19407040ms step_avg:572.97ms +grad accum step:8468/14336 +step:33872/57344 train_time:19408338ms step_avg:572.99ms +step:33873/57344 train_time:19408357ms step_avg:572.97ms +step:33874/57344 train_time:19408597ms step_avg:572.96ms +step:33875/57344 train_time:19409145ms step_avg:572.96ms +grad accum step:8469/14336 +step:33876/57344 train_time:19410457ms step_avg:572.99ms +step:33877/57344 train_time:19410472ms step_avg:572.97ms +step:33878/57344 train_time:19410719ms step_avg:572.96ms +step:33879/57344 train_time:19411265ms step_avg:572.96ms +grad accum step:8470/14336 +step:33880/57344 train_time:19412564ms step_avg:572.98ms +step:33881/57344 train_time:19412582ms step_avg:572.96ms +step:33882/57344 train_time:19412822ms step_avg:572.95ms +step:33883/57344 train_time:19413366ms step_avg:572.95ms +grad accum step:8471/14336 +step:33884/57344 train_time:19414670ms step_avg:572.97ms +step:33885/57344 train_time:19414685ms step_avg:572.96ms +step:33886/57344 train_time:19414927ms step_avg:572.95ms +step:33887/57344 train_time:19415473ms step_avg:572.95ms +grad accum step:8472/14336 +step:33888/57344 train_time:19416792ms step_avg:572.97ms +step:33889/57344 train_time:19416809ms step_avg:572.95ms +step:33890/57344 train_time:19417059ms step_avg:572.94ms +step:33891/57344 train_time:19417626ms step_avg:572.94ms +grad accum step:8473/14336 +step:33892/57344 train_time:19418995ms step_avg:572.97ms +step:33893/57344 train_time:19419009ms step_avg:572.95ms +step:33894/57344 train_time:19419270ms step_avg:572.94ms +step:33895/57344 train_time:19419849ms step_avg:572.94ms +grad accum step:8474/14336 +step:33896/57344 train_time:19421165ms step_avg:572.96ms +step:33897/57344 train_time:19421182ms step_avg:572.95ms +step:33898/57344 train_time:19421426ms step_avg:572.94ms +step:33899/57344 train_time:19421962ms step_avg:572.94ms +grad accum step:8475/14336 +step:33900/57344 train_time:19423264ms step_avg:572.96ms +step:33901/57344 train_time:19423279ms step_avg:572.94ms +step:33902/57344 train_time:19423527ms step_avg:572.93ms +step:33903/57344 train_time:19424079ms step_avg:572.93ms +grad accum step:8476/14336 +step:33904/57344 train_time:19425403ms step_avg:572.95ms +step:33905/57344 train_time:19425425ms step_avg:572.94ms +step:33906/57344 train_time:19425664ms step_avg:572.93ms +step:33907/57344 train_time:19426218ms step_avg:572.93ms +grad accum step:8477/14336 +step:33908/57344 train_time:19427530ms step_avg:572.95ms +step:33909/57344 train_time:19427548ms step_avg:572.93ms +step:33910/57344 train_time:19427789ms step_avg:572.92ms +step:33911/57344 train_time:19428336ms step_avg:572.92ms +grad accum step:8478/14336 +step:33912/57344 train_time:19429619ms step_avg:572.94ms +step:33913/57344 train_time:19429637ms step_avg:572.93ms +step:33914/57344 train_time:19429876ms step_avg:572.92ms +step:33915/57344 train_time:19430428ms step_avg:572.92ms +grad accum step:8479/14336 +step:33916/57344 train_time:19431734ms step_avg:572.94ms +step:33917/57344 train_time:19431749ms step_avg:572.92ms +step:33918/57344 train_time:19431998ms step_avg:572.91ms +step:33919/57344 train_time:19432549ms step_avg:572.91ms +grad accum step:8480/14336 +step:33920/57344 train_time:19433839ms step_avg:572.93ms +step:33920/57344 val_loss:6.447993 train_time:19433846ms step_avg:572.93ms +step:33921/57344 train_time:19433858ms step_avg:572.92ms +step:33922/57344 train_time:19434079ms step_avg:572.90ms +step:33923/57344 train_time:19434612ms step_avg:572.90ms +grad accum step:8481/14336 +step:33924/57344 train_time:19437617ms step_avg:572.98ms +step:33925/57344 train_time:19438141ms step_avg:572.97ms +step:33926/57344 train_time:19438271ms step_avg:572.96ms +step:33927/57344 train_time:19438826ms step_avg:572.96ms +grad accum step:8482/14336 +step:33928/57344 train_time:19440123ms step_avg:572.98ms +step:33929/57344 train_time:19440140ms step_avg:572.97ms +step:33930/57344 train_time:19440383ms step_avg:572.96ms +step:33931/57344 train_time:19440929ms step_avg:572.95ms +grad accum step:8483/14336 +step:33932/57344 train_time:19442223ms step_avg:572.98ms +step:33933/57344 train_time:19442240ms step_avg:572.96ms +step:33934/57344 train_time:19442485ms step_avg:572.95ms +step:33935/57344 train_time:19443034ms step_avg:572.95ms +grad accum step:8484/14336 +step:33936/57344 train_time:19444352ms step_avg:572.97ms +step:33937/57344 train_time:19444366ms step_avg:572.95ms +step:33938/57344 train_time:19444611ms step_avg:572.95ms +step:33939/57344 train_time:19445155ms step_avg:572.94ms +grad accum step:8485/14336 +step:33940/57344 train_time:19446479ms step_avg:572.97ms +step:33941/57344 train_time:19446497ms step_avg:572.95ms +step:33942/57344 train_time:19446736ms step_avg:572.94ms +step:33943/57344 train_time:19447278ms step_avg:572.94ms +grad accum step:8486/14336 +step:33944/57344 train_time:19448585ms step_avg:572.96ms +step:33945/57344 train_time:19448610ms step_avg:572.94ms +step:33946/57344 train_time:19448850ms step_avg:572.93ms +step:33947/57344 train_time:19449417ms step_avg:572.93ms +grad accum step:8487/14336 +step:33948/57344 train_time:19450743ms step_avg:572.96ms +step:33949/57344 train_time:19450758ms step_avg:572.94ms +step:33950/57344 train_time:19451007ms step_avg:572.93ms +step:33951/57344 train_time:19451559ms step_avg:572.93ms +grad accum step:8488/14336 +step:33952/57344 train_time:19452855ms step_avg:572.95ms +step:33953/57344 train_time:19452871ms step_avg:572.94ms +step:33954/57344 train_time:19453119ms step_avg:572.93ms +step:33955/57344 train_time:19453674ms step_avg:572.93ms +grad accum step:8489/14336 +step:33956/57344 train_time:19455018ms step_avg:572.95ms +step:33957/57344 train_time:19455034ms step_avg:572.93ms +step:33958/57344 train_time:19455272ms step_avg:572.92ms +step:33959/57344 train_time:19455821ms step_avg:572.92ms +grad accum step:8490/14336 +step:33960/57344 train_time:19457126ms step_avg:572.94ms +step:33961/57344 train_time:19457144ms step_avg:572.93ms +step:33962/57344 train_time:19457392ms step_avg:572.92ms +step:33963/57344 train_time:19457944ms step_avg:572.92ms +grad accum step:8491/14336 +step:33964/57344 train_time:19459242ms step_avg:572.94ms +step:33965/57344 train_time:19459258ms step_avg:572.92ms +step:33966/57344 train_time:19459502ms step_avg:572.91ms +step:33967/57344 train_time:19460050ms step_avg:572.91ms +grad accum step:8492/14336 +step:33968/57344 train_time:19461332ms step_avg:572.93ms +step:33969/57344 train_time:19461347ms step_avg:572.91ms +step:33970/57344 train_time:19461590ms step_avg:572.91ms +step:33971/57344 train_time:19462136ms step_avg:572.90ms +grad accum step:8493/14336 +step:33972/57344 train_time:19463436ms step_avg:572.93ms +step:33973/57344 train_time:19463453ms step_avg:572.91ms +step:33974/57344 train_time:19463698ms step_avg:572.90ms +step:33975/57344 train_time:19464245ms step_avg:572.90ms +grad accum step:8494/14336 +step:33976/57344 train_time:19465544ms step_avg:572.92ms +step:33977/57344 train_time:19465559ms step_avg:572.90ms +step:33978/57344 train_time:19465807ms step_avg:572.89ms +step:33979/57344 train_time:19466354ms step_avg:572.89ms +grad accum step:8495/14336 +step:33980/57344 train_time:19467687ms step_avg:572.92ms +step:33981/57344 train_time:19467703ms step_avg:572.90ms +step:33982/57344 train_time:19467951ms step_avg:572.89ms +step:33983/57344 train_time:19468499ms step_avg:572.89ms +grad accum step:8496/14336 +step:33984/57344 train_time:19469819ms step_avg:572.91ms +step:33984/57344 val_loss:6.453583 train_time:19469827ms step_avg:572.91ms +step:33985/57344 train_time:19469839ms step_avg:572.90ms +step:33986/57344 train_time:19470067ms step_avg:572.88ms +step:33987/57344 train_time:19470627ms step_avg:572.88ms +grad accum step:8497/14336 +step:33988/57344 train_time:19471988ms step_avg:572.91ms +step:33989/57344 train_time:19472005ms step_avg:572.89ms +step:33990/57344 train_time:19472251ms step_avg:572.88ms +step:33991/57344 train_time:19472816ms step_avg:572.88ms +grad accum step:8498/14336 +step:33992/57344 train_time:19474137ms step_avg:572.90ms +step:33993/57344 train_time:19474157ms step_avg:572.89ms +step:33994/57344 train_time:19474395ms step_avg:572.88ms +step:33995/57344 train_time:19474936ms step_avg:572.88ms +grad accum step:8499/14336 +step:33996/57344 train_time:19476227ms step_avg:572.90ms +step:33997/57344 train_time:19476242ms step_avg:572.88ms +step:33998/57344 train_time:19476488ms step_avg:572.87ms +step:33999/57344 train_time:19477035ms step_avg:572.87ms +grad accum step:8500/14336 +step:34000/57344 train_time:19478370ms step_avg:572.89ms +step:34001/57344 train_time:19478387ms step_avg:572.88ms +step:34002/57344 train_time:19478636ms step_avg:572.87ms +step:34003/57344 train_time:19479183ms step_avg:572.87ms +grad accum step:8501/14336 +step:34004/57344 train_time:19480496ms step_avg:572.89ms +step:34005/57344 train_time:19480513ms step_avg:572.87ms +step:34006/57344 train_time:19480759ms step_avg:572.86ms +step:34007/57344 train_time:19481304ms step_avg:572.86ms +grad accum step:8502/14336 +step:34008/57344 train_time:19482607ms step_avg:572.88ms +step:34009/57344 train_time:19482624ms step_avg:572.87ms +step:34010/57344 train_time:19482860ms step_avg:572.86ms +step:34011/57344 train_time:19483418ms step_avg:572.86ms +grad accum step:8503/14336 +step:34012/57344 train_time:19484724ms step_avg:572.88ms +step:34013/57344 train_time:19484741ms step_avg:572.86ms +step:34014/57344 train_time:19484986ms step_avg:572.85ms +step:34015/57344 train_time:19485540ms step_avg:572.85ms +grad accum step:8504/14336 +step:34016/57344 train_time:19486873ms step_avg:572.87ms +step:34017/57344 train_time:19486891ms step_avg:572.86ms +step:34018/57344 train_time:19487137ms step_avg:572.85ms +step:34019/57344 train_time:19487685ms step_avg:572.85ms +grad accum step:8505/14336 +step:34020/57344 train_time:19488957ms step_avg:572.87ms +step:34021/57344 train_time:19488974ms step_avg:572.85ms +step:34022/57344 train_time:19489222ms step_avg:572.84ms +step:34023/57344 train_time:19489786ms step_avg:572.84ms +grad accum step:8506/14336 +step:34024/57344 train_time:19491085ms step_avg:572.86ms +step:34025/57344 train_time:19491101ms step_avg:572.85ms +step:34026/57344 train_time:19491345ms step_avg:572.84ms +step:34027/57344 train_time:19491892ms step_avg:572.84ms +grad accum step:8507/14336 +step:34028/57344 train_time:19493215ms step_avg:572.86ms +step:34029/57344 train_time:19493230ms step_avg:572.84ms +step:34030/57344 train_time:19493486ms step_avg:572.83ms +step:34031/57344 train_time:19494059ms step_avg:572.83ms +grad accum step:8508/14336 +step:34032/57344 train_time:19495384ms step_avg:572.85ms +step:34033/57344 train_time:19495400ms step_avg:572.84ms +step:34034/57344 train_time:19495664ms step_avg:572.83ms +step:34035/57344 train_time:19496255ms step_avg:572.83ms +grad accum step:8509/14336 +step:34036/57344 train_time:19497598ms step_avg:572.85ms +step:34037/57344 train_time:19497613ms step_avg:572.84ms +step:34038/57344 train_time:19497866ms step_avg:572.83ms +step:34039/57344 train_time:19498426ms step_avg:572.83ms +grad accum step:8510/14336 +step:34040/57344 train_time:19499709ms step_avg:572.85ms +step:34041/57344 train_time:19499726ms step_avg:572.83ms +step:34042/57344 train_time:19499970ms step_avg:572.82ms +step:34043/57344 train_time:19500511ms step_avg:572.82ms +grad accum step:8511/14336 +step:34044/57344 train_time:19501796ms step_avg:572.84ms +step:34045/57344 train_time:19501812ms step_avg:572.82ms +step:34046/57344 train_time:19502062ms step_avg:572.82ms +step:34047/57344 train_time:19502611ms step_avg:572.81ms +grad accum step:8512/14336 +step:34048/57344 train_time:19503979ms step_avg:572.84ms +step:34048/57344 val_loss:6.454758 train_time:19503987ms step_avg:572.84ms +step:34049/57344 train_time:19503999ms step_avg:572.82ms +step:34050/57344 train_time:19504229ms step_avg:572.81ms +step:34051/57344 train_time:19504787ms step_avg:572.81ms +grad accum step:8513/14336 +step:34052/57344 train_time:19506077ms step_avg:572.83ms +step:34053/57344 train_time:19506101ms step_avg:572.82ms +step:34054/57344 train_time:19506339ms step_avg:572.81ms +step:34055/57344 train_time:19506899ms step_avg:572.81ms +grad accum step:8514/14336 +step:34056/57344 train_time:19508190ms step_avg:572.83ms +step:34057/57344 train_time:19508206ms step_avg:572.81ms +step:34058/57344 train_time:19508455ms step_avg:572.80ms +step:34059/57344 train_time:19509014ms step_avg:572.80ms +grad accum step:8515/14336 +step:34060/57344 train_time:19510312ms step_avg:572.82ms +step:34061/57344 train_time:19510329ms step_avg:572.81ms +step:34062/57344 train_time:19510576ms step_avg:572.80ms +step:34063/57344 train_time:19511122ms step_avg:572.80ms +grad accum step:8516/14336 +step:34064/57344 train_time:19512442ms step_avg:572.82ms +step:34065/57344 train_time:19512459ms step_avg:572.80ms +step:34066/57344 train_time:19512714ms step_avg:572.79ms +step:34067/57344 train_time:19513282ms step_avg:572.79ms +grad accum step:8517/14336 +step:34068/57344 train_time:19514584ms step_avg:572.81ms +step:34069/57344 train_time:19514599ms step_avg:572.80ms +step:34070/57344 train_time:19514843ms step_avg:572.79ms +step:34071/57344 train_time:19515390ms step_avg:572.79ms +grad accum step:8518/14336 +step:34072/57344 train_time:19516715ms step_avg:572.81ms +step:34073/57344 train_time:19516737ms step_avg:572.79ms +step:34074/57344 train_time:19516955ms step_avg:572.78ms +step:34075/57344 train_time:19517498ms step_avg:572.78ms +grad accum step:8519/14336 +step:34076/57344 train_time:19518774ms step_avg:572.80ms +step:34077/57344 train_time:19518792ms step_avg:572.78ms +step:34078/57344 train_time:19519032ms step_avg:572.78ms +step:34079/57344 train_time:19519584ms step_avg:572.77ms +grad accum step:8520/14336 +step:34080/57344 train_time:19520911ms step_avg:572.80ms +step:34081/57344 train_time:19520931ms step_avg:572.78ms +step:34082/57344 train_time:19521171ms step_avg:572.77ms +step:34083/57344 train_time:19521717ms step_avg:572.77ms +grad accum step:8521/14336 +step:34084/57344 train_time:19523007ms step_avg:572.79ms +step:34085/57344 train_time:19523023ms step_avg:572.77ms +step:34086/57344 train_time:19523272ms step_avg:572.77ms +step:34087/57344 train_time:19523828ms step_avg:572.76ms +grad accum step:8522/14336 +step:34088/57344 train_time:19525137ms step_avg:572.79ms +step:34089/57344 train_time:19525153ms step_avg:572.77ms +step:34090/57344 train_time:19525401ms step_avg:572.76ms +step:34091/57344 train_time:19525951ms step_avg:572.76ms +grad accum step:8523/14336 +step:34092/57344 train_time:19527271ms step_avg:572.78ms +step:34093/57344 train_time:19527290ms step_avg:572.77ms +step:34094/57344 train_time:19527527ms step_avg:572.76ms +step:34095/57344 train_time:19528069ms step_avg:572.75ms +grad accum step:8524/14336 +step:34096/57344 train_time:19529376ms step_avg:572.78ms +step:34097/57344 train_time:19529395ms step_avg:572.76ms +step:34098/57344 train_time:19529637ms step_avg:572.75ms +step:34099/57344 train_time:19530182ms step_avg:572.75ms +grad accum step:8525/14336 +step:34100/57344 train_time:19531457ms step_avg:572.77ms +step:34101/57344 train_time:19531472ms step_avg:572.75ms +step:34102/57344 train_time:19531719ms step_avg:572.74ms +step:34103/57344 train_time:19532268ms step_avg:572.74ms +grad accum step:8526/14336 +step:34104/57344 train_time:19533583ms step_avg:572.77ms +step:34105/57344 train_time:19533599ms step_avg:572.75ms +step:34106/57344 train_time:19533843ms step_avg:572.74ms +step:34107/57344 train_time:19534389ms step_avg:572.74ms +grad accum step:8527/14336 +step:34108/57344 train_time:19535696ms step_avg:572.76ms +step:34109/57344 train_time:19535710ms step_avg:572.74ms +step:34110/57344 train_time:19535958ms step_avg:572.73ms +step:34111/57344 train_time:19536506ms step_avg:572.73ms +grad accum step:8528/14336 +step:34112/57344 train_time:19537796ms step_avg:572.75ms +step:34112/57344 val_loss:6.444719 train_time:19537802ms step_avg:572.75ms +step:34113/57344 train_time:19537813ms step_avg:572.74ms +step:34114/57344 train_time:19538037ms step_avg:572.73ms +step:34115/57344 train_time:19538588ms step_avg:572.73ms +grad accum step:8529/14336 +step:34116/57344 train_time:19539893ms step_avg:572.75ms +step:34117/57344 train_time:19539914ms step_avg:572.73ms +step:34118/57344 train_time:19540162ms step_avg:572.72ms +step:34119/57344 train_time:19540738ms step_avg:572.72ms +grad accum step:8530/14336 +step:34120/57344 train_time:19542084ms step_avg:572.75ms +step:34121/57344 train_time:19542099ms step_avg:572.73ms +step:34122/57344 train_time:19542345ms step_avg:572.72ms +step:34123/57344 train_time:19542886ms step_avg:572.72ms +grad accum step:8531/14336 +step:34124/57344 train_time:19544159ms step_avg:572.74ms +step:34125/57344 train_time:19544173ms step_avg:572.72ms +step:34126/57344 train_time:19544419ms step_avg:572.71ms +step:34127/57344 train_time:19544962ms step_avg:572.71ms +grad accum step:8532/14336 +step:34128/57344 train_time:19546284ms step_avg:572.73ms +step:34129/57344 train_time:19546303ms step_avg:572.72ms +step:34130/57344 train_time:19546542ms step_avg:572.71ms +step:34131/57344 train_time:19547092ms step_avg:572.71ms +grad accum step:8533/14336 +step:34132/57344 train_time:19548385ms step_avg:572.73ms +step:34133/57344 train_time:19548402ms step_avg:572.71ms +step:34134/57344 train_time:19548654ms step_avg:572.70ms +step:34135/57344 train_time:19549217ms step_avg:572.70ms +grad accum step:8534/14336 +step:34136/57344 train_time:19550536ms step_avg:572.72ms +step:34137/57344 train_time:19550553ms step_avg:572.71ms +step:34138/57344 train_time:19550799ms step_avg:572.70ms +step:34139/57344 train_time:19551347ms step_avg:572.70ms +grad accum step:8535/14336 +step:34140/57344 train_time:19552659ms step_avg:572.72ms +step:34141/57344 train_time:19552674ms step_avg:572.70ms +step:34142/57344 train_time:19552926ms step_avg:572.69ms +step:34143/57344 train_time:19553492ms step_avg:572.69ms +grad accum step:8536/14336 +step:34144/57344 train_time:19554795ms step_avg:572.72ms +step:34145/57344 train_time:19554812ms step_avg:572.70ms +step:34146/57344 train_time:19555063ms step_avg:572.69ms +step:34147/57344 train_time:19555611ms step_avg:572.69ms +grad accum step:8537/14336 +step:34148/57344 train_time:19556943ms step_avg:572.71ms +step:34149/57344 train_time:19556958ms step_avg:572.69ms +step:34150/57344 train_time:19557201ms step_avg:572.69ms +step:34151/57344 train_time:19557741ms step_avg:572.68ms +grad accum step:8538/14336 +step:34152/57344 train_time:19559033ms step_avg:572.71ms +step:34153/57344 train_time:19559050ms step_avg:572.69ms +step:34154/57344 train_time:19559293ms step_avg:572.68ms +step:34155/57344 train_time:19559836ms step_avg:572.68ms +grad accum step:8539/14336 +step:34156/57344 train_time:19561116ms step_avg:572.70ms +step:34157/57344 train_time:19561132ms step_avg:572.68ms +step:34158/57344 train_time:19561376ms step_avg:572.67ms +step:34159/57344 train_time:19561929ms step_avg:572.67ms +grad accum step:8540/14336 +step:34160/57344 train_time:19563258ms step_avg:572.69ms +step:34161/57344 train_time:19563275ms step_avg:572.68ms +step:34162/57344 train_time:19563522ms step_avg:572.67ms +step:34163/57344 train_time:19564071ms step_avg:572.67ms +grad accum step:8541/14336 +step:34164/57344 train_time:19565370ms step_avg:572.69ms +step:34165/57344 train_time:19565386ms step_avg:572.67ms +step:34166/57344 train_time:19565638ms step_avg:572.66ms +step:34167/57344 train_time:19566209ms step_avg:572.66ms +grad accum step:8542/14336 +step:34168/57344 train_time:19567506ms step_avg:572.69ms +step:34169/57344 train_time:19567520ms step_avg:572.67ms +step:34170/57344 train_time:19567770ms step_avg:572.66ms +step:34171/57344 train_time:19568330ms step_avg:572.66ms +grad accum step:8543/14336 +step:34172/57344 train_time:19569615ms step_avg:572.68ms +step:34173/57344 train_time:19569629ms step_avg:572.66ms +step:34174/57344 train_time:19569878ms step_avg:572.65ms +step:34175/57344 train_time:19570431ms step_avg:572.65ms +grad accum step:8544/14336 +step:34176/57344 train_time:19571878ms step_avg:572.68ms +step:34176/57344 val_loss:6.436030 train_time:19571893ms step_avg:572.68ms +step:34177/57344 train_time:19571905ms step_avg:572.66ms +step:34178/57344 train_time:19572129ms step_avg:572.65ms +step:34179/57344 train_time:19572680ms step_avg:572.65ms +grad accum step:8545/14336 +step:34180/57344 train_time:19573982ms step_avg:572.67ms +step:34181/57344 train_time:19574002ms step_avg:572.66ms +step:34182/57344 train_time:19574246ms step_avg:572.65ms +step:34183/57344 train_time:19574809ms step_avg:572.65ms +grad accum step:8546/14336 +step:34184/57344 train_time:19576132ms step_avg:572.67ms +step:34185/57344 train_time:19576147ms step_avg:572.65ms +step:34186/57344 train_time:19576397ms step_avg:572.64ms +step:34187/57344 train_time:19576956ms step_avg:572.64ms +grad accum step:8547/14336 +step:34188/57344 train_time:19578297ms step_avg:572.67ms +step:34189/57344 train_time:19578309ms step_avg:572.65ms +step:34190/57344 train_time:19578551ms step_avg:572.64ms +step:34191/57344 train_time:19579106ms step_avg:572.64ms +grad accum step:8548/14336 +step:34192/57344 train_time:19580447ms step_avg:572.66ms +step:34193/57344 train_time:19580463ms step_avg:572.65ms +step:34194/57344 train_time:19580710ms step_avg:572.64ms +step:34195/57344 train_time:19581253ms step_avg:572.63ms +grad accum step:8549/14336 +step:34196/57344 train_time:19582597ms step_avg:572.66ms +step:34197/57344 train_time:19582618ms step_avg:572.64ms +step:34198/57344 train_time:19582857ms step_avg:572.63ms +step:34199/57344 train_time:19583405ms step_avg:572.63ms +grad accum step:8550/14336 +step:34200/57344 train_time:19584721ms step_avg:572.65ms +step:34201/57344 train_time:19584733ms step_avg:572.64ms +step:34202/57344 train_time:19584961ms step_avg:572.63ms +step:34203/57344 train_time:19585505ms step_avg:572.63ms +grad accum step:8551/14336 +step:34204/57344 train_time:19586788ms step_avg:572.65ms +step:34205/57344 train_time:19586806ms step_avg:572.63ms +step:34206/57344 train_time:19587045ms step_avg:572.62ms +step:34207/57344 train_time:19587599ms step_avg:572.62ms +grad accum step:8552/14336 +step:34208/57344 train_time:19588893ms step_avg:572.64ms +step:34209/57344 train_time:19588910ms step_avg:572.62ms +step:34210/57344 train_time:19589157ms step_avg:572.61ms +step:34211/57344 train_time:19589716ms step_avg:572.61ms +grad accum step:8553/14336 +step:34212/57344 train_time:19591063ms step_avg:572.64ms +step:34213/57344 train_time:19591078ms step_avg:572.62ms +step:34214/57344 train_time:19591326ms step_avg:572.61ms +step:34215/57344 train_time:19591881ms step_avg:572.61ms +grad accum step:8554/14336 +step:34216/57344 train_time:19593169ms step_avg:572.63ms +step:34217/57344 train_time:19593183ms step_avg:572.62ms +step:34218/57344 train_time:19593434ms step_avg:572.61ms +step:34219/57344 train_time:19593992ms step_avg:572.61ms +grad accum step:8555/14336 +step:34220/57344 train_time:19596840ms step_avg:572.67ms +step:34221/57344 train_time:19596852ms step_avg:572.66ms +step:34222/57344 train_time:19597076ms step_avg:572.65ms +step:34223/57344 train_time:19597644ms step_avg:572.65ms +grad accum step:8556/14336 +step:34224/57344 train_time:19598931ms step_avg:572.67ms +step:34225/57344 train_time:19598944ms step_avg:572.65ms +step:34226/57344 train_time:19599187ms step_avg:572.64ms +step:34227/57344 train_time:19599732ms step_avg:572.64ms +grad accum step:8557/14336 +step:34228/57344 train_time:19601014ms step_avg:572.66ms +step:34229/57344 train_time:19601031ms step_avg:572.64ms +step:34230/57344 train_time:19601281ms step_avg:572.63ms +step:34231/57344 train_time:19601830ms step_avg:572.63ms +grad accum step:8558/14336 +step:34232/57344 train_time:19603131ms step_avg:572.66ms +step:34233/57344 train_time:19603148ms step_avg:572.64ms +step:34234/57344 train_time:19603395ms step_avg:572.63ms +step:34235/57344 train_time:19603940ms step_avg:572.63ms +grad accum step:8559/14336 +step:34236/57344 train_time:19605233ms step_avg:572.65ms +step:34237/57344 train_time:19605249ms step_avg:572.63ms +step:34238/57344 train_time:19605497ms step_avg:572.62ms +step:34239/57344 train_time:19606045ms step_avg:572.62ms +grad accum step:8560/14336 +step:34240/57344 train_time:19607347ms step_avg:572.64ms +step:34240/57344 val_loss:6.419268 train_time:19607349ms step_avg:572.64ms +step:34241/57344 train_time:19607360ms step_avg:572.63ms +step:34242/57344 train_time:19607582ms step_avg:572.62ms +step:34243/57344 train_time:19608133ms step_avg:572.62ms +grad accum step:8561/14336 +step:34244/57344 train_time:19609451ms step_avg:572.64ms +step:34245/57344 train_time:19609467ms step_avg:572.62ms +step:34246/57344 train_time:19609716ms step_avg:572.61ms +step:34247/57344 train_time:19610271ms step_avg:572.61ms +grad accum step:8562/14336 +step:34248/57344 train_time:19611605ms step_avg:572.64ms +step:34249/57344 train_time:19611622ms step_avg:572.62ms +step:34250/57344 train_time:19611861ms step_avg:572.61ms +step:34251/57344 train_time:19612416ms step_avg:572.61ms +grad accum step:8563/14336 +step:34252/57344 train_time:19613748ms step_avg:572.63ms +step:34253/57344 train_time:19613765ms step_avg:572.61ms +step:34254/57344 train_time:19614005ms step_avg:572.60ms +step:34255/57344 train_time:19614550ms step_avg:572.60ms +grad accum step:8564/14336 +step:34256/57344 train_time:19615850ms step_avg:572.63ms +step:34257/57344 train_time:19615867ms step_avg:572.61ms +step:34258/57344 train_time:19616128ms step_avg:572.60ms +step:34259/57344 train_time:19616714ms step_avg:572.60ms +grad accum step:8565/14336 +step:34260/57344 train_time:19618038ms step_avg:572.62ms +step:34261/57344 train_time:19618057ms step_avg:572.61ms +step:34262/57344 train_time:19618280ms step_avg:572.60ms +step:34263/57344 train_time:19618833ms step_avg:572.60ms +grad accum step:8566/14336 +step:34264/57344 train_time:19622344ms step_avg:572.68ms +step:34265/57344 train_time:19622359ms step_avg:572.66ms +step:34266/57344 train_time:19622603ms step_avg:572.66ms +step:34267/57344 train_time:19623157ms step_avg:572.65ms +grad accum step:8567/14336 +step:34268/57344 train_time:19624473ms step_avg:572.68ms +step:34269/57344 train_time:19624487ms step_avg:572.66ms +step:34270/57344 train_time:19624729ms step_avg:572.65ms +step:34271/57344 train_time:19625262ms step_avg:572.65ms +grad accum step:8568/14336 +step:34272/57344 train_time:19626544ms step_avg:572.67ms +step:34273/57344 train_time:19626561ms step_avg:572.65ms +step:34274/57344 train_time:19626816ms step_avg:572.64ms +step:34275/57344 train_time:19627380ms step_avg:572.64ms +grad accum step:8569/14336 +step:34276/57344 train_time:19628695ms step_avg:572.67ms +step:34277/57344 train_time:19628713ms step_avg:572.65ms +step:34278/57344 train_time:19628955ms step_avg:572.64ms +step:34279/57344 train_time:19629507ms step_avg:572.64ms +grad accum step:8570/14336 +step:34280/57344 train_time:19630831ms step_avg:572.66ms +step:34281/57344 train_time:19630846ms step_avg:572.65ms +step:34282/57344 train_time:19631092ms step_avg:572.64ms +step:34283/57344 train_time:19631635ms step_avg:572.63ms +grad accum step:8571/14336 +step:34284/57344 train_time:19632934ms step_avg:572.66ms +step:34285/57344 train_time:19632952ms step_avg:572.64ms +step:34286/57344 train_time:19633186ms step_avg:572.63ms +step:34287/57344 train_time:19633720ms step_avg:572.63ms +grad accum step:8572/14336 +step:34288/57344 train_time:19635039ms step_avg:572.65ms +step:34289/57344 train_time:19635054ms step_avg:572.63ms +step:34290/57344 train_time:19635303ms step_avg:572.62ms +step:34291/57344 train_time:19635857ms step_avg:572.62ms +grad accum step:8573/14336 +step:34292/57344 train_time:19637264ms step_avg:572.65ms +step:34293/57344 train_time:19637279ms step_avg:572.63ms +step:34294/57344 train_time:19637496ms step_avg:572.62ms +step:34295/57344 train_time:19638038ms step_avg:572.62ms +grad accum step:8574/14336 +step:34296/57344 train_time:19639381ms step_avg:572.64ms +step:34297/57344 train_time:19639401ms step_avg:572.63ms +step:34298/57344 train_time:19639637ms step_avg:572.62ms +step:34299/57344 train_time:19640197ms step_avg:572.62ms +grad accum step:8575/14336 +step:34300/57344 train_time:19641509ms step_avg:572.64ms +step:34301/57344 train_time:19641525ms step_avg:572.62ms +step:34302/57344 train_time:19641773ms step_avg:572.61ms +step:34303/57344 train_time:19642320ms step_avg:572.61ms +grad accum step:8576/14336 +step:34304/57344 train_time:19643601ms step_avg:572.63ms +step:34304/57344 val_loss:6.396570 train_time:19643605ms step_avg:572.63ms +step:34305/57344 train_time:19643617ms step_avg:572.62ms +step:34306/57344 train_time:19643840ms step_avg:572.61ms +step:34307/57344 train_time:19644378ms step_avg:572.61ms +grad accum step:8577/14336 +step:34308/57344 train_time:19645685ms step_avg:572.63ms +step:34309/57344 train_time:19645702ms step_avg:572.61ms +step:34310/57344 train_time:19645954ms step_avg:572.60ms +step:34311/57344 train_time:19646518ms step_avg:572.60ms +grad accum step:8578/14336 +step:34312/57344 train_time:19647822ms step_avg:572.62ms +step:34313/57344 train_time:19647838ms step_avg:572.61ms +step:34314/57344 train_time:19648090ms step_avg:572.60ms +step:34315/57344 train_time:19648644ms step_avg:572.60ms +grad accum step:8579/14336 +step:34316/57344 train_time:19649969ms step_avg:572.62ms +step:34317/57344 train_time:19649986ms step_avg:572.60ms +step:34318/57344 train_time:19650242ms step_avg:572.59ms +step:34319/57344 train_time:19650818ms step_avg:572.59ms +grad accum step:8580/14336 +step:34320/57344 train_time:19652097ms step_avg:572.61ms +step:34321/57344 train_time:19652114ms step_avg:572.60ms +step:34322/57344 train_time:19652363ms step_avg:572.59ms +step:34323/57344 train_time:19652915ms step_avg:572.59ms +grad accum step:8581/14336 +step:34324/57344 train_time:19654232ms step_avg:572.61ms +step:34325/57344 train_time:19654250ms step_avg:572.59ms +step:34326/57344 train_time:19654495ms step_avg:572.58ms +step:34327/57344 train_time:19655037ms step_avg:572.58ms +grad accum step:8582/14336 +step:34328/57344 train_time:19656367ms step_avg:572.60ms +step:34329/57344 train_time:19656384ms step_avg:572.59ms +step:34330/57344 train_time:19656632ms step_avg:572.58ms +step:34331/57344 train_time:19657182ms step_avg:572.58ms +grad accum step:8583/14336 +step:34332/57344 train_time:19658467ms step_avg:572.60ms +step:34333/57344 train_time:19658484ms step_avg:572.58ms +step:34334/57344 train_time:19658733ms step_avg:572.57ms +step:34335/57344 train_time:19659293ms step_avg:572.57ms +grad accum step:8584/14336 +step:34336/57344 train_time:19660600ms step_avg:572.59ms +step:34337/57344 train_time:19660617ms step_avg:572.58ms +step:34338/57344 train_time:19660868ms step_avg:572.57ms +step:34339/57344 train_time:19661420ms step_avg:572.57ms +grad accum step:8585/14336 +step:34340/57344 train_time:19662725ms step_avg:572.59ms +step:34341/57344 train_time:19662742ms step_avg:572.57ms +step:34342/57344 train_time:19662988ms step_avg:572.56ms +step:34343/57344 train_time:19663534ms step_avg:572.56ms +grad accum step:8586/14336 +step:34344/57344 train_time:19664841ms step_avg:572.58ms +step:34345/57344 train_time:19664858ms step_avg:572.57ms +step:34346/57344 train_time:19665102ms step_avg:572.56ms +step:34347/57344 train_time:19665639ms step_avg:572.56ms +grad accum step:8587/14336 +step:34348/57344 train_time:19666910ms step_avg:572.58ms +step:34349/57344 train_time:19666927ms step_avg:572.56ms +step:34350/57344 train_time:19667179ms step_avg:572.55ms +step:34351/57344 train_time:19667730ms step_avg:572.55ms +grad accum step:8588/14336 +step:34352/57344 train_time:19669035ms step_avg:572.57ms +step:34353/57344 train_time:19669052ms step_avg:572.56ms +step:34354/57344 train_time:19669302ms step_avg:572.55ms +step:34355/57344 train_time:19669863ms step_avg:572.55ms +grad accum step:8589/14336 +step:34356/57344 train_time:19671186ms step_avg:572.57ms +step:34357/57344 train_time:19671203ms step_avg:572.55ms +step:34358/57344 train_time:19671457ms step_avg:572.54ms +step:34359/57344 train_time:19672023ms step_avg:572.54ms +grad accum step:8590/14336 +step:34360/57344 train_time:19673365ms step_avg:572.57ms +step:34361/57344 train_time:19673382ms step_avg:572.55ms +step:34362/57344 train_time:19673631ms step_avg:572.54ms +step:34363/57344 train_time:19674186ms step_avg:572.54ms +grad accum step:8591/14336 +step:34364/57344 train_time:19675531ms step_avg:572.56ms +step:34365/57344 train_time:19675548ms step_avg:572.55ms +step:34366/57344 train_time:19675795ms step_avg:572.54ms +step:34367/57344 train_time:19676345ms step_avg:572.54ms +grad accum step:8592/14336 +step:34368/57344 train_time:19677683ms step_avg:572.56ms +step:34368/57344 val_loss:6.378235 train_time:19677684ms step_avg:572.56ms +step:34369/57344 train_time:19677696ms step_avg:572.54ms +step:34370/57344 train_time:19677917ms step_avg:572.53ms +step:34371/57344 train_time:19678465ms step_avg:572.53ms +grad accum step:8593/14336 +step:34372/57344 train_time:19679769ms step_avg:572.55ms +step:34373/57344 train_time:19679786ms step_avg:572.54ms +step:34374/57344 train_time:19680032ms step_avg:572.53ms +step:34375/57344 train_time:19680584ms step_avg:572.53ms +grad accum step:8594/14336 +step:34376/57344 train_time:19681917ms step_avg:572.55ms +step:34377/57344 train_time:19681934ms step_avg:572.53ms +step:34378/57344 train_time:19682178ms step_avg:572.52ms +step:34379/57344 train_time:19682726ms step_avg:572.52ms +grad accum step:8595/14336 +step:34380/57344 train_time:19684029ms step_avg:572.54ms +step:34381/57344 train_time:19684046ms step_avg:572.53ms +step:34382/57344 train_time:19684291ms step_avg:572.52ms +step:34383/57344 train_time:19684839ms step_avg:572.52ms +grad accum step:8596/14336 +step:34384/57344 train_time:19686230ms step_avg:572.54ms +step:34385/57344 train_time:19686247ms step_avg:572.52ms +step:34386/57344 train_time:19686502ms step_avg:572.52ms +step:34387/57344 train_time:19687073ms step_avg:572.51ms +grad accum step:8597/14336 +step:34388/57344 train_time:19688376ms step_avg:572.54ms +step:34389/57344 train_time:19688393ms step_avg:572.52ms +step:34390/57344 train_time:19688639ms step_avg:572.51ms +step:34391/57344 train_time:19689182ms step_avg:572.51ms +grad accum step:8598/14336 +step:34392/57344 train_time:19690462ms step_avg:572.53ms +step:34393/57344 train_time:19690478ms step_avg:572.51ms +step:34394/57344 train_time:19690731ms step_avg:572.50ms +step:34395/57344 train_time:19691293ms step_avg:572.50ms +grad accum step:8599/14336 +step:34396/57344 train_time:19692609ms step_avg:572.53ms +step:34397/57344 train_time:19692626ms step_avg:572.51ms +step:34398/57344 train_time:19692872ms step_avg:572.50ms +step:34399/57344 train_time:19693419ms step_avg:572.50ms +grad accum step:8600/14336 +step:34400/57344 train_time:19694713ms step_avg:572.52ms +step:34401/57344 train_time:19694730ms step_avg:572.50ms +step:34402/57344 train_time:19694980ms step_avg:572.50ms +step:34403/57344 train_time:19695533ms step_avg:572.49ms +grad accum step:8601/14336 +step:34404/57344 train_time:19696847ms step_avg:572.52ms +step:34405/57344 train_time:19696864ms step_avg:572.50ms +step:34406/57344 train_time:19697111ms step_avg:572.49ms +step:34407/57344 train_time:19697647ms step_avg:572.49ms +grad accum step:8602/14336 +step:34408/57344 train_time:19698948ms step_avg:572.51ms +step:34409/57344 train_time:19698965ms step_avg:572.49ms +step:34410/57344 train_time:19699209ms step_avg:572.48ms +step:34411/57344 train_time:19699751ms step_avg:572.48ms +grad accum step:8603/14336 +step:34412/57344 train_time:19701072ms step_avg:572.51ms +step:34413/57344 train_time:19701089ms step_avg:572.49ms +step:34414/57344 train_time:19701334ms step_avg:572.48ms +step:34415/57344 train_time:19701877ms step_avg:572.48ms +grad accum step:8604/14336 +step:34416/57344 train_time:19703192ms step_avg:572.50ms +step:34417/57344 train_time:19703209ms step_avg:572.48ms +step:34418/57344 train_time:19703458ms step_avg:572.48ms +step:34419/57344 train_time:19704004ms step_avg:572.47ms +grad accum step:8605/14336 +step:34420/57344 train_time:19705274ms step_avg:572.49ms +step:34421/57344 train_time:19705291ms step_avg:572.48ms +step:34422/57344 train_time:19705539ms step_avg:572.47ms +step:34423/57344 train_time:19706094ms step_avg:572.47ms +grad accum step:8606/14336 +step:34424/57344 train_time:19707420ms step_avg:572.49ms +step:34425/57344 train_time:19707437ms step_avg:572.47ms +step:34426/57344 train_time:19707690ms step_avg:572.47ms +step:34427/57344 train_time:19708246ms step_avg:572.46ms +grad accum step:8607/14336 +step:34428/57344 train_time:19709552ms step_avg:572.49ms +step:34429/57344 train_time:19709569ms step_avg:572.47ms +step:34430/57344 train_time:19709820ms step_avg:572.46ms +step:34431/57344 train_time:19710374ms step_avg:572.46ms +grad accum step:8608/14336 +step:34432/57344 train_time:19711673ms step_avg:572.48ms +step:34432/57344 val_loss:6.352869 train_time:19711674ms step_avg:572.48ms +step:34433/57344 train_time:19711686ms step_avg:572.46ms +step:34434/57344 train_time:19711909ms step_avg:572.45ms +step:34435/57344 train_time:19712445ms step_avg:572.45ms +grad accum step:8609/14336 +step:34436/57344 train_time:19713750ms step_avg:572.48ms +step:34437/57344 train_time:19713767ms step_avg:572.46ms +step:34438/57344 train_time:19714018ms step_avg:572.45ms +step:34439/57344 train_time:19714579ms step_avg:572.45ms +grad accum step:8610/14336 +step:34440/57344 train_time:19716010ms step_avg:572.47ms +step:34441/57344 train_time:19716026ms step_avg:572.46ms +step:34442/57344 train_time:19716291ms step_avg:572.45ms +step:34443/57344 train_time:19716875ms step_avg:572.45ms +grad accum step:8611/14336 +step:34444/57344 train_time:19718183ms step_avg:572.47ms +step:34445/57344 train_time:19718200ms step_avg:572.45ms +step:34446/57344 train_time:19718446ms step_avg:572.45ms +step:34447/57344 train_time:19718998ms step_avg:572.44ms +grad accum step:8612/14336 +step:34448/57344 train_time:19720305ms step_avg:572.47ms +step:34449/57344 train_time:19720322ms step_avg:572.45ms +step:34450/57344 train_time:19720573ms step_avg:572.44ms +step:34451/57344 train_time:19721130ms step_avg:572.44ms +grad accum step:8613/14336 +step:34452/57344 train_time:19722425ms step_avg:572.46ms +step:34453/57344 train_time:19722442ms step_avg:572.44ms +step:34454/57344 train_time:19722689ms step_avg:572.44ms +step:34455/57344 train_time:19723234ms step_avg:572.43ms +grad accum step:8614/14336 +step:34456/57344 train_time:19724505ms step_avg:572.45ms +step:34457/57344 train_time:19724522ms step_avg:572.44ms +step:34458/57344 train_time:19724771ms step_avg:572.43ms +step:34459/57344 train_time:19725322ms step_avg:572.43ms +grad accum step:8615/14336 +step:34460/57344 train_time:19726616ms step_avg:572.45ms +step:34461/57344 train_time:19726630ms step_avg:572.43ms +step:34462/57344 train_time:19726876ms step_avg:572.42ms +step:34463/57344 train_time:19727427ms step_avg:572.42ms +grad accum step:8616/14336 +step:34464/57344 train_time:19728745ms step_avg:572.45ms +step:34465/57344 train_time:19728762ms step_avg:572.43ms +step:34466/57344 train_time:19729012ms step_avg:572.42ms +step:34467/57344 train_time:19729566ms step_avg:572.42ms +grad accum step:8617/14336 +step:34468/57344 train_time:19730849ms step_avg:572.44ms +step:34469/57344 train_time:19730866ms step_avg:572.42ms +step:34470/57344 train_time:19731115ms step_avg:572.41ms +step:34471/57344 train_time:19731661ms step_avg:572.41ms +grad accum step:8618/14336 +step:34472/57344 train_time:19732960ms step_avg:572.43ms +step:34473/57344 train_time:19732977ms step_avg:572.42ms +step:34474/57344 train_time:19733220ms step_avg:572.41ms +step:34475/57344 train_time:19733759ms step_avg:572.41ms +grad accum step:8619/14336 +step:34476/57344 train_time:19735062ms step_avg:572.43ms +step:34477/57344 train_time:19735078ms step_avg:572.41ms +step:34478/57344 train_time:19735325ms step_avg:572.40ms +step:34479/57344 train_time:19735882ms step_avg:572.40ms +grad accum step:8620/14336 +step:34480/57344 train_time:19737209ms step_avg:572.42ms +step:34481/57344 train_time:19737227ms step_avg:572.41ms +step:34482/57344 train_time:19737473ms step_avg:572.40ms +step:34483/57344 train_time:19738043ms step_avg:572.40ms +grad accum step:8621/14336 +step:34484/57344 train_time:19739348ms step_avg:572.42ms +step:34485/57344 train_time:19739360ms step_avg:572.40ms +step:34486/57344 train_time:19739603ms step_avg:572.39ms +step:34487/57344 train_time:19740146ms step_avg:572.39ms +grad accum step:8622/14336 +step:34488/57344 train_time:19741456ms step_avg:572.42ms +step:34489/57344 train_time:19741473ms step_avg:572.40ms +step:34490/57344 train_time:19741729ms step_avg:572.39ms +step:34491/57344 train_time:19742303ms step_avg:572.39ms +grad accum step:8623/14336 +step:34492/57344 train_time:19743679ms step_avg:572.41ms +step:34493/57344 train_time:19743698ms step_avg:572.40ms +step:34494/57344 train_time:19743940ms step_avg:572.39ms +step:34495/57344 train_time:19744494ms step_avg:572.39ms +grad accum step:8624/14336 +step:34496/57344 train_time:19745819ms step_avg:572.41ms +step:34496/57344 val_loss:6.326517 train_time:19745820ms step_avg:572.41ms +step:34497/57344 train_time:19745832ms step_avg:572.39ms +step:34498/57344 train_time:19746049ms step_avg:572.38ms +step:34499/57344 train_time:19746589ms step_avg:572.38ms +grad accum step:8625/14336 +step:34500/57344 train_time:19747921ms step_avg:572.40ms +step:34501/57344 train_time:19747936ms step_avg:572.39ms +step:34502/57344 train_time:19748184ms step_avg:572.38ms +step:34503/57344 train_time:19748731ms step_avg:572.38ms +grad accum step:8626/14336 +step:34504/57344 train_time:19750029ms step_avg:572.40ms +step:34505/57344 train_time:19750046ms step_avg:572.38ms +step:34506/57344 train_time:19750287ms step_avg:572.37ms +step:34507/57344 train_time:19750838ms step_avg:572.37ms +grad accum step:8627/14336 +step:34508/57344 train_time:19752134ms step_avg:572.39ms +step:34509/57344 train_time:19752149ms step_avg:572.38ms +step:34510/57344 train_time:19752390ms step_avg:572.37ms +step:34511/57344 train_time:19752944ms step_avg:572.37ms +grad accum step:8628/14336 +step:34512/57344 train_time:19754263ms step_avg:572.39ms +step:34513/57344 train_time:19754280ms step_avg:572.37ms +step:34514/57344 train_time:19754533ms step_avg:572.36ms +step:34515/57344 train_time:19755095ms step_avg:572.36ms +grad accum step:8629/14336 +step:34516/57344 train_time:19756392ms step_avg:572.38ms +step:34517/57344 train_time:19756408ms step_avg:572.37ms +step:34518/57344 train_time:19756655ms step_avg:572.36ms +step:34519/57344 train_time:19757214ms step_avg:572.36ms +grad accum step:8630/14336 +step:34520/57344 train_time:19758524ms step_avg:572.38ms +step:34521/57344 train_time:19758542ms step_avg:572.36ms +step:34522/57344 train_time:19758783ms step_avg:572.35ms +step:34523/57344 train_time:19759335ms step_avg:572.35ms +grad accum step:8631/14336 +step:34524/57344 train_time:19760643ms step_avg:572.37ms +step:34525/57344 train_time:19760662ms step_avg:572.36ms +step:34526/57344 train_time:19760902ms step_avg:572.35ms +step:34527/57344 train_time:19761443ms step_avg:572.35ms +grad accum step:8632/14336 +step:34528/57344 train_time:19762760ms step_avg:572.37ms +step:34529/57344 train_time:19762777ms step_avg:572.35ms +step:34530/57344 train_time:19763028ms step_avg:572.34ms +step:34531/57344 train_time:19763572ms step_avg:572.34ms +grad accum step:8633/14336 +step:34532/57344 train_time:19764848ms step_avg:572.36ms +step:34533/57344 train_time:19764871ms step_avg:572.35ms +step:34534/57344 train_time:19765108ms step_avg:572.34ms +step:34535/57344 train_time:19765660ms step_avg:572.34ms +grad accum step:8634/14336 +step:34536/57344 train_time:19766994ms step_avg:572.36ms +step:34537/57344 train_time:19767011ms step_avg:572.34ms +step:34538/57344 train_time:19767252ms step_avg:572.33ms +step:34539/57344 train_time:19767807ms step_avg:572.33ms +grad accum step:8635/14336 +step:34540/57344 train_time:19769138ms step_avg:572.35ms +step:34541/57344 train_time:19769154ms step_avg:572.34ms +step:34542/57344 train_time:19769395ms step_avg:572.33ms +step:34543/57344 train_time:19769939ms step_avg:572.33ms +grad accum step:8636/14336 +step:34544/57344 train_time:19771254ms step_avg:572.35ms +step:34545/57344 train_time:19771269ms step_avg:572.33ms +step:34546/57344 train_time:19771515ms step_avg:572.32ms +step:34547/57344 train_time:19772075ms step_avg:572.32ms +grad accum step:8637/14336 +step:34548/57344 train_time:19773427ms step_avg:572.35ms +step:34549/57344 train_time:19773442ms step_avg:572.33ms +step:34550/57344 train_time:19773686ms step_avg:572.32ms +step:34551/57344 train_time:19774227ms step_avg:572.32ms +grad accum step:8638/14336 +step:34552/57344 train_time:19775544ms step_avg:572.34ms +step:34553/57344 train_time:19775560ms step_avg:572.33ms +step:34554/57344 train_time:19775803ms step_avg:572.32ms +step:34555/57344 train_time:19776345ms step_avg:572.32ms +grad accum step:8639/14336 +step:34556/57344 train_time:19777638ms step_avg:572.34ms +step:34557/57344 train_time:19777655ms step_avg:572.32ms +step:34558/57344 train_time:19777905ms step_avg:572.31ms +step:34559/57344 train_time:19778458ms step_avg:572.31ms +grad accum step:8640/14336 +step:34560/57344 train_time:19779770ms step_avg:572.33ms +step:34560/57344 val_loss:6.299848 train_time:19779775ms step_avg:572.33ms +step:34561/57344 train_time:19779787ms step_avg:572.32ms +step:34562/57344 train_time:19780007ms step_avg:572.31ms +step:34563/57344 train_time:19780548ms step_avg:572.30ms +grad accum step:8641/14336 +step:34564/57344 train_time:19781852ms step_avg:572.33ms +step:34565/57344 train_time:19781877ms step_avg:572.31ms +step:34566/57344 train_time:19782117ms step_avg:572.30ms +step:34567/57344 train_time:19782679ms step_avg:572.30ms +grad accum step:8642/14336 +step:34568/57344 train_time:19783970ms step_avg:572.32ms +step:34569/57344 train_time:19783986ms step_avg:572.30ms +step:34570/57344 train_time:19784233ms step_avg:572.29ms +step:34571/57344 train_time:19784776ms step_avg:572.29ms +grad accum step:8643/14336 +step:34572/57344 train_time:19786071ms step_avg:572.31ms +step:34573/57344 train_time:19786088ms step_avg:572.30ms +step:34574/57344 train_time:19786336ms step_avg:572.29ms +step:34575/57344 train_time:19786888ms step_avg:572.29ms +grad accum step:8644/14336 +step:34576/57344 train_time:19788195ms step_avg:572.31ms +step:34577/57344 train_time:19788212ms step_avg:572.29ms +step:34578/57344 train_time:19788461ms step_avg:572.28ms +step:34579/57344 train_time:19789008ms step_avg:572.28ms +grad accum step:8645/14336 +step:34580/57344 train_time:19790374ms step_avg:572.31ms +step:34581/57344 train_time:19790391ms step_avg:572.29ms +step:34582/57344 train_time:19790629ms step_avg:572.28ms +step:34583/57344 train_time:19791169ms step_avg:572.28ms +grad accum step:8646/14336 +step:34584/57344 train_time:19792512ms step_avg:572.30ms +step:34585/57344 train_time:19792527ms step_avg:572.29ms +step:34586/57344 train_time:19792771ms step_avg:572.28ms +step:34587/57344 train_time:19793313ms step_avg:572.28ms +grad accum step:8647/14336 +step:34588/57344 train_time:19794621ms step_avg:572.30ms +step:34589/57344 train_time:19794636ms step_avg:572.28ms +step:34590/57344 train_time:19794884ms step_avg:572.27ms +step:34591/57344 train_time:19795439ms step_avg:572.27ms +grad accum step:8648/14336 +step:34592/57344 train_time:19796774ms step_avg:572.29ms +step:34593/57344 train_time:19796790ms step_avg:572.28ms +step:34594/57344 train_time:19797037ms step_avg:572.27ms +step:34595/57344 train_time:19797580ms step_avg:572.27ms +grad accum step:8649/14336 +step:34596/57344 train_time:19798917ms step_avg:572.29ms +step:34597/57344 train_time:19798931ms step_avg:572.27ms +step:34598/57344 train_time:19799179ms step_avg:572.26ms +step:34599/57344 train_time:19799735ms step_avg:572.26ms +grad accum step:8650/14336 +step:34600/57344 train_time:19801062ms step_avg:572.29ms +step:34601/57344 train_time:19801077ms step_avg:572.27ms +step:34602/57344 train_time:19801326ms step_avg:572.26ms +step:34603/57344 train_time:19801875ms step_avg:572.26ms +grad accum step:8651/14336 +step:34604/57344 train_time:19803174ms step_avg:572.28ms +step:34605/57344 train_time:19803191ms step_avg:572.26ms +step:34606/57344 train_time:19803440ms step_avg:572.25ms +step:34607/57344 train_time:19803995ms step_avg:572.25ms +grad accum step:8652/14336 +step:34608/57344 train_time:19805309ms step_avg:572.28ms +step:34609/57344 train_time:19805324ms step_avg:572.26ms +step:34610/57344 train_time:19805577ms step_avg:572.25ms +step:34611/57344 train_time:19806145ms step_avg:572.25ms +grad accum step:8653/14336 +step:34612/57344 train_time:19807480ms step_avg:572.27ms +step:34613/57344 train_time:19807495ms step_avg:572.26ms +step:34614/57344 train_time:19807741ms step_avg:572.25ms +step:34615/57344 train_time:19808282ms step_avg:572.25ms +grad accum step:8654/14336 +step:34616/57344 train_time:19809587ms step_avg:572.27ms +step:34617/57344 train_time:19809604ms step_avg:572.25ms +step:34618/57344 train_time:19809852ms step_avg:572.24ms +step:34619/57344 train_time:19810404ms step_avg:572.24ms +grad accum step:8655/14336 +step:34620/57344 train_time:19811764ms step_avg:572.26ms +step:34621/57344 train_time:19811783ms step_avg:572.25ms +step:34622/57344 train_time:19812026ms step_avg:572.24ms +step:34623/57344 train_time:19812568ms step_avg:572.24ms +grad accum step:8656/14336 +step:34624/57344 train_time:19813842ms step_avg:572.26ms +step:34624/57344 val_loss:6.275382 train_time:19813842ms step_avg:572.26ms +step:34625/57344 train_time:19813854ms step_avg:572.24ms +step:34626/57344 train_time:19814079ms step_avg:572.23ms +step:34627/57344 train_time:19814629ms step_avg:572.23ms +grad accum step:8657/14336 +step:34628/57344 train_time:19815910ms step_avg:572.25ms +step:34629/57344 train_time:19815924ms step_avg:572.23ms +step:34630/57344 train_time:19816170ms step_avg:572.23ms +step:34631/57344 train_time:19816718ms step_avg:572.22ms +grad accum step:8658/14336 +step:34632/57344 train_time:19818085ms step_avg:572.25ms +step:34633/57344 train_time:19818105ms step_avg:572.23ms +step:34634/57344 train_time:19818345ms step_avg:572.22ms +step:34635/57344 train_time:19818905ms step_avg:572.22ms +grad accum step:8659/14336 +step:34636/57344 train_time:19820213ms step_avg:572.24ms +step:34637/57344 train_time:19820230ms step_avg:572.23ms +step:34638/57344 train_time:19820477ms step_avg:572.22ms +step:34639/57344 train_time:19821023ms step_avg:572.22ms +grad accum step:8660/14336 +step:34640/57344 train_time:19822338ms step_avg:572.24ms +step:34641/57344 train_time:19822355ms step_avg:572.22ms +step:34642/57344 train_time:19822604ms step_avg:572.21ms +step:34643/57344 train_time:19823154ms step_avg:572.21ms +grad accum step:8661/14336 +step:34644/57344 train_time:19824473ms step_avg:572.23ms +step:34645/57344 train_time:19824492ms step_avg:572.22ms +step:34646/57344 train_time:19824729ms step_avg:572.21ms +step:34647/57344 train_time:19825287ms step_avg:572.21ms +grad accum step:8662/14336 +step:34648/57344 train_time:19826636ms step_avg:572.23ms +step:34649/57344 train_time:19826650ms step_avg:572.21ms +step:34650/57344 train_time:19826893ms step_avg:572.20ms +step:34651/57344 train_time:19827441ms step_avg:572.20ms +grad accum step:8663/14336 +step:34652/57344 train_time:19828747ms step_avg:572.23ms +step:34653/57344 train_time:19828764ms step_avg:572.21ms +step:34654/57344 train_time:19829028ms step_avg:572.20ms +step:34655/57344 train_time:19829614ms step_avg:572.20ms +grad accum step:8664/14336 +step:34656/57344 train_time:19830910ms step_avg:572.22ms +step:34657/57344 train_time:19830927ms step_avg:572.21ms +step:34658/57344 train_time:19831175ms step_avg:572.20ms +step:34659/57344 train_time:19831720ms step_avg:572.20ms +grad accum step:8665/14336 +step:34660/57344 train_time:19833077ms step_avg:572.22ms +step:34661/57344 train_time:19833097ms step_avg:572.20ms +step:34662/57344 train_time:19833337ms step_avg:572.19ms +step:34663/57344 train_time:19833896ms step_avg:572.19ms +grad accum step:8666/14336 +step:34664/57344 train_time:19835231ms step_avg:572.21ms +step:34665/57344 train_time:19835248ms step_avg:572.20ms +step:34666/57344 train_time:19835490ms step_avg:572.19ms +step:34667/57344 train_time:19836033ms step_avg:572.19ms +grad accum step:8667/14336 +step:34668/57344 train_time:19837347ms step_avg:572.21ms +step:34669/57344 train_time:19837363ms step_avg:572.19ms +step:34670/57344 train_time:19837607ms step_avg:572.18ms +step:34671/57344 train_time:19838152ms step_avg:572.18ms +grad accum step:8668/14336 +step:34672/57344 train_time:19839453ms step_avg:572.20ms +step:34673/57344 train_time:19839468ms step_avg:572.19ms +step:34674/57344 train_time:19839711ms step_avg:572.18ms +step:34675/57344 train_time:19840260ms step_avg:572.18ms +grad accum step:8669/14336 +step:34676/57344 train_time:19841562ms step_avg:572.20ms +step:34677/57344 train_time:19841579ms step_avg:572.18ms +step:34678/57344 train_time:19841824ms step_avg:572.17ms +step:34679/57344 train_time:19842362ms step_avg:572.17ms +grad accum step:8670/14336 +step:34680/57344 train_time:19843690ms step_avg:572.19ms +step:34681/57344 train_time:19843707ms step_avg:572.18ms +step:34682/57344 train_time:19843943ms step_avg:572.17ms +step:34683/57344 train_time:19844489ms step_avg:572.17ms +grad accum step:8671/14336 +step:34684/57344 train_time:19845787ms step_avg:572.19ms +step:34685/57344 train_time:19845802ms step_avg:572.17ms +step:34686/57344 train_time:19846043ms step_avg:572.16ms +step:34687/57344 train_time:19846589ms step_avg:572.16ms +grad accum step:8672/14336 +step:34688/57344 train_time:19847892ms step_avg:572.18ms +step:34688/57344 val_loss:6.239287 train_time:19847898ms step_avg:572.18ms +step:34689/57344 train_time:19847910ms step_avg:572.17ms +step:34690/57344 train_time:19848132ms step_avg:572.16ms +step:34691/57344 train_time:19848686ms step_avg:572.16ms +grad accum step:8673/14336 +step:34692/57344 train_time:19850008ms step_avg:572.18ms +step:34693/57344 train_time:19850025ms step_avg:572.16ms +step:34694/57344 train_time:19850271ms step_avg:572.15ms +step:34695/57344 train_time:19850820ms step_avg:572.15ms +grad accum step:8674/14336 +step:34696/57344 train_time:19852135ms step_avg:572.17ms +step:34697/57344 train_time:19852152ms step_avg:572.16ms +step:34698/57344 train_time:19852409ms step_avg:572.15ms +step:34699/57344 train_time:19852991ms step_avg:572.15ms +grad accum step:8675/14336 +step:34700/57344 train_time:19854316ms step_avg:572.17ms +step:34701/57344 train_time:19854333ms step_avg:572.15ms +step:34702/57344 train_time:19854580ms step_avg:572.15ms +step:34703/57344 train_time:19855122ms step_avg:572.14ms +grad accum step:8676/14336 +step:34704/57344 train_time:19856418ms step_avg:572.17ms +step:34705/57344 train_time:19856435ms step_avg:572.15ms +step:34706/57344 train_time:19856682ms step_avg:572.14ms +step:34707/57344 train_time:19857222ms step_avg:572.14ms +grad accum step:8677/14336 +step:34708/57344 train_time:19858505ms step_avg:572.16ms +step:34709/57344 train_time:19858520ms step_avg:572.14ms +step:34710/57344 train_time:19858769ms step_avg:572.13ms +step:34711/57344 train_time:19859332ms step_avg:572.13ms +grad accum step:8678/14336 +step:34712/57344 train_time:19860652ms step_avg:572.16ms +step:34713/57344 train_time:19860666ms step_avg:572.14ms +step:34714/57344 train_time:19860912ms step_avg:572.13ms +step:34715/57344 train_time:19861460ms step_avg:572.13ms +grad accum step:8679/14336 +step:34716/57344 train_time:19862808ms step_avg:572.15ms +step:34717/57344 train_time:19862825ms step_avg:572.14ms +step:34718/57344 train_time:19863077ms step_avg:572.13ms +step:34719/57344 train_time:19863655ms step_avg:572.13ms +grad accum step:8680/14336 +step:34720/57344 train_time:19864995ms step_avg:572.15ms +step:34721/57344 train_time:19865010ms step_avg:572.13ms +step:34722/57344 train_time:19865254ms step_avg:572.12ms +step:34723/57344 train_time:19865802ms step_avg:572.12ms +grad accum step:8681/14336 +step:34724/57344 train_time:19867080ms step_avg:572.14ms +step:34725/57344 train_time:19867096ms step_avg:572.13ms +step:34726/57344 train_time:19867349ms step_avg:572.12ms +step:34727/57344 train_time:19867918ms step_avg:572.12ms +grad accum step:8682/14336 +step:34728/57344 train_time:19869229ms step_avg:572.14ms +step:34729/57344 train_time:19869244ms step_avg:572.12ms +step:34730/57344 train_time:19869487ms step_avg:572.11ms +step:34731/57344 train_time:19870029ms step_avg:572.11ms +grad accum step:8683/14336 +step:34732/57344 train_time:19871376ms step_avg:572.13ms +step:34733/57344 train_time:19871391ms step_avg:572.12ms +step:34734/57344 train_time:19871632ms step_avg:572.11ms +step:34735/57344 train_time:19872177ms step_avg:572.11ms +grad accum step:8684/14336 +step:34736/57344 train_time:19873495ms step_avg:572.13ms +step:34737/57344 train_time:19873510ms step_avg:572.11ms +step:34738/57344 train_time:19873758ms step_avg:572.10ms +step:34739/57344 train_time:19874311ms step_avg:572.10ms +grad accum step:8685/14336 +step:34740/57344 train_time:19875629ms step_avg:572.13ms +step:34741/57344 train_time:19875645ms step_avg:572.11ms +step:34742/57344 train_time:19875890ms step_avg:572.10ms +step:34743/57344 train_time:19876448ms step_avg:572.10ms +grad accum step:8686/14336 +step:34744/57344 train_time:19877743ms step_avg:572.12ms +step:34745/57344 train_time:19877759ms step_avg:572.10ms +step:34746/57344 train_time:19878005ms step_avg:572.09ms +step:34747/57344 train_time:19878555ms step_avg:572.09ms +grad accum step:8687/14336 +step:34748/57344 train_time:19879908ms step_avg:572.12ms +step:34749/57344 train_time:19879925ms step_avg:572.10ms +step:34750/57344 train_time:19880176ms step_avg:572.09ms +step:34751/57344 train_time:19880739ms step_avg:572.09ms +grad accum step:8688/14336 +step:34752/57344 train_time:19882033ms step_avg:572.11ms +step:34752/57344 val_loss:6.222122 train_time:19882034ms step_avg:572.11ms +step:34753/57344 train_time:19882045ms step_avg:572.10ms +step:34754/57344 train_time:19882268ms step_avg:572.09ms +step:34755/57344 train_time:19882818ms step_avg:572.09ms +grad accum step:8689/14336 +step:34756/57344 train_time:19884161ms step_avg:572.11ms +step:34757/57344 train_time:19884175ms step_avg:572.09ms +step:34758/57344 train_time:19884421ms step_avg:572.08ms +step:34759/57344 train_time:19884974ms step_avg:572.08ms +grad accum step:8690/14336 +step:34760/57344 train_time:19886306ms step_avg:572.10ms +step:34761/57344 train_time:19886322ms step_avg:572.09ms +step:34762/57344 train_time:19886565ms step_avg:572.08ms +step:34763/57344 train_time:19887115ms step_avg:572.08ms +grad accum step:8691/14336 +step:34764/57344 train_time:19888403ms step_avg:572.10ms +step:34765/57344 train_time:19888421ms step_avg:572.08ms +step:34766/57344 train_time:19888661ms step_avg:572.07ms +step:34767/57344 train_time:19889220ms step_avg:572.07ms +grad accum step:8692/14336 +step:34768/57344 train_time:19890501ms step_avg:572.09ms +step:34769/57344 train_time:19890518ms step_avg:572.08ms +step:34770/57344 train_time:19890766ms step_avg:572.07ms +step:34771/57344 train_time:19891311ms step_avg:572.07ms +grad accum step:8693/14336 +step:34772/57344 train_time:19892622ms step_avg:572.09ms +step:34773/57344 train_time:19892639ms step_avg:572.07ms +step:34774/57344 train_time:19892885ms step_avg:572.06ms +step:34775/57344 train_time:19893428ms step_avg:572.06ms +grad accum step:8694/14336 +step:34776/57344 train_time:19894726ms step_avg:572.08ms +step:34777/57344 train_time:19894743ms step_avg:572.07ms +step:34778/57344 train_time:19894992ms step_avg:572.06ms +step:34779/57344 train_time:19895540ms step_avg:572.06ms +grad accum step:8695/14336 +step:34780/57344 train_time:19896875ms step_avg:572.08ms +step:34781/57344 train_time:19896892ms step_avg:572.06ms +step:34782/57344 train_time:19897146ms step_avg:572.05ms +step:34783/57344 train_time:19897698ms step_avg:572.05ms +grad accum step:8696/14336 +step:34784/57344 train_time:19899018ms step_avg:572.07ms +step:34785/57344 train_time:19899031ms step_avg:572.06ms +step:34786/57344 train_time:19899280ms step_avg:572.05ms +step:34787/57344 train_time:19899844ms step_avg:572.05ms +grad accum step:8697/14336 +step:34788/57344 train_time:19901331ms step_avg:572.07ms +step:34789/57344 train_time:19901344ms step_avg:572.06ms +step:34790/57344 train_time:19901569ms step_avg:572.05ms +step:34791/57344 train_time:19902137ms step_avg:572.05ms +grad accum step:8698/14336 +step:34792/57344 train_time:19903453ms step_avg:572.07ms +step:34793/57344 train_time:19903466ms step_avg:572.05ms +step:34794/57344 train_time:19903712ms step_avg:572.04ms +step:34795/57344 train_time:19904252ms step_avg:572.04ms +grad accum step:8699/14336 +step:34796/57344 train_time:19905549ms step_avg:572.06ms +step:34797/57344 train_time:19905566ms step_avg:572.05ms +step:34798/57344 train_time:19905823ms step_avg:572.04ms +step:34799/57344 train_time:19906395ms step_avg:572.04ms +grad accum step:8700/14336 +step:34800/57344 train_time:19907694ms step_avg:572.06ms +step:34801/57344 train_time:19907710ms step_avg:572.04ms +step:34802/57344 train_time:19907956ms step_avg:572.03ms +step:34803/57344 train_time:19908504ms step_avg:572.03ms +grad accum step:8701/14336 +step:34804/57344 train_time:19909896ms step_avg:572.06ms +step:34805/57344 train_time:19909913ms step_avg:572.04ms +step:34806/57344 train_time:19910156ms step_avg:572.03ms +step:34807/57344 train_time:19910716ms step_avg:572.03ms +grad accum step:8702/14336 +step:34808/57344 train_time:19912027ms step_avg:572.05ms +step:34809/57344 train_time:19912043ms step_avg:572.04ms +step:34810/57344 train_time:19912289ms step_avg:572.03ms +step:34811/57344 train_time:19912838ms step_avg:572.03ms +grad accum step:8703/14336 +step:34812/57344 train_time:19914155ms step_avg:572.05ms +step:34813/57344 train_time:19914168ms step_avg:572.03ms +step:34814/57344 train_time:19914407ms step_avg:572.02ms +step:34815/57344 train_time:19914939ms step_avg:572.02ms +grad accum step:8704/14336 +step:34816/57344 train_time:19916241ms step_avg:572.04ms +step:34816/57344 val_loss:6.199553 train_time:19916253ms step_avg:572.04ms +step:34817/57344 train_time:19916265ms step_avg:572.03ms +step:34818/57344 train_time:19916490ms step_avg:572.02ms +step:34819/57344 train_time:19917046ms step_avg:572.02ms +grad accum step:8705/14336 +step:34820/57344 train_time:19918422ms step_avg:572.04ms +step:34821/57344 train_time:19918436ms step_avg:572.02ms +step:34822/57344 train_time:19918652ms step_avg:572.01ms +step:34823/57344 train_time:19919186ms step_avg:572.01ms +grad accum step:8706/14336 +step:34824/57344 train_time:19920463ms step_avg:572.03ms +step:34825/57344 train_time:19920479ms step_avg:572.02ms +step:34826/57344 train_time:19920734ms step_avg:572.01ms +step:34827/57344 train_time:19921314ms step_avg:572.01ms +grad accum step:8707/14336 +step:34828/57344 train_time:19922625ms step_avg:572.03ms +step:34829/57344 train_time:19922641ms step_avg:572.01ms +step:34830/57344 train_time:19922893ms step_avg:572.00ms +step:34831/57344 train_time:19923451ms step_avg:572.00ms +grad accum step:8708/14336 +step:34832/57344 train_time:19924781ms step_avg:572.03ms +step:34833/57344 train_time:19924797ms step_avg:572.01ms +step:34834/57344 train_time:19925054ms step_avg:572.00ms +step:34835/57344 train_time:19925622ms step_avg:572.00ms +grad accum step:8709/14336 +step:34836/57344 train_time:19926910ms step_avg:572.02ms +step:34837/57344 train_time:19926927ms step_avg:572.00ms +step:34838/57344 train_time:19927168ms step_avg:572.00ms +step:34839/57344 train_time:19927714ms step_avg:571.99ms +grad accum step:8710/14336 +step:34840/57344 train_time:19929017ms step_avg:572.02ms +step:34841/57344 train_time:19929031ms step_avg:572.00ms +step:34842/57344 train_time:19929276ms step_avg:571.99ms +step:34843/57344 train_time:19929825ms step_avg:571.99ms +grad accum step:8711/14336 +step:34844/57344 train_time:19931130ms step_avg:572.01ms +step:34845/57344 train_time:19931146ms step_avg:571.99ms +step:34846/57344 train_time:19931393ms step_avg:571.99ms +step:34847/57344 train_time:19931960ms step_avg:571.98ms +grad accum step:8712/14336 +step:34848/57344 train_time:19933309ms step_avg:572.01ms +step:34849/57344 train_time:19933326ms step_avg:571.99ms +step:34850/57344 train_time:19933567ms step_avg:571.98ms +step:34851/57344 train_time:19934117ms step_avg:571.98ms +grad accum step:8713/14336 +step:34852/57344 train_time:19935416ms step_avg:572.00ms +step:34853/57344 train_time:19935436ms step_avg:571.99ms +step:34854/57344 train_time:19935677ms step_avg:571.98ms +step:34855/57344 train_time:19936230ms step_avg:571.98ms +grad accum step:8714/14336 +step:34856/57344 train_time:19937566ms step_avg:572.00ms +step:34857/57344 train_time:19937597ms step_avg:571.98ms +step:34858/57344 train_time:19937821ms step_avg:571.97ms +step:34859/57344 train_time:19938381ms step_avg:571.97ms +grad accum step:8715/14336 +step:34860/57344 train_time:19939719ms step_avg:571.99ms +step:34861/57344 train_time:19939737ms step_avg:571.98ms +step:34862/57344 train_time:19939981ms step_avg:571.97ms +step:34863/57344 train_time:19940534ms step_avg:571.97ms +grad accum step:8716/14336 +step:34864/57344 train_time:19941855ms step_avg:571.99ms +step:34865/57344 train_time:19941869ms step_avg:571.97ms +step:34866/57344 train_time:19942117ms step_avg:571.96ms +step:34867/57344 train_time:19942673ms step_avg:571.96ms +grad accum step:8717/14336 +step:34868/57344 train_time:19943985ms step_avg:571.99ms +step:34869/57344 train_time:19944001ms step_avg:571.97ms +step:34870/57344 train_time:19944248ms step_avg:571.96ms +step:34871/57344 train_time:19944793ms step_avg:571.96ms +grad accum step:8718/14336 +step:34872/57344 train_time:19946066ms step_avg:571.98ms +step:34873/57344 train_time:19946083ms step_avg:571.96ms +step:34874/57344 train_time:19946331ms step_avg:571.95ms +step:34875/57344 train_time:19946877ms step_avg:571.95ms +grad accum step:8719/14336 +step:34876/57344 train_time:19948181ms step_avg:571.97ms +step:34877/57344 train_time:19948196ms step_avg:571.96ms +step:34878/57344 train_time:19948442ms step_avg:571.95ms +step:34879/57344 train_time:19948986ms step_avg:571.95ms +grad accum step:8720/14336 +step:34880/57344 train_time:19950298ms step_avg:571.97ms +step:34880/57344 val_loss:6.168828 train_time:19950300ms step_avg:571.97ms +step:34881/57344 train_time:19950312ms step_avg:571.95ms +step:34882/57344 train_time:19950536ms step_avg:571.94ms +step:34883/57344 train_time:19951078ms step_avg:571.94ms +grad accum step:8721/14336 +step:34884/57344 train_time:19952432ms step_avg:571.97ms +step:34885/57344 train_time:19952448ms step_avg:571.95ms +step:34886/57344 train_time:19952703ms step_avg:571.94ms +step:34887/57344 train_time:19953268ms step_avg:571.94ms +grad accum step:8722/14336 +step:34888/57344 train_time:19954583ms step_avg:571.96ms +step:34889/57344 train_time:19954599ms step_avg:571.95ms +step:34890/57344 train_time:19954842ms step_avg:571.94ms +step:34891/57344 train_time:19955390ms step_avg:571.94ms +grad accum step:8723/14336 +step:34892/57344 train_time:19956731ms step_avg:571.96ms +step:34893/57344 train_time:19956745ms step_avg:571.94ms +step:34894/57344 train_time:19957004ms step_avg:571.93ms +step:34895/57344 train_time:19957589ms step_avg:571.93ms +grad accum step:8724/14336 +step:34896/57344 train_time:19958909ms step_avg:571.95ms +step:34897/57344 train_time:19958925ms step_avg:571.94ms +step:34898/57344 train_time:19959181ms step_avg:571.93ms +step:34899/57344 train_time:19959758ms step_avg:571.93ms +grad accum step:8725/14336 +step:34900/57344 train_time:19961052ms step_avg:571.95ms +step:34901/57344 train_time:19961072ms step_avg:571.93ms +step:34902/57344 train_time:19961310ms step_avg:571.92ms +step:34903/57344 train_time:19961855ms step_avg:571.92ms +grad accum step:8726/14336 +step:34904/57344 train_time:19963135ms step_avg:571.94ms +step:34905/57344 train_time:19963151ms step_avg:571.93ms +step:34906/57344 train_time:19963398ms step_avg:571.92ms +step:34907/57344 train_time:19963955ms step_avg:571.92ms +grad accum step:8727/14336 +step:34908/57344 train_time:19965281ms step_avg:571.94ms +step:34909/57344 train_time:19965295ms step_avg:571.92ms +step:34910/57344 train_time:19965543ms step_avg:571.91ms +step:34911/57344 train_time:19966096ms step_avg:571.91ms +grad accum step:8728/14336 +step:34912/57344 train_time:19967403ms step_avg:571.94ms +step:34913/57344 train_time:19967419ms step_avg:571.92ms +step:34914/57344 train_time:19967668ms step_avg:571.91ms +step:34915/57344 train_time:19968223ms step_avg:571.91ms +grad accum step:8729/14336 +step:34916/57344 train_time:19969552ms step_avg:571.93ms +step:34917/57344 train_time:19969567ms step_avg:571.92ms +step:34918/57344 train_time:19969822ms step_avg:571.91ms +step:34919/57344 train_time:19970387ms step_avg:571.91ms +grad accum step:8730/14336 +step:34920/57344 train_time:19971686ms step_avg:571.93ms +step:34921/57344 train_time:19971703ms step_avg:571.91ms +step:34922/57344 train_time:19971938ms step_avg:571.90ms +step:34923/57344 train_time:19972492ms step_avg:571.90ms +grad accum step:8731/14336 +step:34924/57344 train_time:19973858ms step_avg:571.92ms +step:34925/57344 train_time:19973874ms step_avg:571.91ms +step:34926/57344 train_time:19974117ms step_avg:571.90ms +step:34927/57344 train_time:19974669ms step_avg:571.90ms +grad accum step:8732/14336 +step:34928/57344 train_time:19976037ms step_avg:571.92ms +step:34929/57344 train_time:19976053ms step_avg:571.90ms +step:34930/57344 train_time:19976299ms step_avg:571.90ms +step:34931/57344 train_time:19976858ms step_avg:571.89ms +grad accum step:8733/14336 +step:34932/57344 train_time:19978223ms step_avg:571.92ms +step:34933/57344 train_time:19978243ms step_avg:571.90ms +step:34934/57344 train_time:19978490ms step_avg:571.89ms +step:34935/57344 train_time:19979054ms step_avg:571.89ms +grad accum step:8734/14336 +step:34936/57344 train_time:19980350ms step_avg:571.91ms +step:34937/57344 train_time:19980365ms step_avg:571.90ms +step:34938/57344 train_time:19980611ms step_avg:571.89ms +step:34939/57344 train_time:19981157ms step_avg:571.89ms +grad accum step:8735/14336 +step:34940/57344 train_time:19982472ms step_avg:571.91ms +step:34941/57344 train_time:19982487ms step_avg:571.89ms +step:34942/57344 train_time:19982736ms step_avg:571.88ms +step:34943/57344 train_time:19983287ms step_avg:571.88ms +grad accum step:8736/14336 +step:34944/57344 train_time:19984588ms step_avg:571.90ms +step:34944/57344 val_loss:6.146615 train_time:19984595ms step_avg:571.90ms +step:34945/57344 train_time:19984607ms step_avg:571.89ms +step:34946/57344 train_time:19984835ms step_avg:571.88ms +step:34947/57344 train_time:19985386ms step_avg:571.88ms +grad accum step:8737/14336 +step:34948/57344 train_time:19986707ms step_avg:571.90ms +step:34949/57344 train_time:19986722ms step_avg:571.88ms +step:34950/57344 train_time:19986969ms step_avg:571.87ms +step:34951/57344 train_time:19987505ms step_avg:571.87ms +grad accum step:8738/14336 +step:34952/57344 train_time:19988781ms step_avg:571.89ms +step:34953/57344 train_time:19988796ms step_avg:571.88ms +step:34954/57344 train_time:19989045ms step_avg:571.87ms +step:34955/57344 train_time:19989596ms step_avg:571.87ms +grad accum step:8739/14336 +step:34956/57344 train_time:19990919ms step_avg:571.89ms +step:34957/57344 train_time:19990935ms step_avg:571.87ms +step:34958/57344 train_time:19991187ms step_avg:571.86ms +step:34959/57344 train_time:19991756ms step_avg:571.86ms +grad accum step:8740/14336 +step:34960/57344 train_time:19993092ms step_avg:571.88ms +step:34961/57344 train_time:19993108ms step_avg:571.87ms +step:34962/57344 train_time:19993362ms step_avg:571.86ms +step:34963/57344 train_time:19993929ms step_avg:571.86ms +grad accum step:8741/14336 +step:34964/57344 train_time:19995258ms step_avg:571.88ms +step:34965/57344 train_time:19995277ms step_avg:571.87ms +step:34966/57344 train_time:19995524ms step_avg:571.86ms +step:34967/57344 train_time:19996087ms step_avg:571.86ms +grad accum step:8742/14336 +step:34968/57344 train_time:19997446ms step_avg:571.88ms +step:34969/57344 train_time:19997463ms step_avg:571.86ms +step:34970/57344 train_time:19997707ms step_avg:571.85ms +step:34971/57344 train_time:19998253ms step_avg:571.85ms +grad accum step:8743/14336 +step:34972/57344 train_time:19999615ms step_avg:571.88ms +step:34973/57344 train_time:19999630ms step_avg:571.86ms +step:34974/57344 train_time:19999873ms step_avg:571.85ms +step:34975/57344 train_time:20000420ms step_avg:571.85ms +grad accum step:8744/14336 +step:34976/57344 train_time:20001721ms step_avg:571.87ms +step:34977/57344 train_time:20001738ms step_avg:571.85ms +step:34978/57344 train_time:20001982ms step_avg:571.84ms +step:34979/57344 train_time:20002524ms step_avg:571.84ms +grad accum step:8745/14336 +step:34980/57344 train_time:20003823ms step_avg:571.86ms +step:34981/57344 train_time:20003840ms step_avg:571.85ms +step:34982/57344 train_time:20004090ms step_avg:571.84ms +step:34983/57344 train_time:20004646ms step_avg:571.84ms +grad accum step:8746/14336 +step:34984/57344 train_time:20005947ms step_avg:571.86ms +step:34985/57344 train_time:20005964ms step_avg:571.84ms +step:34986/57344 train_time:20006212ms step_avg:571.83ms +step:34987/57344 train_time:20006761ms step_avg:571.83ms +grad accum step:8747/14336 +step:34988/57344 train_time:20008067ms step_avg:571.86ms +step:34989/57344 train_time:20008084ms step_avg:571.84ms +step:34990/57344 train_time:20008332ms step_avg:571.83ms +step:34991/57344 train_time:20008882ms step_avg:571.83ms +grad accum step:8748/14336 +step:34992/57344 train_time:20010191ms step_avg:571.85ms +step:34993/57344 train_time:20010207ms step_avg:571.83ms +step:34994/57344 train_time:20010453ms step_avg:571.83ms +step:34995/57344 train_time:20011007ms step_avg:571.82ms +grad accum step:8749/14336 +step:34996/57344 train_time:20012340ms step_avg:571.85ms +step:34997/57344 train_time:20012355ms step_avg:571.83ms +step:34998/57344 train_time:20012606ms step_avg:571.82ms +step:34999/57344 train_time:20013164ms step_avg:571.82ms +grad accum step:8750/14336 +step:35000/57344 train_time:20014469ms step_avg:571.84ms +step:35001/57344 train_time:20014486ms step_avg:571.83ms +step:35002/57344 train_time:20014753ms step_avg:571.82ms +step:35003/57344 train_time:20015353ms step_avg:571.82ms +grad accum step:8751/14336 +step:35004/57344 train_time:20016675ms step_avg:571.84ms +step:35005/57344 train_time:20016691ms step_avg:571.82ms +step:35006/57344 train_time:20016938ms step_avg:571.81ms +step:35007/57344 train_time:20017488ms step_avg:571.81ms +grad accum step:8752/14336 +step:35008/57344 train_time:20018803ms step_avg:571.84ms +step:35008/57344 val_loss:6.132919 train_time:20018805ms step_avg:571.84ms +step:35009/57344 train_time:20018817ms step_avg:571.82ms +step:35010/57344 train_time:20019041ms step_avg:571.81ms +step:35011/57344 train_time:20019592ms step_avg:571.81ms +grad accum step:8753/14336 +step:35012/57344 train_time:20020905ms step_avg:571.83ms +step:35013/57344 train_time:20020924ms step_avg:571.81ms +step:35014/57344 train_time:20021166ms step_avg:571.80ms +step:35015/57344 train_time:20021726ms step_avg:571.80ms +grad accum step:8754/14336 +step:35016/57344 train_time:20023042ms step_avg:571.83ms +step:35017/57344 train_time:20023059ms step_avg:571.81ms +step:35018/57344 train_time:20023308ms step_avg:571.80ms +step:35019/57344 train_time:20023863ms step_avg:571.80ms +grad accum step:8755/14336 +step:35020/57344 train_time:20025173ms step_avg:571.82ms +step:35021/57344 train_time:20025189ms step_avg:571.81ms +step:35022/57344 train_time:20025439ms step_avg:571.80ms +step:35023/57344 train_time:20025995ms step_avg:571.80ms +grad accum step:8756/14336 +step:35024/57344 train_time:20027297ms step_avg:571.82ms +step:35025/57344 train_time:20027313ms step_avg:571.80ms +step:35026/57344 train_time:20027565ms step_avg:571.79ms +step:35027/57344 train_time:20028123ms step_avg:571.79ms +grad accum step:8757/14336 +step:35028/57344 train_time:20029448ms step_avg:571.81ms +step:35029/57344 train_time:20029463ms step_avg:571.80ms +step:35030/57344 train_time:20029694ms step_avg:571.79ms +step:35031/57344 train_time:20030255ms step_avg:571.79ms +grad accum step:8758/14336 +step:35032/57344 train_time:20031598ms step_avg:571.81ms +step:35033/57344 train_time:20031612ms step_avg:571.79ms +step:35034/57344 train_time:20031856ms step_avg:571.78ms +step:35035/57344 train_time:20032399ms step_avg:571.78ms +grad accum step:8759/14336 +step:35036/57344 train_time:20033724ms step_avg:571.80ms +step:35037/57344 train_time:20033738ms step_avg:571.79ms +step:35038/57344 train_time:20033985ms step_avg:571.78ms +step:35039/57344 train_time:20034529ms step_avg:571.78ms +grad accum step:8760/14336 +step:35040/57344 train_time:20035819ms step_avg:571.80ms +step:35041/57344 train_time:20035834ms step_avg:571.78ms +step:35042/57344 train_time:20036079ms step_avg:571.77ms +step:35043/57344 train_time:20036631ms step_avg:571.77ms +grad accum step:8761/14336 +step:35044/57344 train_time:20037987ms step_avg:571.80ms +step:35045/57344 train_time:20038004ms step_avg:571.78ms +step:35046/57344 train_time:20038254ms step_avg:571.77ms +step:35047/57344 train_time:20038805ms step_avg:571.77ms +grad accum step:8762/14336 +step:35048/57344 train_time:20040127ms step_avg:571.79ms +step:35049/57344 train_time:20040143ms step_avg:571.78ms +step:35050/57344 train_time:20040387ms step_avg:571.77ms +step:35051/57344 train_time:20040926ms step_avg:571.76ms +grad accum step:8763/14336 +step:35052/57344 train_time:20042252ms step_avg:571.79ms +step:35053/57344 train_time:20042268ms step_avg:571.77ms +step:35054/57344 train_time:20042511ms step_avg:571.76ms +step:35055/57344 train_time:20043061ms step_avg:571.76ms +grad accum step:8764/14336 +step:35056/57344 train_time:20044383ms step_avg:571.78ms +step:35057/57344 train_time:20044397ms step_avg:571.77ms +step:35058/57344 train_time:20044644ms step_avg:571.76ms +step:35059/57344 train_time:20045188ms step_avg:571.76ms +grad accum step:8765/14336 +step:35060/57344 train_time:20046556ms step_avg:571.78ms +step:35061/57344 train_time:20046573ms step_avg:571.76ms +step:35062/57344 train_time:20046831ms step_avg:571.75ms +step:35063/57344 train_time:20047421ms step_avg:571.75ms +grad accum step:8766/14336 +step:35064/57344 train_time:20048751ms step_avg:571.78ms +step:35065/57344 train_time:20048768ms step_avg:571.76ms +step:35066/57344 train_time:20049017ms step_avg:571.75ms +step:35067/57344 train_time:20049572ms step_avg:571.75ms +grad accum step:8767/14336 +step:35068/57344 train_time:20050864ms step_avg:571.77ms +step:35069/57344 train_time:20050881ms step_avg:571.76ms +step:35070/57344 train_time:20051131ms step_avg:571.75ms +step:35071/57344 train_time:20051678ms step_avg:571.75ms +grad accum step:8768/14336 +step:35072/57344 train_time:20053075ms step_avg:571.77ms +step:35072/57344 val_loss:6.118301 train_time:20053076ms step_avg:571.77ms +step:35073/57344 train_time:20053088ms step_avg:571.75ms +step:35074/57344 train_time:20053310ms step_avg:571.74ms +step:35075/57344 train_time:20053855ms step_avg:571.74ms +grad accum step:8769/14336 +step:35076/57344 train_time:20055157ms step_avg:571.76ms +step:35077/57344 train_time:20055173ms step_avg:571.75ms +step:35078/57344 train_time:20055430ms step_avg:571.74ms +step:35079/57344 train_time:20056012ms step_avg:571.74ms +grad accum step:8770/14336 +step:35080/57344 train_time:20057326ms step_avg:571.76ms +step:35081/57344 train_time:20057342ms step_avg:571.74ms +step:35082/57344 train_time:20057589ms step_avg:571.73ms +step:35083/57344 train_time:20058137ms step_avg:571.73ms +grad accum step:8771/14336 +step:35084/57344 train_time:20059468ms step_avg:571.76ms +step:35085/57344 train_time:20059484ms step_avg:571.74ms +step:35086/57344 train_time:20059731ms step_avg:571.73ms +step:35087/57344 train_time:20060278ms step_avg:571.73ms +grad accum step:8772/14336 +step:35088/57344 train_time:20061596ms step_avg:571.75ms +step:35089/57344 train_time:20061613ms step_avg:571.74ms +step:35090/57344 train_time:20061862ms step_avg:571.73ms +step:35091/57344 train_time:20062414ms step_avg:571.73ms +grad accum step:8773/14336 +step:35092/57344 train_time:20063706ms step_avg:571.75ms +step:35093/57344 train_time:20063721ms step_avg:571.73ms +step:35094/57344 train_time:20063967ms step_avg:571.72ms +step:35095/57344 train_time:20064517ms step_avg:571.72ms +grad accum step:8774/14336 +step:35096/57344 train_time:20065843ms step_avg:571.74ms +step:35097/57344 train_time:20065859ms step_avg:571.73ms +step:35098/57344 train_time:20066106ms step_avg:571.72ms +step:35099/57344 train_time:20066660ms step_avg:571.72ms +grad accum step:8775/14336 +step:35100/57344 train_time:20067997ms step_avg:571.74ms +step:35101/57344 train_time:20068013ms step_avg:571.72ms +step:35102/57344 train_time:20068257ms step_avg:571.71ms +step:35103/57344 train_time:20068804ms step_avg:571.71ms +grad accum step:8776/14336 +step:35104/57344 train_time:20070110ms step_avg:571.73ms +step:35105/57344 train_time:20070127ms step_avg:571.72ms +step:35106/57344 train_time:20070374ms step_avg:571.71ms +step:35107/57344 train_time:20070923ms step_avg:571.71ms +grad accum step:8777/14336 +step:35108/57344 train_time:20072247ms step_avg:571.73ms +step:35109/57344 train_time:20072264ms step_avg:571.71ms +step:35110/57344 train_time:20072523ms step_avg:571.70ms +step:35111/57344 train_time:20073100ms step_avg:571.70ms +grad accum step:8778/14336 +step:35112/57344 train_time:20074418ms step_avg:571.73ms +step:35113/57344 train_time:20074433ms step_avg:571.71ms +step:35114/57344 train_time:20074683ms step_avg:571.70ms +step:35115/57344 train_time:20075233ms step_avg:571.70ms +grad accum step:8779/14336 +step:35116/57344 train_time:20076576ms step_avg:571.72ms +step:35117/57344 train_time:20076591ms step_avg:571.71ms +step:35118/57344 train_time:20076836ms step_avg:571.70ms +step:35119/57344 train_time:20077387ms step_avg:571.70ms +grad accum step:8780/14336 +step:35120/57344 train_time:20078714ms step_avg:571.72ms +step:35121/57344 train_time:20078729ms step_avg:571.70ms +step:35122/57344 train_time:20078975ms step_avg:571.69ms +step:35123/57344 train_time:20079520ms step_avg:571.69ms +grad accum step:8781/14336 +step:35124/57344 train_time:20080816ms step_avg:571.71ms +step:35125/57344 train_time:20080829ms step_avg:571.70ms +step:35126/57344 train_time:20081079ms step_avg:571.69ms +step:35127/57344 train_time:20081630ms step_avg:571.69ms +grad accum step:8782/14336 +step:35128/57344 train_time:20082938ms step_avg:571.71ms +step:35129/57344 train_time:20082953ms step_avg:571.69ms +step:35130/57344 train_time:20083202ms step_avg:571.68ms +step:35131/57344 train_time:20083752ms step_avg:571.68ms +grad accum step:8783/14336 +step:35132/57344 train_time:20085049ms step_avg:571.70ms +step:35133/57344 train_time:20085063ms step_avg:571.69ms +step:35134/57344 train_time:20085313ms step_avg:571.68ms +step:35135/57344 train_time:20085875ms step_avg:571.68ms +grad accum step:8784/14336 +step:35136/57344 train_time:20087191ms step_avg:571.70ms +step:35136/57344 val_loss:6.089632 train_time:20087191ms step_avg:571.70ms +step:35137/57344 train_time:20087203ms step_avg:571.68ms +step:35138/57344 train_time:20087423ms step_avg:571.67ms +step:35139/57344 train_time:20087961ms step_avg:571.67ms +grad accum step:8785/14336 +step:35140/57344 train_time:20089298ms step_avg:571.69ms +step:35141/57344 train_time:20089315ms step_avg:571.68ms +step:35142/57344 train_time:20089568ms step_avg:571.67ms +step:35143/57344 train_time:20090128ms step_avg:571.67ms +grad accum step:8786/14336 +step:35144/57344 train_time:20091442ms step_avg:571.69ms +step:35145/57344 train_time:20091460ms step_avg:571.67ms +step:35146/57344 train_time:20091713ms step_avg:571.66ms +step:35147/57344 train_time:20092284ms step_avg:571.66ms +grad accum step:8787/14336 +step:35148/57344 train_time:20093598ms step_avg:571.69ms +step:35149/57344 train_time:20093616ms step_avg:571.67ms +step:35150/57344 train_time:20093860ms step_avg:571.66ms +step:35151/57344 train_time:20094400ms step_avg:571.66ms +grad accum step:8788/14336 +step:35152/57344 train_time:20095719ms step_avg:571.68ms +step:35153/57344 train_time:20095733ms step_avg:571.66ms +step:35154/57344 train_time:20095984ms step_avg:571.66ms +step:35155/57344 train_time:20096545ms step_avg:571.66ms +grad accum step:8789/14336 +step:35156/57344 train_time:20097849ms step_avg:571.68ms +step:35157/57344 train_time:20097865ms step_avg:571.66ms +step:35158/57344 train_time:20098112ms step_avg:571.65ms +step:35159/57344 train_time:20098659ms step_avg:571.65ms +grad accum step:8790/14336 +step:35160/57344 train_time:20100002ms step_avg:571.67ms +step:35161/57344 train_time:20100022ms step_avg:571.66ms +step:35162/57344 train_time:20100263ms step_avg:571.65ms +step:35163/57344 train_time:20100802ms step_avg:571.65ms +grad accum step:8791/14336 +step:35164/57344 train_time:20102097ms step_avg:571.67ms +step:35165/57344 train_time:20102113ms step_avg:571.65ms +step:35166/57344 train_time:20102365ms step_avg:571.64ms +step:35167/57344 train_time:20102930ms step_avg:571.64ms +grad accum step:8792/14336 +step:35168/57344 train_time:20104286ms step_avg:571.66ms +step:35169/57344 train_time:20104303ms step_avg:571.65ms +step:35170/57344 train_time:20104550ms step_avg:571.64ms +step:35171/57344 train_time:20105104ms step_avg:571.64ms +grad accum step:8793/14336 +step:35172/57344 train_time:20106442ms step_avg:571.66ms +step:35173/57344 train_time:20106458ms step_avg:571.64ms +step:35174/57344 train_time:20106709ms step_avg:571.64ms +step:35175/57344 train_time:20107271ms step_avg:571.64ms +grad accum step:8794/14336 +step:35176/57344 train_time:20108582ms step_avg:571.66ms +step:35177/57344 train_time:20108600ms step_avg:571.64ms +step:35178/57344 train_time:20108841ms step_avg:571.63ms +step:35179/57344 train_time:20109385ms step_avg:571.63ms +grad accum step:8795/14336 +step:35180/57344 train_time:20110672ms step_avg:571.65ms +step:35181/57344 train_time:20110689ms step_avg:571.63ms +step:35182/57344 train_time:20110935ms step_avg:571.63ms +step:35183/57344 train_time:20111477ms step_avg:571.62ms +grad accum step:8796/14336 +step:35184/57344 train_time:20112768ms step_avg:571.65ms +step:35185/57344 train_time:20112785ms step_avg:571.63ms +step:35186/57344 train_time:20113033ms step_avg:571.62ms +step:35187/57344 train_time:20113582ms step_avg:571.62ms +grad accum step:8797/14336 +step:35188/57344 train_time:20114879ms step_avg:571.64ms +step:35189/57344 train_time:20114895ms step_avg:571.62ms +step:35190/57344 train_time:20115136ms step_avg:571.62ms +step:35191/57344 train_time:20115682ms step_avg:571.61ms +grad accum step:8798/14336 +step:35192/57344 train_time:20117015ms step_avg:571.64ms +step:35193/57344 train_time:20117028ms step_avg:571.62ms +step:35194/57344 train_time:20117264ms step_avg:571.61ms +step:35195/57344 train_time:20117806ms step_avg:571.61ms +grad accum step:8799/14336 +step:35196/57344 train_time:20119147ms step_avg:571.63ms +step:35197/57344 train_time:20119164ms step_avg:571.62ms +step:35198/57344 train_time:20119413ms step_avg:571.61ms +step:35199/57344 train_time:20119973ms step_avg:571.61ms +grad accum step:8800/14336 +step:35200/57344 train_time:20121252ms step_avg:571.63ms +step:35200/57344 val_loss:6.074313 train_time:20121263ms step_avg:571.63ms +step:35201/57344 train_time:20121275ms step_avg:571.61ms +step:35202/57344 train_time:20121509ms step_avg:571.60ms +step:35203/57344 train_time:20122080ms step_avg:571.60ms +grad accum step:8801/14336 +step:35204/57344 train_time:20123390ms step_avg:571.62ms +step:35205/57344 train_time:20123407ms step_avg:571.61ms +step:35206/57344 train_time:20123655ms step_avg:571.60ms +step:35207/57344 train_time:20124198ms step_avg:571.60ms +grad accum step:8802/14336 +step:35208/57344 train_time:20125502ms step_avg:571.62ms +step:35209/57344 train_time:20125517ms step_avg:571.60ms +step:35210/57344 train_time:20125764ms step_avg:571.59ms +step:35211/57344 train_time:20126310ms step_avg:571.59ms +grad accum step:8803/14336 +step:35212/57344 train_time:20127631ms step_avg:571.61ms +step:35213/57344 train_time:20127647ms step_avg:571.60ms +step:35214/57344 train_time:20127891ms step_avg:571.59ms +step:35215/57344 train_time:20128457ms step_avg:571.59ms +grad accum step:8804/14336 +step:35216/57344 train_time:20129780ms step_avg:571.61ms +step:35217/57344 train_time:20129796ms step_avg:571.59ms +step:35218/57344 train_time:20130042ms step_avg:571.58ms +step:35219/57344 train_time:20130598ms step_avg:571.58ms +grad accum step:8805/14336 +step:35220/57344 train_time:20131931ms step_avg:571.61ms +step:35221/57344 train_time:20131946ms step_avg:571.59ms +step:35222/57344 train_time:20132190ms step_avg:571.58ms +step:35223/57344 train_time:20132735ms step_avg:571.58ms +grad accum step:8806/14336 +step:35224/57344 train_time:20134021ms step_avg:571.60ms +step:35225/57344 train_time:20134036ms step_avg:571.58ms +step:35226/57344 train_time:20134279ms step_avg:571.57ms +step:35227/57344 train_time:20134828ms step_avg:571.57ms +grad accum step:8807/14336 +step:35228/57344 train_time:20136136ms step_avg:571.59ms +step:35229/57344 train_time:20136154ms step_avg:571.58ms +step:35230/57344 train_time:20136392ms step_avg:571.57ms +step:35231/57344 train_time:20136943ms step_avg:571.57ms +grad accum step:8808/14336 +step:35232/57344 train_time:20138245ms step_avg:571.59ms +step:35233/57344 train_time:20138260ms step_avg:571.57ms +step:35234/57344 train_time:20138504ms step_avg:571.56ms +step:35235/57344 train_time:20139066ms step_avg:571.56ms +grad accum step:8809/14336 +step:35236/57344 train_time:20140385ms step_avg:571.59ms +step:35237/57344 train_time:20140400ms step_avg:571.57ms +step:35238/57344 train_time:20140648ms step_avg:571.56ms +step:35239/57344 train_time:20141197ms step_avg:571.56ms +grad accum step:8810/14336 +step:35240/57344 train_time:20142499ms step_avg:571.58ms +step:35241/57344 train_time:20142513ms step_avg:571.56ms +step:35242/57344 train_time:20142757ms step_avg:571.56ms +step:35243/57344 train_time:20143304ms step_avg:571.55ms +grad accum step:8811/14336 +step:35244/57344 train_time:20144623ms step_avg:571.58ms +step:35245/57344 train_time:20144640ms step_avg:571.56ms +step:35246/57344 train_time:20144890ms step_avg:571.55ms +step:35247/57344 train_time:20145449ms step_avg:571.55ms +grad accum step:8812/14336 +step:35248/57344 train_time:20146766ms step_avg:571.57ms +step:35249/57344 train_time:20146781ms step_avg:571.56ms +step:35250/57344 train_time:20147031ms step_avg:571.55ms +step:35251/57344 train_time:20147586ms step_avg:571.55ms +grad accum step:8813/14336 +step:35252/57344 train_time:20148934ms step_avg:571.57ms +step:35253/57344 train_time:20148952ms step_avg:571.55ms +step:35254/57344 train_time:20149194ms step_avg:571.54ms +step:35255/57344 train_time:20149732ms step_avg:571.54ms +grad accum step:8814/14336 +step:35256/57344 train_time:20151040ms step_avg:571.56ms +step:35257/57344 train_time:20151059ms step_avg:571.55ms +step:35258/57344 train_time:20151299ms step_avg:571.54ms +step:35259/57344 train_time:20151838ms step_avg:571.54ms +grad accum step:8815/14336 +step:35260/57344 train_time:20153117ms step_avg:571.56ms +step:35261/57344 train_time:20153132ms step_avg:571.54ms +step:35262/57344 train_time:20153375ms step_avg:571.53ms +step:35263/57344 train_time:20153923ms step_avg:571.53ms +grad accum step:8816/14336 +step:35264/57344 train_time:20155214ms step_avg:571.55ms +step:35264/57344 val_loss:6.051312 train_time:20155222ms step_avg:571.55ms +step:35265/57344 train_time:20155233ms step_avg:571.54ms +step:35266/57344 train_time:20155460ms step_avg:571.53ms +step:35267/57344 train_time:20156013ms step_avg:571.53ms +grad accum step:8817/14336 +step:35268/57344 train_time:20157306ms step_avg:571.55ms +step:35269/57344 train_time:20157322ms step_avg:571.53ms +step:35270/57344 train_time:20157571ms step_avg:571.52ms +step:35271/57344 train_time:20158124ms step_avg:571.52ms +grad accum step:8818/14336 +step:35272/57344 train_time:20159430ms step_avg:571.54ms +step:35273/57344 train_time:20159444ms step_avg:571.53ms +step:35274/57344 train_time:20159695ms step_avg:571.52ms +step:35275/57344 train_time:20160265ms step_avg:571.52ms +grad accum step:8819/14336 +step:35276/57344 train_time:20161661ms step_avg:571.54ms +step:35277/57344 train_time:20161677ms step_avg:571.52ms +step:35278/57344 train_time:20161924ms step_avg:571.52ms +step:35279/57344 train_time:20162486ms step_avg:571.52ms +grad accum step:8820/14336 +step:35280/57344 train_time:20163810ms step_avg:571.54ms +step:35281/57344 train_time:20163825ms step_avg:571.52ms +step:35282/57344 train_time:20164064ms step_avg:571.51ms +step:35283/57344 train_time:20164608ms step_avg:571.51ms +grad accum step:8821/14336 +step:35284/57344 train_time:20165910ms step_avg:571.53ms +step:35285/57344 train_time:20165927ms step_avg:571.52ms +step:35286/57344 train_time:20166170ms step_avg:571.51ms +step:35287/57344 train_time:20166713ms step_avg:571.51ms +grad accum step:8822/14336 +step:35288/57344 train_time:20168015ms step_avg:571.53ms +step:35289/57344 train_time:20168032ms step_avg:571.51ms +step:35290/57344 train_time:20168279ms step_avg:571.50ms +step:35291/57344 train_time:20169179ms step_avg:571.51ms +grad accum step:8823/14336 +step:35292/57344 train_time:20170274ms step_avg:571.53ms +step:35293/57344 train_time:20170286ms step_avg:571.51ms +step:35294/57344 train_time:20170509ms step_avg:571.50ms +step:35295/57344 train_time:20171049ms step_avg:571.50ms +grad accum step:8824/14336 +step:35296/57344 train_time:20172379ms step_avg:571.52ms +step:35297/57344 train_time:20172396ms step_avg:571.50ms +step:35298/57344 train_time:20172646ms step_avg:571.50ms +step:35299/57344 train_time:20173197ms step_avg:571.49ms +grad accum step:8825/14336 +step:35300/57344 train_time:20174488ms step_avg:571.52ms +step:35301/57344 train_time:20174505ms step_avg:571.50ms +step:35302/57344 train_time:20174749ms step_avg:571.49ms +step:35303/57344 train_time:20175293ms step_avg:571.49ms +grad accum step:8826/14336 +step:35304/57344 train_time:20176591ms step_avg:571.51ms +step:35305/57344 train_time:20176608ms step_avg:571.49ms +step:35306/57344 train_time:20176854ms step_avg:571.49ms +step:35307/57344 train_time:20177400ms step_avg:571.48ms +grad accum step:8827/14336 +step:35308/57344 train_time:20178726ms step_avg:571.51ms +step:35309/57344 train_time:20178743ms step_avg:571.49ms +step:35310/57344 train_time:20178990ms step_avg:571.48ms +step:35311/57344 train_time:20179541ms step_avg:571.48ms +grad accum step:8828/14336 +step:35312/57344 train_time:20180841ms step_avg:571.50ms +step:35313/57344 train_time:20180858ms step_avg:571.49ms +step:35314/57344 train_time:20181108ms step_avg:571.48ms +step:35315/57344 train_time:20181665ms step_avg:571.48ms +grad accum step:8829/14336 +step:35316/57344 train_time:20182977ms step_avg:571.50ms +step:35317/57344 train_time:20182994ms step_avg:571.48ms +step:35318/57344 train_time:20183244ms step_avg:571.47ms +step:35319/57344 train_time:20183796ms step_avg:571.47ms +grad accum step:8830/14336 +step:35320/57344 train_time:20185234ms step_avg:571.50ms +step:35321/57344 train_time:20185251ms step_avg:571.48ms +step:35322/57344 train_time:20185499ms step_avg:571.47ms +step:35323/57344 train_time:20186055ms step_avg:571.47ms +grad accum step:8831/14336 +step:35324/57344 train_time:20187381ms step_avg:571.49ms +step:35325/57344 train_time:20187398ms step_avg:571.48ms +step:35326/57344 train_time:20187650ms step_avg:571.47ms +step:35327/57344 train_time:20188212ms step_avg:571.47ms +grad accum step:8832/14336 +step:35328/57344 train_time:20189538ms step_avg:571.49ms +step:35328/57344 val_loss:6.050741 train_time:20189539ms step_avg:571.49ms +step:35329/57344 train_time:20189551ms step_avg:571.47ms +step:35330/57344 train_time:20189778ms step_avg:571.46ms +step:35331/57344 train_time:20190339ms step_avg:571.46ms +grad accum step:8833/14336 +step:35332/57344 train_time:20191658ms step_avg:571.48ms +step:35333/57344 train_time:20191674ms step_avg:571.47ms +step:35334/57344 train_time:20191923ms step_avg:571.46ms +step:35335/57344 train_time:20192486ms step_avg:571.46ms +grad accum step:8834/14336 +step:35336/57344 train_time:20193804ms step_avg:571.48ms +step:35337/57344 train_time:20193821ms step_avg:571.46ms +step:35338/57344 train_time:20194073ms step_avg:571.45ms +step:35339/57344 train_time:20194628ms step_avg:571.45ms +grad accum step:8835/14336 +step:35340/57344 train_time:20195963ms step_avg:571.48ms +step:35341/57344 train_time:20195980ms step_avg:571.46ms +step:35342/57344 train_time:20196244ms step_avg:571.45ms +step:35343/57344 train_time:20196828ms step_avg:571.45ms +grad accum step:8836/14336 +step:35344/57344 train_time:20198123ms step_avg:571.47ms +step:35345/57344 train_time:20198140ms step_avg:571.46ms +step:35346/57344 train_time:20198380ms step_avg:571.45ms +step:35347/57344 train_time:20198930ms step_avg:571.45ms +grad accum step:8837/14336 +step:35348/57344 train_time:20200280ms step_avg:571.47ms +step:35349/57344 train_time:20200294ms step_avg:571.45ms +step:35350/57344 train_time:20200543ms step_avg:571.44ms +step:35351/57344 train_time:20201087ms step_avg:571.44ms +grad accum step:8838/14336 +step:35352/57344 train_time:20202405ms step_avg:571.46ms +step:35353/57344 train_time:20202428ms step_avg:571.45ms +step:35354/57344 train_time:20202662ms step_avg:571.44ms +step:35355/57344 train_time:20203206ms step_avg:571.44ms +grad accum step:8839/14336 +step:35356/57344 train_time:20204508ms step_avg:571.46ms +step:35357/57344 train_time:20204525ms step_avg:571.44ms +step:35358/57344 train_time:20204771ms step_avg:571.43ms +step:35359/57344 train_time:20205333ms step_avg:571.43ms +grad accum step:8840/14336 +step:35360/57344 train_time:20206656ms step_avg:571.46ms +step:35361/57344 train_time:20206673ms step_avg:571.44ms +step:35362/57344 train_time:20206919ms step_avg:571.43ms +step:35363/57344 train_time:20207476ms step_avg:571.43ms +grad accum step:8841/14336 +step:35364/57344 train_time:20208798ms step_avg:571.45ms +step:35365/57344 train_time:20208814ms step_avg:571.44ms +step:35366/57344 train_time:20209059ms step_avg:571.43ms +step:35367/57344 train_time:20209607ms step_avg:571.43ms +grad accum step:8842/14336 +step:35368/57344 train_time:20210900ms step_avg:571.45ms +step:35369/57344 train_time:20210917ms step_avg:571.43ms +step:35370/57344 train_time:20211163ms step_avg:571.42ms +step:35371/57344 train_time:20211709ms step_avg:571.42ms +grad accum step:8843/14336 +step:35372/57344 train_time:20213021ms step_avg:571.44ms +step:35373/57344 train_time:20213039ms step_avg:571.43ms +step:35374/57344 train_time:20213288ms step_avg:571.42ms +step:35375/57344 train_time:20213845ms step_avg:571.42ms +grad accum step:8844/14336 +step:35376/57344 train_time:20215198ms step_avg:571.44ms +step:35377/57344 train_time:20215214ms step_avg:571.42ms +step:35378/57344 train_time:20215464ms step_avg:571.41ms +step:35379/57344 train_time:20216017ms step_avg:571.41ms +grad accum step:8845/14336 +step:35380/57344 train_time:20217293ms step_avg:571.43ms +step:35381/57344 train_time:20217309ms step_avg:571.42ms +step:35382/57344 train_time:20217555ms step_avg:571.41ms +step:35383/57344 train_time:20218098ms step_avg:571.41ms +grad accum step:8846/14336 +step:35384/57344 train_time:20219388ms step_avg:571.43ms +step:35385/57344 train_time:20219403ms step_avg:571.41ms +step:35386/57344 train_time:20219646ms step_avg:571.40ms +step:35387/57344 train_time:20220186ms step_avg:571.40ms +grad accum step:8847/14336 +step:35388/57344 train_time:20221493ms step_avg:571.42ms +step:35389/57344 train_time:20221510ms step_avg:571.41ms +step:35390/57344 train_time:20221764ms step_avg:571.40ms +step:35391/57344 train_time:20222331ms step_avg:571.40ms +grad accum step:8848/14336 +step:35392/57344 train_time:20223646ms step_avg:571.42ms +step:35392/57344 val_loss:6.023050 train_time:20223647ms step_avg:571.42ms +step:35393/57344 train_time:20223659ms step_avg:571.40ms +step:35394/57344 train_time:20223884ms step_avg:571.39ms +step:35395/57344 train_time:20224445ms step_avg:571.39ms +grad accum step:8849/14336 +step:35396/57344 train_time:20225810ms step_avg:571.42ms +step:35397/57344 train_time:20225825ms step_avg:571.40ms +step:35398/57344 train_time:20226072ms step_avg:571.39ms +step:35399/57344 train_time:20226629ms step_avg:571.39ms +grad accum step:8850/14336 +step:35400/57344 train_time:20227940ms step_avg:571.41ms +step:35401/57344 train_time:20227957ms step_avg:571.40ms +step:35402/57344 train_time:20228208ms step_avg:571.39ms +step:35403/57344 train_time:20228775ms step_avg:571.39ms +grad accum step:8851/14336 +step:35404/57344 train_time:20230069ms step_avg:571.41ms +step:35405/57344 train_time:20230084ms step_avg:571.39ms +step:35406/57344 train_time:20230331ms step_avg:571.38ms +step:35407/57344 train_time:20230881ms step_avg:571.38ms +grad accum step:8852/14336 +step:35408/57344 train_time:20232161ms step_avg:571.40ms +step:35409/57344 train_time:20232178ms step_avg:571.39ms +step:35410/57344 train_time:20232428ms step_avg:571.38ms +step:35411/57344 train_time:20232981ms step_avg:571.38ms +grad accum step:8853/14336 +step:35412/57344 train_time:20234258ms step_avg:571.40ms +step:35413/57344 train_time:20234275ms step_avg:571.38ms +step:35414/57344 train_time:20234520ms step_avg:571.37ms +step:35415/57344 train_time:20235066ms step_avg:571.37ms +grad accum step:8854/14336 +step:35416/57344 train_time:20236385ms step_avg:571.39ms +step:35417/57344 train_time:20236404ms step_avg:571.38ms +step:35418/57344 train_time:20236638ms step_avg:571.37ms +step:35419/57344 train_time:20237176ms step_avg:571.36ms +grad accum step:8855/14336 +step:35420/57344 train_time:20238518ms step_avg:571.39ms +step:35421/57344 train_time:20238537ms step_avg:571.37ms +step:35422/57344 train_time:20238780ms step_avg:571.36ms +step:35423/57344 train_time:20239334ms step_avg:571.36ms +grad accum step:8856/14336 +step:35424/57344 train_time:20240681ms step_avg:571.38ms +step:35425/57344 train_time:20240693ms step_avg:571.37ms +step:35426/57344 train_time:20240935ms step_avg:571.36ms +step:35427/57344 train_time:20241493ms step_avg:571.36ms +grad accum step:8857/14336 +step:35428/57344 train_time:20242829ms step_avg:571.38ms +step:35429/57344 train_time:20242844ms step_avg:571.36ms +step:35430/57344 train_time:20243090ms step_avg:571.35ms +step:35431/57344 train_time:20243647ms step_avg:571.35ms +grad accum step:8858/14336 +step:35432/57344 train_time:20244988ms step_avg:571.38ms +step:35433/57344 train_time:20245002ms step_avg:571.36ms +step:35434/57344 train_time:20245248ms step_avg:571.35ms +step:35435/57344 train_time:20245786ms step_avg:571.35ms +grad accum step:8859/14336 +step:35436/57344 train_time:20247093ms step_avg:571.37ms +step:35437/57344 train_time:20247109ms step_avg:571.36ms +step:35438/57344 train_time:20247356ms step_avg:571.35ms +step:35439/57344 train_time:20247904ms step_avg:571.35ms +grad accum step:8860/14336 +step:35440/57344 train_time:20249211ms step_avg:571.37ms +step:35441/57344 train_time:20249227ms step_avg:571.35ms +step:35442/57344 train_time:20249474ms step_avg:571.34ms +step:35443/57344 train_time:20250021ms step_avg:571.34ms +grad accum step:8861/14336 +step:35444/57344 train_time:20251337ms step_avg:571.36ms +step:35445/57344 train_time:20251353ms step_avg:571.35ms +step:35446/57344 train_time:20251601ms step_avg:571.34ms +step:35447/57344 train_time:20252154ms step_avg:571.34ms +grad accum step:8862/14336 +step:35448/57344 train_time:20253508ms step_avg:571.36ms +step:35449/57344 train_time:20253529ms step_avg:571.34ms +step:35450/57344 train_time:20253762ms step_avg:571.33ms +step:35451/57344 train_time:20254307ms step_avg:571.33ms +grad accum step:8863/14336 +step:35452/57344 train_time:20255616ms step_avg:571.35ms +step:35453/57344 train_time:20255631ms step_avg:571.34ms +step:35454/57344 train_time:20255881ms step_avg:571.33ms +step:35455/57344 train_time:20256448ms step_avg:571.33ms +grad accum step:8864/14336 +step:35456/57344 train_time:20257871ms step_avg:571.35ms +step:35456/57344 val_loss:6.012047 train_time:20257875ms step_avg:571.35ms +step:35457/57344 train_time:20257887ms step_avg:571.34ms +step:35458/57344 train_time:20258114ms step_avg:571.33ms +step:35459/57344 train_time:20258668ms step_avg:571.33ms +grad accum step:8865/14336 +step:35460/57344 train_time:20259965ms step_avg:571.35ms +step:35461/57344 train_time:20259983ms step_avg:571.33ms +step:35462/57344 train_time:20260228ms step_avg:571.32ms +step:35463/57344 train_time:20260790ms step_avg:571.32ms +grad accum step:8866/14336 +step:35464/57344 train_time:20262126ms step_avg:571.34ms +step:35465/57344 train_time:20262141ms step_avg:571.33ms +step:35466/57344 train_time:20262391ms step_avg:571.32ms +step:35467/57344 train_time:20262948ms step_avg:571.32ms +grad accum step:8867/14336 +step:35468/57344 train_time:20264275ms step_avg:571.34ms +step:35469/57344 train_time:20264292ms step_avg:571.32ms +step:35470/57344 train_time:20264549ms step_avg:571.32ms +step:35471/57344 train_time:20265125ms step_avg:571.32ms +grad accum step:8868/14336 +step:35472/57344 train_time:20266438ms step_avg:571.34ms +step:35473/57344 train_time:20266453ms step_avg:571.32ms +step:35474/57344 train_time:20266700ms step_avg:571.31ms +step:35475/57344 train_time:20267247ms step_avg:571.31ms +grad accum step:8869/14336 +step:35476/57344 train_time:20268562ms step_avg:571.33ms +step:35477/57344 train_time:20268584ms step_avg:571.32ms +step:35478/57344 train_time:20268821ms step_avg:571.31ms +step:35479/57344 train_time:20269369ms step_avg:571.31ms +grad accum step:8870/14336 +step:35480/57344 train_time:20270685ms step_avg:571.33ms +step:35481/57344 train_time:20270701ms step_avg:571.31ms +step:35482/57344 train_time:20270941ms step_avg:571.30ms +step:35483/57344 train_time:20271477ms step_avg:571.30ms +grad accum step:8871/14336 +step:35484/57344 train_time:20272771ms step_avg:571.32ms +step:35485/57344 train_time:20272787ms step_avg:571.31ms +step:35486/57344 train_time:20273032ms step_avg:571.30ms +step:35487/57344 train_time:20273580ms step_avg:571.30ms +grad accum step:8872/14336 +step:35488/57344 train_time:20274881ms step_avg:571.32ms +step:35489/57344 train_time:20274899ms step_avg:571.30ms +step:35490/57344 train_time:20275140ms step_avg:571.29ms +step:35491/57344 train_time:20275677ms step_avg:571.29ms +grad accum step:8873/14336 +step:35492/57344 train_time:20276966ms step_avg:571.31ms +step:35493/57344 train_time:20276983ms step_avg:571.30ms +step:35494/57344 train_time:20277228ms step_avg:571.29ms +step:35495/57344 train_time:20277777ms step_avg:571.29ms +grad accum step:8874/14336 +step:35496/57344 train_time:20279161ms step_avg:571.31ms +step:35497/57344 train_time:20279178ms step_avg:571.29ms +step:35498/57344 train_time:20279424ms step_avg:571.28ms +step:35499/57344 train_time:20279975ms step_avg:571.28ms +grad accum step:8875/14336 +step:35500/57344 train_time:20281307ms step_avg:571.30ms +step:35501/57344 train_time:20281324ms step_avg:571.29ms +step:35502/57344 train_time:20281572ms step_avg:571.28ms +step:35503/57344 train_time:20282126ms step_avg:571.28ms +grad accum step:8876/14336 +step:35504/57344 train_time:20283433ms step_avg:571.30ms +step:35505/57344 train_time:20283449ms step_avg:571.28ms +step:35506/57344 train_time:20283694ms step_avg:571.28ms +step:35507/57344 train_time:20284235ms step_avg:571.27ms +grad accum step:8877/14336 +step:35508/57344 train_time:20285543ms step_avg:571.29ms +step:35509/57344 train_time:20285560ms step_avg:571.28ms +step:35510/57344 train_time:20285806ms step_avg:571.27ms +step:35511/57344 train_time:20286363ms step_avg:571.27ms +grad accum step:8878/14336 +step:35512/57344 train_time:20287680ms step_avg:571.29ms +step:35513/57344 train_time:20287695ms step_avg:571.28ms +step:35514/57344 train_time:20287942ms step_avg:571.27ms +step:35515/57344 train_time:20288492ms step_avg:571.27ms +grad accum step:8879/14336 +step:35516/57344 train_time:20289802ms step_avg:571.29ms +step:35517/57344 train_time:20289816ms step_avg:571.27ms +step:35518/57344 train_time:20290066ms step_avg:571.26ms +step:35519/57344 train_time:20290616ms step_avg:571.26ms +grad accum step:8880/14336 +step:35520/57344 train_time:20291931ms step_avg:571.28ms +step:35520/57344 val_loss:6.000558 train_time:20291931ms step_avg:571.28ms +step:35521/57344 train_time:20291943ms step_avg:571.27ms +step:35522/57344 train_time:20292170ms step_avg:571.26ms +step:35523/57344 train_time:20292721ms step_avg:571.26ms +grad accum step:8881/14336 +step:35524/57344 train_time:20294011ms step_avg:571.28ms +step:35525/57344 train_time:20294028ms step_avg:571.26ms +step:35526/57344 train_time:20294282ms step_avg:571.25ms +step:35527/57344 train_time:20294856ms step_avg:571.25ms +grad accum step:8882/14336 +step:35528/57344 train_time:20296179ms step_avg:571.27ms +step:35529/57344 train_time:20296196ms step_avg:571.26ms +step:35530/57344 train_time:20296444ms step_avg:571.25ms +step:35531/57344 train_time:20296992ms step_avg:571.25ms +grad accum step:8883/14336 +step:35532/57344 train_time:20298302ms step_avg:571.27ms +step:35533/57344 train_time:20298319ms step_avg:571.25ms +step:35534/57344 train_time:20298560ms step_avg:571.24ms +step:35535/57344 train_time:20299097ms step_avg:571.24ms +grad accum step:8884/14336 +step:35536/57344 train_time:20300381ms step_avg:571.26ms +step:35537/57344 train_time:20300398ms step_avg:571.25ms +step:35538/57344 train_time:20300651ms step_avg:571.24ms +step:35539/57344 train_time:20301214ms step_avg:571.24ms +grad accum step:8885/14336 +step:35540/57344 train_time:20302535ms step_avg:571.26ms +step:35541/57344 train_time:20302551ms step_avg:571.24ms +step:35542/57344 train_time:20302800ms step_avg:571.23ms +step:35543/57344 train_time:20303357ms step_avg:571.23ms +grad accum step:8886/14336 +step:35544/57344 train_time:20304662ms step_avg:571.25ms +step:35545/57344 train_time:20304679ms step_avg:571.24ms +step:35546/57344 train_time:20304929ms step_avg:571.23ms +step:35547/57344 train_time:20305484ms step_avg:571.23ms +grad accum step:8887/14336 +step:35548/57344 train_time:20306814ms step_avg:571.25ms +step:35549/57344 train_time:20306831ms step_avg:571.23ms +step:35550/57344 train_time:20307075ms step_avg:571.23ms +step:35551/57344 train_time:20307623ms step_avg:571.23ms +grad accum step:8888/14336 +step:35552/57344 train_time:20308944ms step_avg:571.25ms +step:35553/57344 train_time:20308961ms step_avg:571.23ms +step:35554/57344 train_time:20309209ms step_avg:571.22ms +step:35555/57344 train_time:20309760ms step_avg:571.22ms +grad accum step:8889/14336 +step:35556/57344 train_time:20311062ms step_avg:571.24ms +step:35557/57344 train_time:20311079ms step_avg:571.23ms +step:35558/57344 train_time:20311327ms step_avg:571.22ms +step:35559/57344 train_time:20311878ms step_avg:571.22ms +grad accum step:8890/14336 +step:35560/57344 train_time:20313209ms step_avg:571.24ms +step:35561/57344 train_time:20313226ms step_avg:571.22ms +step:35562/57344 train_time:20313473ms step_avg:571.21ms +step:35563/57344 train_time:20314020ms step_avg:571.21ms +grad accum step:8891/14336 +step:35564/57344 train_time:20315336ms step_avg:571.23ms +step:35565/57344 train_time:20315352ms step_avg:571.22ms +step:35566/57344 train_time:20315597ms step_avg:571.21ms +step:35567/57344 train_time:20316140ms step_avg:571.21ms +grad accum step:8892/14336 +step:35568/57344 train_time:20317439ms step_avg:571.23ms +step:35569/57344 train_time:20317455ms step_avg:571.21ms +step:35570/57344 train_time:20317713ms step_avg:571.20ms +step:35571/57344 train_time:20318286ms step_avg:571.20ms +grad accum step:8893/14336 +step:35572/57344 train_time:20319585ms step_avg:571.22ms +step:35573/57344 train_time:20319602ms step_avg:571.21ms +step:35574/57344 train_time:20319853ms step_avg:571.20ms +step:35575/57344 train_time:20320412ms step_avg:571.20ms +grad accum step:8894/14336 +step:35576/57344 train_time:20321732ms step_avg:571.22ms +step:35577/57344 train_time:20321748ms step_avg:571.20ms +step:35578/57344 train_time:20321994ms step_avg:571.20ms +step:35579/57344 train_time:20322535ms step_avg:571.19ms +grad accum step:8895/14336 +step:35580/57344 train_time:20323871ms step_avg:571.22ms +step:35581/57344 train_time:20323888ms step_avg:571.20ms +step:35582/57344 train_time:20324132ms step_avg:571.19ms +step:35583/57344 train_time:20324680ms step_avg:571.19ms +grad accum step:8896/14336 +step:35584/57344 train_time:20326000ms step_avg:571.21ms +step:35584/57344 val_loss:5.983496 train_time:20326001ms step_avg:571.21ms +step:35585/57344 train_time:20326012ms step_avg:571.20ms +step:35586/57344 train_time:20326234ms step_avg:571.19ms +step:35587/57344 train_time:20326780ms step_avg:571.19ms +grad accum step:8897/14336 +step:35588/57344 train_time:20328073ms step_avg:571.21ms +step:35589/57344 train_time:20328090ms step_avg:571.19ms +step:35590/57344 train_time:20328338ms step_avg:571.18ms +step:35591/57344 train_time:20328888ms step_avg:571.18ms +grad accum step:8898/14336 +step:35592/57344 train_time:20330231ms step_avg:571.20ms +step:35593/57344 train_time:20330247ms step_avg:571.19ms +step:35594/57344 train_time:20330492ms step_avg:571.18ms +step:35595/57344 train_time:20331032ms step_avg:571.18ms +grad accum step:8899/14336 +step:35596/57344 train_time:20332344ms step_avg:571.20ms +step:35597/57344 train_time:20332361ms step_avg:571.18ms +step:35598/57344 train_time:20332608ms step_avg:571.17ms +step:35599/57344 train_time:20333153ms step_avg:571.17ms +grad accum step:8900/14336 +step:35600/57344 train_time:20334447ms step_avg:571.19ms +step:35601/57344 train_time:20334464ms step_avg:571.18ms +step:35602/57344 train_time:20334712ms step_avg:571.17ms +step:35603/57344 train_time:20335260ms step_avg:571.17ms +grad accum step:8901/14336 +step:35604/57344 train_time:20336576ms step_avg:571.19ms +step:35605/57344 train_time:20336592ms step_avg:571.17ms +step:35606/57344 train_time:20336843ms step_avg:571.16ms +step:35607/57344 train_time:20337407ms step_avg:571.16ms +grad accum step:8902/14336 +step:35608/57344 train_time:20338735ms step_avg:571.18ms +step:35609/57344 train_time:20338751ms step_avg:571.17ms +step:35610/57344 train_time:20338995ms step_avg:571.16ms +step:35611/57344 train_time:20339535ms step_avg:571.16ms +grad accum step:8903/14336 +step:35612/57344 train_time:20340859ms step_avg:571.18ms +step:35613/57344 train_time:20340876ms step_avg:571.16ms +step:35614/57344 train_time:20341123ms step_avg:571.16ms +step:35615/57344 train_time:20341675ms step_avg:571.15ms +grad accum step:8904/14336 +step:35616/57344 train_time:20343032ms step_avg:571.18ms +step:35617/57344 train_time:20343048ms step_avg:571.16ms +step:35618/57344 train_time:20343296ms step_avg:571.15ms +step:35619/57344 train_time:20343840ms step_avg:571.15ms +grad accum step:8905/14336 +step:35620/57344 train_time:20345148ms step_avg:571.17ms +step:35621/57344 train_time:20345165ms step_avg:571.16ms +step:35622/57344 train_time:20345412ms step_avg:571.15ms +step:35623/57344 train_time:20345962ms step_avg:571.15ms +grad accum step:8906/14336 +step:35624/57344 train_time:20347296ms step_avg:571.17ms +step:35625/57344 train_time:20347313ms step_avg:571.15ms +step:35626/57344 train_time:20347559ms step_avg:571.14ms +step:35627/57344 train_time:20348101ms step_avg:571.14ms +grad accum step:8907/14336 +step:35628/57344 train_time:20349406ms step_avg:571.16ms +step:35629/57344 train_time:20349423ms step_avg:571.15ms +step:35630/57344 train_time:20349675ms step_avg:571.14ms +step:35631/57344 train_time:20350239ms step_avg:571.14ms +grad accum step:8908/14336 +step:35632/57344 train_time:20351550ms step_avg:571.16ms +step:35633/57344 train_time:20351567ms step_avg:571.14ms +step:35634/57344 train_time:20351819ms step_avg:571.13ms +step:35635/57344 train_time:20352374ms step_avg:571.13ms +grad accum step:8909/14336 +step:35636/57344 train_time:20353690ms step_avg:571.16ms +step:35637/57344 train_time:20353707ms step_avg:571.14ms +step:35638/57344 train_time:20353959ms step_avg:571.13ms +step:35639/57344 train_time:20354516ms step_avg:571.13ms +grad accum step:8910/14336 +step:35640/57344 train_time:20355852ms step_avg:571.15ms +step:35641/57344 train_time:20355869ms step_avg:571.14ms +step:35642/57344 train_time:20356120ms step_avg:571.13ms +step:35643/57344 train_time:20356682ms step_avg:571.13ms +grad accum step:8911/14336 +step:35644/57344 train_time:20357984ms step_avg:571.15ms +step:35645/57344 train_time:20358001ms step_avg:571.13ms +step:35646/57344 train_time:20358243ms step_avg:571.12ms +step:35647/57344 train_time:20358786ms step_avg:571.12ms +grad accum step:8912/14336 +step:35648/57344 train_time:20360066ms step_avg:571.14ms +step:35648/57344 val_loss:5.968700 train_time:20360067ms step_avg:571.14ms +step:35649/57344 train_time:20361695ms step_avg:571.17ms +step:35650/57344 train_time:20361968ms step_avg:571.16ms +step:35651/57344 train_time:20362349ms step_avg:571.16ms +grad accum step:8913/14336 +step:35652/57344 train_time:20363756ms step_avg:571.18ms +step:35653/57344 train_time:20363767ms step_avg:571.17ms +step:35654/57344 train_time:20363987ms step_avg:571.16ms +step:35655/57344 train_time:20364530ms step_avg:571.15ms +grad accum step:8914/14336 +step:35656/57344 train_time:20365817ms step_avg:571.18ms +step:35657/57344 train_time:20365833ms step_avg:571.16ms +step:35658/57344 train_time:20366072ms step_avg:571.15ms +step:35659/57344 train_time:20366604ms step_avg:571.15ms +grad accum step:8915/14336 +step:35660/57344 train_time:20367882ms step_avg:571.17ms +step:35661/57344 train_time:20367898ms step_avg:571.15ms +step:35662/57344 train_time:20368144ms step_avg:571.14ms +step:35663/57344 train_time:20368697ms step_avg:571.14ms +grad accum step:8916/14336 +step:35664/57344 train_time:20370033ms step_avg:571.17ms +step:35665/57344 train_time:20370049ms step_avg:571.15ms +step:35666/57344 train_time:20370294ms step_avg:571.14ms +step:35667/57344 train_time:20370834ms step_avg:571.14ms +grad accum step:8917/14336 +step:35668/57344 train_time:20372119ms step_avg:571.16ms +step:35669/57344 train_time:20372134ms step_avg:571.14ms +step:35670/57344 train_time:20372376ms step_avg:571.13ms +step:35671/57344 train_time:20372913ms step_avg:571.13ms +grad accum step:8918/14336 +step:35672/57344 train_time:20374177ms step_avg:571.15ms +step:35673/57344 train_time:20374193ms step_avg:571.14ms +step:35674/57344 train_time:20374437ms step_avg:571.13ms +step:35675/57344 train_time:20374986ms step_avg:571.13ms +grad accum step:8919/14336 +step:35676/57344 train_time:20376269ms step_avg:571.15ms +step:35677/57344 train_time:20376286ms step_avg:571.13ms +step:35678/57344 train_time:20376529ms step_avg:571.12ms +step:35679/57344 train_time:20377076ms step_avg:571.12ms +grad accum step:8920/14336 +step:35680/57344 train_time:20378355ms step_avg:571.14ms +step:35681/57344 train_time:20378372ms step_avg:571.13ms +step:35682/57344 train_time:20378613ms step_avg:571.12ms +step:35683/57344 train_time:20379165ms step_avg:571.12ms +grad accum step:8921/14336 +step:35684/57344 train_time:20380441ms step_avg:571.14ms +step:35685/57344 train_time:20380458ms step_avg:571.12ms +step:35686/57344 train_time:20380702ms step_avg:571.11ms +step:35687/57344 train_time:20381253ms step_avg:571.11ms +grad accum step:8922/14336 +step:35688/57344 train_time:20382533ms step_avg:571.13ms +step:35689/57344 train_time:20382550ms step_avg:571.12ms +step:35690/57344 train_time:20382796ms step_avg:571.11ms +step:35691/57344 train_time:20383346ms step_avg:571.11ms +grad accum step:8923/14336 +step:35692/57344 train_time:20384625ms step_avg:571.13ms +step:35693/57344 train_time:20384642ms step_avg:571.11ms +step:35694/57344 train_time:20384898ms step_avg:571.10ms +step:35695/57344 train_time:20385478ms step_avg:571.10ms +grad accum step:8924/14336 +step:35696/57344 train_time:20386820ms step_avg:571.12ms +step:35697/57344 train_time:20386836ms step_avg:571.11ms +step:35698/57344 train_time:20387075ms step_avg:571.10ms +step:35699/57344 train_time:20387617ms step_avg:571.10ms +grad accum step:8925/14336 +step:35700/57344 train_time:20388967ms step_avg:571.12ms +step:35701/57344 train_time:20388983ms step_avg:571.10ms +step:35702/57344 train_time:20389227ms step_avg:571.09ms +step:35703/57344 train_time:20389764ms step_avg:571.09ms +grad accum step:8926/14336 +step:35704/57344 train_time:20391042ms step_avg:571.11ms +step:35705/57344 train_time:20391058ms step_avg:571.10ms +step:35706/57344 train_time:20391310ms step_avg:571.09ms +step:35707/57344 train_time:20391880ms step_avg:571.09ms +grad accum step:8927/14336 +step:35708/57344 train_time:20405287ms step_avg:571.45ms +step:35709/57344 train_time:20405304ms step_avg:571.43ms +step:35710/57344 train_time:20405547ms step_avg:571.42ms +step:35711/57344 train_time:20406112ms step_avg:571.42ms +grad accum step:8928/14336 +step:35712/57344 train_time:20407470ms step_avg:571.45ms +step:35712/57344 val_loss:5.975152 train_time:20407470ms step_avg:571.45ms +step:35713/57344 train_time:20407482ms step_avg:571.43ms +step:35714/57344 train_time:20407750ms step_avg:571.42ms +step:35715/57344 train_time:20408293ms step_avg:571.42ms +grad accum step:8929/14336 +step:35716/57344 train_time:20409563ms step_avg:571.44ms +step:35717/57344 train_time:20409580ms step_avg:571.42ms +step:35718/57344 train_time:20409823ms step_avg:571.42ms +step:35719/57344 train_time:20410373ms step_avg:571.42ms +grad accum step:8930/14336 +step:35720/57344 train_time:20411711ms step_avg:571.44ms +step:35721/57344 train_time:20411728ms step_avg:571.42ms +step:35722/57344 train_time:20411970ms step_avg:571.41ms +step:35723/57344 train_time:20412500ms step_avg:571.41ms +grad accum step:8931/14336 +step:35724/57344 train_time:20413782ms step_avg:571.43ms +step:35725/57344 train_time:20413799ms step_avg:571.41ms +step:35726/57344 train_time:20414041ms step_avg:571.41ms +step:35727/57344 train_time:20414586ms step_avg:571.40ms +grad accum step:8932/14336 +step:35728/57344 train_time:20415879ms step_avg:571.43ms +step:35729/57344 train_time:20415897ms step_avg:571.41ms +step:35730/57344 train_time:20416133ms step_avg:571.40ms +step:35731/57344 train_time:20416685ms step_avg:571.40ms +grad accum step:8933/14336 +step:35732/57344 train_time:20417990ms step_avg:571.42ms +step:35733/57344 train_time:20418006ms step_avg:571.40ms +step:35734/57344 train_time:20418260ms step_avg:571.40ms +step:35735/57344 train_time:20418823ms step_avg:571.40ms +grad accum step:8934/14336 +step:35736/57344 train_time:20420114ms step_avg:571.42ms +step:35737/57344 train_time:20420131ms step_avg:571.40ms +step:35738/57344 train_time:20420373ms step_avg:571.39ms +step:35739/57344 train_time:20420918ms step_avg:571.39ms +grad accum step:8935/14336 +step:35740/57344 train_time:20422219ms step_avg:571.41ms +step:35741/57344 train_time:20422235ms step_avg:571.40ms +step:35742/57344 train_time:20422483ms step_avg:571.39ms +step:35743/57344 train_time:20423027ms step_avg:571.39ms +grad accum step:8936/14336 +step:35744/57344 train_time:20424329ms step_avg:571.41ms +step:35745/57344 train_time:20424347ms step_avg:571.39ms +step:35746/57344 train_time:20424586ms step_avg:571.38ms +step:35747/57344 train_time:20425130ms step_avg:571.38ms +grad accum step:8937/14336 +step:35748/57344 train_time:20426427ms step_avg:571.40ms +step:35749/57344 train_time:20426444ms step_avg:571.39ms +step:35750/57344 train_time:20426698ms step_avg:571.38ms +step:35751/57344 train_time:20427275ms step_avg:571.38ms +grad accum step:8938/14336 +step:35752/57344 train_time:20428586ms step_avg:571.40ms +step:35753/57344 train_time:20428605ms step_avg:571.38ms +step:35754/57344 train_time:20428847ms step_avg:571.37ms +step:35755/57344 train_time:20429396ms step_avg:571.37ms +grad accum step:8939/14336 +step:35756/57344 train_time:20430698ms step_avg:571.39ms +step:35757/57344 train_time:20430714ms step_avg:571.38ms +step:35758/57344 train_time:20430959ms step_avg:571.37ms +step:35759/57344 train_time:20431506ms step_avg:571.37ms +grad accum step:8940/14336 +step:35760/57344 train_time:20432825ms step_avg:571.39ms +step:35761/57344 train_time:20432839ms step_avg:571.37ms +step:35762/57344 train_time:20433086ms step_avg:571.36ms +step:35763/57344 train_time:20433631ms step_avg:571.36ms +grad accum step:8941/14336 +step:35764/57344 train_time:20434931ms step_avg:571.38ms +step:35765/57344 train_time:20434953ms step_avg:571.37ms +step:35766/57344 train_time:20435189ms step_avg:571.36ms +step:35767/57344 train_time:20435736ms step_avg:571.36ms +grad accum step:8942/14336 +step:35768/57344 train_time:20437049ms step_avg:571.38ms +step:35769/57344 train_time:20437062ms step_avg:571.36ms +step:35770/57344 train_time:20437313ms step_avg:571.35ms +step:35771/57344 train_time:20437872ms step_avg:571.35ms +grad accum step:8943/14336 +step:35772/57344 train_time:20439171ms step_avg:571.37ms +step:35773/57344 train_time:20439189ms step_avg:571.36ms +step:35774/57344 train_time:20439435ms step_avg:571.35ms +step:35775/57344 train_time:20439994ms step_avg:571.35ms +grad accum step:8944/14336 +step:35776/57344 train_time:20441299ms step_avg:571.37ms +step:35776/57344 val_loss:5.949347 train_time:20441302ms step_avg:571.37ms +step:35777/57344 train_time:20441314ms step_avg:571.35ms +step:35778/57344 train_time:20441549ms step_avg:571.34ms +step:35779/57344 train_time:20442122ms step_avg:571.34ms +grad accum step:8945/14336 +step:35780/57344 train_time:20443432ms step_avg:571.36ms +step:35781/57344 train_time:20443450ms step_avg:571.35ms +step:35782/57344 train_time:20443695ms step_avg:571.34ms +step:35783/57344 train_time:20444252ms step_avg:571.34ms +grad accum step:8946/14336 +step:35784/57344 train_time:20445585ms step_avg:571.36ms +step:35785/57344 train_time:20445600ms step_avg:571.35ms +step:35786/57344 train_time:20445849ms step_avg:571.34ms +step:35787/57344 train_time:20446414ms step_avg:571.34ms +grad accum step:8947/14336 +step:35788/57344 train_time:20447717ms step_avg:571.36ms +step:35789/57344 train_time:20447732ms step_avg:571.34ms +step:35790/57344 train_time:20447983ms step_avg:571.33ms +step:35791/57344 train_time:20448553ms step_avg:571.33ms +grad accum step:8948/14336 +step:35792/57344 train_time:20449930ms step_avg:571.35ms +step:35793/57344 train_time:20450008ms step_avg:571.34ms +step:35794/57344 train_time:20450230ms step_avg:571.33ms +step:35795/57344 train_time:20450784ms step_avg:571.33ms +grad accum step:8949/14336 +step:35796/57344 train_time:20452110ms step_avg:571.35ms +step:35797/57344 train_time:20452125ms step_avg:571.34ms +step:35798/57344 train_time:20452371ms step_avg:571.33ms +step:35799/57344 train_time:20452915ms step_avg:571.33ms +grad accum step:8950/14336 +step:35800/57344 train_time:20454240ms step_avg:571.35ms +step:35801/57344 train_time:20454255ms step_avg:571.33ms +step:35802/57344 train_time:20454507ms step_avg:571.32ms +step:35803/57344 train_time:20455082ms step_avg:571.32ms +grad accum step:8951/14336 +step:35804/57344 train_time:20456401ms step_avg:571.34ms +step:35805/57344 train_time:20456417ms step_avg:571.33ms +step:35806/57344 train_time:20456673ms step_avg:571.32ms +step:35807/57344 train_time:20457241ms step_avg:571.32ms +grad accum step:8952/14336 +step:35808/57344 train_time:20458583ms step_avg:571.34ms +step:35809/57344 train_time:20458599ms step_avg:571.33ms +step:35810/57344 train_time:20458845ms step_avg:571.32ms +step:35811/57344 train_time:20459389ms step_avg:571.32ms +grad accum step:8953/14336 +step:35812/57344 train_time:20460695ms step_avg:571.34ms +step:35813/57344 train_time:20460713ms step_avg:571.32ms +step:35814/57344 train_time:20460953ms step_avg:571.31ms +step:35815/57344 train_time:20461502ms step_avg:571.31ms +grad accum step:8954/14336 +step:35816/57344 train_time:20462807ms step_avg:571.33ms +step:35817/57344 train_time:20462824ms step_avg:571.32ms +step:35818/57344 train_time:20463070ms step_avg:571.31ms +step:35819/57344 train_time:20463621ms step_avg:571.31ms +grad accum step:8955/14336 +step:35820/57344 train_time:20464975ms step_avg:571.33ms +step:35821/57344 train_time:20464992ms step_avg:571.31ms +step:35822/57344 train_time:20465236ms step_avg:571.30ms +step:35823/57344 train_time:20465793ms step_avg:571.30ms +grad accum step:8956/14336 +step:35824/57344 train_time:20467141ms step_avg:571.32ms +step:35825/57344 train_time:20467157ms step_avg:571.31ms +step:35826/57344 train_time:20467404ms step_avg:571.30ms +step:35827/57344 train_time:20467948ms step_avg:571.30ms +grad accum step:8957/14336 +step:35828/57344 train_time:20469246ms step_avg:571.32ms +step:35829/57344 train_time:20469260ms step_avg:571.30ms +step:35830/57344 train_time:20469509ms step_avg:571.30ms +step:35831/57344 train_time:20470058ms step_avg:571.29ms +grad accum step:8958/14336 +step:35832/57344 train_time:20471373ms step_avg:571.32ms +step:35833/57344 train_time:20471387ms step_avg:571.30ms +step:35834/57344 train_time:20471637ms step_avg:571.29ms +step:35835/57344 train_time:20472199ms step_avg:571.29ms +grad accum step:8959/14336 +step:35836/57344 train_time:20473501ms step_avg:571.31ms +step:35837/57344 train_time:20473519ms step_avg:571.30ms +step:35838/57344 train_time:20473754ms step_avg:571.29ms +step:35839/57344 train_time:20474293ms step_avg:571.29ms +grad accum step:8960/14336 +step:35840/57344 train_time:20475594ms step_avg:571.31ms +step:35840/57344 val_loss:5.953311 train_time:20475595ms step_avg:571.31ms +step:35841/57344 train_time:20475607ms step_avg:571.29ms +step:35842/57344 train_time:20475831ms step_avg:571.28ms +step:35843/57344 train_time:20476391ms step_avg:571.28ms +grad accum step:8961/14336 +step:35844/57344 train_time:20477735ms step_avg:571.30ms +step:35845/57344 train_time:20477750ms step_avg:571.29ms +step:35846/57344 train_time:20477993ms step_avg:571.28ms +step:35847/57344 train_time:20478539ms step_avg:571.28ms +grad accum step:8962/14336 +step:35848/57344 train_time:20479836ms step_avg:571.30ms +step:35849/57344 train_time:20479852ms step_avg:571.28ms +step:35850/57344 train_time:20480102ms step_avg:571.27ms +step:35851/57344 train_time:20480650ms step_avg:571.27ms +grad accum step:8963/14336 +step:35852/57344 train_time:20481975ms step_avg:571.29ms +step:35853/57344 train_time:20481991ms step_avg:571.28ms +step:35854/57344 train_time:20482235ms step_avg:571.27ms +step:35855/57344 train_time:20482785ms step_avg:571.27ms +grad accum step:8964/14336 +step:35856/57344 train_time:20484097ms step_avg:571.29ms +step:35857/57344 train_time:20484114ms step_avg:571.27ms +step:35858/57344 train_time:20484363ms step_avg:571.26ms +step:35859/57344 train_time:20484922ms step_avg:571.26ms +grad accum step:8965/14336 +step:35860/57344 train_time:20486250ms step_avg:571.28ms +step:35861/57344 train_time:20486267ms step_avg:571.27ms +step:35862/57344 train_time:20486515ms step_avg:571.26ms +step:35863/57344 train_time:20487055ms step_avg:571.26ms +grad accum step:8966/14336 +step:35864/57344 train_time:20488376ms step_avg:571.28ms +step:35865/57344 train_time:20488392ms step_avg:571.26ms +step:35866/57344 train_time:20488641ms step_avg:571.26ms +step:35867/57344 train_time:20489194ms step_avg:571.25ms +grad accum step:8967/14336 +step:35868/57344 train_time:20490520ms step_avg:571.28ms +step:35869/57344 train_time:20490536ms step_avg:571.26ms +step:35870/57344 train_time:20490782ms step_avg:571.25ms +step:35871/57344 train_time:20491342ms step_avg:571.25ms +grad accum step:8968/14336 +step:35872/57344 train_time:20492650ms step_avg:571.27ms +step:35873/57344 train_time:20492665ms step_avg:571.26ms +step:35874/57344 train_time:20492907ms step_avg:571.25ms +step:35875/57344 train_time:20493465ms step_avg:571.25ms +grad accum step:8969/14336 +step:35876/57344 train_time:20494760ms step_avg:571.27ms +step:35877/57344 train_time:20494776ms step_avg:571.25ms +step:35878/57344 train_time:20495024ms step_avg:571.24ms +step:35879/57344 train_time:20495571ms step_avg:571.24ms +grad accum step:8970/14336 +step:35880/57344 train_time:20496900ms step_avg:571.26ms +step:35881/57344 train_time:20496916ms step_avg:571.25ms +step:35882/57344 train_time:20497156ms step_avg:571.24ms +step:35883/57344 train_time:20497706ms step_avg:571.24ms +grad accum step:8971/14336 +step:35884/57344 train_time:20499008ms step_avg:571.26ms +step:35885/57344 train_time:20499024ms step_avg:571.24ms +step:35886/57344 train_time:20499286ms step_avg:571.23ms +step:35887/57344 train_time:20499880ms step_avg:571.23ms +grad accum step:8972/14336 +step:35888/57344 train_time:20501163ms step_avg:571.25ms +step:35889/57344 train_time:20501178ms step_avg:571.24ms +step:35890/57344 train_time:20501424ms step_avg:571.23ms +step:35891/57344 train_time:20501975ms step_avg:571.23ms +grad accum step:8973/14336 +step:35892/57344 train_time:20503277ms step_avg:571.25ms +step:35893/57344 train_time:20503292ms step_avg:571.23ms +step:35894/57344 train_time:20503536ms step_avg:571.22ms +step:35895/57344 train_time:20504084ms step_avg:571.22ms +grad accum step:8974/14336 +step:35896/57344 train_time:20505397ms step_avg:571.24ms +step:35897/57344 train_time:20505413ms step_avg:571.23ms +step:35898/57344 train_time:20505664ms step_avg:571.22ms +step:35899/57344 train_time:20506227ms step_avg:571.22ms +grad accum step:8975/14336 +step:35900/57344 train_time:20507562ms step_avg:571.24ms +step:35901/57344 train_time:20507579ms step_avg:571.23ms +step:35902/57344 train_time:20507826ms step_avg:571.22ms +step:35903/57344 train_time:20508375ms step_avg:571.22ms +grad accum step:8976/14336 +step:35904/57344 train_time:20509713ms step_avg:571.24ms +step:35904/57344 val_loss:5.963206 train_time:20509716ms step_avg:571.24ms +step:35905/57344 train_time:20509728ms step_avg:571.22ms +step:35906/57344 train_time:20509948ms step_avg:571.21ms +step:35907/57344 train_time:20510487ms step_avg:571.21ms +grad accum step:8977/14336 +step:35908/57344 train_time:20511822ms step_avg:571.23ms +step:35909/57344 train_time:20511842ms step_avg:571.22ms +step:35910/57344 train_time:20512085ms step_avg:571.21ms +step:35911/57344 train_time:20512639ms step_avg:571.21ms +grad accum step:8978/14336 +step:35912/57344 train_time:20513939ms step_avg:571.23ms +step:35913/57344 train_time:20513953ms step_avg:571.21ms +step:35914/57344 train_time:20514213ms step_avg:571.20ms +step:35915/57344 train_time:20514789ms step_avg:571.20ms +grad accum step:8979/14336 +step:35916/57344 train_time:20516118ms step_avg:571.23ms +step:35917/57344 train_time:20516137ms step_avg:571.21ms +step:35918/57344 train_time:20516376ms step_avg:571.20ms +step:35919/57344 train_time:20516921ms step_avg:571.20ms +grad accum step:8980/14336 +step:35920/57344 train_time:20518227ms step_avg:571.22ms +step:35921/57344 train_time:20518241ms step_avg:571.20ms +step:35922/57344 train_time:20518486ms step_avg:571.20ms +step:35923/57344 train_time:20519030ms step_avg:571.19ms +grad accum step:8981/14336 +step:35924/57344 train_time:20520333ms step_avg:571.22ms +step:35925/57344 train_time:20520348ms step_avg:571.20ms +step:35926/57344 train_time:20520601ms step_avg:571.19ms +step:35927/57344 train_time:20521163ms step_avg:571.19ms +grad accum step:8982/14336 +step:35928/57344 train_time:20522477ms step_avg:571.21ms +step:35929/57344 train_time:20522493ms step_avg:571.20ms +step:35930/57344 train_time:20522738ms step_avg:571.19ms +step:35931/57344 train_time:20523292ms step_avg:571.19ms +grad accum step:8983/14336 +step:35932/57344 train_time:20524621ms step_avg:571.21ms +step:35933/57344 train_time:20524640ms step_avg:571.19ms +step:35934/57344 train_time:20524880ms step_avg:571.18ms +step:35935/57344 train_time:20525425ms step_avg:571.18ms +grad accum step:8984/14336 +step:35936/57344 train_time:20526724ms step_avg:571.20ms +step:35937/57344 train_time:20526738ms step_avg:571.19ms +step:35938/57344 train_time:20526983ms step_avg:571.18ms +step:35939/57344 train_time:20527525ms step_avg:571.18ms +grad accum step:8985/14336 +step:35940/57344 train_time:20528827ms step_avg:571.20ms +step:35941/57344 train_time:20528843ms step_avg:571.18ms +step:35942/57344 train_time:20529085ms step_avg:571.17ms +step:35943/57344 train_time:20529631ms step_avg:571.17ms +grad accum step:8986/14336 +step:35944/57344 train_time:20530911ms step_avg:571.19ms +step:35945/57344 train_time:20530926ms step_avg:571.18ms +step:35946/57344 train_time:20531170ms step_avg:571.17ms +step:35947/57344 train_time:20531716ms step_avg:571.17ms +grad accum step:8987/14336 +step:35948/57344 train_time:20533054ms step_avg:571.19ms +step:35949/57344 train_time:20533070ms step_avg:571.17ms +step:35950/57344 train_time:20533311ms step_avg:571.16ms +step:35951/57344 train_time:20533857ms step_avg:571.16ms +grad accum step:8988/14336 +step:35952/57344 train_time:20535164ms step_avg:571.18ms +step:35953/57344 train_time:20535182ms step_avg:571.17ms +step:35954/57344 train_time:20535424ms step_avg:571.16ms +step:35955/57344 train_time:20535973ms step_avg:571.16ms +grad accum step:8989/14336 +step:35956/57344 train_time:20537297ms step_avg:571.18ms +step:35957/57344 train_time:20537315ms step_avg:571.16ms +step:35958/57344 train_time:20537558ms step_avg:571.15ms +step:35959/57344 train_time:20538111ms step_avg:571.15ms +grad accum step:8990/14336 +step:35960/57344 train_time:20539437ms step_avg:571.17ms +step:35961/57344 train_time:20539454ms step_avg:571.16ms +step:35962/57344 train_time:20539701ms step_avg:571.15ms +step:35963/57344 train_time:20540251ms step_avg:571.15ms +grad accum step:8991/14336 +step:35964/57344 train_time:20541564ms step_avg:571.17ms +step:35965/57344 train_time:20541583ms step_avg:571.15ms +step:35966/57344 train_time:20541819ms step_avg:571.15ms +step:35967/57344 train_time:20542356ms step_avg:571.14ms +grad accum step:8992/14336 +step:35968/57344 train_time:20543752ms step_avg:571.17ms +step:35968/57344 val_loss:5.960691 train_time:20543756ms step_avg:571.17ms +step:35969/57344 train_time:20543768ms step_avg:571.15ms +step:35970/57344 train_time:20543986ms step_avg:571.14ms +step:35971/57344 train_time:20544530ms step_avg:571.14ms +grad accum step:8993/14336 +step:35972/57344 train_time:20545824ms step_avg:571.16ms +step:35973/57344 train_time:20545840ms step_avg:571.15ms +step:35974/57344 train_time:20546091ms step_avg:571.14ms +step:35975/57344 train_time:20546640ms step_avg:571.14ms +grad accum step:8994/14336 +step:35976/57344 train_time:20547935ms step_avg:571.16ms +step:35977/57344 train_time:20547954ms step_avg:571.14ms +step:35978/57344 train_time:20548193ms step_avg:571.13ms +step:35979/57344 train_time:20548735ms step_avg:571.13ms +grad accum step:8995/14336 +step:35980/57344 train_time:20550093ms step_avg:571.15ms +step:35981/57344 train_time:20550107ms step_avg:571.14ms +step:35982/57344 train_time:20550351ms step_avg:571.13ms +step:35983/57344 train_time:20550901ms step_avg:571.13ms +grad accum step:8996/14336 +step:35984/57344 train_time:20552247ms step_avg:571.15ms +step:35985/57344 train_time:20552262ms step_avg:571.13ms +step:35986/57344 train_time:20552503ms step_avg:571.12ms +step:35987/57344 train_time:20553044ms step_avg:571.12ms +grad accum step:8997/14336 +step:35988/57344 train_time:20554320ms step_avg:571.14ms +step:35989/57344 train_time:20554336ms step_avg:571.13ms +step:35990/57344 train_time:20554585ms step_avg:571.12ms +step:35991/57344 train_time:20555132ms step_avg:571.12ms +grad accum step:8998/14336 +step:35992/57344 train_time:20556428ms step_avg:571.14ms +step:35993/57344 train_time:20556444ms step_avg:571.12ms +step:35994/57344 train_time:20556693ms step_avg:571.11ms +step:35995/57344 train_time:20557242ms step_avg:571.11ms +grad accum step:8999/14336 +step:35996/57344 train_time:20558602ms step_avg:571.14ms +step:35997/57344 train_time:20558620ms step_avg:571.12ms +step:35998/57344 train_time:20558866ms step_avg:571.11ms +step:35999/57344 train_time:20559440ms step_avg:571.11ms +grad accum step:9000/14336 +step:36000/57344 train_time:20560741ms step_avg:571.13ms +step:36001/57344 train_time:20560758ms step_avg:571.12ms +step:36002/57344 train_time:20561005ms step_avg:571.11ms +step:36003/57344 train_time:20561557ms step_avg:571.11ms +grad accum step:9001/14336 +step:36004/57344 train_time:20562897ms step_avg:571.13ms +step:36005/57344 train_time:20562913ms step_avg:571.11ms +step:36006/57344 train_time:20563167ms step_avg:571.10ms +step:36007/57344 train_time:20563730ms step_avg:571.10ms +grad accum step:9002/14336 +step:36008/57344 train_time:20565043ms step_avg:571.12ms +step:36009/57344 train_time:20565059ms step_avg:571.11ms +step:36010/57344 train_time:20565308ms step_avg:571.10ms +step:36011/57344 train_time:20565858ms step_avg:571.10ms +grad accum step:9003/14336 +step:36012/57344 train_time:20567191ms step_avg:571.12ms +step:36013/57344 train_time:20567205ms step_avg:571.11ms +step:36014/57344 train_time:20567456ms step_avg:571.10ms +step:36015/57344 train_time:20568018ms step_avg:571.10ms +grad accum step:9004/14336 +step:36016/57344 train_time:20569336ms step_avg:571.12ms +step:36017/57344 train_time:20569352ms step_avg:571.10ms +step:36018/57344 train_time:20569604ms step_avg:571.09ms +step:36019/57344 train_time:20570161ms step_avg:571.09ms +grad accum step:9005/14336 +step:36020/57344 train_time:20571475ms step_avg:571.11ms +step:36021/57344 train_time:20571491ms step_avg:571.10ms +step:36022/57344 train_time:20571740ms step_avg:571.09ms +step:36023/57344 train_time:20572290ms step_avg:571.09ms +grad accum step:9006/14336 +step:36024/57344 train_time:20573589ms step_avg:571.11ms +step:36025/57344 train_time:20573604ms step_avg:571.09ms +step:36026/57344 train_time:20573851ms step_avg:571.08ms +step:36027/57344 train_time:20574399ms step_avg:571.08ms +grad accum step:9007/14336 +step:36028/57344 train_time:20575695ms step_avg:571.10ms +step:36029/57344 train_time:20575714ms step_avg:571.09ms +step:36030/57344 train_time:20575956ms step_avg:571.08ms +step:36031/57344 train_time:20576512ms step_avg:571.08ms +grad accum step:9008/14336 +step:36032/57344 train_time:20577829ms step_avg:571.10ms +step:36032/57344 val_loss:5.970256 train_time:20577836ms step_avg:571.10ms +step:36033/57344 train_time:20577848ms step_avg:571.08ms +step:36034/57344 train_time:20578074ms step_avg:571.07ms +step:36035/57344 train_time:20578627ms step_avg:571.07ms +grad accum step:9009/14336 +step:36036/57344 train_time:20579948ms step_avg:571.09ms +step:36037/57344 train_time:20579970ms step_avg:571.08ms +step:36038/57344 train_time:20580203ms step_avg:571.07ms +step:36039/57344 train_time:20580745ms step_avg:571.07ms +grad accum step:9010/14336 +step:36040/57344 train_time:20582042ms step_avg:571.09ms +step:36041/57344 train_time:20582059ms step_avg:571.07ms +step:36042/57344 train_time:20582305ms step_avg:571.06ms +step:36043/57344 train_time:20582850ms step_avg:571.06ms +grad accum step:9011/14336 +step:36044/57344 train_time:20584157ms step_avg:571.08ms +step:36045/57344 train_time:20584174ms step_avg:571.07ms +step:36046/57344 train_time:20584420ms step_avg:571.06ms +step:36047/57344 train_time:20584981ms step_avg:571.06ms +grad accum step:9012/14336 +step:36048/57344 train_time:20586328ms step_avg:571.08ms +step:36049/57344 train_time:20586345ms step_avg:571.07ms +step:36050/57344 train_time:20586592ms step_avg:571.06ms +step:36051/57344 train_time:20587140ms step_avg:571.06ms +grad accum step:9013/14336 +step:36052/57344 train_time:20588414ms step_avg:571.08ms +step:36053/57344 train_time:20588428ms step_avg:571.06ms +step:36054/57344 train_time:20588676ms step_avg:571.05ms +step:36055/57344 train_time:20589224ms step_avg:571.05ms +grad accum step:9014/14336 +step:36056/57344 train_time:20590521ms step_avg:571.07ms +step:36057/57344 train_time:20590538ms step_avg:571.06ms +step:36058/57344 train_time:20590785ms step_avg:571.05ms +step:36059/57344 train_time:20591339ms step_avg:571.05ms +grad accum step:9015/14336 +step:36060/57344 train_time:20592699ms step_avg:571.07ms +step:36061/57344 train_time:20592714ms step_avg:571.05ms +step:36062/57344 train_time:20592970ms step_avg:571.04ms +step:36063/57344 train_time:20593540ms step_avg:571.04ms +grad accum step:9016/14336 +step:36064/57344 train_time:20594827ms step_avg:571.06ms +step:36065/57344 train_time:20594841ms step_avg:571.05ms +step:36066/57344 train_time:20595101ms step_avg:571.04ms +step:36067/57344 train_time:20595685ms step_avg:571.04ms +grad accum step:9017/14336 +step:36068/57344 train_time:20597001ms step_avg:571.06ms +step:36069/57344 train_time:20597021ms step_avg:571.04ms +step:36070/57344 train_time:20597264ms step_avg:571.04ms +step:36071/57344 train_time:20597820ms step_avg:571.04ms +grad accum step:9018/14336 +step:36072/57344 train_time:20599123ms step_avg:571.06ms +step:36073/57344 train_time:20599138ms step_avg:571.04ms +step:36074/57344 train_time:20599387ms step_avg:571.03ms +step:36075/57344 train_time:20599930ms step_avg:571.03ms +grad accum step:9019/14336 +step:36076/57344 train_time:20601227ms step_avg:571.05ms +step:36077/57344 train_time:20601245ms step_avg:571.04ms +step:36078/57344 train_time:20601490ms step_avg:571.03ms +step:36079/57344 train_time:20602039ms step_avg:571.03ms +grad accum step:9020/14336 +step:36080/57344 train_time:20603337ms step_avg:571.05ms +step:36081/57344 train_time:20603353ms step_avg:571.03ms +step:36082/57344 train_time:20603597ms step_avg:571.02ms +step:36083/57344 train_time:20604137ms step_avg:571.02ms +grad accum step:9021/14336 +step:36084/57344 train_time:20605441ms step_avg:571.04ms +step:36085/57344 train_time:20605457ms step_avg:571.03ms +step:36086/57344 train_time:20605708ms step_avg:571.02ms +step:36087/57344 train_time:20606265ms step_avg:571.02ms +grad accum step:9022/14336 +step:36088/57344 train_time:20607543ms step_avg:571.04ms +step:36089/57344 train_time:20607560ms step_avg:571.02ms +step:36090/57344 train_time:20607807ms step_avg:571.01ms +step:36091/57344 train_time:20608364ms step_avg:571.01ms +grad accum step:9023/14336 +step:36092/57344 train_time:20609690ms step_avg:571.03ms +step:36093/57344 train_time:20609706ms step_avg:571.02ms +step:36094/57344 train_time:20609947ms step_avg:571.01ms +step:36095/57344 train_time:20610495ms step_avg:571.01ms +grad accum step:9024/14336 +step:36096/57344 train_time:20611833ms step_avg:571.03ms +step:36096/57344 val_loss:5.975174 train_time:20611835ms step_avg:571.03ms +step:36097/57344 train_time:20611847ms step_avg:571.01ms +step:36098/57344 train_time:20612070ms step_avg:571.00ms +step:36099/57344 train_time:20612611ms step_avg:571.00ms +grad accum step:9025/14336 +step:36100/57344 train_time:20613924ms step_avg:571.02ms +step:36101/57344 train_time:20613941ms step_avg:571.01ms +step:36102/57344 train_time:20614192ms step_avg:571.00ms +step:36103/57344 train_time:20614736ms step_avg:571.00ms +grad accum step:9026/14336 +step:36104/57344 train_time:20616066ms step_avg:571.02ms +step:36105/57344 train_time:20616080ms step_avg:571.00ms +step:36106/57344 train_time:20616325ms step_avg:570.99ms +step:36107/57344 train_time:20616872ms step_avg:570.99ms +grad accum step:9027/14336 +step:36108/57344 train_time:20618217ms step_avg:571.02ms +step:36109/57344 train_time:20618231ms step_avg:571.00ms +step:36110/57344 train_time:20618474ms step_avg:570.99ms +step:36111/57344 train_time:20619017ms step_avg:570.99ms +grad accum step:9028/14336 +step:36112/57344 train_time:20620336ms step_avg:571.01ms +step:36113/57344 train_time:20620348ms step_avg:571.00ms +step:36114/57344 train_time:20620579ms step_avg:570.99ms +step:36115/57344 train_time:20621122ms step_avg:570.98ms +grad accum step:9029/14336 +step:36116/57344 train_time:20622431ms step_avg:571.01ms +step:36117/57344 train_time:20622452ms step_avg:570.99ms +step:36118/57344 train_time:20622700ms step_avg:570.98ms +step:36119/57344 train_time:20623270ms step_avg:570.98ms +grad accum step:9030/14336 +step:36120/57344 train_time:20624549ms step_avg:571.00ms +step:36121/57344 train_time:20624562ms step_avg:570.99ms +step:36122/57344 train_time:20624808ms step_avg:570.98ms +step:36123/57344 train_time:20625355ms step_avg:570.98ms +grad accum step:9031/14336 +step:36124/57344 train_time:20626637ms step_avg:571.00ms +step:36125/57344 train_time:20626653ms step_avg:570.98ms +step:36126/57344 train_time:20626898ms step_avg:570.97ms +step:36127/57344 train_time:20627438ms step_avg:570.97ms +grad accum step:9032/14336 +step:36128/57344 train_time:20628718ms step_avg:570.99ms +step:36129/57344 train_time:20628734ms step_avg:570.97ms +step:36130/57344 train_time:20628981ms step_avg:570.97ms +step:36131/57344 train_time:20629536ms step_avg:570.96ms +grad accum step:9033/14336 +step:36132/57344 train_time:20630850ms step_avg:570.99ms +step:36133/57344 train_time:20630862ms step_avg:570.97ms +step:36134/57344 train_time:20631111ms step_avg:570.96ms +step:36135/57344 train_time:20631657ms step_avg:570.96ms +grad accum step:9034/14336 +step:36136/57344 train_time:20632934ms step_avg:570.98ms +step:36137/57344 train_time:20632951ms step_avg:570.96ms +step:36138/57344 train_time:20633199ms step_avg:570.96ms +step:36139/57344 train_time:20633748ms step_avg:570.96ms +grad accum step:9035/14336 +step:36140/57344 train_time:20635041ms step_avg:570.98ms +step:36141/57344 train_time:20635056ms step_avg:570.96ms +step:36142/57344 train_time:20635301ms step_avg:570.95ms +step:36143/57344 train_time:20635847ms step_avg:570.95ms +grad accum step:9036/14336 +step:36144/57344 train_time:20637139ms step_avg:570.97ms +step:36145/57344 train_time:20637154ms step_avg:570.95ms +step:36146/57344 train_time:20637395ms step_avg:570.95ms +step:36147/57344 train_time:20637931ms step_avg:570.94ms +grad accum step:9037/14336 +step:36148/57344 train_time:20639244ms step_avg:570.97ms +step:36149/57344 train_time:20639260ms step_avg:570.95ms +step:36150/57344 train_time:20639503ms step_avg:570.94ms +step:36151/57344 train_time:20640046ms step_avg:570.94ms +grad accum step:9038/14336 +step:36152/57344 train_time:20641417ms step_avg:570.96ms +step:36153/57344 train_time:20641433ms step_avg:570.95ms +step:36154/57344 train_time:20641670ms step_avg:570.94ms +step:36155/57344 train_time:20642219ms step_avg:570.94ms +grad accum step:9039/14336 +step:36156/57344 train_time:20643526ms step_avg:570.96ms +step:36157/57344 train_time:20643542ms step_avg:570.94ms +step:36158/57344 train_time:20643789ms step_avg:570.93ms +step:36159/57344 train_time:20644335ms step_avg:570.93ms +grad accum step:9040/14336 +step:36160/57344 train_time:20645625ms step_avg:570.95ms +step:36160/57344 val_loss:5.979783 train_time:20645639ms step_avg:570.95ms +step:36161/57344 train_time:20645650ms step_avg:570.94ms +step:36162/57344 train_time:20645874ms step_avg:570.93ms +step:36163/57344 train_time:20646413ms step_avg:570.93ms +grad accum step:9041/14336 +step:36164/57344 train_time:20647818ms step_avg:570.95ms +step:36165/57344 train_time:20647833ms step_avg:570.93ms +step:36166/57344 train_time:20648054ms step_avg:570.92ms +step:36167/57344 train_time:20648596ms step_avg:570.92ms +grad accum step:9042/14336 +step:36168/57344 train_time:20649922ms step_avg:570.94ms +step:36169/57344 train_time:20649939ms step_avg:570.93ms +step:36170/57344 train_time:20650193ms step_avg:570.92ms +step:36171/57344 train_time:20650767ms step_avg:570.92ms +grad accum step:9043/14336 +step:36172/57344 train_time:20652062ms step_avg:570.94ms +step:36173/57344 train_time:20652080ms step_avg:570.93ms +step:36174/57344 train_time:20652323ms step_avg:570.92ms +step:36175/57344 train_time:20652874ms step_avg:570.92ms +grad accum step:9044/14336 +step:36176/57344 train_time:20654188ms step_avg:570.94ms +step:36177/57344 train_time:20654222ms step_avg:570.92ms +step:36178/57344 train_time:20654450ms step_avg:570.91ms +step:36179/57344 train_time:20655010ms step_avg:570.91ms +grad accum step:9045/14336 +step:36180/57344 train_time:20656316ms step_avg:570.93ms +step:36181/57344 train_time:20656332ms step_avg:570.92ms +step:36182/57344 train_time:20656572ms step_avg:570.91ms +step:36183/57344 train_time:20657113ms step_avg:570.91ms +grad accum step:9046/14336 +step:36184/57344 train_time:20658424ms step_avg:570.93ms +step:36185/57344 train_time:20658441ms step_avg:570.91ms +step:36186/57344 train_time:20658684ms step_avg:570.90ms +step:36187/57344 train_time:20659239ms step_avg:570.90ms +grad accum step:9047/14336 +step:36188/57344 train_time:20660545ms step_avg:570.92ms +step:36189/57344 train_time:20660571ms step_avg:570.91ms +step:36190/57344 train_time:20660794ms step_avg:570.90ms +step:36191/57344 train_time:20661352ms step_avg:570.90ms +grad accum step:9048/14336 +step:36192/57344 train_time:20662644ms step_avg:570.92ms +step:36193/57344 train_time:20662661ms step_avg:570.90ms +step:36194/57344 train_time:20662910ms step_avg:570.89ms +step:36195/57344 train_time:20663455ms step_avg:570.89ms +grad accum step:9049/14336 +step:36196/57344 train_time:20664759ms step_avg:570.91ms +step:36197/57344 train_time:20664777ms step_avg:570.90ms +step:36198/57344 train_time:20665024ms step_avg:570.89ms +step:36199/57344 train_time:20665590ms step_avg:570.89ms +grad accum step:9050/14336 +step:36200/57344 train_time:20666932ms step_avg:570.91ms +step:36201/57344 train_time:20666949ms step_avg:570.89ms +step:36202/57344 train_time:20667191ms step_avg:570.89ms +step:36203/57344 train_time:20667736ms step_avg:570.88ms +grad accum step:9051/14336 +step:36204/57344 train_time:20669042ms step_avg:570.90ms +step:36205/57344 train_time:20669069ms step_avg:570.89ms +step:36206/57344 train_time:20669298ms step_avg:570.88ms +step:36207/57344 train_time:20669868ms step_avg:570.88ms +grad accum step:9052/14336 +step:36208/57344 train_time:20671187ms step_avg:570.90ms +step:36209/57344 train_time:20671206ms step_avg:570.89ms +step:36210/57344 train_time:20671428ms step_avg:570.88ms +step:36211/57344 train_time:20671981ms step_avg:570.88ms +grad accum step:9053/14336 +step:36212/57344 train_time:20673271ms step_avg:570.90ms +step:36213/57344 train_time:20673290ms step_avg:570.88ms +step:36214/57344 train_time:20673525ms step_avg:570.87ms +step:36215/57344 train_time:20674073ms step_avg:570.87ms +grad accum step:9054/14336 +step:36216/57344 train_time:20675352ms step_avg:570.89ms +step:36217/57344 train_time:20675366ms step_avg:570.87ms +step:36218/57344 train_time:20675614ms step_avg:570.87ms +step:36219/57344 train_time:20676167ms step_avg:570.87ms +grad accum step:9055/14336 +step:36220/57344 train_time:20677533ms step_avg:570.89ms +step:36221/57344 train_time:20677548ms step_avg:570.87ms +step:36222/57344 train_time:20677799ms step_avg:570.86ms +step:36223/57344 train_time:20678353ms step_avg:570.86ms +grad accum step:9056/14336 +step:36224/57344 train_time:20679672ms step_avg:570.88ms +step:36224/57344 val_loss:5.993697 train_time:20679675ms step_avg:570.88ms +step:36225/57344 train_time:20679940ms step_avg:570.87ms +step:36226/57344 train_time:20679973ms step_avg:570.86ms +step:36227/57344 train_time:20680534ms step_avg:570.86ms +grad accum step:9057/14336 +step:36228/57344 train_time:20681918ms step_avg:570.88ms +step:36229/57344 train_time:20681930ms step_avg:570.87ms +step:36230/57344 train_time:20682161ms step_avg:570.86ms +step:36231/57344 train_time:20682709ms step_avg:570.86ms +grad accum step:9058/14336 +step:36232/57344 train_time:20684004ms step_avg:570.88ms +step:36233/57344 train_time:20684021ms step_avg:570.86ms +step:36234/57344 train_time:20684267ms step_avg:570.85ms +step:36235/57344 train_time:20684810ms step_avg:570.85ms +grad accum step:9059/14336 +step:36236/57344 train_time:20686088ms step_avg:570.87ms +step:36237/57344 train_time:20686102ms step_avg:570.86ms +step:36238/57344 train_time:20686351ms step_avg:570.85ms +step:36239/57344 train_time:20686916ms step_avg:570.85ms +grad accum step:9060/14336 +step:36240/57344 train_time:20688218ms step_avg:570.87ms +step:36241/57344 train_time:20688232ms step_avg:570.85ms +step:36242/57344 train_time:20688481ms step_avg:570.84ms +step:36243/57344 train_time:20689034ms step_avg:570.84ms +grad accum step:9061/14336 +step:36244/57344 train_time:20690376ms step_avg:570.86ms +step:36245/57344 train_time:20690390ms step_avg:570.85ms +step:36246/57344 train_time:20690631ms step_avg:570.84ms +step:36247/57344 train_time:20691192ms step_avg:570.84ms +grad accum step:9062/14336 +step:36248/57344 train_time:20692515ms step_avg:570.86ms +step:36249/57344 train_time:20692532ms step_avg:570.84ms +step:36250/57344 train_time:20692780ms step_avg:570.84ms +step:36251/57344 train_time:20693343ms step_avg:570.84ms +grad accum step:9063/14336 +step:36252/57344 train_time:20694666ms step_avg:570.86ms +step:36253/57344 train_time:20694687ms step_avg:570.84ms +step:36254/57344 train_time:20694917ms step_avg:570.83ms +step:36255/57344 train_time:20695462ms step_avg:570.83ms +grad accum step:9064/14336 +step:36256/57344 train_time:20696743ms step_avg:570.85ms +step:36257/57344 train_time:20696763ms step_avg:570.83ms +step:36258/57344 train_time:20697003ms step_avg:570.83ms +step:36259/57344 train_time:20697549ms step_avg:570.83ms +grad accum step:9065/14336 +step:36260/57344 train_time:20698852ms step_avg:570.85ms +step:36261/57344 train_time:20698874ms step_avg:570.83ms +step:36262/57344 train_time:20699097ms step_avg:570.82ms +step:36263/57344 train_time:20699656ms step_avg:570.82ms +grad accum step:9066/14336 +step:36264/57344 train_time:20700973ms step_avg:570.84ms +step:36265/57344 train_time:20700990ms step_avg:570.83ms +step:36266/57344 train_time:20701233ms step_avg:570.82ms +step:36267/57344 train_time:20701776ms step_avg:570.82ms +grad accum step:9067/14336 +step:36268/57344 train_time:20703088ms step_avg:570.84ms +step:36269/57344 train_time:20703105ms step_avg:570.82ms +step:36270/57344 train_time:20703354ms step_avg:570.81ms +step:36271/57344 train_time:20703913ms step_avg:570.81ms +grad accum step:9068/14336 +step:36272/57344 train_time:20705281ms step_avg:570.83ms +step:36273/57344 train_time:20705292ms step_avg:570.82ms +step:36274/57344 train_time:20705528ms step_avg:570.81ms +step:36275/57344 train_time:20706076ms step_avg:570.81ms +grad accum step:9069/14336 +step:36276/57344 train_time:20707368ms step_avg:570.83ms +step:36277/57344 train_time:20707384ms step_avg:570.81ms +step:36278/57344 train_time:20707630ms step_avg:570.80ms +step:36279/57344 train_time:20708178ms step_avg:570.80ms +grad accum step:9070/14336 +step:36280/57344 train_time:20709477ms step_avg:570.82ms +step:36281/57344 train_time:20709494ms step_avg:570.81ms +step:36282/57344 train_time:20709739ms step_avg:570.80ms +step:36283/57344 train_time:20710285ms step_avg:570.80ms +grad accum step:9071/14336 +step:36284/57344 train_time:20711894ms step_avg:570.83ms +step:36285/57344 train_time:20712150ms step_avg:570.82ms +step:36286/57344 train_time:20712302ms step_avg:570.81ms +step:36287/57344 train_time:20712840ms step_avg:570.81ms +grad accum step:9072/14336 +step:36288/57344 train_time:20714143ms step_avg:570.83ms +step:36288/57344 val_loss:5.994636 train_time:20714144ms step_avg:570.83ms +step:36289/57344 train_time:20714156ms step_avg:570.81ms +step:36290/57344 train_time:20714375ms step_avg:570.80ms +step:36291/57344 train_time:20714919ms step_avg:570.80ms +grad accum step:9073/14336 +step:36292/57344 train_time:20716247ms step_avg:570.82ms +step:36293/57344 train_time:20716263ms step_avg:570.81ms +step:36294/57344 train_time:20716508ms step_avg:570.80ms +step:36295/57344 train_time:20717068ms step_avg:570.80ms +grad accum step:9074/14336 +step:36296/57344 train_time:20718373ms step_avg:570.82ms +step:36297/57344 train_time:20718385ms step_avg:570.80ms +step:36298/57344 train_time:20718630ms step_avg:570.79ms +step:36299/57344 train_time:20719181ms step_avg:570.79ms +grad accum step:9075/14336 +step:36300/57344 train_time:20720531ms step_avg:570.81ms +step:36301/57344 train_time:20720546ms step_avg:570.80ms +step:36302/57344 train_time:20720777ms step_avg:570.79ms +step:36303/57344 train_time:20721317ms step_avg:570.79ms +grad accum step:9076/14336 +step:36304/57344 train_time:20722595ms step_avg:570.81ms +step:36305/57344 train_time:20722611ms step_avg:570.79ms +step:36306/57344 train_time:20722863ms step_avg:570.78ms +step:36307/57344 train_time:20723429ms step_avg:570.78ms +grad accum step:9077/14336 +step:36308/57344 train_time:20724747ms step_avg:570.80ms +step:36309/57344 train_time:20724762ms step_avg:570.79ms +step:36310/57344 train_time:20725006ms step_avg:570.78ms +step:36311/57344 train_time:20725552ms step_avg:570.78ms +grad accum step:9078/14336 +step:36312/57344 train_time:20726833ms step_avg:570.80ms +step:36313/57344 train_time:20726850ms step_avg:570.78ms +step:36314/57344 train_time:20727090ms step_avg:570.77ms +step:36315/57344 train_time:20727640ms step_avg:570.77ms +grad accum step:9079/14336 +step:36316/57344 train_time:20729055ms step_avg:570.80ms +step:36317/57344 train_time:20729072ms step_avg:570.78ms +step:36318/57344 train_time:20729295ms step_avg:570.77ms +step:36319/57344 train_time:20729853ms step_avg:570.77ms +grad accum step:9080/14336 +step:36320/57344 train_time:20731146ms step_avg:570.79ms +step:36321/57344 train_time:20731162ms step_avg:570.78ms +step:36322/57344 train_time:20731407ms step_avg:570.77ms +step:36323/57344 train_time:20731966ms step_avg:570.77ms +grad accum step:9081/14336 +step:36324/57344 train_time:20733320ms step_avg:570.79ms +step:36325/57344 train_time:20733336ms step_avg:570.77ms +step:36326/57344 train_time:20733579ms step_avg:570.76ms +step:36327/57344 train_time:20734126ms step_avg:570.76ms +grad accum step:9082/14336 +step:36328/57344 train_time:20735430ms step_avg:570.78ms +step:36329/57344 train_time:20735446ms step_avg:570.77ms +step:36330/57344 train_time:20735701ms step_avg:570.76ms +step:36331/57344 train_time:20736271ms step_avg:570.76ms +grad accum step:9083/14336 +step:36332/57344 train_time:20737583ms step_avg:570.78ms +step:36333/57344 train_time:20737607ms step_avg:570.77ms +step:36334/57344 train_time:20737850ms step_avg:570.76ms +step:36335/57344 train_time:20738411ms step_avg:570.76ms +grad accum step:9084/14336 +step:36336/57344 train_time:20739783ms step_avg:570.78ms +step:36337/57344 train_time:20739796ms step_avg:570.76ms +step:36338/57344 train_time:20740045ms step_avg:570.75ms +step:36339/57344 train_time:20740603ms step_avg:570.75ms +grad accum step:9085/14336 +step:36340/57344 train_time:20741902ms step_avg:570.77ms +step:36341/57344 train_time:20741918ms step_avg:570.76ms +step:36342/57344 train_time:20742158ms step_avg:570.75ms +step:36343/57344 train_time:20742720ms step_avg:570.75ms +grad accum step:9086/14336 +step:36344/57344 train_time:20744102ms step_avg:570.77ms +step:36345/57344 train_time:20744122ms step_avg:570.76ms +step:36346/57344 train_time:20744341ms step_avg:570.75ms +step:36347/57344 train_time:20744893ms step_avg:570.75ms +grad accum step:9087/14336 +step:36348/57344 train_time:20746186ms step_avg:570.77ms +step:36349/57344 train_time:20746202ms step_avg:570.75ms +step:36350/57344 train_time:20746449ms step_avg:570.74ms +step:36351/57344 train_time:20747018ms step_avg:570.74ms +grad accum step:9088/14336 +step:36352/57344 train_time:20748378ms step_avg:570.76ms +step:36352/57344 val_loss:6.008160 train_time:20748389ms step_avg:570.76ms +step:36353/57344 train_time:20748401ms step_avg:570.75ms +step:36354/57344 train_time:20748621ms step_avg:570.74ms +step:36355/57344 train_time:20749164ms step_avg:570.74ms +grad accum step:9089/14336 +step:36356/57344 train_time:20750450ms step_avg:570.76ms +step:36357/57344 train_time:20750469ms step_avg:570.74ms +step:36358/57344 train_time:20750708ms step_avg:570.73ms +step:36359/57344 train_time:20751257ms step_avg:570.73ms +grad accum step:9090/14336 +step:36360/57344 train_time:20752583ms step_avg:570.75ms +step:36361/57344 train_time:20752600ms step_avg:570.74ms +step:36362/57344 train_time:20752851ms step_avg:570.73ms +step:36363/57344 train_time:20753430ms step_avg:570.73ms +grad accum step:9091/14336 +step:36364/57344 train_time:20754764ms step_avg:570.75ms +step:36365/57344 train_time:20754778ms step_avg:570.73ms +step:36366/57344 train_time:20755023ms step_avg:570.73ms +step:36367/57344 train_time:20755573ms step_avg:570.73ms +grad accum step:9092/14336 +step:36368/57344 train_time:20756872ms step_avg:570.75ms +step:36369/57344 train_time:20756891ms step_avg:570.73ms +step:36370/57344 train_time:20757139ms step_avg:570.72ms +step:36371/57344 train_time:20757694ms step_avg:570.72ms +grad accum step:9093/14336 +step:36372/57344 train_time:20759009ms step_avg:570.74ms +step:36373/57344 train_time:20759086ms step_avg:570.73ms +step:36374/57344 train_time:20759305ms step_avg:570.72ms +step:36375/57344 train_time:20759860ms step_avg:570.72ms +grad accum step:9094/14336 +step:36376/57344 train_time:20761180ms step_avg:570.74ms +step:36377/57344 train_time:20761228ms step_avg:570.72ms +step:36378/57344 train_time:20761456ms step_avg:570.71ms +step:36379/57344 train_time:20762017ms step_avg:570.71ms +grad accum step:9095/14336 +step:36380/57344 train_time:20763424ms step_avg:570.74ms +step:36381/57344 train_time:20763437ms step_avg:570.72ms +step:36382/57344 train_time:20763656ms step_avg:570.71ms +step:36383/57344 train_time:20764213ms step_avg:570.71ms +grad accum step:9096/14336 +step:36384/57344 train_time:20765670ms step_avg:570.74ms +step:36385/57344 train_time:20765711ms step_avg:570.72ms +step:36386/57344 train_time:20765926ms step_avg:570.71ms +step:36387/57344 train_time:20766483ms step_avg:570.71ms +grad accum step:9097/14336 +step:36388/57344 train_time:20767823ms step_avg:570.73ms +step:36389/57344 train_time:20767841ms step_avg:570.72ms +step:36390/57344 train_time:20768076ms step_avg:570.71ms +step:36391/57344 train_time:20768626ms step_avg:570.71ms +grad accum step:9098/14336 +step:36392/57344 train_time:20769930ms step_avg:570.73ms +step:36393/57344 train_time:20769947ms step_avg:570.71ms +step:36394/57344 train_time:20770187ms step_avg:570.70ms +step:36395/57344 train_time:20770733ms step_avg:570.70ms +grad accum step:9099/14336 +step:36396/57344 train_time:20772067ms step_avg:570.72ms +step:36397/57344 train_time:20772085ms step_avg:570.71ms +step:36398/57344 train_time:20772316ms step_avg:570.70ms +step:36399/57344 train_time:20772866ms step_avg:570.70ms +grad accum step:9100/14336 +step:36400/57344 train_time:20774206ms step_avg:570.72ms +step:36401/57344 train_time:20774234ms step_avg:570.71ms +step:36402/57344 train_time:20774451ms step_avg:570.70ms +step:36403/57344 train_time:20774995ms step_avg:570.69ms +grad accum step:9101/14336 +step:36404/57344 train_time:20776292ms step_avg:570.71ms +step:36405/57344 train_time:20776305ms step_avg:570.70ms +step:36406/57344 train_time:20776552ms step_avg:570.69ms +step:36407/57344 train_time:20777099ms step_avg:570.69ms +grad accum step:9102/14336 +step:36408/57344 train_time:20778415ms step_avg:570.71ms +step:36409/57344 train_time:20778431ms step_avg:570.69ms +step:36410/57344 train_time:20778679ms step_avg:570.69ms +step:36411/57344 train_time:20779228ms step_avg:570.69ms +grad accum step:9103/14336 +step:36412/57344 train_time:20780600ms step_avg:570.71ms +step:36413/57344 train_time:20780612ms step_avg:570.69ms +step:36414/57344 train_time:20780852ms step_avg:570.68ms +step:36415/57344 train_time:20781409ms step_avg:570.68ms +grad accum step:9104/14336 +step:36416/57344 train_time:20790766ms step_avg:570.92ms +step:36416/57344 val_loss:6.015017 train_time:20790767ms step_avg:570.92ms +step:36417/57344 train_time:20790779ms step_avg:570.91ms +step:36418/57344 train_time:20791006ms step_avg:570.90ms +step:36419/57344 train_time:20791570ms step_avg:570.90ms +grad accum step:9105/14336 +step:36420/57344 train_time:20792854ms step_avg:570.92ms +step:36421/57344 train_time:20792869ms step_avg:570.90ms +step:36422/57344 train_time:20793112ms step_avg:570.89ms +step:36423/57344 train_time:20793658ms step_avg:570.89ms +grad accum step:9106/14336 +step:36424/57344 train_time:20794991ms step_avg:570.91ms +step:36425/57344 train_time:20795006ms step_avg:570.90ms +step:36426/57344 train_time:20795230ms step_avg:570.89ms +step:36427/57344 train_time:20795788ms step_avg:570.89ms +grad accum step:9107/14336 +step:36428/57344 train_time:20797081ms step_avg:570.91ms +step:36429/57344 train_time:20797097ms step_avg:570.89ms +step:36430/57344 train_time:20797332ms step_avg:570.88ms +step:36431/57344 train_time:20797877ms step_avg:570.88ms +grad accum step:9108/14336 +step:36432/57344 train_time:20799153ms step_avg:570.90ms +step:36433/57344 train_time:20799170ms step_avg:570.89ms +step:36434/57344 train_time:20799413ms step_avg:570.88ms +step:36435/57344 train_time:20799947ms step_avg:570.88ms +grad accum step:9109/14336 +step:36436/57344 train_time:20801273ms step_avg:570.90ms +step:36437/57344 train_time:20801286ms step_avg:570.88ms +step:36438/57344 train_time:20801532ms step_avg:570.87ms +step:36439/57344 train_time:20802080ms step_avg:570.87ms +grad accum step:9110/14336 +step:36440/57344 train_time:20803396ms step_avg:570.89ms +step:36441/57344 train_time:20803409ms step_avg:570.88ms +step:36442/57344 train_time:20803661ms step_avg:570.87ms +step:36443/57344 train_time:20804214ms step_avg:570.87ms +grad accum step:9111/14336 +step:36444/57344 train_time:20805489ms step_avg:570.89ms +step:36445/57344 train_time:20805504ms step_avg:570.87ms +step:36446/57344 train_time:20805748ms step_avg:570.87ms +step:36447/57344 train_time:20806294ms step_avg:570.86ms +grad accum step:9112/14336 +step:36448/57344 train_time:20807588ms step_avg:570.88ms +step:36449/57344 train_time:20807602ms step_avg:570.87ms +step:36450/57344 train_time:20807850ms step_avg:570.86ms +step:36451/57344 train_time:20808409ms step_avg:570.86ms +grad accum step:9113/14336 +step:36452/57344 train_time:20809736ms step_avg:570.88ms +step:36453/57344 train_time:20809753ms step_avg:570.87ms +step:36454/57344 train_time:20809988ms step_avg:570.86ms +step:36455/57344 train_time:20810526ms step_avg:570.86ms +grad accum step:9114/14336 +step:36456/57344 train_time:20811822ms step_avg:570.88ms +step:36457/57344 train_time:20811837ms step_avg:570.86ms +step:36458/57344 train_time:20812078ms step_avg:570.85ms +step:36459/57344 train_time:20812625ms step_avg:570.85ms +grad accum step:9115/14336 +step:36460/57344 train_time:20813934ms step_avg:570.87ms +step:36461/57344 train_time:20813951ms step_avg:570.86ms +step:36462/57344 train_time:20814205ms step_avg:570.85ms +step:36463/57344 train_time:20814775ms step_avg:570.85ms +grad accum step:9116/14336 +step:36464/57344 train_time:20816095ms step_avg:570.87ms +step:36465/57344 train_time:20816108ms step_avg:570.85ms +step:36466/57344 train_time:20816354ms step_avg:570.84ms +step:36467/57344 train_time:20816901ms step_avg:570.84ms +grad accum step:9117/14336 +step:36468/57344 train_time:20818188ms step_avg:570.86ms +step:36469/57344 train_time:20818205ms step_avg:570.85ms +step:36470/57344 train_time:20818454ms step_avg:570.84ms +step:36471/57344 train_time:20819001ms step_avg:570.84ms +grad accum step:9118/14336 +step:36472/57344 train_time:20820293ms step_avg:570.86ms +step:36473/57344 train_time:20820315ms step_avg:570.84ms +step:36474/57344 train_time:20820536ms step_avg:570.83ms +step:36475/57344 train_time:20821082ms step_avg:570.83ms +grad accum step:9119/14336 +step:36476/57344 train_time:20822357ms step_avg:570.85ms +step:36477/57344 train_time:20822372ms step_avg:570.84ms +step:36478/57344 train_time:20822619ms step_avg:570.83ms +step:36479/57344 train_time:20823168ms step_avg:570.83ms +grad accum step:9120/14336 +step:36480/57344 train_time:20824492ms step_avg:570.85ms +step:36480/57344 val_loss:6.016824 train_time:20824497ms step_avg:570.85ms +step:36481/57344 train_time:20824509ms step_avg:570.83ms +step:36482/57344 train_time:20824733ms step_avg:570.82ms +step:36483/57344 train_time:20825281ms step_avg:570.82ms +grad accum step:9121/14336 +step:36484/57344 train_time:20826581ms step_avg:570.84ms +step:36485/57344 train_time:20826598ms step_avg:570.83ms +step:36486/57344 train_time:20826846ms step_avg:570.82ms +step:36487/57344 train_time:20827402ms step_avg:570.82ms +grad accum step:9122/14336 +step:36488/57344 train_time:20828738ms step_avg:570.84ms +step:36489/57344 train_time:20828779ms step_avg:570.82ms +step:36490/57344 train_time:20828994ms step_avg:570.81ms +step:36491/57344 train_time:20829537ms step_avg:570.81ms +grad accum step:9123/14336 +step:36492/57344 train_time:20830818ms step_avg:570.83ms +step:36493/57344 train_time:20830835ms step_avg:570.82ms +step:36494/57344 train_time:20831088ms step_avg:570.81ms +step:36495/57344 train_time:20831649ms step_avg:570.81ms +grad accum step:9124/14336 +step:36496/57344 train_time:20832934ms step_avg:570.83ms +step:36497/57344 train_time:20832946ms step_avg:570.81ms +step:36498/57344 train_time:20833189ms step_avg:570.80ms +step:36499/57344 train_time:20833740ms step_avg:570.80ms +grad accum step:9125/14336 +step:36500/57344 train_time:20835061ms step_avg:570.82ms +step:36501/57344 train_time:20835077ms step_avg:570.81ms +step:36502/57344 train_time:20835324ms step_avg:570.80ms +step:36503/57344 train_time:20835874ms step_avg:570.80ms +grad accum step:9126/14336 +step:36504/57344 train_time:20837216ms step_avg:570.82ms +step:36505/57344 train_time:20837238ms step_avg:570.81ms +step:36506/57344 train_time:20837457ms step_avg:570.80ms +step:36507/57344 train_time:20838005ms step_avg:570.79ms +grad accum step:9127/14336 +step:36508/57344 train_time:20839292ms step_avg:570.81ms +step:36509/57344 train_time:20839309ms step_avg:570.80ms +step:36510/57344 train_time:20839555ms step_avg:570.79ms +step:36511/57344 train_time:20840097ms step_avg:570.79ms +grad accum step:9128/14336 +step:36512/57344 train_time:20841386ms step_avg:570.81ms +step:36513/57344 train_time:20841403ms step_avg:570.79ms +step:36514/57344 train_time:20841649ms step_avg:570.79ms +step:36515/57344 train_time:20842194ms step_avg:570.78ms +grad accum step:9129/14336 +step:36516/57344 train_time:20843475ms step_avg:570.80ms +step:36517/57344 train_time:20843492ms step_avg:570.79ms +step:36518/57344 train_time:20843742ms step_avg:570.78ms +step:36519/57344 train_time:20844302ms step_avg:570.78ms +grad accum step:9130/14336 +step:36520/57344 train_time:20845607ms step_avg:570.80ms +step:36521/57344 train_time:20845624ms step_avg:570.78ms +step:36522/57344 train_time:20845874ms step_avg:570.78ms +step:36523/57344 train_time:20846437ms step_avg:570.78ms +grad accum step:9131/14336 +step:36524/57344 train_time:20847749ms step_avg:570.80ms +step:36525/57344 train_time:20847764ms step_avg:570.78ms +step:36526/57344 train_time:20848013ms step_avg:570.77ms +step:36527/57344 train_time:20848570ms step_avg:570.77ms +grad accum step:9132/14336 +step:36528/57344 train_time:20849879ms step_avg:570.79ms +step:36529/57344 train_time:20849896ms step_avg:570.78ms +step:36530/57344 train_time:20850146ms step_avg:570.77ms +step:36531/57344 train_time:20850699ms step_avg:570.77ms +grad accum step:9133/14336 +step:36532/57344 train_time:20852025ms step_avg:570.79ms +step:36533/57344 train_time:20852042ms step_avg:570.77ms +step:36534/57344 train_time:20852288ms step_avg:570.76ms +step:36535/57344 train_time:20852836ms step_avg:570.76ms +grad accum step:9134/14336 +step:36536/57344 train_time:20854132ms step_avg:570.78ms +step:36537/57344 train_time:20854148ms step_avg:570.77ms +step:36538/57344 train_time:20854391ms step_avg:570.76ms +step:36539/57344 train_time:20854935ms step_avg:570.76ms +grad accum step:9135/14336 +step:36540/57344 train_time:20856235ms step_avg:570.78ms +step:36541/57344 train_time:20856252ms step_avg:570.76ms +step:36542/57344 train_time:20856499ms step_avg:570.75ms +step:36543/57344 train_time:20857048ms step_avg:570.75ms +grad accum step:9136/14336 +step:36544/57344 train_time:20858353ms step_avg:570.77ms +step:36544/57344 val_loss:6.023655 train_time:20858353ms step_avg:570.77ms +step:36545/57344 train_time:20858365ms step_avg:570.76ms +step:36546/57344 train_time:20858588ms step_avg:570.75ms +step:36547/57344 train_time:20859142ms step_avg:570.75ms +grad accum step:9137/14336 +step:36548/57344 train_time:20860457ms step_avg:570.77ms +step:36549/57344 train_time:20860472ms step_avg:570.75ms +step:36550/57344 train_time:20860724ms step_avg:570.74ms +step:36551/57344 train_time:20861292ms step_avg:570.74ms +grad accum step:9138/14336 +step:36552/57344 train_time:20862640ms step_avg:570.77ms +step:36553/57344 train_time:20862652ms step_avg:570.75ms +step:36554/57344 train_time:20862880ms step_avg:570.74ms +step:36555/57344 train_time:20863438ms step_avg:570.74ms +grad accum step:9139/14336 +step:36556/57344 train_time:20864711ms step_avg:570.76ms +step:36557/57344 train_time:20864728ms step_avg:570.75ms +step:36558/57344 train_time:20864974ms step_avg:570.74ms +step:36559/57344 train_time:20865519ms step_avg:570.74ms +grad accum step:9140/14336 +step:36560/57344 train_time:20866819ms step_avg:570.76ms +step:36561/57344 train_time:20866836ms step_avg:570.74ms +step:36562/57344 train_time:20867085ms step_avg:570.73ms +step:36563/57344 train_time:20867629ms step_avg:570.73ms +grad accum step:9141/14336 +step:36564/57344 train_time:20868933ms step_avg:570.75ms +step:36565/57344 train_time:20868950ms step_avg:570.74ms +step:36566/57344 train_time:20869200ms step_avg:570.73ms +step:36567/57344 train_time:20869747ms step_avg:570.73ms +grad accum step:9142/14336 +step:36568/57344 train_time:20871104ms step_avg:570.75ms +step:36569/57344 train_time:20871127ms step_avg:570.73ms +step:36570/57344 train_time:20871365ms step_avg:570.72ms +step:36571/57344 train_time:20871943ms step_avg:570.72ms +grad accum step:9143/14336 +step:36572/57344 train_time:20873344ms step_avg:570.75ms +step:36573/57344 train_time:20873360ms step_avg:570.73ms +step:36574/57344 train_time:20873627ms step_avg:570.72ms +step:36575/57344 train_time:20874222ms step_avg:570.72ms +grad accum step:9144/14336 +step:36576/57344 train_time:20875536ms step_avg:570.74ms +step:36577/57344 train_time:20875551ms step_avg:570.73ms +step:36578/57344 train_time:20875793ms step_avg:570.72ms +step:36579/57344 train_time:20876333ms step_avg:570.72ms +grad accum step:9145/14336 +step:36580/57344 train_time:20877625ms step_avg:570.74ms +step:36581/57344 train_time:20877642ms step_avg:570.72ms +step:36582/57344 train_time:20877888ms step_avg:570.71ms +step:36583/57344 train_time:20878431ms step_avg:570.71ms +grad accum step:9146/14336 +step:36584/57344 train_time:20879793ms step_avg:570.74ms +step:36585/57344 train_time:20879809ms step_avg:570.72ms +step:36586/57344 train_time:20880055ms step_avg:570.71ms +step:36587/57344 train_time:20880597ms step_avg:570.71ms +grad accum step:9147/14336 +step:36588/57344 train_time:20881911ms step_avg:570.73ms +step:36589/57344 train_time:20881924ms step_avg:570.72ms +step:36590/57344 train_time:20882149ms step_avg:570.71ms +step:36591/57344 train_time:20882694ms step_avg:570.71ms +grad accum step:9148/14336 +step:36592/57344 train_time:20883977ms step_avg:570.73ms +step:36593/57344 train_time:20883994ms step_avg:570.71ms +step:36594/57344 train_time:20884242ms step_avg:570.70ms +step:36595/57344 train_time:20884784ms step_avg:570.70ms +grad accum step:9149/14336 +step:36596/57344 train_time:20886065ms step_avg:570.72ms +step:36597/57344 train_time:20886081ms step_avg:570.70ms +step:36598/57344 train_time:20886330ms step_avg:570.70ms +step:36599/57344 train_time:20886883ms step_avg:570.70ms +grad accum step:9150/14336 +step:36600/57344 train_time:20888208ms step_avg:570.72ms +step:36601/57344 train_time:20888225ms step_avg:570.70ms +step:36602/57344 train_time:20888488ms step_avg:570.69ms +step:36603/57344 train_time:20889069ms step_avg:570.69ms +grad accum step:9151/14336 +step:36604/57344 train_time:20890359ms step_avg:570.71ms +step:36605/57344 train_time:20890376ms step_avg:570.70ms +step:36606/57344 train_time:20890624ms step_avg:570.69ms +step:36607/57344 train_time:20891169ms step_avg:570.69ms +grad accum step:9152/14336 +step:36608/57344 train_time:20892491ms step_avg:570.71ms +step:36608/57344 val_loss:6.038690 train_time:20892492ms step_avg:570.71ms +step:36609/57344 train_time:20892504ms step_avg:570.69ms +step:36610/57344 train_time:20892724ms step_avg:570.68ms +step:36611/57344 train_time:20893284ms step_avg:570.68ms +grad accum step:9153/14336 +step:36612/57344 train_time:20894654ms step_avg:570.71ms +step:36613/57344 train_time:20894671ms step_avg:570.69ms +step:36614/57344 train_time:20894914ms step_avg:570.68ms +step:36615/57344 train_time:20895458ms step_avg:570.68ms +grad accum step:9154/14336 +step:36616/57344 train_time:20896759ms step_avg:570.70ms +step:36617/57344 train_time:20896771ms step_avg:570.68ms +step:36618/57344 train_time:20897012ms step_avg:570.68ms +step:36619/57344 train_time:20897554ms step_avg:570.68ms +grad accum step:9155/14336 +step:36620/57344 train_time:20898878ms step_avg:570.70ms +step:36621/57344 train_time:20898895ms step_avg:570.68ms +step:36622/57344 train_time:20899141ms step_avg:570.67ms +step:36623/57344 train_time:20899684ms step_avg:570.67ms +grad accum step:9156/14336 +step:36624/57344 train_time:20900957ms step_avg:570.69ms +step:36625/57344 train_time:20900974ms step_avg:570.68ms +step:36626/57344 train_time:20901222ms step_avg:570.67ms +step:36627/57344 train_time:20901769ms step_avg:570.67ms +grad accum step:9157/14336 +step:36628/57344 train_time:20903111ms step_avg:570.69ms +step:36629/57344 train_time:20903126ms step_avg:570.67ms +step:36630/57344 train_time:20903354ms step_avg:570.66ms +step:36631/57344 train_time:20903911ms step_avg:570.66ms +grad accum step:9158/14336 +step:36632/57344 train_time:20905211ms step_avg:570.68ms +step:36633/57344 train_time:20905228ms step_avg:570.67ms +step:36634/57344 train_time:20905469ms step_avg:570.66ms +step:36635/57344 train_time:20906018ms step_avg:570.66ms +grad accum step:9159/14336 +step:36636/57344 train_time:20907353ms step_avg:570.68ms +step:36637/57344 train_time:20907367ms step_avg:570.66ms +step:36638/57344 train_time:20907614ms step_avg:570.65ms +step:36639/57344 train_time:20908158ms step_avg:570.65ms +grad accum step:9160/14336 +step:36640/57344 train_time:20909453ms step_avg:570.67ms +step:36641/57344 train_time:20909469ms step_avg:570.66ms +step:36642/57344 train_time:20909715ms step_avg:570.65ms +step:36643/57344 train_time:20910260ms step_avg:570.65ms +grad accum step:9161/14336 +step:36644/57344 train_time:20911580ms step_avg:570.67ms +step:36645/57344 train_time:20911597ms step_avg:570.65ms +step:36646/57344 train_time:20911846ms step_avg:570.64ms +step:36647/57344 train_time:20912409ms step_avg:570.64ms +grad accum step:9162/14336 +step:36648/57344 train_time:20913749ms step_avg:570.67ms +step:36649/57344 train_time:20913764ms step_avg:570.65ms +step:36650/57344 train_time:20914012ms step_avg:570.64ms +step:36651/57344 train_time:20914571ms step_avg:570.64ms +grad accum step:9163/14336 +step:36652/57344 train_time:20915892ms step_avg:570.66ms +step:36653/57344 train_time:20915931ms step_avg:570.65ms +step:36654/57344 train_time:20916157ms step_avg:570.64ms +step:36655/57344 train_time:20916721ms step_avg:570.64ms +grad accum step:9164/14336 +step:36656/57344 train_time:20918009ms step_avg:570.66ms +step:36657/57344 train_time:20918026ms step_avg:570.64ms +step:36658/57344 train_time:20918268ms step_avg:570.63ms +step:36659/57344 train_time:20918816ms step_avg:570.63ms +grad accum step:9165/14336 +step:36660/57344 train_time:20920130ms step_avg:570.65ms +step:36661/57344 train_time:20920145ms step_avg:570.64ms +step:36662/57344 train_time:20920379ms step_avg:570.63ms +step:36663/57344 train_time:20920934ms step_avg:570.63ms +grad accum step:9166/14336 +step:36664/57344 train_time:20949142ms step_avg:571.38ms +step:36665/57344 train_time:20949155ms step_avg:571.37ms +step:36666/57344 train_time:20949425ms step_avg:571.36ms +step:36667/57344 train_time:20949971ms step_avg:571.36ms +grad accum step:9167/14336 +step:36668/57344 train_time:20951253ms step_avg:571.38ms +step:36669/57344 train_time:20951270ms step_avg:571.36ms +step:36670/57344 train_time:20951512ms step_avg:571.35ms +step:36671/57344 train_time:20952050ms step_avg:571.35ms +grad accum step:9168/14336 +step:36672/57344 train_time:20953341ms step_avg:571.37ms +step:36672/57344 val_loss:6.041234 train_time:20953342ms step_avg:571.37ms +step:36673/57344 train_time:20953354ms step_avg:571.36ms +step:36674/57344 train_time:20953642ms step_avg:571.35ms +step:36675/57344 train_time:20954195ms step_avg:571.35ms +grad accum step:9169/14336 +step:36676/57344 train_time:20955562ms step_avg:571.37ms +step:36677/57344 train_time:20955577ms step_avg:571.35ms +step:36678/57344 train_time:20955827ms step_avg:571.35ms +step:36679/57344 train_time:20956382ms step_avg:571.35ms +grad accum step:9170/14336 +step:36680/57344 train_time:20957663ms step_avg:571.36ms +step:36681/57344 train_time:20957680ms step_avg:571.35ms +step:36682/57344 train_time:20957935ms step_avg:571.34ms +step:36683/57344 train_time:20958506ms step_avg:571.34ms +grad accum step:9171/14336 +step:36684/57344 train_time:20959814ms step_avg:571.36ms +step:36685/57344 train_time:20959830ms step_avg:571.35ms +step:36686/57344 train_time:20960072ms step_avg:571.34ms +step:36687/57344 train_time:20960617ms step_avg:571.34ms +grad accum step:9172/14336 +step:36688/57344 train_time:20961928ms step_avg:571.36ms +step:36689/57344 train_time:20961945ms step_avg:571.34ms +step:36690/57344 train_time:20962190ms step_avg:571.33ms +step:36691/57344 train_time:20962741ms step_avg:571.33ms +grad accum step:9173/14336 +step:36692/57344 train_time:20964046ms step_avg:571.35ms +step:36693/57344 train_time:20964062ms step_avg:571.34ms +step:36694/57344 train_time:20964307ms step_avg:571.33ms +step:36695/57344 train_time:20964849ms step_avg:571.33ms +grad accum step:9174/14336 +step:36696/57344 train_time:20966146ms step_avg:571.35ms +step:36697/57344 train_time:20966177ms step_avg:571.33ms +step:36698/57344 train_time:20966395ms step_avg:571.32ms +step:36699/57344 train_time:20966943ms step_avg:571.32ms +grad accum step:9175/14336 +step:36700/57344 train_time:20968233ms step_avg:571.34ms +step:36701/57344 train_time:20968249ms step_avg:571.33ms +step:36702/57344 train_time:20968496ms step_avg:571.32ms +step:36703/57344 train_time:20969055ms step_avg:571.32ms +grad accum step:9176/14336 +step:36704/57344 train_time:20970381ms step_avg:571.34ms +step:36705/57344 train_time:20970403ms step_avg:571.32ms +step:36706/57344 train_time:20970627ms step_avg:571.31ms +step:36707/57344 train_time:20971178ms step_avg:571.31ms +grad accum step:9177/14336 +step:36708/57344 train_time:20972492ms step_avg:571.33ms +step:36709/57344 train_time:20972508ms step_avg:571.32ms +step:36710/57344 train_time:20972756ms step_avg:571.31ms +step:36711/57344 train_time:20973303ms step_avg:571.31ms +grad accum step:9178/14336 +step:36712/57344 train_time:20974581ms step_avg:571.33ms +step:36713/57344 train_time:20974598ms step_avg:571.31ms +step:36714/57344 train_time:20974846ms step_avg:571.30ms +step:36715/57344 train_time:20975392ms step_avg:571.30ms +grad accum step:9179/14336 +step:36716/57344 train_time:20976695ms step_avg:571.32ms +step:36717/57344 train_time:20976712ms step_avg:571.31ms +step:36718/57344 train_time:20976961ms step_avg:571.30ms +step:36719/57344 train_time:20977515ms step_avg:571.30ms +grad accum step:9180/14336 +step:36720/57344 train_time:20978803ms step_avg:571.32ms +step:36721/57344 train_time:20978817ms step_avg:571.30ms +step:36722/57344 train_time:20979075ms step_avg:571.29ms +step:36723/57344 train_time:20979647ms step_avg:571.29ms +grad accum step:9181/14336 +step:36724/57344 train_time:20980969ms step_avg:571.31ms +step:36725/57344 train_time:20980986ms step_avg:571.30ms +step:36726/57344 train_time:20981227ms step_avg:571.29ms +step:36727/57344 train_time:20981766ms step_avg:571.29ms +grad accum step:9182/14336 +step:36728/57344 train_time:20983079ms step_avg:571.31ms +step:36729/57344 train_time:20983095ms step_avg:571.30ms +step:36730/57344 train_time:20983344ms step_avg:571.29ms +step:36731/57344 train_time:20983891ms step_avg:571.29ms +grad accum step:9183/14336 +step:36732/57344 train_time:20985185ms step_avg:571.31ms +step:36733/57344 train_time:20985203ms step_avg:571.29ms +step:36734/57344 train_time:20985447ms step_avg:571.28ms +step:36735/57344 train_time:20985995ms step_avg:571.28ms +grad accum step:9184/14336 +step:36736/57344 train_time:20987288ms step_avg:571.30ms +step:36736/57344 val_loss:6.044783 train_time:20987288ms step_avg:571.30ms +step:36737/57344 train_time:20987300ms step_avg:571.29ms +step:36738/57344 train_time:20987523ms step_avg:571.28ms +step:36739/57344 train_time:20988069ms step_avg:571.27ms +grad accum step:9185/14336 +step:36740/57344 train_time:20989340ms step_avg:571.29ms +step:36741/57344 train_time:20989357ms step_avg:571.28ms +step:36742/57344 train_time:20989608ms step_avg:571.27ms +step:36743/57344 train_time:20990163ms step_avg:571.27ms +grad accum step:9186/14336 +step:36744/57344 train_time:20991526ms step_avg:571.29ms +step:36745/57344 train_time:20991543ms step_avg:571.28ms +step:36746/57344 train_time:20991797ms step_avg:571.27ms +step:36747/57344 train_time:20992362ms step_avg:571.27ms +grad accum step:9187/14336 +step:36748/57344 train_time:20993661ms step_avg:571.29ms +step:36749/57344 train_time:20993674ms step_avg:571.27ms +step:36750/57344 train_time:20993925ms step_avg:571.26ms +step:36751/57344 train_time:20994493ms step_avg:571.26ms +grad accum step:9188/14336 +step:36752/57344 train_time:20995837ms step_avg:571.28ms +step:36753/57344 train_time:20995855ms step_avg:571.27ms +step:36754/57344 train_time:20996094ms step_avg:571.26ms +step:36755/57344 train_time:20996657ms step_avg:571.26ms +grad accum step:9189/14336 +step:36756/57344 train_time:20997966ms step_avg:571.28ms +step:36757/57344 train_time:20997982ms step_avg:571.26ms +step:36758/57344 train_time:20998231ms step_avg:571.26ms +step:36759/57344 train_time:20998788ms step_avg:571.26ms +grad accum step:9190/14336 +step:36760/57344 train_time:21000110ms step_avg:571.28ms +step:36761/57344 train_time:21000125ms step_avg:571.26ms +step:36762/57344 train_time:21000385ms step_avg:571.25ms +step:36763/57344 train_time:21000962ms step_avg:571.25ms +grad accum step:9191/14336 +step:36764/57344 train_time:21002501ms step_avg:571.28ms +step:36765/57344 train_time:21002517ms step_avg:571.26ms +step:36766/57344 train_time:21002746ms step_avg:571.25ms +step:36767/57344 train_time:21003298ms step_avg:571.25ms +grad accum step:9192/14336 +step:36768/57344 train_time:21004592ms step_avg:571.27ms +step:36769/57344 train_time:21004609ms step_avg:571.26ms +step:36770/57344 train_time:21004852ms step_avg:571.25ms +step:36771/57344 train_time:21005401ms step_avg:571.25ms +grad accum step:9193/14336 +step:36772/57344 train_time:21006724ms step_avg:571.27ms +step:36773/57344 train_time:21006740ms step_avg:571.25ms +step:36774/57344 train_time:21006981ms step_avg:571.25ms +step:36775/57344 train_time:21007530ms step_avg:571.24ms +grad accum step:9194/14336 +step:36776/57344 train_time:21008834ms step_avg:571.26ms +step:36777/57344 train_time:21008848ms step_avg:571.25ms +step:36778/57344 train_time:21009098ms step_avg:571.24ms +step:36779/57344 train_time:21009657ms step_avg:571.24ms +grad accum step:9195/14336 +step:36780/57344 train_time:21011015ms step_avg:571.26ms +step:36781/57344 train_time:21011030ms step_avg:571.25ms +step:36782/57344 train_time:21011266ms step_avg:571.24ms +step:36783/57344 train_time:21011852ms step_avg:571.24ms +grad accum step:9196/14336 +step:36784/57344 train_time:21013198ms step_avg:571.26ms +step:36785/57344 train_time:21013213ms step_avg:571.24ms +step:36786/57344 train_time:21013466ms step_avg:571.24ms +step:36787/57344 train_time:21014038ms step_avg:571.24ms +grad accum step:9197/14336 +step:36788/57344 train_time:21015344ms step_avg:571.26ms +step:36789/57344 train_time:21015362ms step_avg:571.24ms +step:36790/57344 train_time:21015613ms step_avg:571.23ms +step:36791/57344 train_time:21016204ms step_avg:571.23ms +grad accum step:9198/14336 +step:36792/57344 train_time:21017555ms step_avg:571.25ms +step:36793/57344 train_time:21017575ms step_avg:571.24ms +step:36794/57344 train_time:21017810ms step_avg:571.23ms +step:36795/57344 train_time:21018368ms step_avg:571.23ms +grad accum step:9199/14336 +step:36796/57344 train_time:21019756ms step_avg:571.25ms +step:36797/57344 train_time:21019786ms step_avg:571.24ms +step:36798/57344 train_time:21020004ms step_avg:571.23ms +step:36799/57344 train_time:21020558ms step_avg:571.23ms +grad accum step:9200/14336 +step:36800/57344 train_time:21021901ms step_avg:571.25ms +step:36800/57344 val_loss:6.058211 train_time:21021907ms step_avg:571.25ms +step:36801/57344 train_time:21021919ms step_avg:571.23ms +step:36802/57344 train_time:21022146ms step_avg:571.22ms +step:36803/57344 train_time:21022709ms step_avg:571.22ms +grad accum step:9201/14336 +step:36804/57344 train_time:21024041ms step_avg:571.24ms +step:36805/57344 train_time:21024057ms step_avg:571.23ms +step:36806/57344 train_time:21024308ms step_avg:571.22ms +step:36807/57344 train_time:21024871ms step_avg:571.22ms +grad accum step:9202/14336 +step:36808/57344 train_time:21026199ms step_avg:571.24ms +step:36809/57344 train_time:21026223ms step_avg:571.23ms +step:36810/57344 train_time:21026467ms step_avg:571.22ms +step:36811/57344 train_time:21027031ms step_avg:571.22ms +grad accum step:9203/14336 +step:36812/57344 train_time:21028346ms step_avg:571.24ms +step:36813/57344 train_time:21028363ms step_avg:571.22ms +step:36814/57344 train_time:21028609ms step_avg:571.21ms +step:36815/57344 train_time:21029157ms step_avg:571.21ms +grad accum step:9204/14336 +step:36816/57344 train_time:21030508ms step_avg:571.23ms +step:36817/57344 train_time:21030525ms step_avg:571.22ms +step:36818/57344 train_time:21030774ms step_avg:571.21ms +step:36819/57344 train_time:21031327ms step_avg:571.21ms +grad accum step:9205/14336 +step:36820/57344 train_time:21032647ms step_avg:571.23ms +step:36821/57344 train_time:21032664ms step_avg:571.21ms +step:36822/57344 train_time:21032913ms step_avg:571.21ms +step:36823/57344 train_time:21033470ms step_avg:571.20ms +grad accum step:9206/14336 +step:36824/57344 train_time:21034775ms step_avg:571.22ms +step:36825/57344 train_time:21034793ms step_avg:571.21ms +step:36826/57344 train_time:21035040ms step_avg:571.20ms +step:36827/57344 train_time:21035586ms step_avg:571.20ms +grad accum step:9207/14336 +step:36828/57344 train_time:21036887ms step_avg:571.22ms +step:36829/57344 train_time:21036903ms step_avg:571.20ms +step:36830/57344 train_time:21037151ms step_avg:571.20ms +step:36831/57344 train_time:21037703ms step_avg:571.20ms +grad accum step:9208/14336 +step:36832/57344 train_time:21039019ms step_avg:571.22ms +step:36833/57344 train_time:21039036ms step_avg:571.20ms +step:36834/57344 train_time:21039284ms step_avg:571.19ms +step:36835/57344 train_time:21039831ms step_avg:571.19ms +grad accum step:9209/14336 +step:36836/57344 train_time:21041146ms step_avg:571.21ms +step:36837/57344 train_time:21041161ms step_avg:571.20ms +step:36838/57344 train_time:21041408ms step_avg:571.19ms +step:36839/57344 train_time:21041955ms step_avg:571.19ms +grad accum step:9210/14336 +step:36840/57344 train_time:21043276ms step_avg:571.21ms +step:36841/57344 train_time:21043288ms step_avg:571.19ms +step:36842/57344 train_time:21043529ms step_avg:571.18ms +step:36843/57344 train_time:21044079ms step_avg:571.18ms +grad accum step:9211/14336 +step:36844/57344 train_time:21045382ms step_avg:571.20ms +step:36845/57344 train_time:21045398ms step_avg:571.19ms +step:36846/57344 train_time:21045641ms step_avg:571.18ms +step:36847/57344 train_time:21046191ms step_avg:571.18ms +grad accum step:9212/14336 +step:36848/57344 train_time:21047509ms step_avg:571.20ms +step:36849/57344 train_time:21047522ms step_avg:571.18ms +step:36850/57344 train_time:21047760ms step_avg:571.17ms +step:36851/57344 train_time:21048304ms step_avg:571.17ms +grad accum step:9213/14336 +step:36852/57344 train_time:21049623ms step_avg:571.19ms +step:36853/57344 train_time:21049660ms step_avg:571.18ms +step:36854/57344 train_time:21049881ms step_avg:571.17ms +step:36855/57344 train_time:21050433ms step_avg:571.17ms +grad accum step:9214/14336 +step:36856/57344 train_time:21051767ms step_avg:571.19ms +step:36857/57344 train_time:21051783ms step_avg:571.17ms +step:36858/57344 train_time:21052032ms step_avg:571.17ms +step:36859/57344 train_time:21052584ms step_avg:571.17ms +grad accum step:9215/14336 +step:36860/57344 train_time:21053912ms step_avg:571.19ms +step:36861/57344 train_time:21053928ms step_avg:571.17ms +step:36862/57344 train_time:21054175ms step_avg:571.16ms +step:36863/57344 train_time:21054717ms step_avg:571.16ms +grad accum step:9216/14336 +step:36864/57344 train_time:21056047ms step_avg:571.18ms +step:36864/57344 val_loss:6.055545 train_time:21056047ms step_avg:571.18ms +step:36865/57344 train_time:21056059ms step_avg:571.17ms +step:36866/57344 train_time:21056281ms step_avg:571.16ms +step:36867/57344 train_time:21056830ms step_avg:571.16ms +grad accum step:9217/14336 +step:36868/57344 train_time:21058112ms step_avg:571.18ms +step:36869/57344 train_time:21058129ms step_avg:571.16ms +step:36870/57344 train_time:21058377ms step_avg:571.15ms +step:36871/57344 train_time:21058941ms step_avg:571.15ms +grad accum step:9218/14336 +step:36872/57344 train_time:21060284ms step_avg:571.17ms +step:36873/57344 train_time:21060299ms step_avg:571.16ms +step:36874/57344 train_time:21060542ms step_avg:571.15ms +step:36875/57344 train_time:21061091ms step_avg:571.15ms +grad accum step:9219/14336 +step:36876/57344 train_time:21062414ms step_avg:571.17ms +step:36877/57344 train_time:21062432ms step_avg:571.15ms +step:36878/57344 train_time:21062666ms step_avg:571.14ms +step:36879/57344 train_time:21063221ms step_avg:571.14ms +grad accum step:9220/14336 +step:36880/57344 train_time:21064541ms step_avg:571.16ms +step:36881/57344 train_time:21064565ms step_avg:571.15ms +step:36882/57344 train_time:21064803ms step_avg:571.14ms +step:36883/57344 train_time:21065354ms step_avg:571.14ms +grad accum step:9221/14336 +step:36884/57344 train_time:21066663ms step_avg:571.16ms +step:36885/57344 train_time:21066680ms step_avg:571.14ms +step:36886/57344 train_time:21066923ms step_avg:571.14ms +step:36887/57344 train_time:21067474ms step_avg:571.14ms +grad accum step:9222/14336 +step:36888/57344 train_time:21068784ms step_avg:571.16ms +step:36889/57344 train_time:21068799ms step_avg:571.14ms +step:36890/57344 train_time:21069052ms step_avg:571.13ms +step:36891/57344 train_time:21069613ms step_avg:571.13ms +grad accum step:9223/14336 +step:36892/57344 train_time:21070920ms step_avg:571.15ms +step:36893/57344 train_time:21070934ms step_avg:571.14ms +step:36894/57344 train_time:21071187ms step_avg:571.13ms +step:36895/57344 train_time:21071756ms step_avg:571.13ms +grad accum step:9224/14336 +step:36896/57344 train_time:21073077ms step_avg:571.15ms +step:36897/57344 train_time:21073094ms step_avg:571.13ms +step:36898/57344 train_time:21073337ms step_avg:571.12ms +step:36899/57344 train_time:21073888ms step_avg:571.12ms +grad accum step:9225/14336 +step:36900/57344 train_time:21075204ms step_avg:571.14ms +step:36901/57344 train_time:21075221ms step_avg:571.13ms +step:36902/57344 train_time:21075463ms step_avg:571.12ms +step:36903/57344 train_time:21076006ms step_avg:571.12ms +grad accum step:9226/14336 +step:36904/57344 train_time:21077315ms step_avg:571.14ms +step:36905/57344 train_time:21077330ms step_avg:571.12ms +step:36906/57344 train_time:21077551ms step_avg:571.11ms +step:36907/57344 train_time:21078100ms step_avg:571.11ms +grad accum step:9227/14336 +step:36908/57344 train_time:21079427ms step_avg:571.13ms +step:36909/57344 train_time:21079443ms step_avg:571.12ms +step:36910/57344 train_time:21079690ms step_avg:571.11ms +step:36911/57344 train_time:21080239ms step_avg:571.11ms +grad accum step:9228/14336 +step:36912/57344 train_time:21081555ms step_avg:571.13ms +step:36913/57344 train_time:21081570ms step_avg:571.12ms +step:36914/57344 train_time:21081821ms step_avg:571.11ms +step:36915/57344 train_time:21082376ms step_avg:571.11ms +grad accum step:9229/14336 +step:36916/57344 train_time:21083683ms step_avg:571.13ms +step:36917/57344 train_time:21083701ms step_avg:571.11ms +step:36918/57344 train_time:21083939ms step_avg:571.10ms +step:36919/57344 train_time:21084499ms step_avg:571.10ms +grad accum step:9230/14336 +step:36920/57344 train_time:21085843ms step_avg:571.12ms +step:36921/57344 train_time:21085860ms step_avg:571.11ms +step:36922/57344 train_time:21086103ms step_avg:571.10ms +step:36923/57344 train_time:21086650ms step_avg:571.10ms +grad accum step:9231/14336 +step:36924/57344 train_time:21087946ms step_avg:571.12ms +step:36925/57344 train_time:21087958ms step_avg:571.10ms +step:36926/57344 train_time:21088208ms step_avg:571.09ms +step:36927/57344 train_time:21088761ms step_avg:571.09ms +grad accum step:9232/14336 +step:36928/57344 train_time:21090067ms step_avg:571.11ms +step:36928/57344 val_loss:6.061031 train_time:21090075ms step_avg:571.11ms +step:36929/57344 train_time:21090087ms step_avg:571.10ms +step:36930/57344 train_time:21090314ms step_avg:571.09ms +step:36931/57344 train_time:21090880ms step_avg:571.09ms +grad accum step:9233/14336 +step:36932/57344 train_time:21092214ms step_avg:571.11ms +step:36933/57344 train_time:21092231ms step_avg:571.09ms +step:36934/57344 train_time:21092477ms step_avg:571.09ms +step:36935/57344 train_time:21093032ms step_avg:571.09ms +grad accum step:9234/14336 +step:36936/57344 train_time:21094325ms step_avg:571.10ms +step:36937/57344 train_time:21094344ms step_avg:571.09ms +step:36938/57344 train_time:21094585ms step_avg:571.08ms +step:36939/57344 train_time:21095134ms step_avg:571.08ms +grad accum step:9235/14336 +step:36940/57344 train_time:21096443ms step_avg:571.10ms +step:36941/57344 train_time:21096466ms step_avg:571.09ms +step:36942/57344 train_time:21096701ms step_avg:571.08ms +step:36943/57344 train_time:21097242ms step_avg:571.08ms +grad accum step:9236/14336 +step:36944/57344 train_time:21098681ms step_avg:571.10ms +step:36945/57344 train_time:21098696ms step_avg:571.08ms +step:36946/57344 train_time:21098914ms step_avg:571.07ms +step:36947/57344 train_time:21099457ms step_avg:571.07ms +grad accum step:9237/14336 +step:36948/57344 train_time:21100783ms step_avg:571.09ms +step:36949/57344 train_time:21100798ms step_avg:571.08ms +step:36950/57344 train_time:21101036ms step_avg:571.07ms +step:36951/57344 train_time:21101591ms step_avg:571.07ms +grad accum step:9238/14336 +step:36952/57344 train_time:21102885ms step_avg:571.09ms +step:36953/57344 train_time:21102904ms step_avg:571.07ms +step:36954/57344 train_time:21103145ms step_avg:571.07ms +step:36955/57344 train_time:21103688ms step_avg:571.06ms +grad accum step:9239/14336 +step:36956/57344 train_time:21105029ms step_avg:571.09ms +step:36957/57344 train_time:21105048ms step_avg:571.07ms +step:36958/57344 train_time:21105271ms step_avg:571.06ms +step:36959/57344 train_time:21105819ms step_avg:571.06ms +grad accum step:9240/14336 +step:36960/57344 train_time:21107159ms step_avg:571.08ms +step:36961/57344 train_time:21107182ms step_avg:571.07ms +step:36962/57344 train_time:21107412ms step_avg:571.06ms +step:36963/57344 train_time:21107954ms step_avg:571.06ms +grad accum step:9241/14336 +step:36964/57344 train_time:21109234ms step_avg:571.08ms +step:36965/57344 train_time:21109253ms step_avg:571.06ms +step:36966/57344 train_time:21109493ms step_avg:571.05ms +step:36967/57344 train_time:21110036ms step_avg:571.05ms +grad accum step:9242/14336 +step:36968/57344 train_time:21111342ms step_avg:571.07ms +step:36969/57344 train_time:21111358ms step_avg:571.06ms +step:36970/57344 train_time:21111607ms step_avg:571.05ms +step:36971/57344 train_time:21112167ms step_avg:571.05ms +grad accum step:9243/14336 +step:36972/57344 train_time:21113521ms step_avg:571.07ms +step:36973/57344 train_time:21113536ms step_avg:571.05ms +step:36974/57344 train_time:21113784ms step_avg:571.04ms +step:36975/57344 train_time:21114332ms step_avg:571.04ms +grad accum step:9244/14336 +step:36976/57344 train_time:21115657ms step_avg:571.06ms +step:36977/57344 train_time:21115680ms step_avg:571.05ms +step:36978/57344 train_time:21115914ms step_avg:571.04ms +step:36979/57344 train_time:21116457ms step_avg:571.04ms +grad accum step:9245/14336 +step:36980/57344 train_time:21117771ms step_avg:571.06ms +step:36981/57344 train_time:21117789ms step_avg:571.04ms +step:36982/57344 train_time:21118028ms step_avg:571.04ms +step:36983/57344 train_time:21118571ms step_avg:571.03ms +grad accum step:9246/14336 +step:36984/57344 train_time:21119874ms step_avg:571.05ms +step:36985/57344 train_time:21119895ms step_avg:571.04ms +step:36986/57344 train_time:21120130ms step_avg:571.03ms +step:36987/57344 train_time:21120695ms step_avg:571.03ms +grad accum step:9247/14336 +step:36988/57344 train_time:21122030ms step_avg:571.05ms +step:36989/57344 train_time:21122047ms step_avg:571.04ms +step:36990/57344 train_time:21122291ms step_avg:571.03ms +step:36991/57344 train_time:21122837ms step_avg:571.03ms +grad accum step:9248/14336 +step:36992/57344 train_time:21124157ms step_avg:571.05ms +step:36992/57344 val_loss:6.064785 train_time:21124167ms step_avg:571.05ms +step:36993/57344 train_time:21124179ms step_avg:571.03ms +step:36994/57344 train_time:21124400ms step_avg:571.02ms +step:36995/57344 train_time:21124947ms step_avg:571.02ms +grad accum step:9249/14336 +step:36996/57344 train_time:21126356ms step_avg:571.04ms +step:36997/57344 train_time:21126374ms step_avg:571.03ms +step:36998/57344 train_time:21126591ms step_avg:571.02ms +step:36999/57344 train_time:21127134ms step_avg:571.02ms +grad accum step:9250/14336 +step:37000/57344 train_time:21128456ms step_avg:571.04ms +step:37001/57344 train_time:21128473ms step_avg:571.02ms +step:37002/57344 train_time:21128796ms step_avg:571.02ms +step:37003/57344 train_time:21129246ms step_avg:571.01ms +grad accum step:9251/14336 +step:37004/57344 train_time:21130557ms step_avg:571.03ms +step:37005/57344 train_time:21130573ms step_avg:571.02ms +step:37006/57344 train_time:21130818ms step_avg:571.01ms +step:37007/57344 train_time:21131362ms step_avg:571.01ms +grad accum step:9252/14336 +step:37008/57344 train_time:21132683ms step_avg:571.03ms +step:37009/57344 train_time:21132699ms step_avg:571.02ms +step:37010/57344 train_time:21132942ms step_avg:571.01ms +step:37011/57344 train_time:21133486ms step_avg:571.01ms +grad accum step:9253/14336 +step:37012/57344 train_time:21134764ms step_avg:571.02ms +step:37013/57344 train_time:21135074ms step_avg:571.02ms +step:37014/57344 train_time:21135289ms step_avg:571.01ms +step:37015/57344 train_time:21135844ms step_avg:571.01ms +grad accum step:9254/14336 +step:37016/57344 train_time:21137167ms step_avg:571.03ms +step:37017/57344 train_time:21137183ms step_avg:571.01ms +step:37018/57344 train_time:21137435ms step_avg:571.00ms +step:37019/57344 train_time:21138009ms step_avg:571.00ms +grad accum step:9255/14336 +step:37020/57344 train_time:21139322ms step_avg:571.02ms +step:37021/57344 train_time:21139337ms step_avg:571.01ms +step:37022/57344 train_time:21139593ms step_avg:571.00ms +step:37023/57344 train_time:21140165ms step_avg:571.00ms +grad accum step:9256/14336 +step:37024/57344 train_time:21141476ms step_avg:571.02ms +step:37025/57344 train_time:21141491ms step_avg:571.01ms +step:37026/57344 train_time:21141739ms step_avg:571.00ms +step:37027/57344 train_time:21142289ms step_avg:571.00ms +grad accum step:9257/14336 +step:37028/57344 train_time:21143605ms step_avg:571.02ms +step:37029/57344 train_time:21143619ms step_avg:571.00ms +step:37030/57344 train_time:21143867ms step_avg:570.99ms +step:37031/57344 train_time:21144423ms step_avg:570.99ms +grad accum step:9258/14336 +step:37032/57344 train_time:21145738ms step_avg:571.01ms +step:37033/57344 train_time:21145759ms step_avg:571.00ms +step:37034/57344 train_time:21145990ms step_avg:570.99ms +step:37035/57344 train_time:21146539ms step_avg:570.99ms +grad accum step:9259/14336 +step:37036/57344 train_time:21147865ms step_avg:571.01ms +step:37037/57344 train_time:21147882ms step_avg:570.99ms +step:37038/57344 train_time:21148137ms step_avg:570.98ms +step:37039/57344 train_time:21148702ms step_avg:570.98ms +grad accum step:9260/14336 +step:37040/57344 train_time:21150023ms step_avg:571.00ms +step:37041/57344 train_time:21150038ms step_avg:570.99ms +step:37042/57344 train_time:21150288ms step_avg:570.98ms +step:37043/57344 train_time:21150846ms step_avg:570.98ms +grad accum step:9261/14336 +step:37044/57344 train_time:21152166ms step_avg:571.00ms +step:37045/57344 train_time:21152184ms step_avg:570.99ms +step:37046/57344 train_time:21152423ms step_avg:570.98ms +step:37047/57344 train_time:21152968ms step_avg:570.98ms +grad accum step:9262/14336 +step:37048/57344 train_time:21154271ms step_avg:571.00ms +step:37049/57344 train_time:21154290ms step_avg:570.98ms +step:37050/57344 train_time:21154534ms step_avg:570.97ms +step:37051/57344 train_time:21155090ms step_avg:570.97ms +grad accum step:9263/14336 +step:37052/57344 train_time:21156366ms step_avg:570.99ms +step:37053/57344 train_time:21156381ms step_avg:570.98ms +step:37054/57344 train_time:21156628ms step_avg:570.97ms +step:37055/57344 train_time:21157175ms step_avg:570.97ms +grad accum step:9264/14336 +step:37056/57344 train_time:21158469ms step_avg:570.99ms +step:37056/57344 val_loss:6.068419 train_time:21158473ms step_avg:570.99ms +step:37057/57344 train_time:21158485ms step_avg:570.97ms +step:37058/57344 train_time:21158710ms step_avg:570.96ms +step:37059/57344 train_time:21159255ms step_avg:570.96ms +grad accum step:9265/14336 +step:37060/57344 train_time:21160713ms step_avg:570.99ms +step:37061/57344 train_time:21160735ms step_avg:570.97ms +step:37062/57344 train_time:21160955ms step_avg:570.96ms +step:37063/57344 train_time:21161515ms step_avg:570.96ms +grad accum step:9266/14336 +step:37064/57344 train_time:21162873ms step_avg:570.98ms +step:37065/57344 train_time:21162920ms step_avg:570.97ms +step:37066/57344 train_time:21163146ms step_avg:570.96ms +step:37067/57344 train_time:21163714ms step_avg:570.96ms +grad accum step:9267/14336 +step:37068/57344 train_time:21165050ms step_avg:570.98ms +step:37069/57344 train_time:21165068ms step_avg:570.96ms +step:37070/57344 train_time:21165312ms step_avg:570.96ms +step:37071/57344 train_time:21165874ms step_avg:570.96ms +grad accum step:9268/14336 +step:37072/57344 train_time:21167230ms step_avg:570.98ms +step:37073/57344 train_time:21167250ms step_avg:570.96ms +step:37074/57344 train_time:21167468ms step_avg:570.95ms +step:37075/57344 train_time:21168020ms step_avg:570.95ms +grad accum step:9269/14336 +step:37076/57344 train_time:21169338ms step_avg:570.97ms +step:37077/57344 train_time:21169355ms step_avg:570.96ms +step:37078/57344 train_time:21169605ms step_avg:570.95ms +step:37079/57344 train_time:21170157ms step_avg:570.95ms +grad accum step:9270/14336 +step:37080/57344 train_time:21171456ms step_avg:570.97ms +step:37081/57344 train_time:21171471ms step_avg:570.95ms +step:37082/57344 train_time:21171725ms step_avg:570.94ms +step:37083/57344 train_time:21172282ms step_avg:570.94ms +grad accum step:9271/14336 +step:37084/57344 train_time:21173563ms step_avg:570.96ms +step:37085/57344 train_time:21173580ms step_avg:570.95ms +step:37086/57344 train_time:21173827ms step_avg:570.94ms +step:37087/57344 train_time:21174383ms step_avg:570.94ms +grad accum step:9272/14336 +step:37088/57344 train_time:21175728ms step_avg:570.96ms +step:37089/57344 train_time:21175748ms step_avg:570.94ms +step:37090/57344 train_time:21175993ms step_avg:570.94ms +step:37091/57344 train_time:21176562ms step_avg:570.94ms +grad accum step:9273/14336 +step:37092/57344 train_time:21177889ms step_avg:570.96ms +step:37093/57344 train_time:21177902ms step_avg:570.94ms +step:37094/57344 train_time:21178152ms step_avg:570.93ms +step:37095/57344 train_time:21178702ms step_avg:570.93ms +grad accum step:9274/14336 +step:37096/57344 train_time:21180054ms step_avg:570.95ms +step:37097/57344 train_time:21180085ms step_avg:570.94ms +step:37098/57344 train_time:21180303ms step_avg:570.93ms +step:37099/57344 train_time:21180844ms step_avg:570.93ms +grad accum step:9275/14336 +step:37100/57344 train_time:21182144ms step_avg:570.95ms +step:37101/57344 train_time:21182159ms step_avg:570.93ms +step:37102/57344 train_time:21182408ms step_avg:570.92ms +step:37103/57344 train_time:21182981ms step_avg:570.92ms +grad accum step:9276/14336 +step:37104/57344 train_time:21184315ms step_avg:570.94ms +step:37105/57344 train_time:21184333ms step_avg:570.93ms +step:37106/57344 train_time:21184578ms step_avg:570.92ms +step:37107/57344 train_time:21185145ms step_avg:570.92ms +grad accum step:9277/14336 +step:37108/57344 train_time:21186464ms step_avg:570.94ms +step:37109/57344 train_time:21186484ms step_avg:570.93ms +step:37110/57344 train_time:21186727ms step_avg:570.92ms +step:37111/57344 train_time:21187285ms step_avg:570.92ms +grad accum step:9278/14336 +step:37112/57344 train_time:21188565ms step_avg:570.94ms +step:37113/57344 train_time:21188582ms step_avg:570.92ms +step:37114/57344 train_time:21188828ms step_avg:570.91ms +step:37115/57344 train_time:21189382ms step_avg:570.91ms +grad accum step:9279/14336 +step:37116/57344 train_time:21190692ms step_avg:570.93ms +step:37117/57344 train_time:21190710ms step_avg:570.92ms +step:37118/57344 train_time:21190951ms step_avg:570.91ms +step:37119/57344 train_time:21191495ms step_avg:570.91ms +grad accum step:9280/14336 +step:37120/57344 train_time:21192800ms step_avg:570.93ms +step:37120/57344 val_loss:6.068573 train_time:21192804ms step_avg:570.93ms +step:37121/57344 train_time:21192816ms step_avg:570.91ms +step:37122/57344 train_time:21193036ms step_avg:570.90ms +step:37123/57344 train_time:21193586ms step_avg:570.90ms +grad accum step:9281/14336 +step:37124/57344 train_time:21194867ms step_avg:570.92ms +step:37125/57344 train_time:21194884ms step_avg:570.91ms +step:37126/57344 train_time:21195129ms step_avg:570.90ms +step:37127/57344 train_time:21195674ms step_avg:570.90ms +grad accum step:9282/14336 +step:37128/57344 train_time:21196978ms step_avg:570.92ms +step:37129/57344 train_time:21196995ms step_avg:570.90ms +step:37130/57344 train_time:21197234ms step_avg:570.89ms +step:37131/57344 train_time:21197784ms step_avg:570.89ms +grad accum step:9283/14336 +step:37132/57344 train_time:21199076ms step_avg:570.91ms +step:37133/57344 train_time:21199093ms step_avg:570.90ms +step:37134/57344 train_time:21199340ms step_avg:570.89ms +step:37135/57344 train_time:21199881ms step_avg:570.89ms +grad accum step:9284/14336 +step:37136/57344 train_time:21201193ms step_avg:570.91ms +step:37137/57344 train_time:21201208ms step_avg:570.89ms +step:37138/57344 train_time:21201461ms step_avg:570.88ms +step:37139/57344 train_time:21202021ms step_avg:570.88ms +grad accum step:9285/14336 +step:37140/57344 train_time:21203349ms step_avg:570.90ms +step:37141/57344 train_time:21203366ms step_avg:570.89ms +step:37142/57344 train_time:21203614ms step_avg:570.88ms +step:37143/57344 train_time:21204166ms step_avg:570.88ms +grad accum step:9286/14336 +step:37144/57344 train_time:21205487ms step_avg:570.90ms +step:37145/57344 train_time:21205519ms step_avg:570.88ms +step:37146/57344 train_time:21205739ms step_avg:570.88ms +step:37147/57344 train_time:21206283ms step_avg:570.87ms +grad accum step:9287/14336 +step:37148/57344 train_time:21207610ms step_avg:570.90ms +step:37149/57344 train_time:21207625ms step_avg:570.88ms +step:37150/57344 train_time:21207872ms step_avg:570.87ms +step:37151/57344 train_time:21208424ms step_avg:570.87ms +grad accum step:9288/14336 +step:37152/57344 train_time:21209771ms step_avg:570.89ms +step:37153/57344 train_time:21209784ms step_avg:570.88ms +step:37154/57344 train_time:21210026ms step_avg:570.87ms +step:37155/57344 train_time:21210564ms step_avg:570.87ms +grad accum step:9289/14336 +step:37156/57344 train_time:21211861ms step_avg:570.89ms +step:37157/57344 train_time:21211878ms step_avg:570.87ms +step:37158/57344 train_time:21212123ms step_avg:570.86ms +step:37159/57344 train_time:21212664ms step_avg:570.86ms +grad accum step:9290/14336 +step:37160/57344 train_time:21213944ms step_avg:570.88ms +step:37161/57344 train_time:21213958ms step_avg:570.87ms +step:37162/57344 train_time:21214204ms step_avg:570.86ms +step:37163/57344 train_time:21214751ms step_avg:570.86ms +grad accum step:9291/14336 +step:37164/57344 train_time:21216047ms step_avg:570.88ms +step:37165/57344 train_time:21216065ms step_avg:570.86ms +step:37166/57344 train_time:21216318ms step_avg:570.85ms +step:37167/57344 train_time:21216892ms step_avg:570.85ms +grad accum step:9292/14336 +step:37168/57344 train_time:21218211ms step_avg:570.87ms +step:37169/57344 train_time:21218228ms step_avg:570.86ms +step:37170/57344 train_time:21218471ms step_avg:570.85ms +step:37171/57344 train_time:21219017ms step_avg:570.85ms +grad accum step:9293/14336 +step:37172/57344 train_time:21220335ms step_avg:570.87ms +step:37173/57344 train_time:21220350ms step_avg:570.85ms +step:37174/57344 train_time:21220587ms step_avg:570.84ms +step:37175/57344 train_time:21221136ms step_avg:570.84ms +grad accum step:9294/14336 +step:37176/57344 train_time:21222433ms step_avg:570.86ms +step:37177/57344 train_time:21222447ms step_avg:570.85ms +step:37178/57344 train_time:21222694ms step_avg:570.84ms +step:37179/57344 train_time:21223240ms step_avg:570.84ms +grad accum step:9295/14336 +step:37180/57344 train_time:21224553ms step_avg:570.86ms +step:37181/57344 train_time:21224567ms step_avg:570.84ms +step:37182/57344 train_time:21224807ms step_avg:570.84ms +step:37183/57344 train_time:21225347ms step_avg:570.83ms +grad accum step:9296/14336 +step:37184/57344 train_time:21226647ms step_avg:570.85ms +step:37184/57344 val_loss:6.071270 train_time:21226650ms step_avg:570.85ms +step:37185/57344 train_time:21226662ms step_avg:570.84ms +step:37186/57344 train_time:21226882ms step_avg:570.83ms +step:37187/57344 train_time:21227439ms step_avg:570.83ms +grad accum step:9297/14336 +step:37188/57344 train_time:21228754ms step_avg:570.85ms +step:37189/57344 train_time:21228772ms step_avg:570.83ms +step:37190/57344 train_time:21229018ms step_avg:570.83ms +step:37191/57344 train_time:21229572ms step_avg:570.83ms +grad accum step:9298/14336 +step:37192/57344 train_time:21230911ms step_avg:570.85ms +step:37193/57344 train_time:21230930ms step_avg:570.83ms +step:37194/57344 train_time:21231151ms step_avg:570.82ms +step:37195/57344 train_time:21231702ms step_avg:570.82ms +grad accum step:9299/14336 +step:37196/57344 train_time:21233019ms step_avg:570.84ms +step:37197/57344 train_time:21233032ms step_avg:570.83ms +step:37198/57344 train_time:21233299ms step_avg:570.82ms +step:37199/57344 train_time:21233906ms step_avg:570.82ms +grad accum step:9300/14336 +step:37200/57344 train_time:21235238ms step_avg:570.84ms +step:37201/57344 train_time:21235673ms step_avg:570.84ms +step:37202/57344 train_time:21235891ms step_avg:570.83ms +step:37203/57344 train_time:21236436ms step_avg:570.83ms +grad accum step:9301/14336 +step:37204/57344 train_time:21237768ms step_avg:570.85ms +step:37205/57344 train_time:21237788ms step_avg:570.83ms +step:37206/57344 train_time:21238005ms step_avg:570.82ms +step:37207/57344 train_time:21238549ms step_avg:570.82ms +grad accum step:9302/14336 +step:37208/57344 train_time:21239833ms step_avg:570.84ms +step:37209/57344 train_time:21239851ms step_avg:570.83ms +step:37210/57344 train_time:21240089ms step_avg:570.82ms +step:37211/57344 train_time:21240636ms step_avg:570.82ms +grad accum step:9303/14336 +step:37212/57344 train_time:21241985ms step_avg:570.84ms +step:37213/57344 train_time:21242003ms step_avg:570.82ms +step:37214/57344 train_time:21242247ms step_avg:570.81ms +step:37215/57344 train_time:21242819ms step_avg:570.81ms +grad accum step:9304/14336 +step:37216/57344 train_time:21244184ms step_avg:570.83ms +step:37217/57344 train_time:21244200ms step_avg:570.82ms +step:37218/57344 train_time:21244447ms step_avg:570.81ms +step:37219/57344 train_time:21244996ms step_avg:570.81ms +grad accum step:9305/14336 +step:37220/57344 train_time:21246334ms step_avg:570.83ms +step:37221/57344 train_time:21246351ms step_avg:570.82ms +step:37222/57344 train_time:21246594ms step_avg:570.81ms +step:37223/57344 train_time:21247148ms step_avg:570.81ms +grad accum step:9306/14336 +step:37224/57344 train_time:21248451ms step_avg:570.83ms +step:37225/57344 train_time:21248466ms step_avg:570.81ms +step:37226/57344 train_time:21248711ms step_avg:570.80ms +step:37227/57344 train_time:21249260ms step_avg:570.80ms +grad accum step:9307/14336 +step:37228/57344 train_time:21250587ms step_avg:570.82ms +step:37229/57344 train_time:21250612ms step_avg:570.81ms +step:37230/57344 train_time:21250844ms step_avg:570.80ms +step:37231/57344 train_time:21251396ms step_avg:570.80ms +grad accum step:9308/14336 +step:37232/57344 train_time:21252692ms step_avg:570.82ms +step:37233/57344 train_time:21252708ms step_avg:570.80ms +step:37234/57344 train_time:21252957ms step_avg:570.79ms +step:37235/57344 train_time:21253510ms step_avg:570.79ms +grad accum step:9309/14336 +step:37236/57344 train_time:21255302ms step_avg:570.83ms +step:37237/57344 train_time:21255317ms step_avg:570.81ms +step:37238/57344 train_time:21255524ms step_avg:570.80ms +step:37239/57344 train_time:21256074ms step_avg:570.80ms +grad accum step:9310/14336 +step:37240/57344 train_time:21257378ms step_avg:570.82ms +step:37241/57344 train_time:21257392ms step_avg:570.81ms +step:37242/57344 train_time:21257635ms step_avg:570.80ms +step:37243/57344 train_time:21258187ms step_avg:570.80ms +grad accum step:9311/14336 +step:37244/57344 train_time:21259535ms step_avg:570.82ms +step:37245/57344 train_time:21259551ms step_avg:570.80ms +step:37246/57344 train_time:21259796ms step_avg:570.79ms +step:37247/57344 train_time:21260352ms step_avg:570.79ms +grad accum step:9312/14336 +step:37248/57344 train_time:21261702ms step_avg:570.81ms +step:37248/57344 val_loss:6.083017 train_time:21261708ms step_avg:570.81ms +step:37249/57344 train_time:21262965ms step_avg:570.83ms +step:37250/57344 train_time:21263113ms step_avg:570.82ms +step:37251/57344 train_time:21263665ms step_avg:570.82ms +grad accum step:9313/14336 +step:37252/57344 train_time:21265192ms step_avg:570.85ms +step:37253/57344 train_time:21265211ms step_avg:570.83ms +step:37254/57344 train_time:21265427ms step_avg:570.82ms +step:37255/57344 train_time:21265969ms step_avg:570.82ms +grad accum step:9314/14336 +step:37256/57344 train_time:21267276ms step_avg:570.84ms +step:37257/57344 train_time:21267296ms step_avg:570.83ms +step:37258/57344 train_time:21267540ms step_avg:570.82ms +step:37259/57344 train_time:21268109ms step_avg:570.82ms +grad accum step:9315/14336 +step:37260/57344 train_time:21269442ms step_avg:570.84ms +step:37261/57344 train_time:21269455ms step_avg:570.82ms +step:37262/57344 train_time:21269691ms step_avg:570.81ms +step:37263/57344 train_time:21270234ms step_avg:570.81ms +grad accum step:9316/14336 +step:37264/57344 train_time:21271534ms step_avg:570.83ms +step:37265/57344 train_time:21271549ms step_avg:570.82ms +step:37266/57344 train_time:21271799ms step_avg:570.81ms +step:37267/57344 train_time:21272358ms step_avg:570.81ms +grad accum step:9317/14336 +step:37268/57344 train_time:21273666ms step_avg:570.83ms +step:37269/57344 train_time:21273683ms step_avg:570.81ms +step:37270/57344 train_time:21273928ms step_avg:570.81ms +step:37271/57344 train_time:21274484ms step_avg:570.81ms +grad accum step:9318/14336 +step:37272/57344 train_time:21275797ms step_avg:570.83ms +step:37273/57344 train_time:21275814ms step_avg:570.81ms +step:37274/57344 train_time:21276060ms step_avg:570.80ms +step:37275/57344 train_time:21276606ms step_avg:570.80ms +grad accum step:9319/14336 +step:37276/57344 train_time:21277961ms step_avg:570.82ms +step:37277/57344 train_time:21277977ms step_avg:570.81ms +step:37278/57344 train_time:21278219ms step_avg:570.80ms +step:37279/57344 train_time:21278764ms step_avg:570.80ms +grad accum step:9320/14336 +step:37280/57344 train_time:21280090ms step_avg:570.82ms +step:37281/57344 train_time:21280106ms step_avg:570.80ms +step:37282/57344 train_time:21280360ms step_avg:570.79ms +step:37283/57344 train_time:21280920ms step_avg:570.79ms +grad accum step:9321/14336 +step:37284/57344 train_time:21282197ms step_avg:570.81ms +step:37285/57344 train_time:21282216ms step_avg:570.80ms +step:37286/57344 train_time:21282462ms step_avg:570.79ms +step:37287/57344 train_time:21283032ms step_avg:570.79ms +grad accum step:9322/14336 +step:37288/57344 train_time:21284366ms step_avg:570.81ms +step:37289/57344 train_time:21284384ms step_avg:570.80ms +step:37290/57344 train_time:21284623ms step_avg:570.79ms +step:37291/57344 train_time:21285175ms step_avg:570.79ms +grad accum step:9323/14336 +step:37292/57344 train_time:21286497ms step_avg:570.81ms +step:37293/57344 train_time:21286517ms step_avg:570.79ms +step:37294/57344 train_time:21286757ms step_avg:570.78ms +step:37295/57344 train_time:21287310ms step_avg:570.78ms +grad accum step:9324/14336 +step:37296/57344 train_time:21288605ms step_avg:570.80ms +step:37297/57344 train_time:21288620ms step_avg:570.79ms +step:37298/57344 train_time:21288866ms step_avg:570.78ms +step:37299/57344 train_time:21289414ms step_avg:570.78ms +grad accum step:9325/14336 +step:37300/57344 train_time:21290742ms step_avg:570.80ms +step:37301/57344 train_time:21290757ms step_avg:570.78ms +step:37302/57344 train_time:21291004ms step_avg:570.77ms +step:37303/57344 train_time:21291551ms step_avg:570.77ms +grad accum step:9326/14336 +step:37304/57344 train_time:21292891ms step_avg:570.79ms +step:37305/57344 train_time:21292907ms step_avg:570.78ms +step:37306/57344 train_time:21293152ms step_avg:570.77ms +step:37307/57344 train_time:21293704ms step_avg:570.77ms +grad accum step:9327/14336 +step:37308/57344 train_time:21295014ms step_avg:570.79ms +step:37309/57344 train_time:21295028ms step_avg:570.77ms +step:37310/57344 train_time:21295275ms step_avg:570.77ms +step:37311/57344 train_time:21295823ms step_avg:570.77ms +grad accum step:9328/14336 +step:37312/57344 train_time:21297124ms step_avg:570.78ms +step:37312/57344 val_loss:6.074487 train_time:21297127ms step_avg:570.78ms +step:37313/57344 train_time:21297139ms step_avg:570.77ms +step:37314/57344 train_time:21297381ms step_avg:570.76ms +step:37315/57344 train_time:21297981ms step_avg:570.76ms +grad accum step:9329/14336 +step:37316/57344 train_time:21299284ms step_avg:570.78ms +step:37317/57344 train_time:21299300ms step_avg:570.77ms +step:37318/57344 train_time:21299550ms step_avg:570.76ms +step:37319/57344 train_time:21300108ms step_avg:570.76ms +grad accum step:9330/14336 +step:37320/57344 train_time:21301452ms step_avg:570.78ms +step:37321/57344 train_time:21301469ms step_avg:570.76ms +step:37322/57344 train_time:21301715ms step_avg:570.75ms +step:37323/57344 train_time:21302253ms step_avg:570.75ms +grad accum step:9331/14336 +step:37324/57344 train_time:21303558ms step_avg:570.77ms +step:37325/57344 train_time:21303574ms step_avg:570.76ms +step:37326/57344 train_time:21303821ms step_avg:570.75ms +step:37327/57344 train_time:21304360ms step_avg:570.75ms +grad accum step:9332/14336 +step:37328/57344 train_time:21305681ms step_avg:570.77ms +step:37329/57344 train_time:21305696ms step_avg:570.75ms +step:37330/57344 train_time:21305943ms step_avg:570.75ms +step:37331/57344 train_time:21306488ms step_avg:570.75ms +grad accum step:9333/14336 +step:37332/57344 train_time:21307818ms step_avg:570.77ms +step:37333/57344 train_time:21307834ms step_avg:570.75ms +step:37334/57344 train_time:21308079ms step_avg:570.74ms +step:37335/57344 train_time:21308632ms step_avg:570.74ms +grad accum step:9334/14336 +step:37336/57344 train_time:21309975ms step_avg:570.76ms +step:37337/57344 train_time:21310058ms step_avg:570.75ms +step:37338/57344 train_time:21310272ms step_avg:570.74ms +step:37339/57344 train_time:21310812ms step_avg:570.74ms +grad accum step:9335/14336 +step:37340/57344 train_time:21312114ms step_avg:570.76ms +step:37341/57344 train_time:21312129ms step_avg:570.74ms +step:37342/57344 train_time:21312377ms step_avg:570.73ms +step:37343/57344 train_time:21312923ms step_avg:570.73ms +grad accum step:9336/14336 +step:37344/57344 train_time:21314218ms step_avg:570.75ms +step:37345/57344 train_time:21314234ms step_avg:570.74ms +step:37346/57344 train_time:21314486ms step_avg:570.73ms +step:37347/57344 train_time:21315046ms step_avg:570.73ms +grad accum step:9337/14336 +step:37348/57344 train_time:21316364ms step_avg:570.75ms +step:37349/57344 train_time:21316383ms step_avg:570.74ms +step:37350/57344 train_time:21316617ms step_avg:570.73ms +step:37351/57344 train_time:21317172ms step_avg:570.73ms +grad accum step:9338/14336 +step:37352/57344 train_time:21318485ms step_avg:570.75ms +step:37353/57344 train_time:21318499ms step_avg:570.73ms +step:37354/57344 train_time:21318743ms step_avg:570.72ms +step:37355/57344 train_time:21319279ms step_avg:570.72ms +grad accum step:9339/14336 +step:37356/57344 train_time:21320602ms step_avg:570.74ms +step:37357/57344 train_time:21320617ms step_avg:570.73ms +step:37358/57344 train_time:21320860ms step_avg:570.72ms +step:37359/57344 train_time:21321406ms step_avg:570.72ms +grad accum step:9340/14336 +step:37360/57344 train_time:21322705ms step_avg:570.74ms +step:37361/57344 train_time:21322720ms step_avg:570.72ms +step:37362/57344 train_time:21322968ms step_avg:570.71ms +step:37363/57344 train_time:21323526ms step_avg:570.71ms +grad accum step:9341/14336 +step:37364/57344 train_time:21324869ms step_avg:570.73ms +step:37365/57344 train_time:21324885ms step_avg:570.72ms +step:37366/57344 train_time:21325132ms step_avg:570.71ms +step:37367/57344 train_time:21325678ms step_avg:570.71ms +grad accum step:9342/14336 +step:37368/57344 train_time:21327027ms step_avg:570.73ms +step:37369/57344 train_time:21327050ms step_avg:570.72ms +step:37370/57344 train_time:21327302ms step_avg:570.71ms +step:37371/57344 train_time:21327907ms step_avg:570.71ms +grad accum step:9343/14336 +step:37372/57344 train_time:21329269ms step_avg:570.73ms +step:37373/57344 train_time:21329285ms step_avg:570.71ms +step:37374/57344 train_time:21329527ms step_avg:570.70ms +step:37375/57344 train_time:21330071ms step_avg:570.70ms +grad accum step:9344/14336 +step:37376/57344 train_time:21331382ms step_avg:570.72ms +step:37376/57344 val_loss:6.100016 train_time:21331383ms step_avg:570.72ms +step:37377/57344 train_time:21331395ms step_avg:570.71ms +step:37378/57344 train_time:21331618ms step_avg:570.70ms +step:37379/57344 train_time:21332155ms step_avg:570.70ms +grad accum step:9345/14336 +step:37380/57344 train_time:21333455ms step_avg:570.72ms +step:37381/57344 train_time:21333472ms step_avg:570.70ms +step:37382/57344 train_time:21333718ms step_avg:570.69ms +step:37383/57344 train_time:21334277ms step_avg:570.69ms +grad accum step:9346/14336 +step:37384/57344 train_time:21335608ms step_avg:570.71ms +step:37385/57344 train_time:21335624ms step_avg:570.70ms +step:37386/57344 train_time:21335873ms step_avg:570.69ms +step:37387/57344 train_time:21336420ms step_avg:570.69ms +grad accum step:9347/14336 +step:37388/57344 train_time:21337714ms step_avg:570.71ms +step:37389/57344 train_time:21337731ms step_avg:570.70ms +step:37390/57344 train_time:21337974ms step_avg:570.69ms +step:37391/57344 train_time:21338510ms step_avg:570.69ms +grad accum step:9348/14336 +step:37392/57344 train_time:21339820ms step_avg:570.71ms +step:37393/57344 train_time:21339835ms step_avg:570.69ms +step:37394/57344 train_time:21340096ms step_avg:570.68ms +step:37395/57344 train_time:21340689ms step_avg:570.68ms +grad accum step:9349/14336 +step:37396/57344 train_time:21342010ms step_avg:570.70ms +step:37397/57344 train_time:21342027ms step_avg:570.69ms +step:37398/57344 train_time:21342269ms step_avg:570.68ms +step:37399/57344 train_time:21342814ms step_avg:570.68ms +grad accum step:9350/14336 +step:37400/57344 train_time:21344151ms step_avg:570.70ms +step:37401/57344 train_time:21344170ms step_avg:570.68ms +step:37402/57344 train_time:21344397ms step_avg:570.68ms +step:37403/57344 train_time:21344947ms step_avg:570.67ms +grad accum step:9351/14336 +step:37404/57344 train_time:21346267ms step_avg:570.69ms +step:37405/57344 train_time:21346282ms step_avg:570.68ms +step:37406/57344 train_time:21346528ms step_avg:570.67ms +step:37407/57344 train_time:21347082ms step_avg:570.67ms +grad accum step:9352/14336 +step:37408/57344 train_time:21348397ms step_avg:570.69ms +step:37409/57344 train_time:21348413ms step_avg:570.68ms +step:37410/57344 train_time:21348662ms step_avg:570.67ms +step:37411/57344 train_time:21349226ms step_avg:570.67ms +grad accum step:9353/14336 +step:37412/57344 train_time:21350558ms step_avg:570.69ms +step:37413/57344 train_time:21350574ms step_avg:570.67ms +step:37414/57344 train_time:21350824ms step_avg:570.66ms +step:37415/57344 train_time:21351380ms step_avg:570.66ms +grad accum step:9354/14336 +step:37416/57344 train_time:21352668ms step_avg:570.68ms +step:37417/57344 train_time:21352684ms step_avg:570.67ms +step:37418/57344 train_time:21352928ms step_avg:570.66ms +step:37419/57344 train_time:21353473ms step_avg:570.66ms +grad accum step:9355/14336 +step:37420/57344 train_time:21354767ms step_avg:570.68ms +step:37421/57344 train_time:21354784ms step_avg:570.66ms +step:37422/57344 train_time:21355033ms step_avg:570.65ms +step:37423/57344 train_time:21355583ms step_avg:570.65ms +grad accum step:9356/14336 +step:37424/57344 train_time:21356940ms step_avg:570.67ms +step:37425/57344 train_time:21356957ms step_avg:570.66ms +step:37426/57344 train_time:21357205ms step_avg:570.65ms +step:37427/57344 train_time:21357754ms step_avg:570.65ms +grad accum step:9357/14336 +step:37428/57344 train_time:21359059ms step_avg:570.67ms +step:37429/57344 train_time:21359076ms step_avg:570.66ms +step:37430/57344 train_time:21359320ms step_avg:570.65ms +step:37431/57344 train_time:21359860ms step_avg:570.65ms +grad accum step:9358/14336 +step:37432/57344 train_time:21361149ms step_avg:570.67ms +step:37433/57344 train_time:21361166ms step_avg:570.65ms +step:37434/57344 train_time:21361413ms step_avg:570.64ms +step:37435/57344 train_time:21361966ms step_avg:570.64ms +grad accum step:9359/14336 +step:37436/57344 train_time:21363269ms step_avg:570.66ms +step:37437/57344 train_time:21363286ms step_avg:570.65ms +step:37438/57344 train_time:21363534ms step_avg:570.64ms +step:37439/57344 train_time:21364078ms step_avg:570.64ms +grad accum step:9360/14336 +step:37440/57344 train_time:21365410ms step_avg:570.66ms +step:37440/57344 val_loss:6.075029 train_time:21365422ms step_avg:570.66ms +step:37441/57344 train_time:21365433ms step_avg:570.64ms +step:37442/57344 train_time:21365659ms step_avg:570.63ms +step:37443/57344 train_time:21366211ms step_avg:570.63ms +grad accum step:9361/14336 +step:37444/57344 train_time:21367511ms step_avg:570.65ms +step:37445/57344 train_time:21367525ms step_avg:570.64ms +step:37446/57344 train_time:21367777ms step_avg:570.63ms +step:37447/57344 train_time:21368340ms step_avg:570.63ms +grad accum step:9362/14336 +step:37448/57344 train_time:21369691ms step_avg:570.65ms +step:37449/57344 train_time:21369705ms step_avg:570.63ms +step:37450/57344 train_time:21369951ms step_avg:570.63ms +step:37451/57344 train_time:21370498ms step_avg:570.63ms +grad accum step:9363/14336 +step:37452/57344 train_time:21371821ms step_avg:570.65ms +step:37453/57344 train_time:21371838ms step_avg:570.63ms +step:37454/57344 train_time:21372057ms step_avg:570.62ms +step:37455/57344 train_time:21372609ms step_avg:570.62ms +grad accum step:9364/14336 +step:37456/57344 train_time:21373928ms step_avg:570.64ms +step:37457/57344 train_time:21373945ms step_avg:570.63ms +step:37458/57344 train_time:21374183ms step_avg:570.62ms +step:37459/57344 train_time:21374732ms step_avg:570.62ms +grad accum step:9365/14336 +step:37460/57344 train_time:21376053ms step_avg:570.64ms +step:37461/57344 train_time:21376067ms step_avg:570.62ms +step:37462/57344 train_time:21376315ms step_avg:570.61ms +step:37463/57344 train_time:21376856ms step_avg:570.61ms +grad accum step:9366/14336 +step:37464/57344 train_time:21378171ms step_avg:570.63ms +step:37465/57344 train_time:21378190ms step_avg:570.62ms +step:37466/57344 train_time:21378431ms step_avg:570.61ms +step:37467/57344 train_time:21378988ms step_avg:570.61ms +grad accum step:9367/14336 +step:37468/57344 train_time:21380330ms step_avg:570.63ms +step:37469/57344 train_time:21380345ms step_avg:570.61ms +step:37470/57344 train_time:21380590ms step_avg:570.61ms +step:37471/57344 train_time:21381134ms step_avg:570.60ms +grad accum step:9368/14336 +step:37472/57344 train_time:21382435ms step_avg:570.62ms +step:37473/57344 train_time:21382450ms step_avg:570.61ms +step:37474/57344 train_time:21382697ms step_avg:570.60ms +step:37475/57344 train_time:21383260ms step_avg:570.60ms +grad accum step:9369/14336 +step:37476/57344 train_time:21384563ms step_avg:570.62ms +step:37477/57344 train_time:21384587ms step_avg:570.61ms +step:37478/57344 train_time:21384821ms step_avg:570.60ms +step:37479/57344 train_time:21385362ms step_avg:570.60ms +grad accum step:9370/14336 +step:37480/57344 train_time:21386652ms step_avg:570.62ms +step:37481/57344 train_time:21386670ms step_avg:570.60ms +step:37482/57344 train_time:21386913ms step_avg:570.59ms +step:37483/57344 train_time:21387463ms step_avg:570.59ms +grad accum step:9371/14336 +step:37484/57344 train_time:21388759ms step_avg:570.61ms +step:37485/57344 train_time:21388776ms step_avg:570.60ms +step:37486/57344 train_time:21389021ms step_avg:570.59ms +step:37487/57344 train_time:21389572ms step_avg:570.59ms +grad accum step:9372/14336 +step:37488/57344 train_time:21390864ms step_avg:570.61ms +step:37489/57344 train_time:21390880ms step_avg:570.59ms +step:37490/57344 train_time:21391120ms step_avg:570.58ms +step:37491/57344 train_time:21391662ms step_avg:570.58ms +grad accum step:9373/14336 +step:37492/57344 train_time:21392978ms step_avg:570.60ms +step:37493/57344 train_time:21393000ms step_avg:570.59ms +step:37494/57344 train_time:21393237ms step_avg:570.58ms +step:37495/57344 train_time:21393787ms step_avg:570.58ms +grad accum step:9374/14336 +step:37496/57344 train_time:21395091ms step_avg:570.60ms +step:37497/57344 train_time:21395107ms step_avg:570.58ms +step:37498/57344 train_time:21395352ms step_avg:570.57ms +step:37499/57344 train_time:21395917ms step_avg:570.57ms +grad accum step:9375/14336 +step:37500/57344 train_time:21397269ms step_avg:570.59ms +step:37501/57344 train_time:21397288ms step_avg:570.58ms +step:37502/57344 train_time:21397517ms step_avg:570.57ms +step:37503/57344 train_time:21398064ms step_avg:570.57ms +grad accum step:9376/14336 +step:37504/57344 train_time:21399377ms step_avg:570.59ms +step:37504/57344 val_loss:6.075674 train_time:21399384ms step_avg:570.59ms +step:37505/57344 train_time:21399396ms step_avg:570.57ms +step:37506/57344 train_time:21399625ms step_avg:570.57ms +step:37507/57344 train_time:21400193ms step_avg:570.57ms +grad accum step:9377/14336 +step:37508/57344 train_time:21401493ms step_avg:570.58ms +step:37509/57344 train_time:21401510ms step_avg:570.57ms +step:37510/57344 train_time:21401760ms step_avg:570.56ms +step:37511/57344 train_time:21402311ms step_avg:570.56ms +grad accum step:9378/14336 +step:37512/57344 train_time:21403624ms step_avg:570.58ms +step:37513/57344 train_time:21403643ms step_avg:570.57ms +step:37514/57344 train_time:21403879ms step_avg:570.56ms +step:37515/57344 train_time:21404429ms step_avg:570.56ms +grad accum step:9379/14336 +step:37516/57344 train_time:21405739ms step_avg:570.58ms +step:37517/57344 train_time:21405756ms step_avg:570.56ms +step:37518/57344 train_time:21406003ms step_avg:570.55ms +step:37519/57344 train_time:21406568ms step_avg:570.55ms +grad accum step:9380/14336 +step:37520/57344 train_time:21407927ms step_avg:570.57ms +step:37521/57344 train_time:21408072ms step_avg:570.56ms +step:37522/57344 train_time:21408292ms step_avg:570.55ms +step:37523/57344 train_time:21408851ms step_avg:570.55ms +grad accum step:9381/14336 +step:37524/57344 train_time:21410184ms step_avg:570.57ms +step:37525/57344 train_time:21410199ms step_avg:570.56ms +step:37526/57344 train_time:21410444ms step_avg:570.55ms +step:37527/57344 train_time:21410988ms step_avg:570.55ms +grad accum step:9382/14336 +step:37528/57344 train_time:21412288ms step_avg:570.57ms +step:37529/57344 train_time:21412307ms step_avg:570.55ms +step:37530/57344 train_time:21412535ms step_avg:570.54ms +step:37531/57344 train_time:21413088ms step_avg:570.54ms +grad accum step:9383/14336 +step:37532/57344 train_time:21414387ms step_avg:570.56ms +step:37533/57344 train_time:21414404ms step_avg:570.55ms +step:37534/57344 train_time:21414648ms step_avg:570.54ms +step:37535/57344 train_time:21415202ms step_avg:570.54ms +grad accum step:9384/14336 +step:37536/57344 train_time:21416545ms step_avg:570.56ms +step:37537/57344 train_time:21416590ms step_avg:570.55ms +step:37538/57344 train_time:21416813ms step_avg:570.54ms +step:37539/57344 train_time:21417377ms step_avg:570.54ms +grad accum step:9385/14336 +step:37540/57344 train_time:21418739ms step_avg:570.56ms +step:37541/57344 train_time:21418756ms step_avg:570.54ms +step:37542/57344 train_time:21418973ms step_avg:570.53ms +step:37543/57344 train_time:21419514ms step_avg:570.53ms +grad accum step:9386/14336 +step:37544/57344 train_time:21420871ms step_avg:570.55ms +step:37545/57344 train_time:21420889ms step_avg:570.54ms +step:37546/57344 train_time:21421134ms step_avg:570.53ms +step:37547/57344 train_time:21421693ms step_avg:570.53ms +grad accum step:9387/14336 +step:37548/57344 train_time:21422973ms step_avg:570.55ms +step:37549/57344 train_time:21422989ms step_avg:570.53ms +step:37550/57344 train_time:21423237ms step_avg:570.53ms +step:37551/57344 train_time:21423782ms step_avg:570.52ms +grad accum step:9388/14336 +step:37552/57344 train_time:21425074ms step_avg:570.54ms +step:37553/57344 train_time:21425090ms step_avg:570.53ms +step:37554/57344 train_time:21425336ms step_avg:570.52ms +step:37555/57344 train_time:21425889ms step_avg:570.52ms +grad accum step:9389/14336 +step:37556/57344 train_time:21427208ms step_avg:570.54ms +step:37557/57344 train_time:21427232ms step_avg:570.53ms +step:37558/57344 train_time:21427472ms step_avg:570.52ms +step:37559/57344 train_time:21428022ms step_avg:570.52ms +grad accum step:9390/14336 +step:37560/57344 train_time:21429327ms step_avg:570.54ms +step:37561/57344 train_time:21429342ms step_avg:570.52ms +step:37562/57344 train_time:21429585ms step_avg:570.51ms +step:37563/57344 train_time:21430132ms step_avg:570.51ms +grad accum step:9391/14336 +step:37564/57344 train_time:21431413ms step_avg:570.53ms +step:37565/57344 train_time:21431434ms step_avg:570.52ms +step:37566/57344 train_time:21431676ms step_avg:570.51ms +step:37567/57344 train_time:21432236ms step_avg:570.51ms +grad accum step:9392/14336 +step:37568/57344 train_time:21433534ms step_avg:570.53ms +step:37568/57344 val_loss:6.078236 train_time:21433537ms step_avg:570.53ms +step:37569/57344 train_time:21433549ms step_avg:570.51ms +step:37570/57344 train_time:21433770ms step_avg:570.50ms +step:37571/57344 train_time:21434309ms step_avg:570.50ms +grad accum step:9393/14336 +step:37572/57344 train_time:21435671ms step_avg:570.52ms +step:37573/57344 train_time:21435686ms step_avg:570.51ms +step:37574/57344 train_time:21435949ms step_avg:570.50ms +step:37575/57344 train_time:21436547ms step_avg:570.50ms +grad accum step:9394/14336 +step:37576/57344 train_time:21437865ms step_avg:570.52ms +step:37577/57344 train_time:21437880ms step_avg:570.51ms +step:37578/57344 train_time:21438124ms step_avg:570.50ms +step:37579/57344 train_time:21438667ms step_avg:570.50ms +grad accum step:9395/14336 +step:37580/57344 train_time:21439960ms step_avg:570.52ms +step:37581/57344 train_time:21439978ms step_avg:570.50ms +step:37582/57344 train_time:21440222ms step_avg:570.49ms +step:37583/57344 train_time:21440769ms step_avg:570.49ms +grad accum step:9396/14336 +step:37584/57344 train_time:21442184ms step_avg:570.51ms +step:37585/57344 train_time:21442196ms step_avg:570.50ms +step:37586/57344 train_time:21442407ms step_avg:570.49ms +step:37587/57344 train_time:21442966ms step_avg:570.49ms +grad accum step:9397/14336 +step:37588/57344 train_time:21444283ms step_avg:570.51ms +step:37589/57344 train_time:21444298ms step_avg:570.49ms +step:37590/57344 train_time:21444547ms step_avg:570.49ms +step:37591/57344 train_time:21445093ms step_avg:570.48ms +grad accum step:9398/14336 +step:37592/57344 train_time:21446401ms step_avg:570.50ms +step:37593/57344 train_time:21446417ms step_avg:570.49ms +step:37594/57344 train_time:21446657ms step_avg:570.48ms +step:37595/57344 train_time:21447204ms step_avg:570.48ms +grad accum step:9399/14336 +step:37596/57344 train_time:21448507ms step_avg:570.50ms +step:37597/57344 train_time:21448524ms step_avg:570.48ms +step:37598/57344 train_time:21448770ms step_avg:570.48ms +step:37599/57344 train_time:21449319ms step_avg:570.48ms +grad accum step:9400/14336 +step:37600/57344 train_time:21450628ms step_avg:570.50ms +step:37601/57344 train_time:21450647ms step_avg:570.48ms +step:37602/57344 train_time:21450889ms step_avg:570.47ms +step:37603/57344 train_time:21451439ms step_avg:570.47ms +grad accum step:9401/14336 +step:37604/57344 train_time:21452745ms step_avg:570.49ms +step:37605/57344 train_time:21452757ms step_avg:570.48ms +step:37606/57344 train_time:21452994ms step_avg:570.47ms +step:37607/57344 train_time:21453542ms step_avg:570.47ms +grad accum step:9402/14336 +step:37608/57344 train_time:21454895ms step_avg:570.49ms +step:37609/57344 train_time:21454914ms step_avg:570.47ms +step:37610/57344 train_time:21455134ms step_avg:570.46ms +step:37611/57344 train_time:21455678ms step_avg:570.46ms +grad accum step:9403/14336 +step:37612/57344 train_time:21456985ms step_avg:570.48ms +step:37613/57344 train_time:21457003ms step_avg:570.47ms +step:37614/57344 train_time:21457243ms step_avg:570.46ms +step:37615/57344 train_time:21457792ms step_avg:570.46ms +grad accum step:9404/14336 +step:37616/57344 train_time:21459108ms step_avg:570.48ms +step:37617/57344 train_time:21459126ms step_avg:570.46ms +step:37618/57344 train_time:21459367ms step_avg:570.45ms +step:37619/57344 train_time:21459919ms step_avg:570.45ms +grad accum step:9405/14336 +step:37620/57344 train_time:21461218ms step_avg:570.47ms +step:37621/57344 train_time:21461233ms step_avg:570.46ms +step:37622/57344 train_time:21461480ms step_avg:570.45ms +step:37623/57344 train_time:21462022ms step_avg:570.45ms +grad accum step:9406/14336 +step:37624/57344 train_time:21463311ms step_avg:570.47ms +step:37625/57344 train_time:21463325ms step_avg:570.45ms +step:37626/57344 train_time:21463571ms step_avg:570.45ms +step:37627/57344 train_time:21464123ms step_avg:570.44ms +grad accum step:9407/14336 +step:37628/57344 train_time:21465444ms step_avg:570.46ms +step:37629/57344 train_time:21465461ms step_avg:570.45ms +step:37630/57344 train_time:21465708ms step_avg:570.44ms +step:37631/57344 train_time:21466273ms step_avg:570.44ms +grad accum step:9408/14336 +step:37632/57344 train_time:21467566ms step_avg:570.46ms +step:37632/57344 val_loss:6.075828 train_time:21467578ms step_avg:570.46ms +step:37633/57344 train_time:21467590ms step_avg:570.45ms +step:37634/57344 train_time:21467807ms step_avg:570.44ms +step:37635/57344 train_time:21468351ms step_avg:570.44ms +grad accum step:9409/14336 +step:37636/57344 train_time:21469685ms step_avg:570.46ms +step:37637/57344 train_time:21469700ms step_avg:570.44ms +step:37638/57344 train_time:21469943ms step_avg:570.43ms +step:37639/57344 train_time:21470489ms step_avg:570.43ms +grad accum step:9410/14336 +step:37640/57344 train_time:21471788ms step_avg:570.45ms +step:37641/57344 train_time:21471802ms step_avg:570.44ms +step:37642/57344 train_time:21472050ms step_avg:570.43ms +step:37643/57344 train_time:21472595ms step_avg:570.43ms +grad accum step:9411/14336 +step:37644/57344 train_time:21473872ms step_avg:570.45ms +step:37645/57344 train_time:21473887ms step_avg:570.43ms +step:37646/57344 train_time:21474133ms step_avg:570.42ms +step:37647/57344 train_time:21474688ms step_avg:570.42ms +grad accum step:9412/14336 +step:37648/57344 train_time:21476000ms step_avg:570.44ms +step:37649/57344 train_time:21476016ms step_avg:570.43ms +step:37650/57344 train_time:21476264ms step_avg:570.42ms +step:37651/57344 train_time:21476827ms step_avg:570.42ms +grad accum step:9413/14336 +step:37652/57344 train_time:21478190ms step_avg:570.44ms +step:37653/57344 train_time:21478206ms step_avg:570.42ms +step:37654/57344 train_time:21478453ms step_avg:570.42ms +step:37655/57344 train_time:21479002ms step_avg:570.42ms +grad accum step:9414/14336 +step:37656/57344 train_time:21480327ms step_avg:570.44ms +step:37657/57344 train_time:21480341ms step_avg:570.42ms +step:37658/57344 train_time:21480597ms step_avg:570.41ms +step:37659/57344 train_time:21481160ms step_avg:570.41ms +grad accum step:9415/14336 +step:37660/57344 train_time:21482475ms step_avg:570.43ms +step:37661/57344 train_time:21482489ms step_avg:570.42ms +step:37662/57344 train_time:21482735ms step_avg:570.41ms +step:37663/57344 train_time:21483287ms step_avg:570.41ms +grad accum step:9416/14336 +step:37664/57344 train_time:21484610ms step_avg:570.43ms +step:37665/57344 train_time:21484628ms step_avg:570.41ms +step:37666/57344 train_time:21484869ms step_avg:570.40ms +step:37667/57344 train_time:21485410ms step_avg:570.40ms +grad accum step:9417/14336 +step:37668/57344 train_time:21486723ms step_avg:570.42ms +step:37669/57344 train_time:21486743ms step_avg:570.41ms +step:37670/57344 train_time:21486984ms step_avg:570.40ms +step:37671/57344 train_time:21487539ms step_avg:570.40ms +grad accum step:9418/14336 +step:37672/57344 train_time:21488848ms step_avg:570.42ms +step:37673/57344 train_time:21488864ms step_avg:570.40ms +step:37674/57344 train_time:21489106ms step_avg:570.40ms +step:37675/57344 train_time:21489650ms step_avg:570.40ms +grad accum step:9419/14336 +step:37676/57344 train_time:21490958ms step_avg:570.42ms +step:37677/57344 train_time:21490974ms step_avg:570.40ms +step:37678/57344 train_time:21491214ms step_avg:570.39ms +step:37679/57344 train_time:21491759ms step_avg:570.39ms +grad accum step:9420/14336 +step:37680/57344 train_time:21493079ms step_avg:570.41ms +step:37681/57344 train_time:21493095ms step_avg:570.40ms +step:37682/57344 train_time:21493341ms step_avg:570.39ms +step:37683/57344 train_time:21493900ms step_avg:570.39ms +grad accum step:9421/14336 +step:37684/57344 train_time:21495246ms step_avg:570.41ms +step:37685/57344 train_time:21495267ms step_avg:570.39ms +step:37686/57344 train_time:21495504ms step_avg:570.38ms +step:37687/57344 train_time:21496063ms step_avg:570.38ms +grad accum step:9422/14336 +step:37688/57344 train_time:21497396ms step_avg:570.40ms +step:37689/57344 train_time:21497410ms step_avg:570.39ms +step:37690/57344 train_time:21497626ms step_avg:570.38ms +step:37691/57344 train_time:21498169ms step_avg:570.38ms +grad accum step:9423/14336 +step:37692/57344 train_time:21499524ms step_avg:570.40ms +step:37693/57344 train_time:21499540ms step_avg:570.39ms +step:37694/57344 train_time:21499792ms step_avg:570.38ms +step:37695/57344 train_time:21500354ms step_avg:570.38ms +grad accum step:9424/14336 +step:37696/57344 train_time:21501646ms step_avg:570.40ms +step:37696/57344 val_loss:6.077408 train_time:21501649ms step_avg:570.40ms +step:37697/57344 train_time:21501661ms step_avg:570.38ms +step:37698/57344 train_time:21501885ms step_avg:570.37ms +step:37699/57344 train_time:21502437ms step_avg:570.37ms +grad accum step:9425/14336 +step:37700/57344 train_time:21503772ms step_avg:570.39ms +step:37701/57344 train_time:21503801ms step_avg:570.38ms +step:37702/57344 train_time:21504023ms step_avg:570.37ms +step:37703/57344 train_time:21504571ms step_avg:570.37ms +grad accum step:9426/14336 +step:37704/57344 train_time:21505933ms step_avg:570.39ms +step:37705/57344 train_time:21505949ms step_avg:570.37ms +step:37706/57344 train_time:21506194ms step_avg:570.37ms +step:37707/57344 train_time:21506748ms step_avg:570.36ms +grad accum step:9427/14336 +step:37708/57344 train_time:21508086ms step_avg:570.39ms +step:37709/57344 train_time:21508100ms step_avg:570.37ms +step:37710/57344 train_time:21508331ms step_avg:570.36ms +step:37711/57344 train_time:21508886ms step_avg:570.36ms +grad accum step:9428/14336 +step:37712/57344 train_time:21510170ms step_avg:570.38ms +step:37713/57344 train_time:21510184ms step_avg:570.37ms +step:37714/57344 train_time:21510434ms step_avg:570.36ms +step:37715/57344 train_time:21510983ms step_avg:570.36ms +grad accum step:9429/14336 +step:37716/57344 train_time:21512307ms step_avg:570.38ms +step:37717/57344 train_time:21512322ms step_avg:570.36ms +step:37718/57344 train_time:21512564ms step_avg:570.35ms +step:37719/57344 train_time:21513106ms step_avg:570.35ms +grad accum step:9430/14336 +step:37720/57344 train_time:21514453ms step_avg:570.37ms +step:37721/57344 train_time:21514467ms step_avg:570.36ms +step:37722/57344 train_time:21514716ms step_avg:570.35ms +step:37723/57344 train_time:21515262ms step_avg:570.35ms +grad accum step:9431/14336 +step:37724/57344 train_time:21516538ms step_avg:570.37ms +step:37725/57344 train_time:21516555ms step_avg:570.35ms +step:37726/57344 train_time:21516798ms step_avg:570.34ms +step:37727/57344 train_time:21517349ms step_avg:570.34ms +grad accum step:9432/14336 +step:37728/57344 train_time:21518667ms step_avg:570.36ms +step:37729/57344 train_time:21518682ms step_avg:570.35ms +step:37730/57344 train_time:21518927ms step_avg:570.34ms +step:37731/57344 train_time:21519475ms step_avg:570.34ms +grad accum step:9433/14336 +step:37732/57344 train_time:21520781ms step_avg:570.36ms +step:37733/57344 train_time:21520796ms step_avg:570.34ms +step:37734/57344 train_time:21521044ms step_avg:570.34ms +step:37735/57344 train_time:21521589ms step_avg:570.33ms +grad accum step:9434/14336 +step:37736/57344 train_time:21522884ms step_avg:570.35ms +step:37737/57344 train_time:21522900ms step_avg:570.34ms +step:37738/57344 train_time:21523137ms step_avg:570.33ms +step:37739/57344 train_time:21523686ms step_avg:570.33ms +grad accum step:9435/14336 +step:37740/57344 train_time:21525006ms step_avg:570.35ms +step:37741/57344 train_time:21525021ms step_avg:570.34ms +step:37742/57344 train_time:21525280ms step_avg:570.33ms +step:37743/57344 train_time:21525857ms step_avg:570.33ms +grad accum step:9436/14336 +step:37744/57344 train_time:21527312ms step_avg:570.35ms +step:37745/57344 train_time:21527330ms step_avg:570.34ms +step:37746/57344 train_time:21527558ms step_avg:570.33ms +step:37747/57344 train_time:21528131ms step_avg:570.33ms +grad accum step:9437/14336 +step:37748/57344 train_time:21529455ms step_avg:570.35ms +step:37749/57344 train_time:21529472ms step_avg:570.33ms +step:37750/57344 train_time:21529715ms step_avg:570.32ms +step:37751/57344 train_time:21530260ms step_avg:570.32ms +grad accum step:9438/14336 +step:37752/57344 train_time:21531575ms step_avg:570.34ms +step:37753/57344 train_time:21531590ms step_avg:570.33ms +step:37754/57344 train_time:21531837ms step_avg:570.32ms +step:37755/57344 train_time:21532382ms step_avg:570.32ms +grad accum step:9439/14336 +step:37756/57344 train_time:21533680ms step_avg:570.34ms +step:37757/57344 train_time:21533696ms step_avg:570.32ms +step:37758/57344 train_time:21533948ms step_avg:570.31ms +step:37759/57344 train_time:21534517ms step_avg:570.31ms +grad accum step:9440/14336 +step:37760/57344 train_time:21535823ms step_avg:570.33ms +step:37760/57344 val_loss:6.082530 train_time:21535827ms step_avg:570.33ms +step:37761/57344 train_time:21535839ms step_avg:570.32ms +step:37762/57344 train_time:21536059ms step_avg:570.31ms +step:37763/57344 train_time:21536609ms step_avg:570.31ms +grad accum step:9441/14336 +step:37764/57344 train_time:21537959ms step_avg:570.33ms +step:37765/57344 train_time:21537971ms step_avg:570.32ms +step:37766/57344 train_time:21538214ms step_avg:570.31ms +step:37767/57344 train_time:21538760ms step_avg:570.31ms +grad accum step:9442/14336 +step:37768/57344 train_time:21540095ms step_avg:570.33ms +step:37769/57344 train_time:21540109ms step_avg:570.31ms +step:37770/57344 train_time:21540344ms step_avg:570.30ms +step:37771/57344 train_time:21540900ms step_avg:570.30ms +grad accum step:9443/14336 +step:37772/57344 train_time:21542215ms step_avg:570.32ms +step:37773/57344 train_time:21542228ms step_avg:570.31ms +step:37774/57344 train_time:21542473ms step_avg:570.30ms +step:37775/57344 train_time:21543028ms step_avg:570.30ms +grad accum step:9444/14336 +step:37776/57344 train_time:21544363ms step_avg:570.32ms +step:37777/57344 train_time:21544381ms step_avg:570.30ms +step:37778/57344 train_time:21544627ms step_avg:570.30ms +step:37779/57344 train_time:21545177ms step_avg:570.30ms +grad accum step:9445/14336 +step:37780/57344 train_time:21546494ms step_avg:570.31ms +step:37781/57344 train_time:21546512ms step_avg:570.30ms +step:37782/57344 train_time:21546755ms step_avg:570.29ms +step:37783/57344 train_time:21547317ms step_avg:570.29ms +grad accum step:9446/14336 +step:37784/57344 train_time:21548619ms step_avg:570.31ms +step:37785/57344 train_time:21548634ms step_avg:570.30ms +step:37786/57344 train_time:21548879ms step_avg:570.29ms +step:37787/57344 train_time:21549418ms step_avg:570.29ms +grad accum step:9447/14336 +step:37788/57344 train_time:21550746ms step_avg:570.31ms +step:37789/57344 train_time:21550762ms step_avg:570.29ms +step:37790/57344 train_time:21551014ms step_avg:570.28ms +step:37791/57344 train_time:21551585ms step_avg:570.28ms +grad accum step:9448/14336 +step:37792/57344 train_time:21552906ms step_avg:570.30ms +step:37793/57344 train_time:21552921ms step_avg:570.29ms +step:37794/57344 train_time:21553170ms step_avg:570.28ms +step:37795/57344 train_time:21553719ms step_avg:570.28ms +grad accum step:9449/14336 +step:37796/57344 train_time:21555020ms step_avg:570.30ms +step:37797/57344 train_time:21555035ms step_avg:570.28ms +step:37798/57344 train_time:21555300ms step_avg:570.28ms +step:37799/57344 train_time:21555897ms step_avg:570.28ms +grad accum step:9450/14336 +step:37800/57344 train_time:21557210ms step_avg:570.30ms +step:37801/57344 train_time:21557231ms step_avg:570.28ms +step:37802/57344 train_time:21557470ms step_avg:570.27ms +step:37803/57344 train_time:21558027ms step_avg:570.27ms +grad accum step:9451/14336 +step:37804/57344 train_time:21559351ms step_avg:570.29ms +step:37805/57344 train_time:21559368ms step_avg:570.28ms +step:37806/57344 train_time:21559620ms step_avg:570.27ms +step:37807/57344 train_time:21560183ms step_avg:570.27ms +grad accum step:9452/14336 +step:37808/57344 train_time:21561497ms step_avg:570.29ms +step:37809/57344 train_time:21561513ms step_avg:570.27ms +step:37810/57344 train_time:21561780ms step_avg:570.27ms +step:37811/57344 train_time:21562390ms step_avg:570.27ms +grad accum step:9453/14336 +step:37812/57344 train_time:21563773ms step_avg:570.29ms +step:37813/57344 train_time:21563789ms step_avg:570.27ms +step:37814/57344 train_time:21564036ms step_avg:570.27ms +step:37815/57344 train_time:21564582ms step_avg:570.27ms +grad accum step:9454/14336 +step:37816/57344 train_time:21565910ms step_avg:570.29ms +step:37817/57344 train_time:21565926ms step_avg:570.27ms +step:37818/57344 train_time:21566177ms step_avg:570.26ms +step:37819/57344 train_time:21566733ms step_avg:570.26ms +grad accum step:9455/14336 +step:37820/57344 train_time:21568052ms step_avg:570.28ms +step:37821/57344 train_time:21568069ms step_avg:570.27ms +step:37822/57344 train_time:21568315ms step_avg:570.26ms +step:37823/57344 train_time:21568863ms step_avg:570.26ms +grad accum step:9456/14336 +step:37824/57344 train_time:21570176ms step_avg:570.28ms +step:37824/57344 val_loss:6.092300 train_time:21570177ms step_avg:570.28ms +step:37825/57344 train_time:21570189ms step_avg:570.26ms +step:37826/57344 train_time:21570413ms step_avg:570.25ms +step:37827/57344 train_time:21570960ms step_avg:570.25ms +grad accum step:9457/14336 +step:37828/57344 train_time:21572266ms step_avg:570.27ms +step:37829/57344 train_time:21572283ms step_avg:570.26ms +step:37830/57344 train_time:21572530ms step_avg:570.25ms +step:37831/57344 train_time:21573069ms step_avg:570.25ms +grad accum step:9458/14336 +step:37832/57344 train_time:21574370ms step_avg:570.27ms +step:37833/57344 train_time:21574387ms step_avg:570.25ms +step:37834/57344 train_time:21574635ms step_avg:570.24ms +step:37835/57344 train_time:21575188ms step_avg:570.24ms +grad accum step:9459/14336 +step:37836/57344 train_time:21576495ms step_avg:570.26ms +step:37837/57344 train_time:21576512ms step_avg:570.25ms +step:37838/57344 train_time:21576757ms step_avg:570.24ms +step:37839/57344 train_time:21577308ms step_avg:570.24ms +grad accum step:9460/14336 +step:37840/57344 train_time:21578606ms step_avg:570.26ms +step:37841/57344 train_time:21578623ms step_avg:570.24ms +step:37842/57344 train_time:21578875ms step_avg:570.24ms +step:37843/57344 train_time:21579433ms step_avg:570.24ms +grad accum step:9461/14336 +step:37844/57344 train_time:21580731ms step_avg:570.26ms +step:37845/57344 train_time:21580748ms step_avg:570.24ms +step:37846/57344 train_time:21580997ms step_avg:570.23ms +step:37847/57344 train_time:21581556ms step_avg:570.23ms +grad accum step:9462/14336 +step:37848/57344 train_time:21582873ms step_avg:570.25ms +step:37849/57344 train_time:21582890ms step_avg:570.24ms +step:37850/57344 train_time:21583139ms step_avg:570.23ms +step:37851/57344 train_time:21583691ms step_avg:570.23ms +grad accum step:9463/14336 +step:37852/57344 train_time:21584995ms step_avg:570.25ms +step:37853/57344 train_time:21585012ms step_avg:570.23ms +step:37854/57344 train_time:21585255ms step_avg:570.22ms +step:37855/57344 train_time:21585809ms step_avg:570.22ms +grad accum step:9464/14336 +step:37856/57344 train_time:21587268ms step_avg:570.25ms +step:37857/57344 train_time:21587284ms step_avg:570.23ms +step:37858/57344 train_time:21587532ms step_avg:570.22ms +step:37859/57344 train_time:21588076ms step_avg:570.22ms +grad accum step:9465/14336 +step:37860/57344 train_time:21589388ms step_avg:570.24ms +step:37861/57344 train_time:21589404ms step_avg:570.23ms +step:37862/57344 train_time:21589649ms step_avg:570.22ms +step:37863/57344 train_time:21590191ms step_avg:570.22ms +grad accum step:9466/14336 +step:37864/57344 train_time:21591485ms step_avg:570.24ms +step:37865/57344 train_time:21591497ms step_avg:570.22ms +step:37866/57344 train_time:21591744ms step_avg:570.21ms +step:37867/57344 train_time:21592307ms step_avg:570.21ms +grad accum step:9467/14336 +step:37868/57344 train_time:21593633ms step_avg:570.23ms +step:37869/57344 train_time:21593650ms step_avg:570.22ms +step:37870/57344 train_time:21593897ms step_avg:570.21ms +step:37871/57344 train_time:21594449ms step_avg:570.21ms +grad accum step:9468/14336 +step:37872/57344 train_time:21595824ms step_avg:570.23ms +step:37873/57344 train_time:21595841ms step_avg:570.22ms +step:37874/57344 train_time:21596087ms step_avg:570.21ms +step:37875/57344 train_time:21596627ms step_avg:570.21ms +grad accum step:9469/14336 +step:37876/57344 train_time:21597970ms step_avg:570.23ms +step:37877/57344 train_time:21597991ms step_avg:570.21ms +step:37878/57344 train_time:21598216ms step_avg:570.20ms +step:37879/57344 train_time:21598774ms step_avg:570.20ms +grad accum step:9470/14336 +step:37880/57344 train_time:21600093ms step_avg:570.22ms +step:37881/57344 train_time:21600110ms step_avg:570.21ms +step:37882/57344 train_time:21600354ms step_avg:570.20ms +step:37883/57344 train_time:21600892ms step_avg:570.20ms +grad accum step:9471/14336 +step:37884/57344 train_time:21602193ms step_avg:570.22ms +step:37885/57344 train_time:21602209ms step_avg:570.20ms +step:37886/57344 train_time:21602459ms step_avg:570.20ms +step:37887/57344 train_time:21603028ms step_avg:570.20ms +grad accum step:9472/14336 +step:37888/57344 train_time:21604370ms step_avg:570.22ms +step:37888/57344 val_loss:6.086246 train_time:21604371ms step_avg:570.22ms +step:37889/57344 train_time:21604383ms step_avg:570.20ms +step:37890/57344 train_time:21604612ms step_avg:570.19ms +step:37891/57344 train_time:21605170ms step_avg:570.19ms +grad accum step:9473/14336 +step:37892/57344 train_time:21606523ms step_avg:570.21ms +step:37893/57344 train_time:21606540ms step_avg:570.20ms +step:37894/57344 train_time:21606787ms step_avg:570.19ms +step:37895/57344 train_time:21607337ms step_avg:570.19ms +grad accum step:9474/14336 +step:37896/57344 train_time:21608652ms step_avg:570.21ms +step:37897/57344 train_time:21608669ms step_avg:570.19ms +step:37898/57344 train_time:21608914ms step_avg:570.19ms +step:37899/57344 train_time:21609459ms step_avg:570.19ms +grad accum step:9475/14336 +step:37900/57344 train_time:21610782ms step_avg:570.21ms +step:37901/57344 train_time:21610799ms step_avg:570.19ms +step:37902/57344 train_time:21611047ms step_avg:570.18ms +step:37903/57344 train_time:21611590ms step_avg:570.18ms +grad accum step:9476/14336 +step:37904/57344 train_time:21612906ms step_avg:570.20ms +step:37905/57344 train_time:21612923ms step_avg:570.19ms +step:37906/57344 train_time:21613175ms step_avg:570.18ms +step:37907/57344 train_time:21613734ms step_avg:570.18ms +grad accum step:9477/14336 +step:37908/57344 train_time:21615086ms step_avg:570.20ms +step:37909/57344 train_time:21615098ms step_avg:570.18ms +step:37910/57344 train_time:21615328ms step_avg:570.17ms +step:37911/57344 train_time:21615873ms step_avg:570.17ms +grad accum step:9478/14336 +step:37912/57344 train_time:21617213ms step_avg:570.19ms +step:37913/57344 train_time:21617230ms step_avg:570.18ms +step:37914/57344 train_time:21617483ms step_avg:570.17ms +step:37915/57344 train_time:21618046ms step_avg:570.17ms +grad accum step:9479/14336 +step:37916/57344 train_time:21619358ms step_avg:570.19ms +step:37917/57344 train_time:21619373ms step_avg:570.18ms +step:37918/57344 train_time:21619620ms step_avg:570.17ms +step:37919/57344 train_time:21620176ms step_avg:570.17ms +grad accum step:9480/14336 +step:37920/57344 train_time:21621477ms step_avg:570.19ms +step:37921/57344 train_time:21621492ms step_avg:570.17ms +step:37922/57344 train_time:21621739ms step_avg:570.16ms +step:37923/57344 train_time:21622286ms step_avg:570.16ms +grad accum step:9481/14336 +step:37924/57344 train_time:21623565ms step_avg:570.18ms +step:37925/57344 train_time:21623582ms step_avg:570.17ms +step:37926/57344 train_time:21623834ms step_avg:570.16ms +step:37927/57344 train_time:21624393ms step_avg:570.16ms +grad accum step:9482/14336 +step:37928/57344 train_time:21625708ms step_avg:570.18ms +step:37929/57344 train_time:21625725ms step_avg:570.16ms +step:37930/57344 train_time:21625970ms step_avg:570.15ms +step:37931/57344 train_time:21626514ms step_avg:570.15ms +grad accum step:9483/14336 +step:37932/57344 train_time:21627827ms step_avg:570.17ms +step:37933/57344 train_time:21627838ms step_avg:570.16ms +step:37934/57344 train_time:21628077ms step_avg:570.15ms +step:37935/57344 train_time:21628620ms step_avg:570.15ms +grad accum step:9484/14336 +step:37936/57344 train_time:21629915ms step_avg:570.17ms +step:37937/57344 train_time:21629932ms step_avg:570.15ms +step:37938/57344 train_time:21630181ms step_avg:570.15ms +step:37939/57344 train_time:21630735ms step_avg:570.15ms +grad accum step:9485/14336 +step:37940/57344 train_time:21632030ms step_avg:570.16ms +step:37941/57344 train_time:21632047ms step_avg:570.15ms +step:37942/57344 train_time:21632293ms step_avg:570.14ms +step:37943/57344 train_time:21632850ms step_avg:570.14ms +grad accum step:9486/14336 +step:37944/57344 train_time:21634143ms step_avg:570.16ms +step:37945/57344 train_time:21634155ms step_avg:570.15ms +step:37946/57344 train_time:21634407ms step_avg:570.14ms +step:37947/57344 train_time:21634963ms step_avg:570.14ms +grad accum step:9487/14336 +step:37948/57344 train_time:21636289ms step_avg:570.16ms +step:37949/57344 train_time:21636306ms step_avg:570.14ms +step:37950/57344 train_time:21636569ms step_avg:570.13ms +step:37951/57344 train_time:21637157ms step_avg:570.13ms +grad accum step:9488/14336 +step:37952/57344 train_time:21638492ms step_avg:570.15ms +step:37952/57344 val_loss:6.090149 train_time:21638493ms step_avg:570.15ms +step:37953/57344 train_time:21638505ms step_avg:570.14ms +step:37954/57344 train_time:21638729ms step_avg:570.13ms +step:37955/57344 train_time:21639289ms step_avg:570.13ms +grad accum step:9489/14336 +step:37956/57344 train_time:21640616ms step_avg:570.15ms +step:37957/57344 train_time:21640632ms step_avg:570.14ms +step:37958/57344 train_time:21640876ms step_avg:570.13ms +step:37959/57344 train_time:21641430ms step_avg:570.13ms +grad accum step:9490/14336 +step:37960/57344 train_time:21642777ms step_avg:570.15ms +step:37961/57344 train_time:21642795ms step_avg:570.13ms +step:37962/57344 train_time:21643038ms step_avg:570.12ms +step:37963/57344 train_time:21643585ms step_avg:570.12ms +grad accum step:9491/14336 +step:37964/57344 train_time:21644860ms step_avg:570.14ms +step:37965/57344 train_time:21644877ms step_avg:570.13ms +step:37966/57344 train_time:21645122ms step_avg:570.12ms +step:37967/57344 train_time:21645665ms step_avg:570.12ms +grad accum step:9492/14336 +step:37968/57344 train_time:21646971ms step_avg:570.14ms +step:37969/57344 train_time:21646992ms step_avg:570.12ms +step:37970/57344 train_time:21647231ms step_avg:570.11ms +step:37971/57344 train_time:21647795ms step_avg:570.11ms +grad accum step:9493/14336 +step:37972/57344 train_time:21649112ms step_avg:570.13ms +step:37973/57344 train_time:21649130ms step_avg:570.12ms +step:37974/57344 train_time:21649368ms step_avg:570.11ms +step:37975/57344 train_time:21649914ms step_avg:570.11ms +grad accum step:9494/14336 +step:37976/57344 train_time:21651201ms step_avg:570.13ms +step:37977/57344 train_time:21651219ms step_avg:570.11ms +step:37978/57344 train_time:21651465ms step_avg:570.11ms +step:37979/57344 train_time:21652035ms step_avg:570.11ms +grad accum step:9495/14336 +step:37980/57344 train_time:21653352ms step_avg:570.13ms +step:37981/57344 train_time:21653369ms step_avg:570.11ms +step:37982/57344 train_time:21653620ms step_avg:570.10ms +step:37983/57344 train_time:21654184ms step_avg:570.10ms +grad accum step:9496/14336 +step:37984/57344 train_time:21655533ms step_avg:570.12ms +step:37985/57344 train_time:21655551ms step_avg:570.11ms +step:37986/57344 train_time:21655783ms step_avg:570.10ms +step:37987/57344 train_time:21656330ms step_avg:570.10ms +grad accum step:9497/14336 +step:37988/57344 train_time:21657647ms step_avg:570.12ms +step:37989/57344 train_time:21657660ms step_avg:570.10ms +step:37990/57344 train_time:21657896ms step_avg:570.09ms +step:37991/57344 train_time:21658439ms step_avg:570.09ms +grad accum step:9498/14336 +step:37992/57344 train_time:21659750ms step_avg:570.11ms +step:37993/57344 train_time:21659764ms step_avg:570.10ms +step:37994/57344 train_time:21660017ms step_avg:570.09ms +step:37995/57344 train_time:21660578ms step_avg:570.09ms +grad accum step:9499/14336 +step:37996/57344 train_time:21661946ms step_avg:570.11ms +step:37997/57344 train_time:21661961ms step_avg:570.10ms +step:37998/57344 train_time:21662213ms step_avg:570.09ms +step:37999/57344 train_time:21662768ms step_avg:570.09ms +grad accum step:9500/14336 +step:38000/57344 train_time:21664090ms step_avg:570.11ms +step:38001/57344 train_time:21664107ms step_avg:570.09ms +step:38002/57344 train_time:21664343ms step_avg:570.08ms +step:38003/57344 train_time:21664887ms step_avg:570.08ms +grad accum step:9501/14336 +step:38004/57344 train_time:21666241ms step_avg:570.10ms +step:38005/57344 train_time:21666254ms step_avg:570.09ms +step:38006/57344 train_time:21666505ms step_avg:570.08ms +step:38007/57344 train_time:21667062ms step_avg:570.08ms +grad accum step:9502/14336 +step:38008/57344 train_time:21668476ms step_avg:570.10ms +step:38009/57344 train_time:21668499ms step_avg:570.09ms +step:38010/57344 train_time:21668714ms step_avg:570.08ms +step:38011/57344 train_time:21669251ms step_avg:570.08ms +grad accum step:9503/14336 +step:38012/57344 train_time:21670552ms step_avg:570.10ms +step:38013/57344 train_time:21670570ms step_avg:570.08ms +step:38014/57344 train_time:21670805ms step_avg:570.07ms +step:38015/57344 train_time:21671351ms step_avg:570.07ms +grad accum step:9504/14336 +step:38016/57344 train_time:21672647ms step_avg:570.09ms +step:38016/57344 val_loss:6.094055 train_time:21672651ms step_avg:570.09ms +step:38017/57344 train_time:21672663ms step_avg:570.08ms +step:38018/57344 train_time:21672887ms step_avg:570.07ms +step:38019/57344 train_time:21673440ms step_avg:570.07ms +grad accum step:9505/14336 +step:38020/57344 train_time:21674764ms step_avg:570.09ms +step:38021/57344 train_time:21674782ms step_avg:570.07ms +step:38022/57344 train_time:21675030ms step_avg:570.07ms +step:38023/57344 train_time:21675594ms step_avg:570.07ms +grad accum step:9506/14336 +step:38024/57344 train_time:21676907ms step_avg:570.08ms +step:38025/57344 train_time:21676927ms step_avg:570.07ms +step:38026/57344 train_time:21677165ms step_avg:570.06ms +step:38027/57344 train_time:21677708ms step_avg:570.06ms +grad accum step:9507/14336 +step:38028/57344 train_time:21678999ms step_avg:570.08ms +step:38029/57344 train_time:21679019ms step_avg:570.07ms +step:38030/57344 train_time:21679258ms step_avg:570.06ms +step:38031/57344 train_time:21679813ms step_avg:570.06ms +grad accum step:9508/14336 +step:38032/57344 train_time:21681109ms step_avg:570.08ms +step:38033/57344 train_time:21681126ms step_avg:570.06ms +step:38034/57344 train_time:21681373ms step_avg:570.05ms +step:38035/57344 train_time:21681914ms step_avg:570.05ms +grad accum step:9509/14336 +step:38036/57344 train_time:21683220ms step_avg:570.07ms +step:38037/57344 train_time:21683240ms step_avg:570.06ms +step:38038/57344 train_time:21683475ms step_avg:570.05ms +step:38039/57344 train_time:21684028ms step_avg:570.05ms +grad accum step:9510/14336 +step:38040/57344 train_time:21685409ms step_avg:570.07ms +step:38041/57344 train_time:21685421ms step_avg:570.05ms +step:38042/57344 train_time:21685679ms step_avg:570.05ms +step:38043/57344 train_time:21686251ms step_avg:570.05ms +grad accum step:9511/14336 +step:38044/57344 train_time:21687579ms step_avg:570.07ms +step:38045/57344 train_time:21687597ms step_avg:570.05ms +step:38046/57344 train_time:21687837ms step_avg:570.04ms +step:38047/57344 train_time:21688384ms step_avg:570.04ms +grad accum step:9512/14336 +step:38048/57344 train_time:21689701ms step_avg:570.06ms +step:38049/57344 train_time:21689724ms step_avg:570.05ms +step:38050/57344 train_time:21689956ms step_avg:570.04ms +step:38051/57344 train_time:21690510ms step_avg:570.04ms +grad accum step:9513/14336 +step:38052/57344 train_time:21691841ms step_avg:570.06ms +step:38053/57344 train_time:21691859ms step_avg:570.04ms +step:38054/57344 train_time:21692086ms step_avg:570.03ms +step:38055/57344 train_time:21692628ms step_avg:570.03ms +grad accum step:9514/14336 +step:38056/57344 train_time:21693927ms step_avg:570.05ms +step:38057/57344 train_time:21693944ms step_avg:570.04ms +step:38058/57344 train_time:21694189ms step_avg:570.03ms +step:38059/57344 train_time:21694739ms step_avg:570.03ms +grad accum step:9515/14336 +step:38060/57344 train_time:21696081ms step_avg:570.05ms +step:38061/57344 train_time:21696095ms step_avg:570.03ms +step:38062/57344 train_time:21696337ms step_avg:570.03ms +step:38063/57344 train_time:21696874ms step_avg:570.03ms +grad accum step:9516/14336 +step:38064/57344 train_time:21698161ms step_avg:570.04ms +step:38065/57344 train_time:21698177ms step_avg:570.03ms +step:38066/57344 train_time:21698427ms step_avg:570.02ms +step:38067/57344 train_time:21698989ms step_avg:570.02ms +grad accum step:9517/14336 +step:38068/57344 train_time:21700325ms step_avg:570.04ms +step:38069/57344 train_time:21700342ms step_avg:570.03ms +step:38070/57344 train_time:21700572ms step_avg:570.02ms +step:38071/57344 train_time:21701123ms step_avg:570.02ms +grad accum step:9518/14336 +step:38072/57344 train_time:21702427ms step_avg:570.04ms +step:38073/57344 train_time:21702443ms step_avg:570.02ms +step:38074/57344 train_time:21702696ms step_avg:570.01ms +step:38075/57344 train_time:21703256ms step_avg:570.01ms +grad accum step:9519/14336 +step:38076/57344 train_time:21704569ms step_avg:570.03ms +step:38077/57344 train_time:21704589ms step_avg:570.02ms +step:38078/57344 train_time:21704827ms step_avg:570.01ms +step:38079/57344 train_time:21705379ms step_avg:570.01ms +grad accum step:9520/14336 +step:38080/57344 train_time:21706696ms step_avg:570.03ms +step:38080/57344 val_loss:6.081831 train_time:21706702ms step_avg:570.03ms +step:38081/57344 train_time:21706714ms step_avg:570.01ms +step:38082/57344 train_time:21706938ms step_avg:570.01ms +step:38083/57344 train_time:21707484ms step_avg:570.00ms +grad accum step:9521/14336 +step:38084/57344 train_time:21708899ms step_avg:570.03ms +step:38085/57344 train_time:21708919ms step_avg:570.01ms +step:38086/57344 train_time:21709136ms step_avg:570.00ms +step:38087/57344 train_time:21709682ms step_avg:570.00ms +grad accum step:9522/14336 +step:38088/57344 train_time:21710981ms step_avg:570.02ms +step:38089/57344 train_time:21711000ms step_avg:570.01ms +step:38090/57344 train_time:21711244ms step_avg:570.00ms +step:38091/57344 train_time:21711808ms step_avg:570.00ms +grad accum step:9523/14336 +step:38092/57344 train_time:21713117ms step_avg:570.02ms +step:38093/57344 train_time:21713137ms step_avg:570.00ms +step:38094/57344 train_time:21713370ms step_avg:569.99ms +step:38095/57344 train_time:21713911ms step_avg:569.99ms +grad accum step:9524/14336 +step:38096/57344 train_time:21715191ms step_avg:570.01ms +step:38097/57344 train_time:21715206ms step_avg:570.00ms +step:38098/57344 train_time:21715451ms step_avg:569.99ms +step:38099/57344 train_time:21715996ms step_avg:569.99ms +grad accum step:9525/14336 +step:38100/57344 train_time:21717324ms step_avg:570.01ms +step:38101/57344 train_time:21717340ms step_avg:569.99ms +step:38102/57344 train_time:21717588ms step_avg:569.99ms +step:38103/57344 train_time:21718150ms step_avg:569.99ms +grad accum step:9526/14336 +step:38104/57344 train_time:21719482ms step_avg:570.01ms +step:38105/57344 train_time:21719498ms step_avg:569.99ms +step:38106/57344 train_time:21719743ms step_avg:569.98ms +step:38107/57344 train_time:21720289ms step_avg:569.98ms +grad accum step:9527/14336 +step:38108/57344 train_time:21721613ms step_avg:570.00ms +step:38109/57344 train_time:21721630ms step_avg:569.99ms +step:38110/57344 train_time:21721875ms step_avg:569.98ms +step:38111/57344 train_time:21722429ms step_avg:569.98ms +grad accum step:9528/14336 +step:38112/57344 train_time:21723732ms step_avg:570.00ms +step:38113/57344 train_time:21723749ms step_avg:569.98ms +step:38114/57344 train_time:21723989ms step_avg:569.97ms +step:38115/57344 train_time:21724534ms step_avg:569.97ms +grad accum step:9529/14336 +step:38116/57344 train_time:21725835ms step_avg:569.99ms +step:38117/57344 train_time:21725852ms step_avg:569.98ms +step:38118/57344 train_time:21726103ms step_avg:569.97ms +step:38119/57344 train_time:21726694ms step_avg:569.97ms +grad accum step:9530/14336 +step:38120/57344 train_time:21728094ms step_avg:569.99ms +step:38121/57344 train_time:21728111ms step_avg:569.98ms +step:38122/57344 train_time:21728378ms step_avg:569.97ms +step:38123/57344 train_time:21728978ms step_avg:569.97ms +grad accum step:9531/14336 +step:38124/57344 train_time:21730302ms step_avg:569.99ms +step:38125/57344 train_time:21730319ms step_avg:569.98ms +step:38126/57344 train_time:21730571ms step_avg:569.97ms +step:38127/57344 train_time:21731145ms step_avg:569.97ms +grad accum step:9532/14336 +step:38128/57344 train_time:21732427ms step_avg:569.99ms +step:38129/57344 train_time:21732442ms step_avg:569.97ms +step:38130/57344 train_time:21732685ms step_avg:569.96ms +step:38131/57344 train_time:21733236ms step_avg:569.96ms +grad accum step:9533/14336 +step:38132/57344 train_time:21734564ms step_avg:569.98ms +step:38133/57344 train_time:21734582ms step_avg:569.97ms +step:38134/57344 train_time:21734828ms step_avg:569.96ms +step:38135/57344 train_time:21735382ms step_avg:569.96ms +grad accum step:9534/14336 +step:38136/57344 train_time:21736714ms step_avg:569.98ms +step:38137/57344 train_time:21736730ms step_avg:569.96ms +step:38138/57344 train_time:21736974ms step_avg:569.96ms +step:38139/57344 train_time:21737520ms step_avg:569.96ms +grad accum step:9535/14336 +step:38140/57344 train_time:21738867ms step_avg:569.98ms +step:38141/57344 train_time:21738882ms step_avg:569.96ms +step:38142/57344 train_time:21739130ms step_avg:569.95ms +step:38143/57344 train_time:21739695ms step_avg:569.95ms +grad accum step:9536/14336 +step:38144/57344 train_time:21741032ms step_avg:569.97ms +step:38144/57344 val_loss:6.084065 train_time:21741042ms step_avg:569.97ms +step:38145/57344 train_time:21741054ms step_avg:569.96ms +step:38146/57344 train_time:21741282ms step_avg:569.95ms +step:38147/57344 train_time:21741840ms step_avg:569.95ms +grad accum step:9537/14336 +step:38148/57344 train_time:21743150ms step_avg:569.97ms +step:38149/57344 train_time:21743164ms step_avg:569.95ms +step:38150/57344 train_time:21743402ms step_avg:569.95ms +step:38151/57344 train_time:21743944ms step_avg:569.94ms +grad accum step:9538/14336 +step:38152/57344 train_time:21745218ms step_avg:569.96ms +step:38153/57344 train_time:21745234ms step_avg:569.95ms +step:38154/57344 train_time:21745483ms step_avg:569.94ms +step:38155/57344 train_time:21746041ms step_avg:569.94ms +grad accum step:9539/14336 +step:38156/57344 train_time:21747361ms step_avg:569.96ms +step:38157/57344 train_time:21747374ms step_avg:569.94ms +step:38158/57344 train_time:21747620ms step_avg:569.94ms +step:38159/57344 train_time:21748173ms step_avg:569.94ms +grad accum step:9540/14336 +step:38160/57344 train_time:21749472ms step_avg:569.95ms +step:38161/57344 train_time:21749501ms step_avg:569.94ms +step:38162/57344 train_time:21749739ms step_avg:569.93ms +step:38163/57344 train_time:21750316ms step_avg:569.93ms +grad accum step:9541/14336 +step:38164/57344 train_time:21751631ms step_avg:569.95ms +step:38165/57344 train_time:21751644ms step_avg:569.94ms +step:38166/57344 train_time:21751889ms step_avg:569.93ms +step:38167/57344 train_time:21752435ms step_avg:569.93ms +grad accum step:9542/14336 +step:38168/57344 train_time:21753794ms step_avg:569.95ms +step:38169/57344 train_time:21753812ms step_avg:569.93ms +step:38170/57344 train_time:21754055ms step_avg:569.93ms +step:38171/57344 train_time:21754609ms step_avg:569.93ms +grad accum step:9543/14336 +step:38172/57344 train_time:21755960ms step_avg:569.95ms +step:38173/57344 train_time:21755976ms step_avg:569.93ms +step:38174/57344 train_time:21756218ms step_avg:569.92ms +step:38175/57344 train_time:21756764ms step_avg:569.92ms +grad accum step:9544/14336 +step:38176/57344 train_time:21758092ms step_avg:569.94ms +step:38177/57344 train_time:21758110ms step_avg:569.93ms +step:38178/57344 train_time:21758347ms step_avg:569.92ms +step:38179/57344 train_time:21758889ms step_avg:569.92ms +grad accum step:9545/14336 +step:38180/57344 train_time:21760181ms step_avg:569.94ms +step:38181/57344 train_time:21760197ms step_avg:569.92ms +step:38182/57344 train_time:21760445ms step_avg:569.91ms +step:38183/57344 train_time:21761002ms step_avg:569.91ms +grad accum step:9546/14336 +step:38184/57344 train_time:21762290ms step_avg:569.93ms +step:38185/57344 train_time:21762307ms step_avg:569.92ms +step:38186/57344 train_time:21762549ms step_avg:569.91ms +step:38187/57344 train_time:21763096ms step_avg:569.91ms +grad accum step:9547/14336 +step:38188/57344 train_time:21764376ms step_avg:569.93ms +step:38189/57344 train_time:21764390ms step_avg:569.91ms +step:38190/57344 train_time:21764637ms step_avg:569.90ms +step:38191/57344 train_time:21765181ms step_avg:569.90ms +grad accum step:9548/14336 +step:38192/57344 train_time:21766518ms step_avg:569.92ms +step:38193/57344 train_time:21766535ms step_avg:569.91ms +step:38194/57344 train_time:21766791ms step_avg:569.90ms +step:38195/57344 train_time:21767354ms step_avg:569.90ms +grad accum step:9549/14336 +step:38196/57344 train_time:21768669ms step_avg:569.92ms +step:38197/57344 train_time:21768685ms step_avg:569.91ms +step:38198/57344 train_time:21768933ms step_avg:569.90ms +step:38199/57344 train_time:21769480ms step_avg:569.90ms +grad accum step:9550/14336 +step:38200/57344 train_time:21770762ms step_avg:569.92ms +step:38201/57344 train_time:21770781ms step_avg:569.90ms +step:38202/57344 train_time:21771020ms step_avg:569.89ms +step:38203/57344 train_time:21771566ms step_avg:569.89ms +grad accum step:9551/14336 +step:38204/57344 train_time:21772863ms step_avg:569.91ms +step:38205/57344 train_time:21772879ms step_avg:569.90ms +step:38206/57344 train_time:21773134ms step_avg:569.89ms +step:38207/57344 train_time:21773698ms step_avg:569.89ms +grad accum step:9552/14336 +step:38208/57344 train_time:21774993ms step_avg:569.91ms +step:38208/57344 val_loss:6.071197 train_time:21774996ms step_avg:569.91ms +step:38209/57344 train_time:21775007ms step_avg:569.89ms +step:38210/57344 train_time:21775234ms step_avg:569.88ms +step:38211/57344 train_time:21775792ms step_avg:569.88ms +grad accum step:9553/14336 +step:38212/57344 train_time:21777097ms step_avg:569.90ms +step:38213/57344 train_time:21777114ms step_avg:569.89ms +step:38214/57344 train_time:21777363ms step_avg:569.88ms +step:38215/57344 train_time:21777916ms step_avg:569.88ms +grad accum step:9554/14336 +step:38216/57344 train_time:21779190ms step_avg:569.90ms +step:38217/57344 train_time:21779209ms step_avg:569.88ms +step:38218/57344 train_time:21779455ms step_avg:569.87ms +step:38219/57344 train_time:21780019ms step_avg:569.87ms +grad accum step:9555/14336 +step:38220/57344 train_time:21781350ms step_avg:569.89ms +step:38221/57344 train_time:21781367ms step_avg:569.88ms +step:38222/57344 train_time:21781603ms step_avg:569.87ms +step:38223/57344 train_time:21782160ms step_avg:569.87ms +grad accum step:9556/14336 +step:38224/57344 train_time:21783473ms step_avg:569.89ms +step:38225/57344 train_time:21783494ms step_avg:569.88ms +step:38226/57344 train_time:21783737ms step_avg:569.87ms +step:38227/57344 train_time:21784282ms step_avg:569.87ms +grad accum step:9557/14336 +step:38228/57344 train_time:21785604ms step_avg:569.89ms +step:38229/57344 train_time:21785621ms step_avg:569.87ms +step:38230/57344 train_time:21785875ms step_avg:569.86ms +step:38231/57344 train_time:21786445ms step_avg:569.86ms +grad accum step:9558/14336 +step:38232/57344 train_time:21787788ms step_avg:569.88ms +step:38233/57344 train_time:21788060ms step_avg:569.88ms +step:38234/57344 train_time:21788287ms step_avg:569.87ms +step:38235/57344 train_time:21788855ms step_avg:569.87ms +grad accum step:9559/14336 +step:38236/57344 train_time:21790166ms step_avg:569.89ms +step:38237/57344 train_time:21790185ms step_avg:569.87ms +step:38238/57344 train_time:21790424ms step_avg:569.86ms +step:38239/57344 train_time:21790979ms step_avg:569.86ms +grad accum step:9560/14336 +step:38240/57344 train_time:21792317ms step_avg:569.88ms +step:38241/57344 train_time:21792330ms step_avg:569.87ms +step:38242/57344 train_time:21792579ms step_avg:569.86ms +step:38243/57344 train_time:21793135ms step_avg:569.86ms +grad accum step:9561/14336 +step:38244/57344 train_time:21794420ms step_avg:569.88ms +step:38245/57344 train_time:21794443ms step_avg:569.86ms +step:38246/57344 train_time:21794677ms step_avg:569.86ms +step:38247/57344 train_time:21795230ms step_avg:569.85ms +grad accum step:9562/14336 +step:38248/57344 train_time:21796552ms step_avg:569.87ms +step:38249/57344 train_time:21796568ms step_avg:569.86ms +step:38250/57344 train_time:21796812ms step_avg:569.85ms +step:38251/57344 train_time:21797362ms step_avg:569.85ms +grad accum step:9563/14336 +step:38252/57344 train_time:21798673ms step_avg:569.87ms +step:38253/57344 train_time:21798688ms step_avg:569.86ms +step:38254/57344 train_time:21798933ms step_avg:569.85ms +step:38255/57344 train_time:21799480ms step_avg:569.85ms +grad accum step:9564/14336 +step:38256/57344 train_time:21800764ms step_avg:569.87ms +step:38257/57344 train_time:21800784ms step_avg:569.85ms +step:38258/57344 train_time:21801021ms step_avg:569.84ms +step:38259/57344 train_time:21801576ms step_avg:569.84ms +grad accum step:9565/14336 +step:38260/57344 train_time:21802932ms step_avg:569.86ms +step:38261/57344 train_time:21802950ms step_avg:569.85ms +step:38262/57344 train_time:21803192ms step_avg:569.84ms +step:38263/57344 train_time:21803754ms step_avg:569.84ms +grad accum step:9566/14336 +step:38264/57344 train_time:21805130ms step_avg:569.86ms +step:38265/57344 train_time:21805157ms step_avg:569.85ms +step:38266/57344 train_time:21805383ms step_avg:569.84ms +step:38267/57344 train_time:21805933ms step_avg:569.84ms +grad accum step:9567/14336 +step:38268/57344 train_time:21807251ms step_avg:569.86ms +step:38269/57344 train_time:21807276ms step_avg:569.84ms +step:38270/57344 train_time:21807509ms step_avg:569.83ms +step:38271/57344 train_time:21808050ms step_avg:569.83ms +grad accum step:9568/14336 +step:38272/57344 train_time:21809554ms step_avg:569.86ms +step:38272/57344 val_loss:6.063191 train_time:21809555ms step_avg:569.86ms +step:38273/57344 train_time:21809567ms step_avg:569.84ms +step:38274/57344 train_time:21809791ms step_avg:569.83ms +step:38275/57344 train_time:21810339ms step_avg:569.83ms +grad accum step:9569/14336 +step:38276/57344 train_time:21811633ms step_avg:569.85ms +step:38277/57344 train_time:21811652ms step_avg:569.84ms +step:38278/57344 train_time:21811898ms step_avg:569.83ms +step:38279/57344 train_time:21812447ms step_avg:569.83ms +grad accum step:9570/14336 +step:38280/57344 train_time:21813733ms step_avg:569.85ms +step:38281/57344 train_time:21813750ms step_avg:569.83ms +step:38282/57344 train_time:21813998ms step_avg:569.82ms +step:38283/57344 train_time:21814543ms step_avg:569.82ms +grad accum step:9571/14336 +step:38284/57344 train_time:21815839ms step_avg:569.84ms +step:38285/57344 train_time:21815855ms step_avg:569.83ms +step:38286/57344 train_time:21816108ms step_avg:569.82ms +step:38287/57344 train_time:21816668ms step_avg:569.82ms +grad accum step:9572/14336 +step:38288/57344 train_time:21817972ms step_avg:569.84ms +step:38289/57344 train_time:21817987ms step_avg:569.82ms +step:38290/57344 train_time:21818235ms step_avg:569.82ms +step:38291/57344 train_time:21818782ms step_avg:569.81ms +grad accum step:9573/14336 +step:38292/57344 train_time:21820084ms step_avg:569.83ms +step:38293/57344 train_time:21820101ms step_avg:569.82ms +step:38294/57344 train_time:21820345ms step_avg:569.81ms +step:38295/57344 train_time:21820897ms step_avg:569.81ms +grad accum step:9574/14336 +step:38296/57344 train_time:21822237ms step_avg:569.83ms +step:38297/57344 train_time:21822253ms step_avg:569.82ms +step:38298/57344 train_time:21822506ms step_avg:569.81ms +step:38299/57344 train_time:21823069ms step_avg:569.81ms +grad accum step:9575/14336 +step:38300/57344 train_time:21824386ms step_avg:569.83ms +step:38301/57344 train_time:21824402ms step_avg:569.81ms +step:38302/57344 train_time:21824657ms step_avg:569.80ms +step:38303/57344 train_time:21825222ms step_avg:569.80ms +grad accum step:9576/14336 +step:38304/57344 train_time:21826528ms step_avg:569.82ms +step:38305/57344 train_time:21826545ms step_avg:569.81ms +step:38306/57344 train_time:21826794ms step_avg:569.80ms +step:38307/57344 train_time:21827340ms step_avg:569.80ms +grad accum step:9577/14336 +step:38308/57344 train_time:21828615ms step_avg:569.82ms +step:38309/57344 train_time:21828632ms step_avg:569.80ms +step:38310/57344 train_time:21828880ms step_avg:569.80ms +step:38311/57344 train_time:21829428ms step_avg:569.80ms +grad accum step:9578/14336 +step:38312/57344 train_time:21830721ms step_avg:569.81ms +step:38313/57344 train_time:21830738ms step_avg:569.80ms +step:38314/57344 train_time:21830984ms step_avg:569.79ms +step:38315/57344 train_time:21831534ms step_avg:569.79ms +grad accum step:9579/14336 +step:38316/57344 train_time:21832853ms step_avg:569.81ms +step:38317/57344 train_time:21832870ms step_avg:569.80ms +step:38318/57344 train_time:21833118ms step_avg:569.79ms +step:38319/57344 train_time:21833672ms step_avg:569.79ms +grad accum step:9580/14336 +step:38320/57344 train_time:21834998ms step_avg:569.81ms +step:38321/57344 train_time:21835014ms step_avg:569.79ms +step:38322/57344 train_time:21835265ms step_avg:569.78ms +step:38323/57344 train_time:21835814ms step_avg:569.78ms +grad accum step:9581/14336 +step:38324/57344 train_time:21837161ms step_avg:569.80ms +step:38325/57344 train_time:21837178ms step_avg:569.79ms +step:38326/57344 train_time:21837430ms step_avg:569.78ms +step:38327/57344 train_time:21837990ms step_avg:569.78ms +grad accum step:9582/14336 +step:38328/57344 train_time:21839323ms step_avg:569.80ms +step:38329/57344 train_time:21839340ms step_avg:569.79ms +step:38330/57344 train_time:21839597ms step_avg:569.78ms +step:38331/57344 train_time:21840162ms step_avg:569.78ms +grad accum step:9583/14336 +step:38332/57344 train_time:21841478ms step_avg:569.80ms +step:38333/57344 train_time:21841495ms step_avg:569.78ms +step:38334/57344 train_time:21841743ms step_avg:569.77ms +step:38335/57344 train_time:21842292ms step_avg:569.77ms +grad accum step:9584/14336 +step:38336/57344 train_time:21843595ms step_avg:569.79ms +step:38336/57344 val_loss:6.052119 train_time:21843598ms step_avg:569.79ms +step:38337/57344 train_time:21843610ms step_avg:569.78ms +step:38338/57344 train_time:21843841ms step_avg:569.77ms +step:38339/57344 train_time:21844401ms step_avg:569.77ms +grad accum step:9585/14336 +step:38340/57344 train_time:21845704ms step_avg:569.79ms +step:38341/57344 train_time:21845721ms step_avg:569.77ms +step:38342/57344 train_time:21845971ms step_avg:569.77ms +step:38343/57344 train_time:21846517ms step_avg:569.77ms +grad accum step:9586/14336 +step:38344/57344 train_time:21847813ms step_avg:569.78ms +step:38345/57344 train_time:21847828ms step_avg:569.77ms +step:38346/57344 train_time:21848079ms step_avg:569.76ms +step:38347/57344 train_time:21848646ms step_avg:569.76ms +grad accum step:9587/14336 +step:38348/57344 train_time:21849975ms step_avg:569.78ms +step:38349/57344 train_time:21849991ms step_avg:569.77ms +step:38350/57344 train_time:21850240ms step_avg:569.76ms +step:38351/57344 train_time:21850796ms step_avg:569.76ms +grad accum step:9588/14336 +step:38352/57344 train_time:21852134ms step_avg:569.78ms +step:38353/57344 train_time:21852150ms step_avg:569.76ms +step:38354/57344 train_time:21852373ms step_avg:569.75ms +step:38355/57344 train_time:21852926ms step_avg:569.75ms +grad accum step:9589/14336 +step:38356/57344 train_time:21854200ms step_avg:569.77ms +step:38357/57344 train_time:21854217ms step_avg:569.76ms +step:38358/57344 train_time:21854467ms step_avg:569.75ms +step:38359/57344 train_time:21855016ms step_avg:569.75ms +grad accum step:9590/14336 +step:38360/57344 train_time:21856316ms step_avg:569.77ms +step:38361/57344 train_time:21856331ms step_avg:569.75ms +step:38362/57344 train_time:21856571ms step_avg:569.75ms +step:38363/57344 train_time:21857127ms step_avg:569.74ms +grad accum step:9591/14336 +step:38364/57344 train_time:21858440ms step_avg:569.76ms +step:38365/57344 train_time:21858455ms step_avg:569.75ms +step:38366/57344 train_time:21858703ms step_avg:569.74ms +step:38367/57344 train_time:21859249ms step_avg:569.74ms +grad accum step:9592/14336 +step:38368/57344 train_time:21860583ms step_avg:569.76ms +step:38369/57344 train_time:21860600ms step_avg:569.75ms +step:38370/57344 train_time:21860847ms step_avg:569.74ms +step:38371/57344 train_time:21861405ms step_avg:569.74ms +grad accum step:9593/14336 +step:38372/57344 train_time:21862748ms step_avg:569.76ms +step:38373/57344 train_time:21862763ms step_avg:569.74ms +step:38374/57344 train_time:21863005ms step_avg:569.73ms +step:38375/57344 train_time:21863566ms step_avg:569.73ms +grad accum step:9594/14336 +step:38376/57344 train_time:21864880ms step_avg:569.75ms +step:38377/57344 train_time:21864896ms step_avg:569.74ms +step:38378/57344 train_time:21865138ms step_avg:569.73ms +step:38379/57344 train_time:21865685ms step_avg:569.73ms +grad accum step:9595/14336 +step:38380/57344 train_time:21866985ms step_avg:569.75ms +step:38381/57344 train_time:21866999ms step_avg:569.74ms +step:38382/57344 train_time:21867249ms step_avg:569.73ms +step:38383/57344 train_time:21867804ms step_avg:569.73ms +grad accum step:9596/14336 +step:38384/57344 train_time:21869115ms step_avg:569.75ms +step:38385/57344 train_time:21869133ms step_avg:569.73ms +step:38386/57344 train_time:21869371ms step_avg:569.72ms +step:38387/57344 train_time:21869923ms step_avg:569.72ms +grad accum step:9597/14336 +step:38388/57344 train_time:21871229ms step_avg:569.74ms +step:38389/57344 train_time:21871244ms step_avg:569.73ms +step:38390/57344 train_time:21871490ms step_avg:569.72ms +step:38391/57344 train_time:21872036ms step_avg:569.72ms +grad accum step:9598/14336 +step:38392/57344 train_time:21873380ms step_avg:569.74ms +step:38393/57344 train_time:21873401ms step_avg:569.72ms +step:38394/57344 train_time:21873635ms step_avg:569.71ms +step:38395/57344 train_time:21874179ms step_avg:569.71ms +grad accum step:9599/14336 +step:38396/57344 train_time:21875477ms step_avg:569.73ms +step:38397/57344 train_time:21875494ms step_avg:569.72ms +step:38398/57344 train_time:21875745ms step_avg:569.71ms +step:38399/57344 train_time:21876304ms step_avg:569.71ms +grad accum step:9600/14336 +step:38400/57344 train_time:21877613ms step_avg:569.73ms +step:38400/57344 val_loss:6.033501 train_time:21877620ms step_avg:569.73ms +step:38401/57344 train_time:21877632ms step_avg:569.72ms +step:38402/57344 train_time:21877856ms step_avg:569.71ms +step:38403/57344 train_time:21878412ms step_avg:569.71ms +grad accum step:9601/14336 +step:38404/57344 train_time:21879753ms step_avg:569.73ms +step:38405/57344 train_time:21879774ms step_avg:569.71ms +step:38406/57344 train_time:21879993ms step_avg:569.70ms +step:38407/57344 train_time:21880535ms step_avg:569.70ms +grad accum step:9602/14336 +step:38408/57344 train_time:21881832ms step_avg:569.72ms +step:38409/57344 train_time:21881847ms step_avg:569.71ms +step:38410/57344 train_time:21882096ms step_avg:569.70ms +step:38411/57344 train_time:21882652ms step_avg:569.70ms +grad accum step:9603/14336 +step:38412/57344 train_time:21884021ms step_avg:569.72ms +step:38413/57344 train_time:21884039ms step_avg:569.70ms +step:38414/57344 train_time:21884261ms step_avg:569.69ms +step:38415/57344 train_time:21884821ms step_avg:569.69ms +grad accum step:9604/14336 +step:38416/57344 train_time:21886176ms step_avg:569.72ms +step:38417/57344 train_time:21886191ms step_avg:569.70ms +step:38418/57344 train_time:21886428ms step_avg:569.69ms +step:38419/57344 train_time:21886986ms step_avg:569.69ms +grad accum step:9605/14336 +step:38420/57344 train_time:21888293ms step_avg:569.71ms +step:38421/57344 train_time:21888310ms step_avg:569.70ms +step:38422/57344 train_time:21888560ms step_avg:569.69ms +step:38423/57344 train_time:21889120ms step_avg:569.69ms +grad accum step:9606/14336 +step:38424/57344 train_time:21890444ms step_avg:569.71ms +step:38425/57344 train_time:21890459ms step_avg:569.69ms +step:38426/57344 train_time:21890704ms step_avg:569.68ms +step:38427/57344 train_time:21891247ms step_avg:569.68ms +grad accum step:9607/14336 +step:38428/57344 train_time:21892557ms step_avg:569.70ms +step:38429/57344 train_time:21892572ms step_avg:569.69ms +step:38430/57344 train_time:21892819ms step_avg:569.68ms +step:38431/57344 train_time:21893367ms step_avg:569.68ms +grad accum step:9608/14336 +step:38432/57344 train_time:21894670ms step_avg:569.70ms +step:38433/57344 train_time:21894687ms step_avg:569.68ms +step:38434/57344 train_time:21894933ms step_avg:569.68ms +step:38435/57344 train_time:21895479ms step_avg:569.68ms +grad accum step:9609/14336 +step:38436/57344 train_time:21896798ms step_avg:569.70ms +step:38437/57344 train_time:21896815ms step_avg:569.68ms +step:38438/57344 train_time:21897064ms step_avg:569.67ms +step:38439/57344 train_time:21897610ms step_avg:569.67ms +grad accum step:9610/14336 +step:38440/57344 train_time:21898903ms step_avg:569.69ms +step:38441/57344 train_time:21898921ms step_avg:569.68ms +step:38442/57344 train_time:21899173ms step_avg:569.67ms +step:38443/57344 train_time:21899733ms step_avg:569.67ms +grad accum step:9611/14336 +step:38444/57344 train_time:21901071ms step_avg:569.69ms +step:38445/57344 train_time:21901088ms step_avg:569.67ms +step:38446/57344 train_time:21901347ms step_avg:569.67ms +step:38447/57344 train_time:21901920ms step_avg:569.67ms +grad accum step:9612/14336 +step:38448/57344 train_time:21903249ms step_avg:569.69ms +step:38449/57344 train_time:21903266ms step_avg:569.67ms +step:38450/57344 train_time:21903513ms step_avg:569.66ms +step:38451/57344 train_time:21904055ms step_avg:569.66ms +grad accum step:9613/14336 +step:38452/57344 train_time:21905340ms step_avg:569.68ms +step:38453/57344 train_time:21905352ms step_avg:569.67ms +step:38454/57344 train_time:21905593ms step_avg:569.66ms +step:38455/57344 train_time:21906145ms step_avg:569.66ms +grad accum step:9614/14336 +step:38456/57344 train_time:21907448ms step_avg:569.68ms +step:38457/57344 train_time:21907467ms step_avg:569.66ms +step:38458/57344 train_time:21907707ms step_avg:569.65ms +step:38459/57344 train_time:21908255ms step_avg:569.65ms +grad accum step:9615/14336 +step:38460/57344 train_time:21909553ms step_avg:569.67ms +step:38461/57344 train_time:21909570ms step_avg:569.66ms +step:38462/57344 train_time:21909815ms step_avg:569.65ms +step:38463/57344 train_time:21910361ms step_avg:569.65ms +grad accum step:9616/14336 +step:38464/57344 train_time:21911681ms step_avg:569.67ms +step:38464/57344 val_loss:6.012586 train_time:21911681ms step_avg:569.67ms +step:38465/57344 train_time:21911693ms step_avg:569.65ms +step:38466/57344 train_time:21911941ms step_avg:569.64ms +step:38467/57344 train_time:21912555ms step_avg:569.65ms +grad accum step:9617/14336 +step:38468/57344 train_time:21913875ms step_avg:569.67ms +step:38469/57344 train_time:21913891ms step_avg:569.65ms +step:38470/57344 train_time:21914138ms step_avg:569.64ms +step:38471/57344 train_time:21914686ms step_avg:569.64ms +grad accum step:9618/14336 +step:38472/57344 train_time:21916013ms step_avg:569.66ms +step:38473/57344 train_time:21916027ms step_avg:569.65ms +step:38474/57344 train_time:21916278ms step_avg:569.64ms +step:38475/57344 train_time:21916831ms step_avg:569.64ms +grad accum step:9619/14336 +step:38476/57344 train_time:21918187ms step_avg:569.66ms +step:38477/57344 train_time:21918203ms step_avg:569.64ms +step:38478/57344 train_time:21918452ms step_avg:569.64ms +step:38479/57344 train_time:21919003ms step_avg:569.64ms +grad accum step:9620/14336 +step:38480/57344 train_time:21920311ms step_avg:569.65ms +step:38481/57344 train_time:21920327ms step_avg:569.64ms +step:38482/57344 train_time:21920581ms step_avg:569.63ms +step:38483/57344 train_time:21921140ms step_avg:569.63ms +grad accum step:9621/14336 +step:38484/57344 train_time:21922475ms step_avg:569.65ms +step:38485/57344 train_time:21922492ms step_avg:569.64ms +step:38486/57344 train_time:21922741ms step_avg:569.63ms +step:38487/57344 train_time:21923289ms step_avg:569.63ms +grad accum step:9622/14336 +step:38488/57344 train_time:21924595ms step_avg:569.65ms +step:38489/57344 train_time:21924607ms step_avg:569.63ms +step:38490/57344 train_time:21924854ms step_avg:569.62ms +step:38491/57344 train_time:21925413ms step_avg:569.62ms +grad accum step:9623/14336 +step:38492/57344 train_time:21926729ms step_avg:569.64ms +step:38493/57344 train_time:21926746ms step_avg:569.63ms +step:38494/57344 train_time:21926999ms step_avg:569.62ms +step:38495/57344 train_time:21927565ms step_avg:569.62ms +grad accum step:9624/14336 +step:38496/57344 train_time:21928863ms step_avg:569.64ms +step:38497/57344 train_time:21928880ms step_avg:569.63ms +step:38498/57344 train_time:21929126ms step_avg:569.62ms +step:38499/57344 train_time:21929670ms step_avg:569.62ms +grad accum step:9625/14336 +step:38500/57344 train_time:21930975ms step_avg:569.64ms +step:38501/57344 train_time:21930987ms step_avg:569.62ms +step:38502/57344 train_time:21931217ms step_avg:569.61ms +step:38503/57344 train_time:21931761ms step_avg:569.61ms +grad accum step:9626/14336 +step:38504/57344 train_time:21933061ms step_avg:569.63ms +step:38505/57344 train_time:21933078ms step_avg:569.62ms +step:38506/57344 train_time:21933328ms step_avg:569.61ms +step:38507/57344 train_time:21933885ms step_avg:569.61ms +grad accum step:9627/14336 +step:38508/57344 train_time:21935170ms step_avg:569.63ms +step:38509/57344 train_time:21935187ms step_avg:569.61ms +step:38510/57344 train_time:21935441ms step_avg:569.60ms +step:38511/57344 train_time:21936005ms step_avg:569.60ms +grad accum step:9628/14336 +step:38512/57344 train_time:21937302ms step_avg:569.62ms +step:38513/57344 train_time:21937319ms step_avg:569.61ms +step:38514/57344 train_time:21937566ms step_avg:569.60ms +step:38515/57344 train_time:21938120ms step_avg:569.60ms +grad accum step:9629/14336 +step:38516/57344 train_time:21939417ms step_avg:569.62ms +step:38517/57344 train_time:21939434ms step_avg:569.60ms +step:38518/57344 train_time:21939680ms step_avg:569.60ms +step:38519/57344 train_time:21940224ms step_avg:569.59ms +grad accum step:9630/14336 +step:38520/57344 train_time:21941499ms step_avg:569.61ms +step:38521/57344 train_time:21941516ms step_avg:569.60ms +step:38522/57344 train_time:21941770ms step_avg:569.59ms +step:38523/57344 train_time:21942328ms step_avg:569.59ms +grad accum step:9631/14336 +step:38524/57344 train_time:21943690ms step_avg:569.61ms +step:38525/57344 train_time:21943708ms step_avg:569.60ms +step:38526/57344 train_time:21943953ms step_avg:569.59ms +step:38527/57344 train_time:21944512ms step_avg:569.59ms +grad accum step:9632/14336 +step:38528/57344 train_time:21945841ms step_avg:569.61ms +step:38528/57344 val_loss:6.000410 train_time:21945841ms step_avg:569.61ms +step:38529/57344 train_time:21945853ms step_avg:569.59ms +step:38530/57344 train_time:21946080ms step_avg:569.58ms +step:38531/57344 train_time:21946636ms step_avg:569.58ms +grad accum step:9633/14336 +step:38532/57344 train_time:21947939ms step_avg:569.60ms +step:38533/57344 train_time:21947954ms step_avg:569.59ms +step:38534/57344 train_time:21948206ms step_avg:569.58ms +step:38535/57344 train_time:21948768ms step_avg:569.58ms +grad accum step:9634/14336 +step:38536/57344 train_time:21950075ms step_avg:569.60ms +step:38537/57344 train_time:21950090ms step_avg:569.58ms +step:38538/57344 train_time:21950329ms step_avg:569.58ms +step:38539/57344 train_time:21950872ms step_avg:569.58ms +grad accum step:9635/14336 +step:38540/57344 train_time:21952173ms step_avg:569.59ms +step:38541/57344 train_time:21952193ms step_avg:569.58ms +step:38542/57344 train_time:21952425ms step_avg:569.57ms +step:38543/57344 train_time:21952974ms step_avg:569.57ms +grad accum step:9636/14336 +step:38544/57344 train_time:21954291ms step_avg:569.59ms +step:38545/57344 train_time:21954307ms step_avg:569.58ms +step:38546/57344 train_time:21954529ms step_avg:569.57ms +step:38547/57344 train_time:21955073ms step_avg:569.57ms +grad accum step:9637/14336 +step:38548/57344 train_time:21956424ms step_avg:569.59ms +step:38549/57344 train_time:21956441ms step_avg:569.57ms +step:38550/57344 train_time:21956684ms step_avg:569.56ms +step:38551/57344 train_time:21957247ms step_avg:569.56ms +grad accum step:9638/14336 +step:38552/57344 train_time:21958637ms step_avg:569.58ms +step:38553/57344 train_time:21958653ms step_avg:569.57ms +step:38554/57344 train_time:21958907ms step_avg:569.56ms +step:38555/57344 train_time:21959472ms step_avg:569.56ms +grad accum step:9639/14336 +step:38556/57344 train_time:21960775ms step_avg:569.58ms +step:38557/57344 train_time:21960800ms step_avg:569.57ms +step:38558/57344 train_time:21961026ms step_avg:569.56ms +step:38559/57344 train_time:21961598ms step_avg:569.56ms +grad accum step:9640/14336 +step:38560/57344 train_time:21962931ms step_avg:569.58ms +step:38561/57344 train_time:21962946ms step_avg:569.56ms +step:38562/57344 train_time:21963188ms step_avg:569.56ms +step:38563/57344 train_time:21963729ms step_avg:569.55ms +grad accum step:9641/14336 +step:38564/57344 train_time:21965016ms step_avg:569.57ms +step:38565/57344 train_time:21965037ms step_avg:569.56ms +step:38566/57344 train_time:21965278ms step_avg:569.55ms +step:38567/57344 train_time:21965839ms step_avg:569.55ms +grad accum step:9642/14336 +step:38568/57344 train_time:21967203ms step_avg:569.57ms +step:38569/57344 train_time:21967223ms step_avg:569.56ms +step:38570/57344 train_time:21967442ms step_avg:569.55ms +step:38571/57344 train_time:21967992ms step_avg:569.55ms +grad accum step:9643/14336 +step:38572/57344 train_time:21969310ms step_avg:569.57ms +step:38573/57344 train_time:21969325ms step_avg:569.55ms +step:38574/57344 train_time:21969577ms step_avg:569.54ms +step:38575/57344 train_time:21970133ms step_avg:569.54ms +grad accum step:9644/14336 +step:38576/57344 train_time:21971464ms step_avg:569.56ms +step:38577/57344 train_time:21971478ms step_avg:569.55ms +step:38578/57344 train_time:21971701ms step_avg:569.54ms +step:38579/57344 train_time:21972254ms step_avg:569.54ms +grad accum step:9645/14336 +step:38580/57344 train_time:21973581ms step_avg:569.56ms +step:38581/57344 train_time:21973601ms step_avg:569.54ms +step:38582/57344 train_time:21973829ms step_avg:569.54ms +step:38583/57344 train_time:21974390ms step_avg:569.54ms +grad accum step:9646/14336 +step:38584/57344 train_time:21975669ms step_avg:569.55ms +step:38585/57344 train_time:21975688ms step_avg:569.54ms +step:38586/57344 train_time:21975930ms step_avg:569.53ms +step:38587/57344 train_time:21976478ms step_avg:569.53ms +grad accum step:9647/14336 +step:38588/57344 train_time:21977793ms step_avg:569.55ms +step:38589/57344 train_time:21977810ms step_avg:569.54ms +step:38590/57344 train_time:21978056ms step_avg:569.53ms +step:38591/57344 train_time:21978616ms step_avg:569.53ms +grad accum step:9648/14336 +step:38592/57344 train_time:21979914ms step_avg:569.55ms +step:38592/57344 val_loss:5.979206 train_time:21979915ms step_avg:569.55ms +step:38593/57344 train_time:21979927ms step_avg:569.53ms +step:38594/57344 train_time:21980158ms step_avg:569.52ms +step:38595/57344 train_time:21980725ms step_avg:569.52ms +grad accum step:9649/14336 +step:38596/57344 train_time:21982028ms step_avg:569.54ms +step:38597/57344 train_time:21982045ms step_avg:569.53ms +step:38598/57344 train_time:21982295ms step_avg:569.52ms +step:38599/57344 train_time:21982850ms step_avg:569.52ms +grad accum step:9650/14336 +step:38600/57344 train_time:21984169ms step_avg:569.54ms +step:38601/57344 train_time:21984181ms step_avg:569.52ms +step:38602/57344 train_time:21984420ms step_avg:569.52ms +step:38603/57344 train_time:21984991ms step_avg:569.52ms +grad accum step:9651/14336 +step:38604/57344 train_time:21986297ms step_avg:569.53ms +step:38605/57344 train_time:21986315ms step_avg:569.52ms +step:38606/57344 train_time:21986567ms step_avg:569.51ms +step:38607/57344 train_time:21987142ms step_avg:569.51ms +grad accum step:9652/14336 +step:38608/57344 train_time:21988503ms step_avg:569.53ms +step:38609/57344 train_time:21988519ms step_avg:569.52ms +step:38610/57344 train_time:21988766ms step_avg:569.51ms +step:38611/57344 train_time:21989314ms step_avg:569.51ms +grad accum step:9653/14336 +step:38612/57344 train_time:21990632ms step_avg:569.53ms +step:38613/57344 train_time:21990646ms step_avg:569.51ms +step:38614/57344 train_time:21990896ms step_avg:569.51ms +step:38615/57344 train_time:21991442ms step_avg:569.51ms +grad accum step:9654/14336 +step:38616/57344 train_time:21992774ms step_avg:569.52ms +step:38617/57344 train_time:21992788ms step_avg:569.51ms +step:38618/57344 train_time:21993037ms step_avg:569.50ms +step:38619/57344 train_time:21993594ms step_avg:569.50ms +grad accum step:9655/14336 +step:38620/57344 train_time:21994912ms step_avg:569.52ms +step:38621/57344 train_time:21994929ms step_avg:569.51ms +step:38622/57344 train_time:21995179ms step_avg:569.50ms +step:38623/57344 train_time:21995734ms step_avg:569.50ms +grad accum step:9656/14336 +step:38624/57344 train_time:21997047ms step_avg:569.52ms +step:38625/57344 train_time:21997064ms step_avg:569.50ms +step:38626/57344 train_time:21997320ms step_avg:569.50ms +step:38627/57344 train_time:21997882ms step_avg:569.49ms +grad accum step:9657/14336 +step:38628/57344 train_time:21999229ms step_avg:569.52ms +step:38629/57344 train_time:21999241ms step_avg:569.50ms +step:38630/57344 train_time:21999487ms step_avg:569.49ms +step:38631/57344 train_time:22000049ms step_avg:569.49ms +grad accum step:9658/14336 +step:38632/57344 train_time:22001364ms step_avg:569.51ms +step:38633/57344 train_time:22001381ms step_avg:569.50ms +step:38634/57344 train_time:22001630ms step_avg:569.49ms +step:38635/57344 train_time:22002181ms step_avg:569.49ms +grad accum step:9659/14336 +step:38636/57344 train_time:22003502ms step_avg:569.51ms +step:38637/57344 train_time:22003514ms step_avg:569.49ms +step:38638/57344 train_time:22003755ms step_avg:569.48ms +step:38639/57344 train_time:22004300ms step_avg:569.48ms +grad accum step:9660/14336 +step:38640/57344 train_time:22005579ms step_avg:569.50ms +step:38641/57344 train_time:22005596ms step_avg:569.49ms +step:38642/57344 train_time:22005844ms step_avg:569.48ms +step:38643/57344 train_time:22006395ms step_avg:569.48ms +grad accum step:9661/14336 +step:38644/57344 train_time:22007694ms step_avg:569.50ms +step:38645/57344 train_time:22007712ms step_avg:569.48ms +step:38646/57344 train_time:22007969ms step_avg:569.48ms +step:38647/57344 train_time:22008532ms step_avg:569.48ms +grad accum step:9662/14336 +step:38648/57344 train_time:22009849ms step_avg:569.50ms +step:38649/57344 train_time:22009865ms step_avg:569.48ms +step:38650/57344 train_time:22010113ms step_avg:569.47ms +step:38651/57344 train_time:22010653ms step_avg:569.47ms +grad accum step:9663/14336 +step:38652/57344 train_time:22011931ms step_avg:569.49ms +step:38653/57344 train_time:22011947ms step_avg:569.48ms +step:38654/57344 train_time:22012190ms step_avg:569.47ms +step:38655/57344 train_time:22012737ms step_avg:569.47ms +grad accum step:9664/14336 +step:38656/57344 train_time:22014033ms step_avg:569.49ms +step:38656/57344 val_loss:5.959765 train_time:22014034ms step_avg:569.49ms +step:38657/57344 train_time:22014046ms step_avg:569.47ms +step:38658/57344 train_time:22014269ms step_avg:569.46ms +step:38659/57344 train_time:22014817ms step_avg:569.46ms +grad accum step:9665/14336 +step:38660/57344 train_time:22016254ms step_avg:569.48ms +step:38661/57344 train_time:22016282ms step_avg:569.47ms +step:38662/57344 train_time:22016497ms step_avg:569.46ms +step:38663/57344 train_time:22017048ms step_avg:569.46ms +grad accum step:9666/14336 +step:38664/57344 train_time:22018368ms step_avg:569.48ms +step:38665/57344 train_time:22018384ms step_avg:569.47ms +step:38666/57344 train_time:22018630ms step_avg:569.46ms +step:38667/57344 train_time:22019176ms step_avg:569.46ms +grad accum step:9667/14336 +step:38668/57344 train_time:22020463ms step_avg:569.48ms +step:38669/57344 train_time:22020480ms step_avg:569.46ms +step:38670/57344 train_time:22020727ms step_avg:569.45ms +step:38671/57344 train_time:22021273ms step_avg:569.45ms +grad accum step:9668/14336 +step:38672/57344 train_time:22022585ms step_avg:569.47ms +step:38673/57344 train_time:22022602ms step_avg:569.46ms +step:38674/57344 train_time:22022852ms step_avg:569.45ms +step:38675/57344 train_time:22023411ms step_avg:569.45ms +grad accum step:9669/14336 +step:38676/57344 train_time:22024743ms step_avg:569.47ms +step:38677/57344 train_time:22024759ms step_avg:569.45ms +step:38678/57344 train_time:22025003ms step_avg:569.45ms +step:38679/57344 train_time:22025545ms step_avg:569.44ms +grad accum step:9670/14336 +step:38680/57344 train_time:22026845ms step_avg:569.46ms +step:38681/57344 train_time:22026871ms step_avg:569.45ms +step:38682/57344 train_time:22027100ms step_avg:569.44ms +step:38683/57344 train_time:22027666ms step_avg:569.44ms +grad accum step:9671/14336 +step:38684/57344 train_time:22029014ms step_avg:569.46ms +step:38685/57344 train_time:22029030ms step_avg:569.45ms +step:38686/57344 train_time:22029281ms step_avg:569.44ms +step:38687/57344 train_time:22029828ms step_avg:569.44ms +grad accum step:9672/14336 +step:38688/57344 train_time:22031117ms step_avg:569.46ms +step:38689/57344 train_time:22031134ms step_avg:569.44ms +step:38690/57344 train_time:22031379ms step_avg:569.43ms +step:38691/57344 train_time:22031927ms step_avg:569.43ms +grad accum step:9673/14336 +step:38692/57344 train_time:22033262ms step_avg:569.45ms +step:38693/57344 train_time:22033279ms step_avg:569.44ms +step:38694/57344 train_time:22033526ms step_avg:569.43ms +step:38695/57344 train_time:22034069ms step_avg:569.43ms +grad accum step:9674/14336 +step:38696/57344 train_time:22035364ms step_avg:569.45ms +step:38697/57344 train_time:22035381ms step_avg:569.43ms +step:38698/57344 train_time:22035650ms step_avg:569.43ms +step:38699/57344 train_time:22036270ms step_avg:569.43ms +grad accum step:9675/14336 +step:38700/57344 train_time:22037657ms step_avg:569.45ms +step:38701/57344 train_time:22037669ms step_avg:569.43ms +step:38702/57344 train_time:22037899ms step_avg:569.43ms +step:38703/57344 train_time:22038442ms step_avg:569.42ms +grad accum step:9676/14336 +step:38704/57344 train_time:22039804ms step_avg:569.45ms +step:38705/57344 train_time:22039821ms step_avg:569.43ms +step:38706/57344 train_time:22040083ms step_avg:569.42ms +step:38707/57344 train_time:22040667ms step_avg:569.42ms +grad accum step:9677/14336 +step:38708/57344 train_time:22041963ms step_avg:569.44ms +step:38709/57344 train_time:22041979ms step_avg:569.43ms +step:38710/57344 train_time:22042227ms step_avg:569.42ms +step:38711/57344 train_time:22042770ms step_avg:569.42ms +grad accum step:9678/14336 +step:38712/57344 train_time:22044060ms step_avg:569.44ms +step:38713/57344 train_time:22044077ms step_avg:569.42ms +step:38714/57344 train_time:22044324ms step_avg:569.41ms +step:38715/57344 train_time:22044867ms step_avg:569.41ms +grad accum step:9679/14336 +step:38716/57344 train_time:22046166ms step_avg:569.43ms +step:38717/57344 train_time:22046180ms step_avg:569.42ms +step:38718/57344 train_time:22046429ms step_avg:569.41ms +step:38719/57344 train_time:22046981ms step_avg:569.41ms +grad accum step:9680/14336 +step:38720/57344 train_time:22048294ms step_avg:569.43ms +step:38720/57344 val_loss:5.957704 train_time:22048298ms step_avg:569.43ms +step:38721/57344 train_time:22048310ms step_avg:569.41ms +step:38722/57344 train_time:22048533ms step_avg:569.41ms +step:38723/57344 train_time:22049085ms step_avg:569.41ms +grad accum step:9681/14336 +step:38724/57344 train_time:22050408ms step_avg:569.42ms +step:38725/57344 train_time:22050421ms step_avg:569.41ms +step:38726/57344 train_time:22050657ms step_avg:569.40ms +step:38727/57344 train_time:22051217ms step_avg:569.40ms +grad accum step:9682/14336 +step:38728/57344 train_time:22052539ms step_avg:569.42ms +step:38729/57344 train_time:22052552ms step_avg:569.41ms +step:38730/57344 train_time:22052798ms step_avg:569.40ms +step:38731/57344 train_time:22053345ms step_avg:569.40ms +grad accum step:9683/14336 +step:38732/57344 train_time:22054650ms step_avg:569.42ms +step:38733/57344 train_time:22054666ms step_avg:569.40ms +step:38734/57344 train_time:22054909ms step_avg:569.39ms +step:38735/57344 train_time:22055464ms step_avg:569.39ms +grad accum step:9684/14336 +step:38736/57344 train_time:22056815ms step_avg:569.41ms +step:38737/57344 train_time:22056832ms step_avg:569.40ms +step:38738/57344 train_time:22057084ms step_avg:569.39ms +step:38739/57344 train_time:22057640ms step_avg:569.39ms +grad accum step:9685/14336 +step:38740/57344 train_time:22058948ms step_avg:569.41ms +step:38741/57344 train_time:22058966ms step_avg:569.40ms +step:38742/57344 train_time:22059205ms step_avg:569.39ms +step:38743/57344 train_time:22059746ms step_avg:569.39ms +grad accum step:9686/14336 +step:38744/57344 train_time:22061063ms step_avg:569.41ms +step:38745/57344 train_time:22061080ms step_avg:569.39ms +step:38746/57344 train_time:22061332ms step_avg:569.38ms +step:38747/57344 train_time:22061893ms step_avg:569.38ms +grad accum step:9687/14336 +step:38748/57344 train_time:22063198ms step_avg:569.40ms +step:38749/57344 train_time:22063214ms step_avg:569.39ms +step:38750/57344 train_time:22063465ms step_avg:569.38ms +step:38751/57344 train_time:22064020ms step_avg:569.38ms +grad accum step:9688/14336 +step:38752/57344 train_time:22065348ms step_avg:569.40ms +step:38753/57344 train_time:22065371ms step_avg:569.38ms +step:38754/57344 train_time:22065602ms step_avg:569.38ms +step:38755/57344 train_time:22066149ms step_avg:569.38ms +grad accum step:9689/14336 +step:38756/57344 train_time:22067438ms step_avg:569.39ms +step:38757/57344 train_time:22067453ms step_avg:569.38ms +step:38758/57344 train_time:22067700ms step_avg:569.37ms +step:38759/57344 train_time:22068262ms step_avg:569.37ms +grad accum step:9690/14336 +step:38760/57344 train_time:22069591ms step_avg:569.39ms +step:38761/57344 train_time:22069606ms step_avg:569.38ms +step:38762/57344 train_time:22069851ms step_avg:569.37ms +step:38763/57344 train_time:22070386ms step_avg:569.37ms +grad accum step:9691/14336 +step:38764/57344 train_time:22071669ms step_avg:569.39ms +step:38765/57344 train_time:22071689ms step_avg:569.37ms +step:38766/57344 train_time:22071933ms step_avg:569.36ms +step:38767/57344 train_time:22072489ms step_avg:569.36ms +grad accum step:9692/14336 +step:38768/57344 train_time:22073772ms step_avg:569.38ms +step:38769/57344 train_time:22073787ms step_avg:569.37ms +step:38770/57344 train_time:22074036ms step_avg:569.36ms +step:38771/57344 train_time:22074587ms step_avg:569.36ms +grad accum step:9693/14336 +step:38772/57344 train_time:22075904ms step_avg:569.38ms +step:38773/57344 train_time:22075917ms step_avg:569.36ms +step:38774/57344 train_time:22076159ms step_avg:569.35ms +step:38775/57344 train_time:22076718ms step_avg:569.35ms +grad accum step:9694/14336 +step:38776/57344 train_time:22078048ms step_avg:569.37ms +step:38777/57344 train_time:22078066ms step_avg:569.36ms +step:38778/57344 train_time:22078310ms step_avg:569.35ms +step:38779/57344 train_time:22078861ms step_avg:569.35ms +grad accum step:9695/14336 +step:38780/57344 train_time:22080159ms step_avg:569.37ms +step:38781/57344 train_time:22080175ms step_avg:569.36ms +step:38782/57344 train_time:22080425ms step_avg:569.35ms +step:38783/57344 train_time:22080978ms step_avg:569.35ms +grad accum step:9696/14336 +step:38784/57344 train_time:22082280ms step_avg:569.37ms +step:38784/57344 val_loss:5.924534 train_time:22082281ms step_avg:569.37ms +step:38785/57344 train_time:22082293ms step_avg:569.35ms +step:38786/57344 train_time:22082597ms step_avg:569.34ms +step:38787/57344 train_time:22083151ms step_avg:569.34ms +grad accum step:9697/14336 +step:38788/57344 train_time:22084501ms step_avg:569.36ms +step:38789/57344 train_time:22084518ms step_avg:569.35ms +step:38790/57344 train_time:22084768ms step_avg:569.34ms +step:38791/57344 train_time:22085326ms step_avg:569.34ms +grad accum step:9698/14336 +step:38792/57344 train_time:22086642ms step_avg:569.36ms +step:38793/57344 train_time:22086658ms step_avg:569.35ms +step:38794/57344 train_time:22086905ms step_avg:569.34ms +step:38795/57344 train_time:22087452ms step_avg:569.34ms +grad accum step:9699/14336 +step:38796/57344 train_time:22088756ms step_avg:569.36ms +step:38797/57344 train_time:22088773ms step_avg:569.34ms +step:38798/57344 train_time:22089021ms step_avg:569.33ms +step:38799/57344 train_time:22089576ms step_avg:569.33ms +grad accum step:9700/14336 +step:38800/57344 train_time:22090876ms step_avg:569.35ms +step:38801/57344 train_time:22090893ms step_avg:569.34ms +step:38802/57344 train_time:22091141ms step_avg:569.33ms +step:38803/57344 train_time:22091696ms step_avg:569.33ms +grad accum step:9701/14336 +step:38804/57344 train_time:22093033ms step_avg:569.35ms +step:38805/57344 train_time:22093049ms step_avg:569.34ms +step:38806/57344 train_time:22093298ms step_avg:569.33ms +step:38807/57344 train_time:22093857ms step_avg:569.33ms +grad accum step:9702/14336 +step:38808/57344 train_time:22095195ms step_avg:569.35ms +step:38809/57344 train_time:22095212ms step_avg:569.33ms +step:38810/57344 train_time:22095458ms step_avg:569.32ms +step:38811/57344 train_time:22096004ms step_avg:569.32ms +grad accum step:9703/14336 +step:38812/57344 train_time:22097343ms step_avg:569.34ms +step:38813/57344 train_time:22097360ms step_avg:569.33ms +step:38814/57344 train_time:22097609ms step_avg:569.32ms +step:38815/57344 train_time:22098149ms step_avg:569.32ms +grad accum step:9704/14336 +step:38816/57344 train_time:22099447ms step_avg:569.34ms +step:38817/57344 train_time:22099464ms step_avg:569.32ms +step:38818/57344 train_time:22099712ms step_avg:569.32ms +step:38819/57344 train_time:22100261ms step_avg:569.32ms +grad accum step:9705/14336 +step:38820/57344 train_time:22101582ms step_avg:569.33ms +step:38821/57344 train_time:22101599ms step_avg:569.32ms +step:38822/57344 train_time:22101846ms step_avg:569.31ms +step:38823/57344 train_time:22102400ms step_avg:569.31ms +grad accum step:9706/14336 +step:38824/57344 train_time:22103709ms step_avg:569.33ms +step:38825/57344 train_time:22103727ms step_avg:569.32ms +step:38826/57344 train_time:22103974ms step_avg:569.31ms +step:38827/57344 train_time:22104524ms step_avg:569.31ms +grad accum step:9707/14336 +step:38828/57344 train_time:22105836ms step_avg:569.33ms +step:38829/57344 train_time:22105853ms step_avg:569.31ms +step:38830/57344 train_time:22106108ms step_avg:569.30ms +step:38831/57344 train_time:22106680ms step_avg:569.30ms +grad accum step:9708/14336 +step:38832/57344 train_time:22107957ms step_avg:569.32ms +step:38833/57344 train_time:22107974ms step_avg:569.31ms +step:38834/57344 train_time:22108227ms step_avg:569.30ms +step:38835/57344 train_time:22108794ms step_avg:569.30ms +grad accum step:9709/14336 +step:38836/57344 train_time:22110111ms step_avg:569.32ms +step:38837/57344 train_time:22110128ms step_avg:569.31ms +step:38838/57344 train_time:22110377ms step_avg:569.30ms +step:38839/57344 train_time:22110928ms step_avg:569.30ms +grad accum step:9710/14336 +step:38840/57344 train_time:22112230ms step_avg:569.32ms +step:38841/57344 train_time:22112247ms step_avg:569.30ms +step:38842/57344 train_time:22112491ms step_avg:569.29ms +step:38843/57344 train_time:22113039ms step_avg:569.29ms +grad accum step:9711/14336 +step:38844/57344 train_time:22153157ms step_avg:570.31ms +step:38845/57344 train_time:22153172ms step_avg:570.30ms +step:38846/57344 train_time:22153415ms step_avg:570.29ms +step:38847/57344 train_time:22153957ms step_avg:570.29ms +grad accum step:9712/14336 +step:38848/57344 train_time:22155240ms step_avg:570.31ms +step:38848/57344 val_loss:5.911640 train_time:22155241ms step_avg:570.31ms +step:38849/57344 train_time:22155253ms step_avg:570.29ms +step:38850/57344 train_time:22155542ms step_avg:570.28ms +step:38851/57344 train_time:22156081ms step_avg:570.28ms +grad accum step:9713/14336 +step:38852/57344 train_time:22157371ms step_avg:570.30ms +step:38853/57344 train_time:22157388ms step_avg:570.29ms +step:38854/57344 train_time:22157641ms step_avg:570.28ms +step:38855/57344 train_time:22158202ms step_avg:570.28ms +grad accum step:9714/14336 +step:38856/57344 train_time:22159489ms step_avg:570.30ms +step:38857/57344 train_time:22159531ms step_avg:570.28ms +step:38858/57344 train_time:22159747ms step_avg:570.28ms +step:38859/57344 train_time:22160285ms step_avg:570.27ms +grad accum step:9715/14336 +step:38860/57344 train_time:22161591ms step_avg:570.29ms +step:38861/57344 train_time:22161607ms step_avg:570.28ms +step:38862/57344 train_time:22161854ms step_avg:570.27ms +step:38863/57344 train_time:22162409ms step_avg:570.27ms +grad accum step:9716/14336 +step:38864/57344 train_time:22163704ms step_avg:570.29ms +step:38865/57344 train_time:22163721ms step_avg:570.27ms +step:38866/57344 train_time:22163965ms step_avg:570.27ms +step:38867/57344 train_time:22164513ms step_avg:570.27ms +grad accum step:9717/14336 +step:38868/57344 train_time:22165787ms step_avg:570.28ms +step:38869/57344 train_time:22165804ms step_avg:570.27ms +step:38870/57344 train_time:22166054ms step_avg:570.26ms +step:38871/57344 train_time:22166615ms step_avg:570.26ms +grad accum step:9718/14336 +step:38872/57344 train_time:22167947ms step_avg:570.28ms +step:38873/57344 train_time:22167964ms step_avg:570.27ms +step:38874/57344 train_time:22168216ms step_avg:570.26ms +step:38875/57344 train_time:22168773ms step_avg:570.26ms +grad accum step:9719/14336 +step:38876/57344 train_time:22170071ms step_avg:570.28ms +step:38877/57344 train_time:22170088ms step_avg:570.26ms +step:38878/57344 train_time:22170341ms step_avg:570.25ms +step:38879/57344 train_time:22170909ms step_avg:570.25ms +grad accum step:9720/14336 +step:38880/57344 train_time:22172231ms step_avg:570.27ms +step:38881/57344 train_time:22172247ms step_avg:570.26ms +step:38882/57344 train_time:22172495ms step_avg:570.25ms +step:38883/57344 train_time:22173046ms step_avg:570.25ms +grad accum step:9721/14336 +step:38884/57344 train_time:22174370ms step_avg:570.27ms +step:38885/57344 train_time:22174390ms step_avg:570.26ms +step:38886/57344 train_time:22174621ms step_avg:570.25ms +step:38887/57344 train_time:22175168ms step_avg:570.25ms +grad accum step:9722/14336 +step:38888/57344 train_time:22176484ms step_avg:570.27ms +step:38889/57344 train_time:22176501ms step_avg:570.25ms +step:38890/57344 train_time:22176746ms step_avg:570.24ms +step:38891/57344 train_time:22177289ms step_avg:570.24ms +grad accum step:9723/14336 +step:38892/57344 train_time:22178577ms step_avg:570.26ms +step:38893/57344 train_time:22178595ms step_avg:570.25ms +step:38894/57344 train_time:22178830ms step_avg:570.24ms +step:38895/57344 train_time:22179378ms step_avg:570.24ms +grad accum step:9724/14336 +step:38896/57344 train_time:22180662ms step_avg:570.26ms +step:38897/57344 train_time:22180678ms step_avg:570.24ms +step:38898/57344 train_time:22180918ms step_avg:570.23ms +step:38899/57344 train_time:22181460ms step_avg:570.23ms +grad accum step:9725/14336 +step:38900/57344 train_time:22182789ms step_avg:570.25ms +step:38901/57344 train_time:22182805ms step_avg:570.24ms +step:38902/57344 train_time:22183044ms step_avg:570.23ms +step:38903/57344 train_time:22183588ms step_avg:570.23ms +grad accum step:9726/14336 +step:38904/57344 train_time:22184922ms step_avg:570.25ms +step:38905/57344 train_time:22184939ms step_avg:570.23ms +step:38906/57344 train_time:22185187ms step_avg:570.23ms +step:38907/57344 train_time:22185747ms step_avg:570.23ms +grad accum step:9727/14336 +step:38908/57344 train_time:22187072ms step_avg:570.24ms +step:38909/57344 train_time:22187094ms step_avg:570.23ms +step:38910/57344 train_time:22187335ms step_avg:570.22ms +step:38911/57344 train_time:22187891ms step_avg:570.22ms +grad accum step:9728/14336 +step:38912/57344 train_time:22189180ms step_avg:570.24ms +step:38912/57344 val_loss:5.893886 train_time:22189187ms step_avg:570.24ms +step:38913/57344 train_time:22189199ms step_avg:570.23ms +step:38914/57344 train_time:22189421ms step_avg:570.22ms +step:38915/57344 train_time:22189970ms step_avg:570.22ms +grad accum step:9729/14336 +step:38916/57344 train_time:22191287ms step_avg:570.24ms +step:38917/57344 train_time:22191304ms step_avg:570.22ms +step:38918/57344 train_time:22191542ms step_avg:570.21ms +step:38919/57344 train_time:22192085ms step_avg:570.21ms +grad accum step:9730/14336 +step:38920/57344 train_time:22193400ms step_avg:570.23ms +step:38921/57344 train_time:22193416ms step_avg:570.22ms +step:38922/57344 train_time:22193658ms step_avg:570.21ms +step:38923/57344 train_time:22194215ms step_avg:570.21ms +grad accum step:9731/14336 +step:38924/57344 train_time:22195568ms step_avg:570.23ms +step:38925/57344 train_time:22195586ms step_avg:570.21ms +step:38926/57344 train_time:22195809ms step_avg:570.21ms +step:38927/57344 train_time:22196372ms step_avg:570.21ms +grad accum step:9732/14336 +step:38928/57344 train_time:22197682ms step_avg:570.22ms +step:38929/57344 train_time:22197696ms step_avg:570.21ms +step:38930/57344 train_time:22197946ms step_avg:570.20ms +step:38931/57344 train_time:22198500ms step_avg:570.20ms +grad accum step:9733/14336 +step:38932/57344 train_time:22199874ms step_avg:570.22ms +step:38933/57344 train_time:22199895ms step_avg:570.21ms +step:38934/57344 train_time:22200128ms step_avg:570.20ms +step:38935/57344 train_time:22200670ms step_avg:570.20ms +grad accum step:9734/14336 +step:38936/57344 train_time:22202008ms step_avg:570.22ms +step:38937/57344 train_time:22202030ms step_avg:570.20ms +step:38938/57344 train_time:22202251ms step_avg:570.19ms +step:38939/57344 train_time:22202795ms step_avg:570.19ms +grad accum step:9735/14336 +step:38940/57344 train_time:22204152ms step_avg:570.21ms +step:38941/57344 train_time:22204168ms step_avg:570.20ms +step:38942/57344 train_time:22204412ms step_avg:570.19ms +step:38943/57344 train_time:22204953ms step_avg:570.19ms +grad accum step:9736/14336 +step:38944/57344 train_time:22206235ms step_avg:570.21ms +step:38945/57344 train_time:22206250ms step_avg:570.20ms +step:38946/57344 train_time:22206669ms step_avg:570.19ms +step:38947/57344 train_time:22207044ms step_avg:570.19ms +grad accum step:9737/14336 +step:38948/57344 train_time:22208402ms step_avg:570.21ms +step:38949/57344 train_time:22208419ms step_avg:570.19ms +step:38950/57344 train_time:22208670ms step_avg:570.18ms +step:38951/57344 train_time:22209228ms step_avg:570.18ms +grad accum step:9738/14336 +step:38952/57344 train_time:22210507ms step_avg:570.20ms +step:38953/57344 train_time:22210523ms step_avg:570.19ms +step:38954/57344 train_time:22210772ms step_avg:570.18ms +step:38955/57344 train_time:22211327ms step_avg:570.18ms +grad accum step:9739/14336 +step:38956/57344 train_time:22212649ms step_avg:570.20ms +step:38957/57344 train_time:22212665ms step_avg:570.18ms +step:38958/57344 train_time:22212912ms step_avg:570.18ms +step:38959/57344 train_time:22213466ms step_avg:570.18ms +grad accum step:9740/14336 +step:38960/57344 train_time:22214790ms step_avg:570.19ms +step:38961/57344 train_time:22214806ms step_avg:570.18ms +step:38962/57344 train_time:22215052ms step_avg:570.17ms +step:38963/57344 train_time:22215604ms step_avg:570.17ms +grad accum step:9741/14336 +step:38964/57344 train_time:22216901ms step_avg:570.19ms +step:38965/57344 train_time:22216917ms step_avg:570.18ms +step:38966/57344 train_time:22217167ms step_avg:570.17ms +step:38967/57344 train_time:22217724ms step_avg:570.17ms +grad accum step:9742/14336 +step:38968/57344 train_time:22219026ms step_avg:570.19ms +step:38969/57344 train_time:22219038ms step_avg:570.17ms +step:38970/57344 train_time:22219290ms step_avg:570.16ms +step:38971/57344 train_time:22219849ms step_avg:570.16ms +grad accum step:9743/14336 +step:38972/57344 train_time:22221126ms step_avg:570.18ms +step:38973/57344 train_time:22221142ms step_avg:570.17ms +step:38974/57344 train_time:22221391ms step_avg:570.16ms +step:38975/57344 train_time:22221955ms step_avg:570.16ms +grad accum step:9744/14336 +step:38976/57344 train_time:22223275ms step_avg:570.18ms +step:38976/57344 val_loss:5.880382 train_time:22223275ms step_avg:570.18ms +step:38977/57344 train_time:22301338ms step_avg:572.17ms +step:38978/57344 train_time:22301421ms step_avg:572.15ms +step:38979/57344 train_time:22301965ms step_avg:572.15ms +grad accum step:9745/14336 +step:38980/57344 train_time:22303475ms step_avg:572.18ms +step:38981/57344 train_time:22303489ms step_avg:572.16ms +step:38982/57344 train_time:22303703ms step_avg:572.15ms +step:38983/57344 train_time:22304234ms step_avg:572.15ms +grad accum step:9746/14336 +step:38984/57344 train_time:22305513ms step_avg:572.17ms +step:38985/57344 train_time:22305528ms step_avg:572.16ms +step:38986/57344 train_time:22305765ms step_avg:572.15ms +step:38987/57344 train_time:22306298ms step_avg:572.15ms +grad accum step:9747/14336 +step:38988/57344 train_time:22307584ms step_avg:572.17ms +step:38989/57344 train_time:22307601ms step_avg:572.15ms +step:38990/57344 train_time:22307841ms step_avg:572.14ms +step:38991/57344 train_time:22308379ms step_avg:572.14ms +grad accum step:9748/14336 +step:38992/57344 train_time:22309671ms step_avg:572.16ms +step:38993/57344 train_time:22309694ms step_avg:572.15ms +step:38994/57344 train_time:22309929ms step_avg:572.14ms +step:38995/57344 train_time:22310475ms step_avg:572.14ms +grad accum step:9749/14336 +step:38996/57344 train_time:22311750ms step_avg:572.15ms +step:38997/57344 train_time:22311766ms step_avg:572.14ms +step:38998/57344 train_time:22312011ms step_avg:572.13ms +step:38999/57344 train_time:22312560ms step_avg:572.13ms +grad accum step:9750/14336 +step:39000/57344 train_time:22313882ms step_avg:572.15ms +step:39001/57344 train_time:22313903ms step_avg:572.14ms +step:39002/57344 train_time:22314119ms step_avg:572.13ms +step:39003/57344 train_time:22314668ms step_avg:572.13ms +grad accum step:9751/14336 +step:39004/57344 train_time:22316027ms step_avg:572.15ms +step:39005/57344 train_time:22316043ms step_avg:572.13ms +step:39006/57344 train_time:22316287ms step_avg:572.12ms +step:39007/57344 train_time:22316834ms step_avg:572.12ms +grad accum step:9752/14336 +step:39008/57344 train_time:22318116ms step_avg:572.14ms +step:39009/57344 train_time:22318136ms step_avg:572.13ms +step:39010/57344 train_time:22318369ms step_avg:572.12ms +step:39011/57344 train_time:22318920ms step_avg:572.12ms +grad accum step:9753/14336 +step:39012/57344 train_time:22320221ms step_avg:572.14ms +step:39013/57344 train_time:22320237ms step_avg:572.12ms +step:39014/57344 train_time:22320455ms step_avg:572.11ms +step:39015/57344 train_time:22320998ms step_avg:572.11ms +grad accum step:9754/14336 +step:39016/57344 train_time:22322309ms step_avg:572.13ms +step:39017/57344 train_time:22322326ms step_avg:572.12ms +step:39018/57344 train_time:22322564ms step_avg:572.11ms +step:39019/57344 train_time:22323100ms step_avg:572.11ms +grad accum step:9755/14336 +step:39020/57344 train_time:22324428ms step_avg:572.13ms +step:39021/57344 train_time:22324448ms step_avg:572.11ms +step:39022/57344 train_time:22324678ms step_avg:572.10ms +step:39023/57344 train_time:22325218ms step_avg:572.10ms +grad accum step:9756/14336 +step:39024/57344 train_time:22326508ms step_avg:572.12ms +step:39025/57344 train_time:22326529ms step_avg:572.11ms +step:39026/57344 train_time:22326760ms step_avg:572.10ms +step:39027/57344 train_time:22327308ms step_avg:572.10ms +grad accum step:9757/14336 +step:39028/57344 train_time:22328598ms step_avg:572.12ms +step:39029/57344 train_time:22328615ms step_avg:572.10ms +step:39030/57344 train_time:22328864ms step_avg:572.09ms +step:39031/57344 train_time:22329431ms step_avg:572.09ms +grad accum step:9758/14336 +step:39032/57344 train_time:22330721ms step_avg:572.11ms +step:39033/57344 train_time:22330738ms step_avg:572.10ms +step:39034/57344 train_time:22330985ms step_avg:572.09ms +step:39035/57344 train_time:22331548ms step_avg:572.09ms +grad accum step:9759/14336 +step:39036/57344 train_time:22332815ms step_avg:572.11ms +step:39037/57344 train_time:22332832ms step_avg:572.09ms +step:39038/57344 train_time:22333078ms step_avg:572.09ms +step:39039/57344 train_time:22333635ms step_avg:572.09ms +grad accum step:9760/14336 +step:39040/57344 train_time:22334938ms step_avg:572.10ms +step:39040/57344 val_loss:5.867537 train_time:22334939ms step_avg:572.10ms +step:39041/57344 train_time:22334951ms step_avg:572.09ms +step:39042/57344 train_time:22335174ms step_avg:572.08ms +step:39043/57344 train_time:22335717ms step_avg:572.08ms +grad accum step:9761/14336 +step:39044/57344 train_time:22337000ms step_avg:572.10ms +step:39045/57344 train_time:22337019ms step_avg:572.08ms +step:39046/57344 train_time:22337257ms step_avg:572.08ms +step:39047/57344 train_time:22337797ms step_avg:572.07ms +grad accum step:9762/14336 +step:39048/57344 train_time:22339145ms step_avg:572.09ms +step:39049/57344 train_time:22339165ms step_avg:572.08ms +step:39050/57344 train_time:22339403ms step_avg:572.07ms +step:39051/57344 train_time:22339966ms step_avg:572.07ms +grad accum step:9763/14336 +step:39052/57344 train_time:22341318ms step_avg:572.09ms +step:39053/57344 train_time:22341332ms step_avg:572.08ms +step:39054/57344 train_time:22341580ms step_avg:572.07ms +step:39055/57344 train_time:22342145ms step_avg:572.07ms +grad accum step:9764/14336 +step:39056/57344 train_time:22343528ms step_avg:572.09ms +step:39057/57344 train_time:22343554ms step_avg:572.08ms +step:39058/57344 train_time:22343777ms step_avg:572.07ms +step:39059/57344 train_time:22344332ms step_avg:572.07ms +grad accum step:9765/14336 +step:39060/57344 train_time:22345643ms step_avg:572.09ms +step:39061/57344 train_time:22345663ms step_avg:572.07ms +step:39062/57344 train_time:22345896ms step_avg:572.06ms +step:39063/57344 train_time:22346445ms step_avg:572.06ms +grad accum step:9766/14336 +step:39064/57344 train_time:22347768ms step_avg:572.08ms +step:39065/57344 train_time:22347789ms step_avg:572.07ms +step:39066/57344 train_time:22348031ms step_avg:572.06ms +step:39067/57344 train_time:22348589ms step_avg:572.06ms +grad accum step:9767/14336 +step:39068/57344 train_time:22349898ms step_avg:572.08ms +step:39069/57344 train_time:22349915ms step_avg:572.06ms +step:39070/57344 train_time:22350154ms step_avg:572.05ms +step:39071/57344 train_time:22350698ms step_avg:572.05ms +grad accum step:9768/14336 +step:39072/57344 train_time:22352034ms step_avg:572.07ms +step:39073/57344 train_time:22352051ms step_avg:572.06ms +step:39074/57344 train_time:22352293ms step_avg:572.05ms +step:39075/57344 train_time:22352838ms step_avg:572.05ms +grad accum step:9769/14336 +step:39076/57344 train_time:22354234ms step_avg:572.07ms +step:39077/57344 train_time:22354255ms step_avg:572.06ms +step:39078/57344 train_time:22354494ms step_avg:572.05ms +step:39079/57344 train_time:22355046ms step_avg:572.05ms +grad accum step:9770/14336 +step:39080/57344 train_time:22356378ms step_avg:572.07ms +step:39081/57344 train_time:22356394ms step_avg:572.05ms +step:39082/57344 train_time:22356633ms step_avg:572.04ms +step:39083/57344 train_time:22357180ms step_avg:572.04ms +grad accum step:9771/14336 +step:39084/57344 train_time:22358506ms step_avg:572.06ms +step:39085/57344 train_time:22358520ms step_avg:572.05ms +step:39086/57344 train_time:22358763ms step_avg:572.04ms +step:39087/57344 train_time:22359308ms step_avg:572.04ms +grad accum step:9772/14336 +step:39088/57344 train_time:22360632ms step_avg:572.06ms +step:39089/57344 train_time:22360651ms step_avg:572.04ms +step:39090/57344 train_time:22360890ms step_avg:572.04ms +step:39091/57344 train_time:22361438ms step_avg:572.04ms +grad accum step:9773/14336 +step:39092/57344 train_time:22362784ms step_avg:572.06ms +step:39093/57344 train_time:22362800ms step_avg:572.04ms +step:39094/57344 train_time:22363041ms step_avg:572.03ms +step:39095/57344 train_time:22363588ms step_avg:572.03ms +grad accum step:9774/14336 +step:39096/57344 train_time:22364872ms step_avg:572.05ms +step:39097/57344 train_time:22364889ms step_avg:572.04ms +step:39098/57344 train_time:22365125ms step_avg:572.03ms +step:39099/57344 train_time:22365671ms step_avg:572.03ms +grad accum step:9775/14336 +step:39100/57344 train_time:22366961ms step_avg:572.05ms +step:39101/57344 train_time:22366984ms step_avg:572.03ms +step:39102/57344 train_time:22367216ms step_avg:572.02ms +step:39103/57344 train_time:22367760ms step_avg:572.02ms +grad accum step:9776/14336 +step:39104/57344 train_time:22369091ms step_avg:572.04ms +step:39104/57344 val_loss:5.857060 train_time:22369096ms step_avg:572.04ms +step:39105/57344 train_time:22369108ms step_avg:572.03ms +step:39106/57344 train_time:22369327ms step_avg:572.02ms +step:39107/57344 train_time:22369876ms step_avg:572.02ms +grad accum step:9777/14336 +step:39108/57344 train_time:22371194ms step_avg:572.04ms +step:39109/57344 train_time:22371211ms step_avg:572.02ms +step:39110/57344 train_time:22371457ms step_avg:572.01ms +step:39111/57344 train_time:22372003ms step_avg:572.01ms +grad accum step:9778/14336 +step:39112/57344 train_time:22373309ms step_avg:572.03ms +step:39113/57344 train_time:22373324ms step_avg:572.02ms +step:39114/57344 train_time:22373574ms step_avg:572.01ms +step:39115/57344 train_time:22374138ms step_avg:572.01ms +grad accum step:9779/14336 +step:39116/57344 train_time:22375522ms step_avg:572.03ms +step:39117/57344 train_time:22375537ms step_avg:572.02ms +step:39118/57344 train_time:22375772ms step_avg:572.01ms +step:39119/57344 train_time:22376348ms step_avg:572.01ms +grad accum step:9780/14336 +step:39120/57344 train_time:22377638ms step_avg:572.03ms +step:39121/57344 train_time:22377655ms step_avg:572.01ms +step:39122/57344 train_time:22377899ms step_avg:572.00ms +step:39123/57344 train_time:22378457ms step_avg:572.00ms +grad accum step:9781/14336 +step:39124/57344 train_time:22379825ms step_avg:572.02ms +step:39125/57344 train_time:22379841ms step_avg:572.01ms +step:39126/57344 train_time:22380062ms step_avg:572.00ms +step:39127/57344 train_time:22380615ms step_avg:572.00ms +grad accum step:9782/14336 +step:39128/57344 train_time:22381954ms step_avg:572.02ms +step:39129/57344 train_time:22381969ms step_avg:572.00ms +step:39130/57344 train_time:22382218ms step_avg:572.00ms +step:39131/57344 train_time:22382779ms step_avg:572.00ms +grad accum step:9783/14336 +step:39132/57344 train_time:22384126ms step_avg:572.02ms +step:39133/57344 train_time:22384147ms step_avg:572.00ms +step:39134/57344 train_time:22384387ms step_avg:571.99ms +step:39135/57344 train_time:22384949ms step_avg:571.99ms +grad accum step:9784/14336 +step:39136/57344 train_time:22386223ms step_avg:572.01ms +step:39137/57344 train_time:22386240ms step_avg:572.00ms +step:39138/57344 train_time:22386486ms step_avg:571.99ms +step:39139/57344 train_time:22387036ms step_avg:571.99ms +grad accum step:9785/14336 +step:39140/57344 train_time:22388332ms step_avg:572.01ms +step:39141/57344 train_time:22388353ms step_avg:571.99ms +step:39142/57344 train_time:22388597ms step_avg:571.98ms +step:39143/57344 train_time:22389162ms step_avg:571.98ms +grad accum step:9786/14336 +step:39144/57344 train_time:22390489ms step_avg:572.00ms +step:39145/57344 train_time:22390505ms step_avg:571.99ms +step:39146/57344 train_time:22390725ms step_avg:571.98ms +step:39147/57344 train_time:22391269ms step_avg:571.98ms +grad accum step:9787/14336 +step:39148/57344 train_time:22392571ms step_avg:572.00ms +step:39149/57344 train_time:22392588ms step_avg:571.98ms +step:39150/57344 train_time:22392839ms step_avg:571.98ms +step:39151/57344 train_time:22393392ms step_avg:571.97ms +grad accum step:9788/14336 +step:39152/57344 train_time:22394713ms step_avg:571.99ms +step:39153/57344 train_time:22394731ms step_avg:571.98ms +step:39154/57344 train_time:22394978ms step_avg:571.97ms +step:39155/57344 train_time:22395561ms step_avg:571.97ms +grad accum step:9789/14336 +step:39156/57344 train_time:22396898ms step_avg:571.99ms +step:39157/57344 train_time:22396912ms step_avg:571.98ms +step:39158/57344 train_time:22397162ms step_avg:571.97ms +step:39159/57344 train_time:22397707ms step_avg:571.97ms +grad accum step:9790/14336 +step:39160/57344 train_time:22399010ms step_avg:571.99ms +step:39161/57344 train_time:22399024ms step_avg:571.97ms +step:39162/57344 train_time:22399274ms step_avg:571.96ms +step:39163/57344 train_time:22399833ms step_avg:571.96ms +grad accum step:9791/14336 +step:39164/57344 train_time:22401148ms step_avg:571.98ms +step:39165/57344 train_time:22401162ms step_avg:571.97ms +step:39166/57344 train_time:22401397ms step_avg:571.96ms +step:39167/57344 train_time:22401943ms step_avg:571.96ms +grad accum step:9792/14336 +step:39168/57344 train_time:22403255ms step_avg:571.98ms +step:39168/57344 val_loss:5.843616 train_time:22403256ms step_avg:571.98ms +step:39169/57344 train_time:22403268ms step_avg:571.96ms +step:39170/57344 train_time:22403490ms step_avg:571.96ms +step:39171/57344 train_time:22404034ms step_avg:571.95ms +grad accum step:9793/14336 +step:39172/57344 train_time:22405365ms step_avg:571.97ms +step:39173/57344 train_time:22405383ms step_avg:571.96ms +step:39174/57344 train_time:22405635ms step_avg:571.95ms +step:39175/57344 train_time:22406188ms step_avg:571.95ms +grad accum step:9794/14336 +step:39176/57344 train_time:22407495ms step_avg:571.97ms +step:39177/57344 train_time:22407512ms step_avg:571.96ms +step:39178/57344 train_time:22407763ms step_avg:571.95ms +step:39179/57344 train_time:22408321ms step_avg:571.95ms +grad accum step:9795/14336 +step:39180/57344 train_time:22409643ms step_avg:571.97ms +step:39181/57344 train_time:22409658ms step_avg:571.95ms +step:39182/57344 train_time:22409908ms step_avg:571.94ms +step:39183/57344 train_time:22410467ms step_avg:571.94ms +grad accum step:9796/14336 +step:39184/57344 train_time:22411775ms step_avg:571.96ms +step:39185/57344 train_time:22411792ms step_avg:571.95ms +step:39186/57344 train_time:22412044ms step_avg:571.94ms +step:39187/57344 train_time:22412604ms step_avg:571.94ms +grad accum step:9797/14336 +step:39188/57344 train_time:22413934ms step_avg:571.96ms +step:39189/57344 train_time:22413951ms step_avg:571.94ms +step:39190/57344 train_time:22414200ms step_avg:571.94ms +step:39191/57344 train_time:22414760ms step_avg:571.94ms +grad accum step:9798/14336 +step:39192/57344 train_time:22416096ms step_avg:571.96ms +step:39193/57344 train_time:22416112ms step_avg:571.94ms +step:39194/57344 train_time:22416361ms step_avg:571.93ms +step:39195/57344 train_time:22416916ms step_avg:571.93ms +grad accum step:9799/14336 +step:39196/57344 train_time:22418217ms step_avg:571.95ms +step:39197/57344 train_time:22418234ms step_avg:571.94ms +step:39198/57344 train_time:22418487ms step_avg:571.93ms +step:39199/57344 train_time:22419051ms step_avg:571.93ms +grad accum step:9800/14336 +step:39200/57344 train_time:22420350ms step_avg:571.95ms +step:39201/57344 train_time:22420365ms step_avg:571.93ms +step:39202/57344 train_time:22420618ms step_avg:571.93ms +step:39203/57344 train_time:22421177ms step_avg:571.93ms +grad accum step:9801/14336 +step:39204/57344 train_time:22422482ms step_avg:571.94ms +step:39205/57344 train_time:22422499ms step_avg:571.93ms +step:39206/57344 train_time:22422747ms step_avg:571.92ms +step:39207/57344 train_time:22423297ms step_avg:571.92ms +grad accum step:9802/14336 +step:39208/57344 train_time:22424631ms step_avg:571.94ms +step:39209/57344 train_time:22424648ms step_avg:571.93ms +step:39210/57344 train_time:22424896ms step_avg:571.92ms +step:39211/57344 train_time:22425456ms step_avg:571.92ms +grad accum step:9803/14336 +step:39212/57344 train_time:22426816ms step_avg:571.94ms +step:39213/57344 train_time:22426833ms step_avg:571.92ms +step:39214/57344 train_time:22427088ms step_avg:571.92ms +step:39215/57344 train_time:22427651ms step_avg:571.92ms +grad accum step:9804/14336 +step:39216/57344 train_time:22428949ms step_avg:571.93ms +step:39217/57344 train_time:22428966ms step_avg:571.92ms +step:39218/57344 train_time:22429222ms step_avg:571.91ms +step:39219/57344 train_time:22429783ms step_avg:571.91ms +grad accum step:9805/14336 +step:39220/57344 train_time:22431094ms step_avg:571.93ms +step:39221/57344 train_time:22431114ms step_avg:571.92ms +step:39222/57344 train_time:22431341ms step_avg:571.91ms +step:39223/57344 train_time:22431900ms step_avg:571.91ms +grad accum step:9806/14336 +step:39224/57344 train_time:22433208ms step_avg:571.93ms +step:39225/57344 train_time:22433223ms step_avg:571.91ms +step:39226/57344 train_time:22433472ms step_avg:571.90ms +step:39227/57344 train_time:22434024ms step_avg:571.90ms +grad accum step:9807/14336 +step:39228/57344 train_time:22435336ms step_avg:571.92ms +step:39229/57344 train_time:22435353ms step_avg:571.91ms +step:39230/57344 train_time:22435604ms step_avg:571.90ms +step:39231/57344 train_time:22436161ms step_avg:571.90ms +grad accum step:9808/14336 +step:39232/57344 train_time:22437488ms step_avg:571.92ms +step:39232/57344 val_loss:5.834862 train_time:22437509ms step_avg:571.92ms +step:39233/57344 train_time:22437521ms step_avg:571.90ms +step:39234/57344 train_time:22437747ms step_avg:571.90ms +step:39235/57344 train_time:22438291ms step_avg:571.89ms +grad accum step:9809/14336 +step:39236/57344 train_time:22439588ms step_avg:571.91ms +step:39237/57344 train_time:22439604ms step_avg:571.90ms +step:39238/57344 train_time:22439854ms step_avg:571.89ms +step:39239/57344 train_time:22440414ms step_avg:571.89ms +grad accum step:9810/14336 +step:39240/57344 train_time:22441732ms step_avg:571.91ms +step:39241/57344 train_time:22441746ms step_avg:571.90ms +step:39242/57344 train_time:22441992ms step_avg:571.89ms +step:39243/57344 train_time:22442540ms step_avg:571.89ms +grad accum step:9811/14336 +step:39244/57344 train_time:22443833ms step_avg:571.90ms +step:39245/57344 train_time:22443849ms step_avg:571.89ms +step:39246/57344 train_time:22444094ms step_avg:571.88ms +step:39247/57344 train_time:22444642ms step_avg:571.88ms +grad accum step:9812/14336 +step:39248/57344 train_time:22445960ms step_avg:571.90ms +step:39249/57344 train_time:22445977ms step_avg:571.89ms +step:39250/57344 train_time:22446222ms step_avg:571.88ms +step:39251/57344 train_time:22446780ms step_avg:571.88ms +grad accum step:9813/14336 +step:39252/57344 train_time:22448122ms step_avg:571.90ms +step:39253/57344 train_time:22448137ms step_avg:571.88ms +step:39254/57344 train_time:22448386ms step_avg:571.88ms +step:39255/57344 train_time:22448943ms step_avg:571.87ms +grad accum step:9814/14336 +step:39256/57344 train_time:22450237ms step_avg:571.89ms +step:39257/57344 train_time:22450254ms step_avg:571.88ms +step:39258/57344 train_time:22450510ms step_avg:571.87ms +step:39259/57344 train_time:22451072ms step_avg:571.87ms +grad accum step:9815/14336 +step:39260/57344 train_time:22452374ms step_avg:571.89ms +step:39261/57344 train_time:22452395ms step_avg:571.88ms +step:39262/57344 train_time:22452621ms step_avg:571.87ms +step:39263/57344 train_time:22453178ms step_avg:571.87ms +grad accum step:9816/14336 +step:39264/57344 train_time:22454514ms step_avg:571.89ms +step:39265/57344 train_time:22454532ms step_avg:571.87ms +step:39266/57344 train_time:22454774ms step_avg:571.86ms +step:39267/57344 train_time:22455327ms step_avg:571.86ms +grad accum step:9817/14336 +step:39268/57344 train_time:22456627ms step_avg:571.88ms +step:39269/57344 train_time:22456650ms step_avg:571.87ms +step:39270/57344 train_time:22456885ms step_avg:571.86ms +step:39271/57344 train_time:22457432ms step_avg:571.86ms +grad accum step:9818/14336 +step:39272/57344 train_time:22458739ms step_avg:571.88ms +step:39273/57344 train_time:22458757ms step_avg:571.86ms +step:39274/57344 train_time:22458996ms step_avg:571.85ms +step:39275/57344 train_time:22459549ms step_avg:571.85ms +grad accum step:9819/14336 +step:39276/57344 train_time:22460871ms step_avg:571.87ms +step:39277/57344 train_time:22460890ms step_avg:571.86ms +step:39278/57344 train_time:22461128ms step_avg:571.85ms +step:39279/57344 train_time:22461680ms step_avg:571.85ms +grad accum step:9820/14336 +step:39280/57344 train_time:22462961ms step_avg:571.87ms +step:39281/57344 train_time:22462978ms step_avg:571.85ms +step:39282/57344 train_time:22463231ms step_avg:571.85ms +step:39283/57344 train_time:22463792ms step_avg:571.85ms +grad accum step:9821/14336 +step:39284/57344 train_time:22465104ms step_avg:571.86ms +step:39285/57344 train_time:22465123ms step_avg:571.85ms +step:39286/57344 train_time:22465376ms step_avg:571.84ms +step:39287/57344 train_time:22465948ms step_avg:571.84ms +grad accum step:9822/14336 +step:39288/57344 train_time:22467253ms step_avg:571.86ms +step:39289/57344 train_time:22467269ms step_avg:571.85ms +step:39290/57344 train_time:22467512ms step_avg:571.84ms +step:39291/57344 train_time:22468066ms step_avg:571.84ms +grad accum step:9823/14336 +step:39292/57344 train_time:22469425ms step_avg:571.86ms +step:39293/57344 train_time:22469439ms step_avg:571.84ms +step:39294/57344 train_time:22469689ms step_avg:571.84ms +step:39295/57344 train_time:22470246ms step_avg:571.83ms +grad accum step:9824/14336 +step:39296/57344 train_time:22471584ms step_avg:571.85ms +step:39296/57344 val_loss:5.820549 train_time:22471590ms step_avg:571.85ms +step:39297/57344 train_time:22471602ms step_avg:571.84ms +step:39298/57344 train_time:22471824ms step_avg:571.83ms +step:39299/57344 train_time:22472379ms step_avg:571.83ms +grad accum step:9825/14336 +step:39300/57344 train_time:22473713ms step_avg:571.85ms +step:39301/57344 train_time:22473730ms step_avg:571.84ms +step:39302/57344 train_time:22473972ms step_avg:571.83ms +step:39303/57344 train_time:22474519ms step_avg:571.83ms +grad accum step:9826/14336 +step:39304/57344 train_time:22475801ms step_avg:571.85ms +step:39305/57344 train_time:22475818ms step_avg:571.83ms +step:39306/57344 train_time:22476066ms step_avg:571.82ms +step:39307/57344 train_time:22476615ms step_avg:571.82ms +grad accum step:9827/14336 +step:39308/57344 train_time:22477910ms step_avg:571.84ms +step:39309/57344 train_time:22477926ms step_avg:571.83ms +step:39310/57344 train_time:22478172ms step_avg:571.82ms +step:39311/57344 train_time:22478722ms step_avg:571.82ms +grad accum step:9828/14336 +step:39312/57344 train_time:22480054ms step_avg:571.84ms +step:39313/57344 train_time:22480071ms step_avg:571.82ms +step:39314/57344 train_time:22480318ms step_avg:571.81ms +step:39315/57344 train_time:22480868ms step_avg:571.81ms +grad accum step:9829/14336 +step:39316/57344 train_time:22482149ms step_avg:571.83ms +step:39317/57344 train_time:22482166ms step_avg:571.82ms +step:39318/57344 train_time:22482414ms step_avg:571.81ms +step:39319/57344 train_time:22482962ms step_avg:571.81ms +grad accum step:9830/14336 +step:39320/57344 train_time:22484261ms step_avg:571.83ms +step:39321/57344 train_time:22484277ms step_avg:571.81ms +step:39322/57344 train_time:22484525ms step_avg:571.81ms +step:39323/57344 train_time:22485076ms step_avg:571.80ms +grad accum step:9831/14336 +step:39324/57344 train_time:22486382ms step_avg:571.82ms +step:39325/57344 train_time:22486400ms step_avg:571.81ms +step:39326/57344 train_time:22486645ms step_avg:571.80ms +step:39327/57344 train_time:22487182ms step_avg:571.80ms +grad accum step:9832/14336 +step:39328/57344 train_time:22488483ms step_avg:571.82ms +step:39329/57344 train_time:22488499ms step_avg:571.80ms +step:39330/57344 train_time:22488744ms step_avg:571.80ms +step:39331/57344 train_time:22489287ms step_avg:571.80ms +grad accum step:9833/14336 +step:39332/57344 train_time:22490567ms step_avg:571.81ms +step:39333/57344 train_time:22490583ms step_avg:571.80ms +step:39334/57344 train_time:22490835ms step_avg:571.79ms +step:39335/57344 train_time:22491397ms step_avg:571.79ms +grad accum step:9834/14336 +step:39336/57344 train_time:22492696ms step_avg:571.81ms +step:39337/57344 train_time:22492713ms step_avg:571.80ms +step:39338/57344 train_time:22492958ms step_avg:571.79ms +step:39339/57344 train_time:22493501ms step_avg:571.79ms +grad accum step:9835/14336 +step:39340/57344 train_time:22494786ms step_avg:571.80ms +step:39341/57344 train_time:22494811ms step_avg:571.79ms +step:39342/57344 train_time:22495036ms step_avg:571.78ms +step:39343/57344 train_time:22495582ms step_avg:571.78ms +grad accum step:9836/14336 +step:39344/57344 train_time:22496943ms step_avg:571.80ms +step:39345/57344 train_time:22496960ms step_avg:571.79ms +step:39346/57344 train_time:22497193ms step_avg:571.78ms +step:39347/57344 train_time:22497743ms step_avg:571.78ms +grad accum step:9837/14336 +step:39348/57344 train_time:22499039ms step_avg:571.80ms +step:39349/57344 train_time:22499054ms step_avg:571.78ms +step:39350/57344 train_time:22499305ms step_avg:571.77ms +step:39351/57344 train_time:22499863ms step_avg:571.77ms +grad accum step:9838/14336 +step:39352/57344 train_time:22501163ms step_avg:571.79ms +step:39353/57344 train_time:22501180ms step_avg:571.78ms +step:39354/57344 train_time:22501427ms step_avg:571.77ms +step:39355/57344 train_time:22501973ms step_avg:571.77ms +grad accum step:9839/14336 +step:39356/57344 train_time:22503269ms step_avg:571.79ms +step:39357/57344 train_time:22503285ms step_avg:571.77ms +step:39358/57344 train_time:22503535ms step_avg:571.77ms +step:39359/57344 train_time:22504085ms step_avg:571.76ms +grad accum step:9840/14336 +step:39360/57344 train_time:22505425ms step_avg:571.78ms +step:39360/57344 val_loss:5.809020 train_time:22505436ms step_avg:571.78ms +step:39361/57344 train_time:22505448ms step_avg:571.77ms +step:39362/57344 train_time:22505670ms step_avg:571.76ms +step:39363/57344 train_time:22506212ms step_avg:571.76ms +grad accum step:9841/14336 +step:39364/57344 train_time:22507512ms step_avg:571.78ms +step:39365/57344 train_time:22507529ms step_avg:571.76ms +step:39366/57344 train_time:22507771ms step_avg:571.76ms +step:39367/57344 train_time:22508318ms step_avg:571.76ms +grad accum step:9842/14336 +step:39368/57344 train_time:22509640ms step_avg:571.78ms +step:39369/57344 train_time:22509654ms step_avg:571.76ms +step:39370/57344 train_time:22509904ms step_avg:571.75ms +step:39371/57344 train_time:22510458ms step_avg:571.75ms +grad accum step:9843/14336 +step:39372/57344 train_time:22511805ms step_avg:571.77ms +step:39373/57344 train_time:22511818ms step_avg:571.76ms +step:39374/57344 train_time:22512061ms step_avg:571.75ms +step:39375/57344 train_time:22512602ms step_avg:571.75ms +grad accum step:9844/14336 +step:39376/57344 train_time:22513925ms step_avg:571.77ms +step:39377/57344 train_time:22513949ms step_avg:571.75ms +step:39378/57344 train_time:22514184ms step_avg:571.75ms +step:39379/57344 train_time:22514732ms step_avg:571.74ms +grad accum step:9845/14336 +step:39380/57344 train_time:22516036ms step_avg:571.76ms +step:39381/57344 train_time:22516050ms step_avg:571.75ms +step:39382/57344 train_time:22516296ms step_avg:571.74ms +step:39383/57344 train_time:22516839ms step_avg:571.74ms +grad accum step:9846/14336 +step:39384/57344 train_time:22518165ms step_avg:571.76ms +step:39385/57344 train_time:22518180ms step_avg:571.75ms +step:39386/57344 train_time:22518424ms step_avg:571.74ms +step:39387/57344 train_time:22518966ms step_avg:571.74ms +grad accum step:9847/14336 +step:39388/57344 train_time:22520309ms step_avg:571.76ms +step:39389/57344 train_time:22520331ms step_avg:571.74ms +step:39390/57344 train_time:22520560ms step_avg:571.73ms +step:39391/57344 train_time:22521118ms step_avg:571.73ms +grad accum step:9848/14336 +step:39392/57344 train_time:22522432ms step_avg:571.75ms +step:39393/57344 train_time:22522448ms step_avg:571.74ms +step:39394/57344 train_time:22522690ms step_avg:571.73ms +step:39395/57344 train_time:22523235ms step_avg:571.73ms +grad accum step:9849/14336 +step:39396/57344 train_time:22524585ms step_avg:571.75ms +step:39397/57344 train_time:22524599ms step_avg:571.73ms +step:39398/57344 train_time:22524822ms step_avg:571.73ms +step:39399/57344 train_time:22525383ms step_avg:571.72ms +grad accum step:9850/14336 +step:39400/57344 train_time:22526719ms step_avg:571.74ms +step:39401/57344 train_time:22526741ms step_avg:571.73ms +step:39402/57344 train_time:22526964ms step_avg:571.72ms +step:39403/57344 train_time:22527510ms step_avg:571.72ms +grad accum step:9851/14336 +step:39404/57344 train_time:22528831ms step_avg:571.74ms +step:39405/57344 train_time:22528851ms step_avg:571.73ms +step:39406/57344 train_time:22529088ms step_avg:571.72ms +step:39407/57344 train_time:22529642ms step_avg:571.72ms +grad accum step:9852/14336 +step:39408/57344 train_time:22530948ms step_avg:571.74ms +step:39409/57344 train_time:22530964ms step_avg:571.72ms +step:39410/57344 train_time:22531213ms step_avg:571.71ms +step:39411/57344 train_time:22531781ms step_avg:571.71ms +grad accum step:9853/14336 +step:39412/57344 train_time:22533438ms step_avg:571.74ms +step:39413/57344 train_time:22533450ms step_avg:571.73ms +step:39414/57344 train_time:22533667ms step_avg:571.72ms +step:39415/57344 train_time:22534202ms step_avg:571.72ms +grad accum step:9854/14336 +step:39416/57344 train_time:22535487ms step_avg:571.73ms +step:39417/57344 train_time:22535507ms step_avg:571.72ms +step:39418/57344 train_time:22535752ms step_avg:571.71ms +step:39419/57344 train_time:22536311ms step_avg:571.71ms +grad accum step:9855/14336 +step:39420/57344 train_time:22537614ms step_avg:571.73ms +step:39421/57344 train_time:22537630ms step_avg:571.72ms +step:39422/57344 train_time:22537861ms step_avg:571.71ms +step:39423/57344 train_time:22538409ms step_avg:571.71ms +grad accum step:9856/14336 +step:39424/57344 train_time:22539734ms step_avg:571.73ms +step:39424/57344 val_loss:5.801020 train_time:22539737ms step_avg:571.73ms +step:39425/57344 train_time:22539748ms step_avg:571.71ms +step:39426/57344 train_time:22539977ms step_avg:571.70ms +step:39427/57344 train_time:22540548ms step_avg:571.70ms +grad accum step:9857/14336 +step:39428/57344 train_time:22541911ms step_avg:571.72ms +step:39429/57344 train_time:22541928ms step_avg:571.71ms +step:39430/57344 train_time:22542173ms step_avg:571.70ms +step:39431/57344 train_time:22542740ms step_avg:571.70ms +grad accum step:9858/14336 +step:39432/57344 train_time:22544112ms step_avg:571.72ms +step:39433/57344 train_time:22544129ms step_avg:571.71ms +step:39434/57344 train_time:22544379ms step_avg:571.70ms +step:39435/57344 train_time:22544934ms step_avg:571.70ms +grad accum step:9859/14336 +step:39436/57344 train_time:22546253ms step_avg:571.72ms +step:39437/57344 train_time:22546273ms step_avg:571.70ms +step:39438/57344 train_time:22546512ms step_avg:571.70ms +step:39439/57344 train_time:22547064ms step_avg:571.69ms +grad accum step:9860/14336 +step:39440/57344 train_time:22548391ms step_avg:571.71ms +step:39441/57344 train_time:22548407ms step_avg:571.70ms +step:39442/57344 train_time:22548643ms step_avg:571.69ms +step:39443/57344 train_time:22549187ms step_avg:571.69ms +grad accum step:9861/14336 +step:39444/57344 train_time:22550463ms step_avg:571.71ms +step:39445/57344 train_time:22550479ms step_avg:571.69ms +step:39446/57344 train_time:22550732ms step_avg:571.69ms +step:39447/57344 train_time:22551294ms step_avg:571.69ms +grad accum step:9862/14336 +step:39448/57344 train_time:22553875ms step_avg:571.74ms +step:39449/57344 train_time:22554791ms step_avg:571.75ms +step:39450/57344 train_time:22554851ms step_avg:571.73ms +step:39451/57344 train_time:22555277ms step_avg:571.73ms +grad accum step:9863/14336 +step:39452/57344 train_time:22556638ms step_avg:571.75ms +step:39453/57344 train_time:22556653ms step_avg:571.73ms +step:39454/57344 train_time:22556895ms step_avg:571.73ms +step:39455/57344 train_time:22557444ms step_avg:571.73ms +grad accum step:9864/14336 +step:39456/57344 train_time:22558780ms step_avg:571.75ms +step:39457/57344 train_time:22558797ms step_avg:571.73ms +step:39458/57344 train_time:22559051ms step_avg:571.72ms +step:39459/57344 train_time:22559633ms step_avg:571.72ms +grad accum step:9865/14336 +step:39460/57344 train_time:22561018ms step_avg:571.74ms +step:39461/57344 train_time:22561034ms step_avg:571.73ms +step:39462/57344 train_time:22561273ms step_avg:571.72ms +step:39463/57344 train_time:22561819ms step_avg:571.72ms +grad accum step:9866/14336 +step:39464/57344 train_time:22563134ms step_avg:571.74ms +step:39465/57344 train_time:22563150ms step_avg:571.73ms +step:39466/57344 train_time:22563399ms step_avg:571.72ms +step:39467/57344 train_time:22563951ms step_avg:571.72ms +grad accum step:9867/14336 +step:39468/57344 train_time:22565230ms step_avg:571.73ms +step:39469/57344 train_time:22565247ms step_avg:571.72ms +step:39470/57344 train_time:22565496ms step_avg:571.71ms +step:39471/57344 train_time:22566041ms step_avg:571.71ms +grad accum step:9868/14336 +step:39472/57344 train_time:22567323ms step_avg:571.73ms +step:39473/57344 train_time:22567338ms step_avg:571.72ms +step:39474/57344 train_time:22567581ms step_avg:571.71ms +step:39475/57344 train_time:22568127ms step_avg:571.71ms +grad accum step:9869/14336 +step:39476/57344 train_time:22569625ms step_avg:571.73ms +step:39477/57344 train_time:22569638ms step_avg:571.72ms +step:39478/57344 train_time:22569868ms step_avg:571.71ms +step:39479/57344 train_time:22570438ms step_avg:571.71ms +grad accum step:9870/14336 +step:39480/57344 train_time:22571729ms step_avg:571.73ms +step:39481/57344 train_time:22571744ms step_avg:571.71ms +step:39482/57344 train_time:22571991ms step_avg:571.70ms +step:39483/57344 train_time:22572538ms step_avg:571.70ms +grad accum step:9871/14336 +step:39484/57344 train_time:22573870ms step_avg:571.72ms +step:39485/57344 train_time:22573885ms step_avg:571.71ms +step:39486/57344 train_time:22574138ms step_avg:571.70ms +step:39487/57344 train_time:22574695ms step_avg:571.70ms +grad accum step:9872/14336 +step:39488/57344 train_time:22575993ms step_avg:571.72ms +step:39488/57344 val_loss:5.795458 train_time:22576002ms step_avg:571.72ms +step:39489/57344 train_time:22576658ms step_avg:571.72ms +step:39490/57344 train_time:22576685ms step_avg:571.71ms +step:39491/57344 train_time:22577100ms step_avg:571.70ms +grad accum step:9873/14336 +step:39492/57344 train_time:22578418ms step_avg:571.72ms +step:39493/57344 train_time:22578435ms step_avg:571.71ms +step:39494/57344 train_time:22578677ms step_avg:571.70ms +step:39495/57344 train_time:22579226ms step_avg:571.70ms +grad accum step:9874/14336 +step:39496/57344 train_time:22580540ms step_avg:571.72ms +step:39497/57344 train_time:22580557ms step_avg:571.70ms +step:39498/57344 train_time:22580823ms step_avg:571.70ms +step:39499/57344 train_time:22581431ms step_avg:571.70ms +grad accum step:9875/14336 +step:39500/57344 train_time:22582768ms step_avg:571.72ms +step:39501/57344 train_time:22582784ms step_avg:571.70ms +step:39502/57344 train_time:22583033ms step_avg:571.69ms +step:39503/57344 train_time:22583594ms step_avg:571.69ms +grad accum step:9876/14336 +step:39504/57344 train_time:22584942ms step_avg:571.71ms +step:39505/57344 train_time:22584958ms step_avg:571.70ms +step:39506/57344 train_time:22585207ms step_avg:571.69ms +step:39507/57344 train_time:22585756ms step_avg:571.69ms +grad accum step:9877/14336 +step:39508/57344 train_time:22587070ms step_avg:571.71ms +step:39509/57344 train_time:22587087ms step_avg:571.69ms +step:39510/57344 train_time:22587317ms step_avg:571.69ms +step:39511/57344 train_time:22587886ms step_avg:571.69ms +grad accum step:9878/14336 +step:39512/57344 train_time:22589217ms step_avg:571.71ms +step:39513/57344 train_time:22589233ms step_avg:571.69ms +step:39514/57344 train_time:22589475ms step_avg:571.68ms +step:39515/57344 train_time:22590009ms step_avg:571.68ms +grad accum step:9879/14336 +step:39516/57344 train_time:22591316ms step_avg:571.70ms +step:39517/57344 train_time:22591333ms step_avg:571.69ms +step:39518/57344 train_time:22591568ms step_avg:571.68ms +step:39519/57344 train_time:22592123ms step_avg:571.68ms +grad accum step:9880/14336 +step:39520/57344 train_time:22593476ms step_avg:571.70ms +step:39521/57344 train_time:22593495ms step_avg:571.68ms +step:39522/57344 train_time:22593723ms step_avg:571.67ms +step:39523/57344 train_time:22594269ms step_avg:571.67ms +grad accum step:9881/14336 +step:39524/57344 train_time:22595556ms step_avg:571.69ms +step:39525/57344 train_time:22595573ms step_avg:571.68ms +step:39526/57344 train_time:22595942ms step_avg:571.67ms +step:39527/57344 train_time:22596357ms step_avg:571.67ms +grad accum step:9882/14336 +step:39528/57344 train_time:22597681ms step_avg:571.69ms +step:39529/57344 train_time:22597698ms step_avg:571.67ms +step:39530/57344 train_time:22597946ms step_avg:571.67ms +step:39531/57344 train_time:22598519ms step_avg:571.67ms +grad accum step:9883/14336 +step:39532/57344 train_time:22599884ms step_avg:571.69ms +step:39533/57344 train_time:22599900ms step_avg:571.67ms +step:39534/57344 train_time:22600144ms step_avg:571.66ms +step:39535/57344 train_time:22600688ms step_avg:571.66ms +grad accum step:9884/14336 +step:39536/57344 train_time:22601976ms step_avg:571.68ms +step:39537/57344 train_time:22601992ms step_avg:571.67ms +step:39538/57344 train_time:22602239ms step_avg:571.66ms +step:39539/57344 train_time:22602794ms step_avg:571.66ms +grad accum step:9885/14336 +step:39540/57344 train_time:22604118ms step_avg:571.68ms +step:39541/57344 train_time:22604135ms step_avg:571.66ms +step:39542/57344 train_time:22604375ms step_avg:571.65ms +step:39543/57344 train_time:22604914ms step_avg:571.65ms +grad accum step:9886/14336 +step:39544/57344 train_time:22606231ms step_avg:571.67ms +step:39545/57344 train_time:22606243ms step_avg:571.66ms +step:39546/57344 train_time:22606464ms step_avg:571.65ms +step:39547/57344 train_time:22607009ms step_avg:571.65ms +grad accum step:9887/14336 +step:39548/57344 train_time:22608293ms step_avg:571.67ms +step:39549/57344 train_time:22608312ms step_avg:571.65ms +step:39550/57344 train_time:22608551ms step_avg:571.64ms +step:39551/57344 train_time:22609098ms step_avg:571.64ms +grad accum step:9888/14336 +step:39552/57344 train_time:22610593ms step_avg:571.67ms +step:39552/57344 val_loss:5.780212 train_time:22610594ms step_avg:571.67ms +step:39553/57344 train_time:22610605ms step_avg:571.65ms +step:39554/57344 train_time:22610827ms step_avg:571.64ms +step:39555/57344 train_time:22611372ms step_avg:571.64ms +grad accum step:9889/14336 +step:39556/57344 train_time:22612688ms step_avg:571.66ms +step:39557/57344 train_time:22612703ms step_avg:571.65ms +step:39558/57344 train_time:22612948ms step_avg:571.64ms +step:39559/57344 train_time:22613494ms step_avg:571.64ms +grad accum step:9890/14336 +step:39560/57344 train_time:22614816ms step_avg:571.66ms +step:39561/57344 train_time:22614832ms step_avg:571.64ms +step:39562/57344 train_time:22615076ms step_avg:571.64ms +step:39563/57344 train_time:22615620ms step_avg:571.64ms +grad accum step:9891/14336 +step:39564/57344 train_time:22616913ms step_avg:571.65ms +step:39565/57344 train_time:22616929ms step_avg:571.64ms +step:39566/57344 train_time:22617187ms step_avg:571.63ms +step:39567/57344 train_time:22617765ms step_avg:571.63ms +grad accum step:9892/14336 +step:39568/57344 train_time:22619093ms step_avg:571.65ms +step:39569/57344 train_time:22619116ms step_avg:571.64ms +step:39570/57344 train_time:22619358ms step_avg:571.63ms +step:39571/57344 train_time:22619932ms step_avg:571.63ms +grad accum step:9893/14336 +step:39572/57344 train_time:22621288ms step_avg:571.65ms +step:39573/57344 train_time:22621307ms step_avg:571.63ms +step:39574/57344 train_time:22621533ms step_avg:571.63ms +step:39575/57344 train_time:22622091ms step_avg:571.63ms +grad accum step:9894/14336 +step:39576/57344 train_time:22623388ms step_avg:571.64ms +step:39577/57344 train_time:22623407ms step_avg:571.63ms +step:39578/57344 train_time:22623649ms step_avg:571.62ms +step:39579/57344 train_time:22624199ms step_avg:571.62ms +grad accum step:9895/14336 +step:39580/57344 train_time:22625513ms step_avg:571.64ms +step:39581/57344 train_time:22625532ms step_avg:571.63ms +step:39582/57344 train_time:22625775ms step_avg:571.62ms +step:39583/57344 train_time:22626318ms step_avg:571.62ms +grad accum step:9896/14336 +step:39584/57344 train_time:22627641ms step_avg:571.64ms +step:39585/57344 train_time:22627655ms step_avg:571.62ms +step:39586/57344 train_time:22627902ms step_avg:571.61ms +step:39587/57344 train_time:22628450ms step_avg:571.61ms +grad accum step:9897/14336 +step:39588/57344 train_time:22629758ms step_avg:571.63ms +step:39589/57344 train_time:22629776ms step_avg:571.62ms +step:39590/57344 train_time:22630494ms step_avg:571.62ms +step:39591/57344 train_time:22630745ms step_avg:571.61ms +grad accum step:9898/14336 +step:39592/57344 train_time:22632133ms step_avg:571.63ms +step:39593/57344 train_time:22632145ms step_avg:571.62ms +step:39594/57344 train_time:22632365ms step_avg:571.61ms +step:39595/57344 train_time:22632912ms step_avg:571.61ms +grad accum step:9899/14336 +step:39596/57344 train_time:22634257ms step_avg:571.63ms +step:39597/57344 train_time:22634293ms step_avg:571.62ms +step:39598/57344 train_time:22634515ms step_avg:571.61ms +step:39599/57344 train_time:22635058ms step_avg:571.61ms +grad accum step:9900/14336 +step:39600/57344 train_time:22636345ms step_avg:571.62ms +step:39601/57344 train_time:22636363ms step_avg:571.61ms +step:39602/57344 train_time:22636606ms step_avg:571.60ms +step:39603/57344 train_time:22637152ms step_avg:571.60ms +grad accum step:9901/14336 +step:39604/57344 train_time:22638438ms step_avg:571.62ms +step:39605/57344 train_time:22638464ms step_avg:571.61ms +step:39606/57344 train_time:22638696ms step_avg:571.60ms +step:39607/57344 train_time:22639258ms step_avg:571.60ms +grad accum step:9902/14336 +step:39608/57344 train_time:22640662ms step_avg:571.62ms +step:39609/57344 train_time:22640678ms step_avg:571.60ms +step:39610/57344 train_time:22640917ms step_avg:571.60ms +step:39611/57344 train_time:22641470ms step_avg:571.60ms +grad accum step:9903/14336 +step:39612/57344 train_time:22642766ms step_avg:571.61ms +step:39613/57344 train_time:22642782ms step_avg:571.60ms +step:39614/57344 train_time:22643030ms step_avg:571.59ms +step:39615/57344 train_time:22643584ms step_avg:571.59ms +grad accum step:9904/14336 +step:39616/57344 train_time:22644942ms step_avg:571.61ms +step:39616/57344 val_loss:5.776663 train_time:22644944ms step_avg:571.61ms +step:39617/57344 train_time:22644956ms step_avg:571.60ms +step:39618/57344 train_time:22645184ms step_avg:571.59ms +step:39619/57344 train_time:22645752ms step_avg:571.59ms +grad accum step:9905/14336 +step:39620/57344 train_time:22647036ms step_avg:571.61ms +step:39621/57344 train_time:22647056ms step_avg:571.59ms +step:39622/57344 train_time:22647295ms step_avg:571.58ms +step:39623/57344 train_time:22647846ms step_avg:571.58ms +grad accum step:9906/14336 +step:39624/57344 train_time:22649200ms step_avg:571.60ms +step:39625/57344 train_time:22649218ms step_avg:571.59ms +step:39626/57344 train_time:22649455ms step_avg:571.58ms +step:39627/57344 train_time:22649998ms step_avg:571.58ms +grad accum step:9907/14336 +step:39628/57344 train_time:22651314ms step_avg:571.60ms +step:39629/57344 train_time:22651331ms step_avg:571.58ms +step:39630/57344 train_time:22651585ms step_avg:571.58ms +step:39631/57344 train_time:22652153ms step_avg:571.58ms +grad accum step:9908/14336 +step:39632/57344 train_time:22653503ms step_avg:571.60ms +step:39633/57344 train_time:22653520ms step_avg:571.58ms +step:39634/57344 train_time:22653765ms step_avg:571.57ms +step:39635/57344 train_time:22654307ms step_avg:571.57ms +grad accum step:9909/14336 +step:39636/57344 train_time:22655623ms step_avg:571.59ms +step:39637/57344 train_time:22655639ms step_avg:571.58ms +step:39638/57344 train_time:22655886ms step_avg:571.57ms +step:39639/57344 train_time:22656430ms step_avg:571.57ms +grad accum step:9910/14336 +step:39640/57344 train_time:22657729ms step_avg:571.59ms +step:39641/57344 train_time:22657744ms step_avg:571.57ms +step:39642/57344 train_time:22657991ms step_avg:571.57ms +step:39643/57344 train_time:22658536ms step_avg:571.56ms +grad accum step:9911/14336 +step:39644/57344 train_time:22659833ms step_avg:571.58ms +step:39645/57344 train_time:22659849ms step_avg:571.57ms +step:39646/57344 train_time:22660095ms step_avg:571.56ms +step:39647/57344 train_time:22660657ms step_avg:571.56ms +grad accum step:9912/14336 +step:39648/57344 train_time:22662001ms step_avg:571.58ms +step:39649/57344 train_time:22662016ms step_avg:571.57ms +step:39650/57344 train_time:22662265ms step_avg:571.56ms +step:39651/57344 train_time:22662832ms step_avg:571.56ms +grad accum step:9913/14336 +step:39652/57344 train_time:22664176ms step_avg:571.58ms +step:39653/57344 train_time:22664193ms step_avg:571.56ms +step:39654/57344 train_time:22664441ms step_avg:571.55ms +step:39655/57344 train_time:22664993ms step_avg:571.55ms +grad accum step:9914/14336 +step:39656/57344 train_time:22666320ms step_avg:571.57ms +step:39657/57344 train_time:22666337ms step_avg:571.56ms +step:39658/57344 train_time:22666588ms step_avg:571.55ms +step:39659/57344 train_time:22667134ms step_avg:571.55ms +grad accum step:9915/14336 +step:39660/57344 train_time:22668429ms step_avg:571.57ms +step:39661/57344 train_time:22668445ms step_avg:571.56ms +step:39662/57344 train_time:22668692ms step_avg:571.55ms +step:39663/57344 train_time:22669250ms step_avg:571.55ms +grad accum step:9916/14336 +step:39664/57344 train_time:22670563ms step_avg:571.57ms +step:39665/57344 train_time:22670580ms step_avg:571.55ms +step:39666/57344 train_time:22670827ms step_avg:571.54ms +step:39667/57344 train_time:22671374ms step_avg:571.54ms +grad accum step:9917/14336 +step:39668/57344 train_time:22672691ms step_avg:571.56ms +step:39669/57344 train_time:22672708ms step_avg:571.55ms +step:39670/57344 train_time:22672953ms step_avg:571.54ms +step:39671/57344 train_time:22673498ms step_avg:571.54ms +grad accum step:9918/14336 +step:39672/57344 train_time:22674801ms step_avg:571.56ms +step:39673/57344 train_time:22674815ms step_avg:571.54ms +step:39674/57344 train_time:22675058ms step_avg:571.53ms +step:39675/57344 train_time:22675607ms step_avg:571.53ms +grad accum step:9919/14336 +step:39676/57344 train_time:22676922ms step_avg:571.55ms +step:39677/57344 train_time:22676939ms step_avg:571.54ms +step:39678/57344 train_time:22677188ms step_avg:571.53ms +step:39679/57344 train_time:22677738ms step_avg:571.53ms +grad accum step:9920/14336 +step:39680/57344 train_time:22679027ms step_avg:571.55ms +step:39680/57344 val_loss:5.763973 train_time:22679028ms step_avg:571.55ms +step:39681/57344 train_time:22679040ms step_avg:571.53ms +step:39682/57344 train_time:22679268ms step_avg:571.53ms +step:39683/57344 train_time:22679822ms step_avg:571.52ms +grad accum step:9921/14336 +step:39684/57344 train_time:22681096ms step_avg:571.54ms +step:39685/57344 train_time:22681112ms step_avg:571.53ms +step:39686/57344 train_time:22681356ms step_avg:571.52ms +step:39687/57344 train_time:22681902ms step_avg:571.52ms +grad accum step:9922/14336 +step:39688/57344 train_time:22683204ms step_avg:571.54ms +step:39689/57344 train_time:22683221ms step_avg:571.52ms +step:39690/57344 train_time:22683469ms step_avg:571.52ms +step:39691/57344 train_time:22684020ms step_avg:571.52ms +grad accum step:9923/14336 +step:39692/57344 train_time:22685347ms step_avg:571.53ms +step:39693/57344 train_time:22685364ms step_avg:571.52ms +step:39694/57344 train_time:22685613ms step_avg:571.51ms +step:39695/57344 train_time:22686161ms step_avg:571.51ms +grad accum step:9924/14336 +step:39696/57344 train_time:22687510ms step_avg:571.53ms +step:39697/57344 train_time:22687527ms step_avg:571.52ms +step:39698/57344 train_time:22687774ms step_avg:571.51ms +step:39699/57344 train_time:22688319ms step_avg:571.51ms +grad accum step:9925/14336 +step:39700/57344 train_time:22689654ms step_avg:571.53ms +step:39701/57344 train_time:22689670ms step_avg:571.51ms +step:39702/57344 train_time:22689920ms step_avg:571.51ms +step:39703/57344 train_time:22690479ms step_avg:571.51ms +grad accum step:9926/14336 +step:39704/57344 train_time:22691776ms step_avg:571.52ms +step:39705/57344 train_time:22691792ms step_avg:571.51ms +step:39706/57344 train_time:22692041ms step_avg:571.50ms +step:39707/57344 train_time:22692592ms step_avg:571.50ms +grad accum step:9927/14336 +step:39708/57344 train_time:22693902ms step_avg:571.52ms +step:39709/57344 train_time:22693922ms step_avg:571.51ms +step:39710/57344 train_time:22694163ms step_avg:571.50ms +step:39711/57344 train_time:22694707ms step_avg:571.50ms +grad accum step:9928/14336 +step:39712/57344 train_time:22696038ms step_avg:571.52ms +step:39713/57344 train_time:22696052ms step_avg:571.50ms +step:39714/57344 train_time:22696271ms step_avg:571.49ms +step:39715/57344 train_time:22696811ms step_avg:571.49ms +grad accum step:9929/14336 +step:39716/57344 train_time:22698148ms step_avg:571.51ms +step:39717/57344 train_time:22698164ms step_avg:571.50ms +step:39718/57344 train_time:22698416ms step_avg:571.49ms +step:39719/57344 train_time:22698980ms step_avg:571.49ms +grad accum step:9930/14336 +step:39720/57344 train_time:22700270ms step_avg:571.51ms +step:39721/57344 train_time:22700287ms step_avg:571.49ms +step:39722/57344 train_time:22700536ms step_avg:571.49ms +step:39723/57344 train_time:22701092ms step_avg:571.48ms +grad accum step:9931/14336 +step:39724/57344 train_time:22702395ms step_avg:571.50ms +step:39725/57344 train_time:22702412ms step_avg:571.49ms +step:39726/57344 train_time:22702656ms step_avg:571.48ms +step:39727/57344 train_time:22703200ms step_avg:571.48ms +grad accum step:9932/14336 +step:39728/57344 train_time:22704478ms step_avg:571.50ms +step:39729/57344 train_time:22704493ms step_avg:571.48ms +step:39730/57344 train_time:22704742ms step_avg:571.48ms +step:39731/57344 train_time:22705289ms step_avg:571.48ms +grad accum step:9933/14336 +step:39732/57344 train_time:22706591ms step_avg:571.49ms +step:39733/57344 train_time:22706604ms step_avg:571.48ms +step:39734/57344 train_time:22706849ms step_avg:571.47ms +step:39735/57344 train_time:22707394ms step_avg:571.47ms +grad accum step:9934/14336 +step:39736/57344 train_time:22708690ms step_avg:571.49ms +step:39737/57344 train_time:22708707ms step_avg:571.48ms +step:39738/57344 train_time:22708952ms step_avg:571.47ms +step:39739/57344 train_time:22709500ms step_avg:571.47ms +grad accum step:9935/14336 +step:39740/57344 train_time:22718701ms step_avg:571.68ms +step:39741/57344 train_time:22718714ms step_avg:571.67ms +step:39742/57344 train_time:22719005ms step_avg:571.66ms +step:39743/57344 train_time:22719569ms step_avg:571.66ms +grad accum step:9936/14336 +step:39744/57344 train_time:22720948ms step_avg:571.68ms +step:39744/57344 val_loss:5.769980 train_time:22720949ms step_avg:571.68ms +step:39745/57344 train_time:22720961ms step_avg:571.67ms +step:39746/57344 train_time:22737611ms step_avg:572.07ms +step:39747/57344 train_time:22737910ms step_avg:572.07ms +grad accum step:9937/14336 +step:39748/57344 train_time:22739217ms step_avg:572.08ms +step:39749/57344 train_time:22739232ms step_avg:572.07ms +step:39750/57344 train_time:22739469ms step_avg:572.06ms +step:39751/57344 train_time:22739997ms step_avg:572.06ms +grad accum step:9938/14336 +step:39752/57344 train_time:22741320ms step_avg:572.08ms +step:39753/57344 train_time:22741337ms step_avg:572.07ms +step:39754/57344 train_time:22741581ms step_avg:572.06ms +step:39755/57344 train_time:22742126ms step_avg:572.06ms +grad accum step:9939/14336 +step:39756/57344 train_time:22743412ms step_avg:572.07ms +step:39757/57344 train_time:22743429ms step_avg:572.06ms +step:39758/57344 train_time:22743671ms step_avg:572.05ms +step:39759/57344 train_time:22744218ms step_avg:572.05ms +grad accum step:9940/14336 +step:39760/57344 train_time:22745563ms step_avg:572.07ms +step:39761/57344 train_time:22745579ms step_avg:572.06ms +step:39762/57344 train_time:22745828ms step_avg:572.05ms +step:39763/57344 train_time:22746380ms step_avg:572.05ms +grad accum step:9941/14336 +step:39764/57344 train_time:22747657ms step_avg:572.07ms +step:39765/57344 train_time:22747674ms step_avg:572.05ms +step:39766/57344 train_time:22747918ms step_avg:572.04ms +step:39767/57344 train_time:22748464ms step_avg:572.04ms +grad accum step:9942/14336 +step:39768/57344 train_time:22774394ms step_avg:572.68ms +step:39769/57344 train_time:22797040ms step_avg:573.24ms +step:39770/57344 train_time:22797304ms step_avg:573.23ms +step:39771/57344 train_time:22797848ms step_avg:573.23ms +grad accum step:9943/14336 +step:39772/57344 train_time:22799127ms step_avg:573.25ms +step:39773/57344 train_time:22799142ms step_avg:573.23ms +step:39774/57344 train_time:22799395ms step_avg:573.22ms +step:39775/57344 train_time:22799956ms step_avg:573.22ms +grad accum step:9944/14336 +step:39776/57344 train_time:22801229ms step_avg:573.24ms +step:39777/57344 train_time:22801246ms step_avg:573.23ms +step:39778/57344 train_time:22801496ms step_avg:573.22ms +step:39779/57344 train_time:22802056ms step_avg:573.22ms +grad accum step:9945/14336 +step:39780/57344 train_time:22803350ms step_avg:573.24ms +step:39781/57344 train_time:22803367ms step_avg:573.22ms +step:39782/57344 train_time:22803608ms step_avg:573.21ms +step:39783/57344 train_time:22804140ms step_avg:573.21ms +grad accum step:9946/14336 +step:39784/57344 train_time:22809320ms step_avg:573.33ms +step:39785/57344 train_time:22809332ms step_avg:573.31ms +step:39786/57344 train_time:22809613ms step_avg:573.31ms +step:39787/57344 train_time:22810165ms step_avg:573.31ms +grad accum step:9947/14336 +step:39788/57344 train_time:22811444ms step_avg:573.32ms +step:39789/57344 train_time:22811460ms step_avg:573.31ms +step:39790/57344 train_time:22811705ms step_avg:573.30ms +step:39791/57344 train_time:22812267ms step_avg:573.30ms +grad accum step:9948/14336 +step:39792/57344 train_time:22813632ms step_avg:573.32ms +step:39793/57344 train_time:22813649ms step_avg:573.31ms +step:39794/57344 train_time:22813890ms step_avg:573.30ms +step:39795/57344 train_time:22814431ms step_avg:573.30ms +grad accum step:9949/14336 +step:39796/57344 train_time:22815727ms step_avg:573.32ms +step:39797/57344 train_time:22815743ms step_avg:573.30ms +step:39798/57344 train_time:22815985ms step_avg:573.29ms +step:39799/57344 train_time:22816545ms step_avg:573.29ms +grad accum step:9950/14336 +step:39800/57344 train_time:22817859ms step_avg:573.31ms +step:39801/57344 train_time:22817878ms step_avg:573.30ms +step:39802/57344 train_time:22818094ms step_avg:573.29ms +step:39803/57344 train_time:22818630ms step_avg:573.29ms +grad accum step:9951/14336 +step:39804/57344 train_time:22819917ms step_avg:573.31ms +step:39805/57344 train_time:22819936ms step_avg:573.29ms +step:39806/57344 train_time:22820175ms step_avg:573.28ms +step:39807/57344 train_time:22820720ms step_avg:573.28ms +grad accum step:9952/14336 +step:39808/57344 train_time:22822022ms step_avg:573.30ms +step:39808/57344 val_loss:5.750341 train_time:22822029ms step_avg:573.30ms +step:39809/57344 train_time:22822041ms step_avg:573.29ms +step:39810/57344 train_time:22822265ms step_avg:573.28ms +step:39811/57344 train_time:22822813ms step_avg:573.28ms +grad accum step:9953/14336 +step:39812/57344 train_time:22824221ms step_avg:573.30ms +step:39813/57344 train_time:22824243ms step_avg:573.29ms +step:39814/57344 train_time:22824458ms step_avg:573.28ms +step:39815/57344 train_time:22825001ms step_avg:573.28ms +grad accum step:9954/14336 +step:39816/57344 train_time:22826317ms step_avg:573.30ms +step:39817/57344 train_time:22826334ms step_avg:573.28ms +step:39818/57344 train_time:22826577ms step_avg:573.27ms +step:39819/57344 train_time:22827114ms step_avg:573.27ms +grad accum step:9955/14336 +step:39820/57344 train_time:22828960ms step_avg:573.30ms +step:39821/57344 train_time:22828987ms step_avg:573.29ms +step:39822/57344 train_time:22829207ms step_avg:573.28ms +step:39823/57344 train_time:22829764ms step_avg:573.28ms +grad accum step:9956/14336 +step:39824/57344 train_time:22831071ms step_avg:573.30ms +step:39825/57344 train_time:22831087ms step_avg:573.29ms +step:39826/57344 train_time:22831340ms step_avg:573.28ms +step:39827/57344 train_time:22831899ms step_avg:573.28ms +grad accum step:9957/14336 +step:39828/57344 train_time:22833181ms step_avg:573.29ms +step:39829/57344 train_time:22833198ms step_avg:573.28ms +step:39830/57344 train_time:22833443ms step_avg:573.27ms +step:39831/57344 train_time:22834004ms step_avg:573.27ms +grad accum step:9958/14336 +step:39832/57344 train_time:22835305ms step_avg:573.29ms +step:39833/57344 train_time:22835319ms step_avg:573.28ms +step:39834/57344 train_time:22835566ms step_avg:573.27ms +step:39835/57344 train_time:22836120ms step_avg:573.27ms +grad accum step:9959/14336 +step:39836/57344 train_time:22837459ms step_avg:573.29ms +step:39837/57344 train_time:22837471ms step_avg:573.27ms +step:39838/57344 train_time:22837691ms step_avg:573.26ms +step:39839/57344 train_time:22838238ms step_avg:573.26ms +grad accum step:9960/14336 +step:39840/57344 train_time:22839558ms step_avg:573.28ms +step:39841/57344 train_time:22839574ms step_avg:573.27ms +step:39842/57344 train_time:22839822ms step_avg:573.26ms +step:39843/57344 train_time:22840384ms step_avg:573.26ms +grad accum step:9961/14336 +step:39844/57344 train_time:22841673ms step_avg:573.28ms +step:39845/57344 train_time:22841688ms step_avg:573.26ms +step:39846/57344 train_time:22841935ms step_avg:573.26ms +step:39847/57344 train_time:22842478ms step_avg:573.25ms +grad accum step:9962/14336 +step:39848/57344 train_time:22843821ms step_avg:573.27ms +step:39849/57344 train_time:22843838ms step_avg:573.26ms +step:39850/57344 train_time:22844076ms step_avg:573.25ms +step:39851/57344 train_time:22844618ms step_avg:573.25ms +grad accum step:9963/14336 +step:39852/57344 train_time:22845913ms step_avg:573.27ms +step:39853/57344 train_time:22845929ms step_avg:573.25ms +step:39854/57344 train_time:22846173ms step_avg:573.25ms +step:39855/57344 train_time:22846712ms step_avg:573.25ms +grad accum step:9964/14336 +step:39856/57344 train_time:22848084ms step_avg:573.27ms +step:39857/57344 train_time:22848107ms step_avg:573.25ms +step:39858/57344 train_time:22848328ms step_avg:573.24ms +step:39859/57344 train_time:22848880ms step_avg:573.24ms +grad accum step:9965/14336 +step:39860/57344 train_time:22850207ms step_avg:573.26ms +step:39861/57344 train_time:22850221ms step_avg:573.25ms +step:39862/57344 train_time:22850464ms step_avg:573.24ms +step:39863/57344 train_time:22851016ms step_avg:573.24ms +grad accum step:9966/14336 +step:39864/57344 train_time:22852341ms step_avg:573.26ms +step:39865/57344 train_time:22852356ms step_avg:573.24ms +step:39866/57344 train_time:22852606ms step_avg:573.24ms +step:39867/57344 train_time:22853159ms step_avg:573.23ms +grad accum step:9967/14336 +step:39868/57344 train_time:22854506ms step_avg:573.25ms +step:39869/57344 train_time:22854520ms step_avg:573.24ms +step:39870/57344 train_time:22854761ms step_avg:573.23ms +step:39871/57344 train_time:22855309ms step_avg:573.23ms +grad accum step:9968/14336 +step:39872/57344 train_time:22856611ms step_avg:573.25ms +step:39872/57344 val_loss:5.738113 train_time:22856612ms step_avg:573.25ms +step:39873/57344 train_time:22856624ms step_avg:573.24ms +step:39874/57344 train_time:22856850ms step_avg:573.23ms +step:39875/57344 train_time:22857397ms step_avg:573.23ms +grad accum step:9969/14336 +step:39876/57344 train_time:22858677ms step_avg:573.24ms +step:39877/57344 train_time:22858692ms step_avg:573.23ms +step:39878/57344 train_time:22858935ms step_avg:573.22ms +step:39879/57344 train_time:22859486ms step_avg:573.22ms +grad accum step:9970/14336 +step:39880/57344 train_time:22860818ms step_avg:573.24ms +step:39881/57344 train_time:22860853ms step_avg:573.23ms +step:39882/57344 train_time:22861084ms step_avg:573.22ms +step:39883/57344 train_time:22861642ms step_avg:573.22ms +grad accum step:9971/14336 +step:39884/57344 train_time:22862946ms step_avg:573.24ms +step:39885/57344 train_time:22862962ms step_avg:573.22ms +step:39886/57344 train_time:22863210ms step_avg:573.21ms +step:39887/57344 train_time:22863756ms step_avg:573.21ms +grad accum step:9972/14336 +step:39888/57344 train_time:22865053ms step_avg:573.23ms +step:39889/57344 train_time:22865071ms step_avg:573.22ms +step:39890/57344 train_time:22865314ms step_avg:573.21ms +step:39891/57344 train_time:22865859ms step_avg:573.21ms +grad accum step:9973/14336 +step:39892/57344 train_time:22867178ms step_avg:573.23ms +step:39893/57344 train_time:22867195ms step_avg:573.21ms +step:39894/57344 train_time:22867453ms step_avg:573.21ms +step:39895/57344 train_time:22868026ms step_avg:573.21ms +grad accum step:9974/14336 +step:39896/57344 train_time:22869359ms step_avg:573.22ms +step:39897/57344 train_time:22869377ms step_avg:573.21ms +step:39898/57344 train_time:22869610ms step_avg:573.20ms +step:39899/57344 train_time:22870157ms step_avg:573.20ms +grad accum step:9975/14336 +step:39900/57344 train_time:22871596ms step_avg:573.22ms +step:39901/57344 train_time:22871611ms step_avg:573.21ms +step:39902/57344 train_time:22871863ms step_avg:573.20ms +step:39903/57344 train_time:22872430ms step_avg:573.20ms +grad accum step:9976/14336 +step:39904/57344 train_time:22873728ms step_avg:573.22ms +step:39905/57344 train_time:22873746ms step_avg:573.21ms +step:39906/57344 train_time:22873985ms step_avg:573.20ms +step:39907/57344 train_time:22874533ms step_avg:573.20ms +grad accum step:9977/14336 +step:39908/57344 train_time:22875854ms step_avg:573.21ms +step:39909/57344 train_time:22875870ms step_avg:573.20ms +step:39910/57344 train_time:22876116ms step_avg:573.19ms +step:39911/57344 train_time:22876661ms step_avg:573.19ms +grad accum step:9978/14336 +step:39912/57344 train_time:22877945ms step_avg:573.21ms +step:39913/57344 train_time:22877959ms step_avg:573.20ms +step:39914/57344 train_time:22878203ms step_avg:573.19ms +step:39915/57344 train_time:22878751ms step_avg:573.19ms +grad accum step:9979/14336 +step:39916/57344 train_time:22880073ms step_avg:573.21ms +step:39917/57344 train_time:22880089ms step_avg:573.19ms +step:39918/57344 train_time:22880340ms step_avg:573.18ms +step:39919/57344 train_time:22880912ms step_avg:573.18ms +grad accum step:9980/14336 +step:39920/57344 train_time:22882250ms step_avg:573.20ms +step:39921/57344 train_time:22882265ms step_avg:573.19ms +step:39922/57344 train_time:22882512ms step_avg:573.18ms +step:39923/57344 train_time:22883058ms step_avg:573.18ms +grad accum step:9981/14336 +step:39924/57344 train_time:22884502ms step_avg:573.20ms +step:39925/57344 train_time:22884516ms step_avg:573.19ms +step:39926/57344 train_time:22884738ms step_avg:573.18ms +step:39927/57344 train_time:22885292ms step_avg:573.18ms +grad accum step:9982/14336 +step:39928/57344 train_time:22886599ms step_avg:573.20ms +step:39929/57344 train_time:22886614ms step_avg:573.18ms +step:39930/57344 train_time:22886859ms step_avg:573.17ms +step:39931/57344 train_time:22887396ms step_avg:573.17ms +grad accum step:9983/14336 +step:39932/57344 train_time:22888714ms step_avg:573.19ms +step:39933/57344 train_time:22888731ms step_avg:573.18ms +step:39934/57344 train_time:22888966ms step_avg:573.17ms +step:39935/57344 train_time:22889525ms step_avg:573.17ms +grad accum step:9984/14336 +step:39936/57344 train_time:22890826ms step_avg:573.19ms +step:39936/57344 val_loss:5.732807 train_time:22890831ms step_avg:573.19ms +step:39937/57344 train_time:22890843ms step_avg:573.17ms +step:39938/57344 train_time:22891067ms step_avg:573.17ms +step:39939/57344 train_time:22891625ms step_avg:573.16ms +grad accum step:9985/14336 +step:39940/57344 train_time:22892930ms step_avg:573.18ms +step:39941/57344 train_time:22892944ms step_avg:573.17ms +step:39942/57344 train_time:22893190ms step_avg:573.16ms +step:39943/57344 train_time:22893741ms step_avg:573.16ms +grad accum step:9986/14336 +step:39944/57344 train_time:22895060ms step_avg:573.18ms +step:39945/57344 train_time:22896150ms step_avg:573.19ms +step:39946/57344 train_time:22896250ms step_avg:573.18ms +step:39947/57344 train_time:22896801ms step_avg:573.18ms +grad accum step:9987/14336 +step:39948/57344 train_time:22898199ms step_avg:573.20ms +step:39949/57344 train_time:22898223ms step_avg:573.19ms +step:39950/57344 train_time:22898442ms step_avg:573.18ms +step:39951/57344 train_time:22898993ms step_avg:573.18ms +grad accum step:9988/14336 +step:39952/57344 train_time:22900333ms step_avg:573.20ms +step:39953/57344 train_time:22900349ms step_avg:573.18ms +step:39954/57344 train_time:22900591ms step_avg:573.17ms +step:39955/57344 train_time:22901142ms step_avg:573.17ms +grad accum step:9989/14336 +step:39956/57344 train_time:22902440ms step_avg:573.19ms +step:39957/57344 train_time:22902460ms step_avg:573.18ms +step:39958/57344 train_time:22902697ms step_avg:573.17ms +step:39959/57344 train_time:22903245ms step_avg:573.17ms +grad accum step:9990/14336 +step:39960/57344 train_time:22904547ms step_avg:573.19ms +step:39961/57344 train_time:22904563ms step_avg:573.17ms +step:39962/57344 train_time:22904814ms step_avg:573.16ms +step:39963/57344 train_time:22905372ms step_avg:573.16ms +grad accum step:9991/14336 +step:39964/57344 train_time:22906712ms step_avg:573.18ms +step:39965/57344 train_time:22906736ms step_avg:573.17ms +step:39966/57344 train_time:22906965ms step_avg:573.16ms +step:39967/57344 train_time:22907537ms step_avg:573.16ms +grad accum step:9992/14336 +step:39968/57344 train_time:22908889ms step_avg:573.18ms +step:39969/57344 train_time:22908904ms step_avg:573.17ms +step:39970/57344 train_time:22909148ms step_avg:573.16ms +step:39971/57344 train_time:22909697ms step_avg:573.16ms +grad accum step:9993/14336 +step:39972/57344 train_time:22911022ms step_avg:573.18ms +step:39973/57344 train_time:22911040ms step_avg:573.16ms +step:39974/57344 train_time:22911285ms step_avg:573.15ms +step:39975/57344 train_time:22911843ms step_avg:573.15ms +grad accum step:9994/14336 +step:39976/57344 train_time:22913154ms step_avg:573.17ms +step:39977/57344 train_time:22913172ms step_avg:573.16ms +step:39978/57344 train_time:22913411ms step_avg:573.15ms +step:39979/57344 train_time:22913968ms step_avg:573.15ms +grad accum step:9995/14336 +step:39980/57344 train_time:22915311ms step_avg:573.17ms +step:39981/57344 train_time:22915326ms step_avg:573.16ms +step:39982/57344 train_time:22915580ms step_avg:573.15ms +step:39983/57344 train_time:22916156ms step_avg:573.15ms +grad accum step:9996/14336 +step:39984/57344 train_time:22917523ms step_avg:573.17ms +step:39985/57344 train_time:22917540ms step_avg:573.15ms +step:39986/57344 train_time:22917779ms step_avg:573.15ms +step:39987/57344 train_time:22918318ms step_avg:573.14ms +grad accum step:9997/14336 +step:39988/57344 train_time:22919691ms step_avg:573.16ms +step:39989/57344 train_time:22919706ms step_avg:573.15ms +step:39990/57344 train_time:22919922ms step_avg:573.14ms +step:39991/57344 train_time:22920460ms step_avg:573.14ms +grad accum step:9998/14336 +step:39992/57344 train_time:22921800ms step_avg:573.16ms +step:39993/57344 train_time:22921818ms step_avg:573.15ms +step:39994/57344 train_time:22922062ms step_avg:573.14ms +step:39995/57344 train_time:22922614ms step_avg:573.14ms +grad accum step:9999/14336 +step:39996/57344 train_time:22923928ms step_avg:573.16ms +step:39997/57344 train_time:22923944ms step_avg:573.14ms +step:39998/57344 train_time:22924181ms step_avg:573.13ms +step:39999/57344 train_time:22924727ms step_avg:573.13ms +grad accum step:10000/14336 +step:40000/57344 train_time:22926050ms step_avg:573.15ms +step:40000/57344 val_loss:5.725449 train_time:22926057ms step_avg:573.15ms +step:40001/57344 train_time:22926069ms step_avg:573.14ms +step:40002/57344 train_time:22926293ms step_avg:573.13ms +step:40003/57344 train_time:22926836ms step_avg:573.13ms +grad accum step:10001/14336 +step:40004/57344 train_time:22928169ms step_avg:573.15ms +step:40005/57344 train_time:22928188ms step_avg:573.13ms +step:40006/57344 train_time:22928433ms step_avg:573.12ms +step:40007/57344 train_time:22928995ms step_avg:573.12ms +grad accum step:10002/14336 +step:40008/57344 train_time:22930294ms step_avg:573.14ms +step:40009/57344 train_time:22930310ms step_avg:573.13ms +step:40010/57344 train_time:22930558ms step_avg:573.12ms +step:40011/57344 train_time:22931103ms step_avg:573.12ms +grad accum step:10003/14336 +step:40012/57344 train_time:22932383ms step_avg:573.14ms +step:40013/57344 train_time:22932400ms step_avg:573.12ms +step:40014/57344 train_time:22932647ms step_avg:573.12ms +step:40015/57344 train_time:22933202ms step_avg:573.12ms +grad accum step:10004/14336 +step:40016/57344 train_time:22934545ms step_avg:573.13ms +step:40017/57344 train_time:22934564ms step_avg:573.12ms +step:40018/57344 train_time:22934780ms step_avg:573.11ms +step:40019/57344 train_time:22935323ms step_avg:573.11ms +grad accum step:10005/14336 +step:40020/57344 train_time:22936669ms step_avg:573.13ms +step:40021/57344 train_time:22936684ms step_avg:573.12ms +step:40022/57344 train_time:22936905ms step_avg:573.11ms +step:40023/57344 train_time:22937446ms step_avg:573.11ms +grad accum step:10006/14336 +step:40024/57344 train_time:22938755ms step_avg:573.12ms +step:40025/57344 train_time:22938770ms step_avg:573.11ms +step:40026/57344 train_time:22939072ms step_avg:573.10ms +step:40027/57344 train_time:22939577ms step_avg:573.10ms +grad accum step:10007/14336 +step:40028/57344 train_time:22940932ms step_avg:573.12ms +step:40029/57344 train_time:22940948ms step_avg:573.11ms +step:40030/57344 train_time:22941169ms step_avg:573.10ms +step:40031/57344 train_time:22941718ms step_avg:573.10ms +grad accum step:10008/14336 +step:40032/57344 train_time:22943049ms step_avg:573.12ms +step:40033/57344 train_time:22943064ms step_avg:573.10ms +step:40034/57344 train_time:22943306ms step_avg:573.10ms +step:40035/57344 train_time:22943856ms step_avg:573.09ms +grad accum step:10009/14336 +step:40036/57344 train_time:22945159ms step_avg:573.11ms +step:40037/57344 train_time:22945175ms step_avg:573.10ms +step:40038/57344 train_time:22945424ms step_avg:573.09ms +step:40039/57344 train_time:22945984ms step_avg:573.09ms +grad accum step:10010/14336 +step:40040/57344 train_time:22947291ms step_avg:573.11ms +step:40041/57344 train_time:22947310ms step_avg:573.10ms +step:40042/57344 train_time:22947549ms step_avg:573.09ms +step:40043/57344 train_time:22948096ms step_avg:573.09ms +grad accum step:10011/14336 +step:40044/57344 train_time:22949402ms step_avg:573.10ms +step:40045/57344 train_time:22949420ms step_avg:573.09ms +step:40046/57344 train_time:22949666ms step_avg:573.08ms +step:40047/57344 train_time:22950222ms step_avg:573.08ms +grad accum step:10012/14336 +step:40048/57344 train_time:22951520ms step_avg:573.10ms +step:40049/57344 train_time:22951536ms step_avg:573.09ms +step:40050/57344 train_time:22951782ms step_avg:573.08ms +step:40051/57344 train_time:22952325ms step_avg:573.08ms +grad accum step:10013/14336 +step:40052/57344 train_time:22953668ms step_avg:573.10ms +step:40053/57344 train_time:22953697ms step_avg:573.08ms +step:40054/57344 train_time:22953930ms step_avg:573.07ms +step:40055/57344 train_time:22954476ms step_avg:573.07ms +grad accum step:10014/14336 +step:40056/57344 train_time:22955831ms step_avg:573.09ms +step:40057/57344 train_time:22955848ms step_avg:573.08ms +step:40058/57344 train_time:22956095ms step_avg:573.07ms +step:40059/57344 train_time:22956642ms step_avg:573.07ms +grad accum step:10015/14336 +step:40060/57344 train_time:22957986ms step_avg:573.09ms +step:40061/57344 train_time:22958016ms step_avg:573.08ms +step:40062/57344 train_time:22958247ms step_avg:573.07ms +step:40063/57344 train_time:22958808ms step_avg:573.07ms +grad accum step:10016/14336 +step:40064/57344 train_time:22960132ms step_avg:573.09ms +step:40064/57344 val_loss:5.717214 train_time:22960136ms step_avg:573.09ms +step:40065/57344 train_time:22960148ms step_avg:573.07ms +step:40066/57344 train_time:22960370ms step_avg:573.06ms +step:40067/57344 train_time:22960923ms step_avg:573.06ms +grad accum step:10017/14336 +step:40068/57344 train_time:22962233ms step_avg:573.08ms +step:40069/57344 train_time:22962249ms step_avg:573.07ms +step:40070/57344 train_time:22962492ms step_avg:573.06ms +step:40071/57344 train_time:22963029ms step_avg:573.06ms +grad accum step:10018/14336 +step:40072/57344 train_time:22964333ms step_avg:573.08ms +step:40073/57344 train_time:22964352ms step_avg:573.06ms +step:40074/57344 train_time:22964595ms step_avg:573.05ms +step:40075/57344 train_time:22965145ms step_avg:573.05ms +grad accum step:10019/14336 +step:40076/57344 train_time:22966463ms step_avg:573.07ms +step:40077/57344 train_time:22966498ms step_avg:573.06ms +step:40078/57344 train_time:22966724ms step_avg:573.05ms +step:40079/57344 train_time:22967281ms step_avg:573.05ms +grad accum step:10020/14336 +step:40080/57344 train_time:22968627ms step_avg:573.07ms +step:40081/57344 train_time:22968643ms step_avg:573.06ms +step:40082/57344 train_time:22968889ms step_avg:573.05ms +step:40083/57344 train_time:22969446ms step_avg:573.05ms +grad accum step:10021/14336 +step:40084/57344 train_time:22970785ms step_avg:573.07ms +step:40085/57344 train_time:22970800ms step_avg:573.05ms +step:40086/57344 train_time:22971053ms step_avg:573.04ms +step:40087/57344 train_time:22971611ms step_avg:573.04ms +grad accum step:10022/14336 +step:40088/57344 train_time:22972916ms step_avg:573.06ms +step:40089/57344 train_time:22972930ms step_avg:573.05ms +step:40090/57344 train_time:22973175ms step_avg:573.04ms +step:40091/57344 train_time:22973715ms step_avg:573.04ms +grad accum step:10023/14336 +step:40092/57344 train_time:22975030ms step_avg:573.06ms +step:40093/57344 train_time:22975049ms step_avg:573.04ms +step:40094/57344 train_time:22975290ms step_avg:573.04ms +step:40095/57344 train_time:22975843ms step_avg:573.04ms +grad accum step:10024/14336 +step:40096/57344 train_time:22977161ms step_avg:573.05ms +step:40097/57344 train_time:22977177ms step_avg:573.04ms +step:40098/57344 train_time:22977420ms step_avg:573.03ms +step:40099/57344 train_time:22977969ms step_avg:573.03ms +grad accum step:10025/14336 +step:40100/57344 train_time:22979301ms step_avg:573.05ms +step:40101/57344 train_time:22979330ms step_avg:573.04ms +step:40102/57344 train_time:22979550ms step_avg:573.03ms +step:40103/57344 train_time:22980094ms step_avg:573.03ms +grad accum step:10026/14336 +step:40104/57344 train_time:22981458ms step_avg:573.05ms +step:40105/57344 train_time:22981484ms step_avg:573.03ms +step:40106/57344 train_time:22981708ms step_avg:573.02ms +step:40107/57344 train_time:22982274ms step_avg:573.02ms +grad accum step:10027/14336 +step:40108/57344 train_time:22983579ms step_avg:573.04ms +step:40109/57344 train_time:22983596ms step_avg:573.03ms +step:40110/57344 train_time:22983840ms step_avg:573.02ms +step:40111/57344 train_time:22984393ms step_avg:573.02ms +grad accum step:10028/14336 +step:40112/57344 train_time:22985862ms step_avg:573.04ms +step:40113/57344 train_time:22985879ms step_avg:573.03ms +step:40114/57344 train_time:22986094ms step_avg:573.02ms +step:40115/57344 train_time:22986642ms step_avg:573.02ms +grad accum step:10029/14336 +step:40116/57344 train_time:22987939ms step_avg:573.04ms +step:40117/57344 train_time:22987955ms step_avg:573.02ms +step:40118/57344 train_time:22988200ms step_avg:573.01ms +step:40119/57344 train_time:22988745ms step_avg:573.01ms +grad accum step:10030/14336 +step:40120/57344 train_time:22990047ms step_avg:573.03ms +step:40121/57344 train_time:22990061ms step_avg:573.02ms +step:40122/57344 train_time:22990306ms step_avg:573.01ms +step:40123/57344 train_time:22990848ms step_avg:573.01ms +grad accum step:10031/14336 +step:40124/57344 train_time:22992166ms step_avg:573.03ms +step:40125/57344 train_time:22992182ms step_avg:573.01ms +step:40126/57344 train_time:22992432ms step_avg:573.01ms +step:40127/57344 train_time:22992996ms step_avg:573.01ms +grad accum step:10032/14336 +step:40128/57344 train_time:22994395ms step_avg:573.03ms +step:40128/57344 val_loss:5.710665 train_time:22994398ms step_avg:573.03ms +step:40129/57344 train_time:22994410ms step_avg:573.01ms +step:40130/57344 train_time:22994646ms step_avg:573.00ms +step:40131/57344 train_time:22995226ms step_avg:573.00ms +grad accum step:10033/14336 +step:40132/57344 train_time:22996589ms step_avg:573.02ms +step:40133/57344 train_time:22996606ms step_avg:573.01ms +step:40134/57344 train_time:22996845ms step_avg:573.00ms +step:40135/57344 train_time:22997391ms step_avg:573.00ms +grad accum step:10034/14336 +step:40136/57344 train_time:22998713ms step_avg:573.02ms +step:40137/57344 train_time:22998735ms step_avg:573.01ms +step:40138/57344 train_time:22998975ms step_avg:573.00ms +step:40139/57344 train_time:22999521ms step_avg:573.00ms +grad accum step:10035/14336 +step:40140/57344 train_time:23000939ms step_avg:573.02ms +step:40141/57344 train_time:23000954ms step_avg:573.00ms +step:40142/57344 train_time:23001178ms step_avg:573.00ms +step:40143/57344 train_time:23001741ms step_avg:573.00ms +grad accum step:10036/14336 +step:40144/57344 train_time:23003055ms step_avg:573.01ms +step:40145/57344 train_time:23003071ms step_avg:573.00ms +step:40146/57344 train_time:23003320ms step_avg:572.99ms +step:40147/57344 train_time:23003867ms step_avg:572.99ms +grad accum step:10037/14336 +step:40148/57344 train_time:23005162ms step_avg:573.01ms +step:40149/57344 train_time:23005176ms step_avg:572.99ms +step:40150/57344 train_time:23005424ms step_avg:572.99ms +step:40151/57344 train_time:23005973ms step_avg:572.99ms +grad accum step:10038/14336 +step:40152/57344 train_time:23007276ms step_avg:573.00ms +step:40153/57344 train_time:23007290ms step_avg:572.99ms +step:40154/57344 train_time:23007534ms step_avg:572.98ms +step:40155/57344 train_time:23008074ms step_avg:572.98ms +grad accum step:10039/14336 +step:40156/57344 train_time:23009355ms step_avg:573.00ms +step:40157/57344 train_time:23009371ms step_avg:572.99ms +step:40158/57344 train_time:23009615ms step_avg:572.98ms +step:40159/57344 train_time:23010150ms step_avg:572.98ms +grad accum step:10040/14336 +step:40160/57344 train_time:23011495ms step_avg:573.00ms +step:40161/57344 train_time:23011511ms step_avg:572.98ms +step:40162/57344 train_time:23011758ms step_avg:572.97ms +step:40163/57344 train_time:23012296ms step_avg:572.97ms +grad accum step:10041/14336 +step:40164/57344 train_time:23013574ms step_avg:572.99ms +step:40165/57344 train_time:23013590ms step_avg:572.98ms +step:40166/57344 train_time:23013839ms step_avg:572.97ms +step:40167/57344 train_time:23014390ms step_avg:572.97ms +grad accum step:10042/14336 +step:40168/57344 train_time:23015706ms step_avg:572.99ms +step:40169/57344 train_time:23015723ms step_avg:572.97ms +step:40170/57344 train_time:23015971ms step_avg:572.96ms +step:40171/57344 train_time:23016517ms step_avg:572.96ms +grad accum step:10043/14336 +step:40172/57344 train_time:23017837ms step_avg:572.98ms +step:40173/57344 train_time:23017852ms step_avg:572.97ms +step:40174/57344 train_time:23018106ms step_avg:572.96ms +step:40175/57344 train_time:23018672ms step_avg:572.96ms +grad accum step:10044/14336 +step:40176/57344 train_time:23020025ms step_avg:572.98ms +step:40177/57344 train_time:23020041ms step_avg:572.97ms +step:40178/57344 train_time:23020291ms step_avg:572.96ms +step:40179/57344 train_time:23020851ms step_avg:572.96ms +grad accum step:10045/14336 +step:40180/57344 train_time:23022178ms step_avg:572.98ms +step:40181/57344 train_time:23022194ms step_avg:572.96ms +step:40182/57344 train_time:23022440ms step_avg:572.95ms +step:40183/57344 train_time:23022982ms step_avg:572.95ms +grad accum step:10046/14336 +step:40184/57344 train_time:23024298ms step_avg:572.97ms +step:40185/57344 train_time:23024315ms step_avg:572.96ms +step:40186/57344 train_time:23024565ms step_avg:572.95ms +step:40187/57344 train_time:23025118ms step_avg:572.95ms +grad accum step:10047/14336 +step:40188/57344 train_time:23026397ms step_avg:572.97ms +step:40189/57344 train_time:23026414ms step_avg:572.95ms +step:40190/57344 train_time:23026659ms step_avg:572.94ms +step:40191/57344 train_time:23027214ms step_avg:572.94ms +grad accum step:10048/14336 +step:40192/57344 train_time:23028520ms step_avg:572.96ms +step:40192/57344 val_loss:5.701880 train_time:23028521ms step_avg:572.96ms +step:40193/57344 train_time:23028533ms step_avg:572.95ms +step:40194/57344 train_time:23028757ms step_avg:572.94ms +step:40195/57344 train_time:23029295ms step_avg:572.94ms +grad accum step:10049/14336 +step:40196/57344 train_time:23030626ms step_avg:572.96ms +step:40197/57344 train_time:23030638ms step_avg:572.94ms +step:40198/57344 train_time:23030880ms step_avg:572.94ms +step:40199/57344 train_time:23031429ms step_avg:572.94ms +grad accum step:10050/14336 +step:40200/57344 train_time:23032776ms step_avg:572.95ms +step:40201/57344 train_time:23032788ms step_avg:572.94ms +step:40202/57344 train_time:23033034ms step_avg:572.93ms +step:40203/57344 train_time:23033575ms step_avg:572.93ms +grad accum step:10051/14336 +step:40204/57344 train_time:23034899ms step_avg:572.95ms +step:40205/57344 train_time:23034916ms step_avg:572.94ms +step:40206/57344 train_time:23035163ms step_avg:572.93ms +step:40207/57344 train_time:23035711ms step_avg:572.93ms +grad accum step:10052/14336 +step:40208/57344 train_time:23036994ms step_avg:572.95ms +step:40209/57344 train_time:23037011ms step_avg:572.93ms +step:40210/57344 train_time:23037259ms step_avg:572.92ms +step:40211/57344 train_time:23037803ms step_avg:572.92ms +grad accum step:10053/14336 +step:40212/57344 train_time:23039077ms step_avg:572.94ms +step:40213/57344 train_time:23039094ms step_avg:572.93ms +step:40214/57344 train_time:23039340ms step_avg:572.92ms +step:40215/57344 train_time:23039882ms step_avg:572.92ms +grad accum step:10054/14336 +step:40216/57344 train_time:23041194ms step_avg:572.94ms +step:40217/57344 train_time:23041211ms step_avg:572.92ms +step:40218/57344 train_time:23041458ms step_avg:572.91ms +step:40219/57344 train_time:23042011ms step_avg:572.91ms +grad accum step:10055/14336 +step:40220/57344 train_time:23043331ms step_avg:572.93ms +step:40221/57344 train_time:23043348ms step_avg:572.92ms +step:40222/57344 train_time:23043600ms step_avg:572.91ms +step:40223/57344 train_time:23044160ms step_avg:572.91ms +grad accum step:10056/14336 +step:40224/57344 train_time:23045469ms step_avg:572.93ms +step:40225/57344 train_time:23045486ms step_avg:572.91ms +step:40226/57344 train_time:23045742ms step_avg:572.91ms +step:40227/57344 train_time:23046335ms step_avg:572.91ms +grad accum step:10057/14336 +step:40228/57344 train_time:23047697ms step_avg:572.93ms +step:40229/57344 train_time:23047714ms step_avg:572.91ms +step:40230/57344 train_time:23047963ms step_avg:572.90ms +step:40231/57344 train_time:23048511ms step_avg:572.90ms +grad accum step:10058/14336 +step:40232/57344 train_time:23049809ms step_avg:572.92ms +step:40233/57344 train_time:23049825ms step_avg:572.91ms +step:40234/57344 train_time:23050071ms step_avg:572.90ms +step:40235/57344 train_time:23050609ms step_avg:572.90ms +grad accum step:10059/14336 +step:40236/57344 train_time:23051959ms step_avg:572.92ms +step:40237/57344 train_time:23051983ms step_avg:572.91ms +step:40238/57344 train_time:23052205ms step_avg:572.90ms +step:40239/57344 train_time:23052756ms step_avg:572.90ms +grad accum step:10060/14336 +step:40240/57344 train_time:23054087ms step_avg:572.91ms +step:40241/57344 train_time:23054103ms step_avg:572.90ms +step:40242/57344 train_time:23054350ms step_avg:572.89ms +step:40243/57344 train_time:23054893ms step_avg:572.89ms +grad accum step:10061/14336 +step:40244/57344 train_time:23056190ms step_avg:572.91ms +step:40245/57344 train_time:23056207ms step_avg:572.90ms +step:40246/57344 train_time:23056457ms step_avg:572.89ms +step:40247/57344 train_time:23057004ms step_avg:572.89ms +grad accum step:10062/14336 +step:40248/57344 train_time:23058384ms step_avg:572.91ms +step:40249/57344 train_time:23058401ms step_avg:572.89ms +step:40250/57344 train_time:23058648ms step_avg:572.89ms +step:40251/57344 train_time:23059196ms step_avg:572.89ms +grad accum step:10063/14336 +step:40252/57344 train_time:23060501ms step_avg:572.90ms +step:40253/57344 train_time:23060517ms step_avg:572.89ms +step:40254/57344 train_time:23060765ms step_avg:572.88ms +step:40255/57344 train_time:23071513ms step_avg:573.13ms +grad accum step:10064/14336 +step:40256/57344 train_time:23072630ms step_avg:573.15ms +step:40256/57344 val_loss:5.703946 train_time:23072631ms step_avg:573.15ms +step:40257/57344 train_time:23072643ms step_avg:573.13ms +step:40258/57344 train_time:23072871ms step_avg:573.13ms +step:40259/57344 train_time:23073421ms step_avg:573.12ms +grad accum step:10065/14336 +step:40260/57344 train_time:23074701ms step_avg:573.14ms +step:40261/57344 train_time:23074718ms step_avg:573.13ms +step:40262/57344 train_time:23074966ms step_avg:573.12ms +step:40263/57344 train_time:23075513ms step_avg:573.12ms +grad accum step:10066/14336 +step:40264/57344 train_time:23076826ms step_avg:573.14ms +step:40265/57344 train_time:23076841ms step_avg:573.12ms +step:40266/57344 train_time:23077088ms step_avg:573.12ms +step:40267/57344 train_time:23077630ms step_avg:573.12ms +grad accum step:10067/14336 +step:40268/57344 train_time:23078921ms step_avg:573.13ms +step:40269/57344 train_time:23078938ms step_avg:573.12ms +step:40270/57344 train_time:23079183ms step_avg:573.11ms +step:40271/57344 train_time:23079731ms step_avg:573.11ms +grad accum step:10068/14336 +step:40272/57344 train_time:23081175ms step_avg:573.13ms +step:40273/57344 train_time:23081192ms step_avg:573.12ms +step:40274/57344 train_time:23081451ms step_avg:573.11ms +step:40275/57344 train_time:23082032ms step_avg:573.11ms +grad accum step:10069/14336 +step:40276/57344 train_time:23083359ms step_avg:573.13ms +step:40277/57344 train_time:23083376ms step_avg:573.12ms +step:40278/57344 train_time:23083627ms step_avg:573.11ms +step:40279/57344 train_time:23084183ms step_avg:573.11ms +grad accum step:10070/14336 +step:40280/57344 train_time:23085502ms step_avg:573.13ms +step:40281/57344 train_time:23085538ms step_avg:573.11ms +step:40282/57344 train_time:23085763ms step_avg:573.10ms +step:40283/57344 train_time:23086309ms step_avg:573.10ms +grad accum step:10071/14336 +step:40284/57344 train_time:23087627ms step_avg:573.12ms +step:40285/57344 train_time:23087644ms step_avg:573.11ms +step:40286/57344 train_time:23087892ms step_avg:573.10ms +step:40287/57344 train_time:23088447ms step_avg:573.10ms +grad accum step:10072/14336 +step:40288/57344 train_time:23089773ms step_avg:573.12ms +step:40289/57344 train_time:23089787ms step_avg:573.10ms +step:40290/57344 train_time:23090035ms step_avg:573.10ms +step:40291/57344 train_time:23090579ms step_avg:573.10ms +grad accum step:10073/14336 +step:40292/57344 train_time:23091878ms step_avg:573.11ms +step:40293/57344 train_time:23091896ms step_avg:573.10ms +step:40294/57344 train_time:23092134ms step_avg:573.09ms +step:40295/57344 train_time:23092687ms step_avg:573.09ms +grad accum step:10074/14336 +step:40296/57344 train_time:23094020ms step_avg:573.11ms +step:40297/57344 train_time:23094035ms step_avg:573.10ms +step:40298/57344 train_time:23094282ms step_avg:573.09ms +step:40299/57344 train_time:23094826ms step_avg:573.09ms +grad accum step:10075/14336 +step:40300/57344 train_time:23096113ms step_avg:573.10ms +step:40301/57344 train_time:23096130ms step_avg:573.09ms +step:40302/57344 train_time:23096383ms step_avg:573.08ms +step:40303/57344 train_time:23096948ms step_avg:573.08ms +grad accum step:10076/14336 +step:40304/57344 train_time:23098317ms step_avg:573.10ms +step:40305/57344 train_time:23098329ms step_avg:573.09ms +step:40306/57344 train_time:23098577ms step_avg:573.08ms +step:40307/57344 train_time:23099134ms step_avg:573.08ms +grad accum step:10077/14336 +step:40308/57344 train_time:23100447ms step_avg:573.10ms +step:40309/57344 train_time:23100461ms step_avg:573.08ms +step:40310/57344 train_time:23100692ms step_avg:573.08ms +step:40311/57344 train_time:23101238ms step_avg:573.08ms +grad accum step:10078/14336 +step:40312/57344 train_time:23102545ms step_avg:573.09ms +step:40313/57344 train_time:23102558ms step_avg:573.08ms +step:40314/57344 train_time:23102804ms step_avg:573.07ms +step:40315/57344 train_time:23103368ms step_avg:573.07ms +grad accum step:10079/14336 +step:40316/57344 train_time:23111375ms step_avg:573.26ms +step:40317/57344 train_time:23111388ms step_avg:573.24ms +step:40318/57344 train_time:23111649ms step_avg:573.23ms +step:40319/57344 train_time:23112200ms step_avg:573.23ms +grad accum step:10080/14336 +step:40320/57344 train_time:23113548ms step_avg:573.25ms +step:40320/57344 val_loss:5.691270 train_time:23113549ms step_avg:573.25ms +step:40321/57344 train_time:23113561ms step_avg:573.24ms +step:40322/57344 train_time:23113860ms step_avg:573.23ms +step:40323/57344 train_time:23114406ms step_avg:573.23ms +grad accum step:10081/14336 +step:40324/57344 train_time:23115676ms step_avg:573.25ms +step:40325/57344 train_time:23115692ms step_avg:573.23ms +step:40326/57344 train_time:23115933ms step_avg:573.23ms +step:40327/57344 train_time:23116464ms step_avg:573.23ms +grad accum step:10082/14336 +step:40328/57344 train_time:23117753ms step_avg:573.24ms +step:40329/57344 train_time:23117769ms step_avg:573.23ms +step:40330/57344 train_time:23118012ms step_avg:573.22ms +step:40331/57344 train_time:23118552ms step_avg:573.22ms +grad accum step:10083/14336 +step:40332/57344 train_time:23119841ms step_avg:573.24ms +step:40333/57344 train_time:23119857ms step_avg:573.22ms +step:40334/57344 train_time:23120100ms step_avg:573.22ms +step:40335/57344 train_time:23120641ms step_avg:573.22ms +grad accum step:10084/14336 +step:40336/57344 train_time:23121987ms step_avg:573.23ms +step:40337/57344 train_time:23122006ms step_avg:573.22ms +step:40338/57344 train_time:23122226ms step_avg:573.21ms +step:40339/57344 train_time:23122780ms step_avg:573.21ms +grad accum step:10085/14336 +step:40340/57344 train_time:23124097ms step_avg:573.23ms +step:40341/57344 train_time:23124114ms step_avg:573.22ms +step:40342/57344 train_time:23124362ms step_avg:573.21ms +step:40343/57344 train_time:23124912ms step_avg:573.21ms +grad accum step:10086/14336 +step:40344/57344 train_time:23126211ms step_avg:573.23ms +step:40345/57344 train_time:23126227ms step_avg:573.21ms +step:40346/57344 train_time:23126475ms step_avg:573.20ms +step:40347/57344 train_time:23127027ms step_avg:573.20ms +grad accum step:10087/14336 +step:40348/57344 train_time:23128310ms step_avg:573.22ms +step:40349/57344 train_time:23128327ms step_avg:573.21ms +step:40350/57344 train_time:23128571ms step_avg:573.20ms +step:40351/57344 train_time:23129119ms step_avg:573.20ms +grad accum step:10088/14336 +step:40352/57344 train_time:23130456ms step_avg:573.22ms +step:40353/57344 train_time:23130471ms step_avg:573.20ms +step:40354/57344 train_time:23130709ms step_avg:573.19ms +step:40355/57344 train_time:23189255ms step_avg:574.63ms +grad accum step:10089/14336 +step:40356/57344 train_time:23190329ms step_avg:574.64ms +step:40357/57344 train_time:23190345ms step_avg:574.63ms +step:40358/57344 train_time:23190584ms step_avg:574.62ms +step:40359/57344 train_time:23191119ms step_avg:574.62ms +grad accum step:10090/14336 +step:40360/57344 train_time:23192426ms step_avg:574.64ms +step:40361/57344 train_time:23192442ms step_avg:574.63ms +step:40362/57344 train_time:23192690ms step_avg:574.62ms +step:40363/57344 train_time:23193245ms step_avg:574.62ms +grad accum step:10091/14336 +step:40364/57344 train_time:23194536ms step_avg:574.63ms +step:40365/57344 train_time:23194560ms step_avg:574.62ms +step:40366/57344 train_time:23194784ms step_avg:574.61ms +step:40367/57344 train_time:23195318ms step_avg:574.61ms +grad accum step:10092/14336 +step:40368/57344 train_time:23196598ms step_avg:574.63ms +step:40369/57344 train_time:23196614ms step_avg:574.61ms +step:40370/57344 train_time:23196866ms step_avg:574.61ms +step:40371/57344 train_time:23197421ms step_avg:574.61ms +grad accum step:10093/14336 +step:40372/57344 train_time:23198723ms step_avg:574.62ms +step:40373/57344 train_time:23198739ms step_avg:574.61ms +step:40374/57344 train_time:23198984ms step_avg:574.60ms +step:40375/57344 train_time:23199539ms step_avg:574.60ms +grad accum step:10094/14336 +step:40376/57344 train_time:23200916ms step_avg:574.62ms +step:40377/57344 train_time:23200932ms step_avg:574.61ms +step:40378/57344 train_time:23201181ms step_avg:574.60ms +step:40379/57344 train_time:23201737ms step_avg:574.60ms +grad accum step:10095/14336 +step:40380/57344 train_time:23203525ms step_avg:574.63ms +step:40381/57344 train_time:23203643ms step_avg:574.62ms +step:40382/57344 train_time:23203852ms step_avg:574.61ms +step:40383/57344 train_time:23204399ms step_avg:574.61ms +grad accum step:10096/14336 +step:40384/57344 train_time:23215669ms step_avg:574.87ms +step:40384/57344 val_loss:5.681849 train_time:23215672ms step_avg:574.87ms +step:40385/57344 train_time:23215684ms step_avg:574.86ms +step:40386/57344 train_time:23215903ms step_avg:574.85ms +step:40387/57344 train_time:23216447ms step_avg:574.85ms +grad accum step:10097/14336 +step:40388/57344 train_time:23217749ms step_avg:574.87ms +step:40389/57344 train_time:23217765ms step_avg:574.85ms +step:40390/57344 train_time:23218012ms step_avg:574.85ms +step:40391/57344 train_time:23218566ms step_avg:574.85ms +grad accum step:10098/14336 +step:40392/57344 train_time:23219896ms step_avg:574.86ms +step:40393/57344 train_time:23219913ms step_avg:574.85ms +step:40394/57344 train_time:23220166ms step_avg:574.84ms +step:40395/57344 train_time:23220724ms step_avg:574.84ms +grad accum step:10099/14336 +step:40396/57344 train_time:23222037ms step_avg:574.86ms +step:40397/57344 train_time:23222054ms step_avg:574.85ms +step:40398/57344 train_time:23222305ms step_avg:574.84ms +step:40399/57344 train_time:23222867ms step_avg:574.84ms +grad accum step:10100/14336 +step:40400/57344 train_time:23224194ms step_avg:574.86ms +step:40401/57344 train_time:23224206ms step_avg:574.84ms +step:40402/57344 train_time:23224439ms step_avg:574.83ms +step:40403/57344 train_time:23224974ms step_avg:574.83ms +grad accum step:10101/14336 +step:40404/57344 train_time:23226256ms step_avg:574.85ms +step:40405/57344 train_time:23226270ms step_avg:574.84ms +step:40406/57344 train_time:23226529ms step_avg:574.83ms +step:40407/57344 train_time:23227110ms step_avg:574.83ms +grad accum step:10102/14336 +step:40408/57344 train_time:23228396ms step_avg:574.85ms +step:40409/57344 train_time:23228413ms step_avg:574.83ms +step:40410/57344 train_time:23228668ms step_avg:574.82ms +step:40411/57344 train_time:23229234ms step_avg:574.82ms +grad accum step:10103/14336 +step:40412/57344 train_time:23230545ms step_avg:574.84ms +step:40413/57344 train_time:23230562ms step_avg:574.83ms +step:40414/57344 train_time:23230807ms step_avg:574.82ms +step:40415/57344 train_time:23231350ms step_avg:574.82ms +grad accum step:10104/14336 +step:40416/57344 train_time:23232628ms step_avg:574.84ms +step:40417/57344 train_time:23232645ms step_avg:574.82ms +step:40418/57344 train_time:23232897ms step_avg:574.82ms +step:40419/57344 train_time:23233462ms step_avg:574.82ms +grad accum step:10105/14336 +step:40420/57344 train_time:23234793ms step_avg:574.83ms +step:40421/57344 train_time:23234806ms step_avg:574.82ms +step:40422/57344 train_time:23235059ms step_avg:574.81ms +step:40423/57344 train_time:23235627ms step_avg:574.81ms +grad accum step:10106/14336 +step:40424/57344 train_time:23237001ms step_avg:574.83ms +step:40425/57344 train_time:23237017ms step_avg:574.82ms +step:40426/57344 train_time:23237267ms step_avg:574.81ms +step:40427/57344 train_time:23237840ms step_avg:574.81ms +grad accum step:10107/14336 +step:40428/57344 train_time:23239165ms step_avg:574.83ms +step:40429/57344 train_time:23239189ms step_avg:574.81ms +step:40430/57344 train_time:23239416ms step_avg:574.81ms +step:40431/57344 train_time:23239969ms step_avg:574.81ms +grad accum step:10108/14336 +step:40432/57344 train_time:23241320ms step_avg:574.82ms +step:40433/57344 train_time:23241353ms step_avg:574.81ms +step:40434/57344 train_time:23241571ms step_avg:574.80ms +step:40435/57344 train_time:23242127ms step_avg:574.80ms +grad accum step:10109/14336 +step:40436/57344 train_time:23243459ms step_avg:574.82ms +step:40437/57344 train_time:23243474ms step_avg:574.81ms +step:40438/57344 train_time:23243717ms step_avg:574.80ms +step:40439/57344 train_time:23244256ms step_avg:574.80ms +grad accum step:10110/14336 +step:40440/57344 train_time:23245596ms step_avg:574.82ms +step:40441/57344 train_time:23245648ms step_avg:574.80ms +step:40442/57344 train_time:23245866ms step_avg:574.80ms +step:40443/57344 train_time:23246413ms step_avg:574.79ms +grad accum step:10111/14336 +step:40444/57344 train_time:23247719ms step_avg:574.81ms +step:40445/57344 train_time:23247738ms step_avg:574.80ms +step:40446/57344 train_time:23247983ms step_avg:574.79ms +step:40447/57344 train_time:23248550ms step_avg:574.79ms +grad accum step:10112/14336 +step:40448/57344 train_time:23249981ms step_avg:574.81ms +step:40448/57344 val_loss:5.675029 train_time:23249992ms step_avg:574.81ms +step:40449/57344 train_time:23250004ms step_avg:574.80ms +step:40450/57344 train_time:23250229ms step_avg:574.79ms +step:40451/57344 train_time:23250792ms step_avg:574.79ms +grad accum step:10113/14336 +step:40452/57344 train_time:23252135ms step_avg:574.81ms +step:40453/57344 train_time:23252147ms step_avg:574.79ms +step:40454/57344 train_time:23252399ms step_avg:574.79ms +step:40455/57344 train_time:23252955ms step_avg:574.79ms +grad accum step:10114/14336 +step:40456/57344 train_time:23254253ms step_avg:574.80ms +step:40457/57344 train_time:23254267ms step_avg:574.79ms +step:40458/57344 train_time:23254511ms step_avg:574.78ms +step:40459/57344 train_time:23255054ms step_avg:574.78ms +grad accum step:10115/14336 +step:40460/57344 train_time:23256404ms step_avg:574.80ms +step:40461/57344 train_time:23256417ms step_avg:574.79ms +step:40462/57344 train_time:23256662ms step_avg:574.78ms +step:40463/57344 train_time:23257208ms step_avg:574.78ms +grad accum step:10116/14336 +step:40464/57344 train_time:23258571ms step_avg:574.80ms +step:40465/57344 train_time:23258583ms step_avg:574.78ms +step:40466/57344 train_time:23258833ms step_avg:574.77ms +step:40467/57344 train_time:23259391ms step_avg:574.77ms +grad accum step:10117/14336 +step:40468/57344 train_time:23260753ms step_avg:574.79ms +step:40469/57344 train_time:23260770ms step_avg:574.78ms +step:40470/57344 train_time:23260996ms step_avg:574.77ms +step:40471/57344 train_time:23261543ms step_avg:574.77ms +grad accum step:10118/14336 +step:40472/57344 train_time:23262863ms step_avg:574.79ms +step:40473/57344 train_time:23262879ms step_avg:574.78ms +step:40474/57344 train_time:23263126ms step_avg:574.77ms +step:40475/57344 train_time:23263685ms step_avg:574.77ms +grad accum step:10119/14336 +step:40476/57344 train_time:23264985ms step_avg:574.78ms +step:40477/57344 train_time:23265000ms step_avg:574.77ms +step:40478/57344 train_time:23265251ms step_avg:574.76ms +step:40479/57344 train_time:23265807ms step_avg:574.76ms +grad accum step:10120/14336 +step:40480/57344 train_time:23267121ms step_avg:574.78ms +step:40481/57344 train_time:23267138ms step_avg:574.77ms +step:40482/57344 train_time:23267386ms step_avg:574.76ms +step:40483/57344 train_time:23267935ms step_avg:574.76ms +grad accum step:10121/14336 +step:40484/57344 train_time:23269228ms step_avg:574.78ms +step:40485/57344 train_time:23269243ms step_avg:574.76ms +step:40486/57344 train_time:23269503ms step_avg:574.75ms +step:40487/57344 train_time:23270087ms step_avg:574.75ms +grad accum step:10122/14336 +step:40488/57344 train_time:23271425ms step_avg:574.77ms +step:40489/57344 train_time:23271440ms step_avg:574.76ms +step:40490/57344 train_time:23271689ms step_avg:574.75ms +step:40491/57344 train_time:23272243ms step_avg:574.75ms +grad accum step:10123/14336 +step:40492/57344 train_time:23273577ms step_avg:574.77ms +step:40493/57344 train_time:23273594ms step_avg:574.76ms +step:40494/57344 train_time:23273824ms step_avg:574.75ms +step:40495/57344 train_time:23274361ms step_avg:574.75ms +grad accum step:10124/14336 +step:40496/57344 train_time:23275680ms step_avg:574.76ms +step:40497/57344 train_time:23275694ms step_avg:574.75ms +step:40498/57344 train_time:23275946ms step_avg:574.74ms +step:40499/57344 train_time:23276505ms step_avg:574.74ms +grad accum step:10125/14336 +step:40500/57344 train_time:23277809ms step_avg:574.76ms +step:40501/57344 train_time:23277823ms step_avg:574.75ms +step:40502/57344 train_time:23278066ms step_avg:574.74ms +step:40503/57344 train_time:23278620ms step_avg:574.74ms +grad accum step:10126/14336 +step:40504/57344 train_time:23279954ms step_avg:574.76ms +step:40505/57344 train_time:23279976ms step_avg:574.74ms +step:40506/57344 train_time:23280206ms step_avg:574.73ms +step:40507/57344 train_time:23280758ms step_avg:574.73ms +grad accum step:10127/14336 +step:40508/57344 train_time:23282122ms step_avg:574.75ms +step:40509/57344 train_time:23282137ms step_avg:574.74ms +step:40510/57344 train_time:23282370ms step_avg:574.73ms +step:40511/57344 train_time:23282918ms step_avg:574.73ms +grad accum step:10128/14336 +step:40512/57344 train_time:23284237ms step_avg:574.75ms +step:40512/57344 val_loss:5.666840 train_time:23284243ms step_avg:574.75ms +step:40513/57344 train_time:23284255ms step_avg:574.74ms +step:40514/57344 train_time:23284478ms step_avg:574.73ms +step:40515/57344 train_time:23285022ms step_avg:574.73ms +grad accum step:10129/14336 +step:40516/57344 train_time:23286354ms step_avg:574.74ms +step:40517/57344 train_time:23286370ms step_avg:574.73ms +step:40518/57344 train_time:23286619ms step_avg:574.72ms +step:40519/57344 train_time:23287169ms step_avg:574.72ms +grad accum step:10130/14336 +step:40520/57344 train_time:23288539ms step_avg:574.74ms +step:40521/57344 train_time:23288554ms step_avg:574.73ms +step:40522/57344 train_time:23288800ms step_avg:574.72ms +step:40523/57344 train_time:23289354ms step_avg:574.72ms +grad accum step:10131/14336 +step:40524/57344 train_time:23290667ms step_avg:574.74ms +step:40525/57344 train_time:23290686ms step_avg:574.72ms +step:40526/57344 train_time:23290928ms step_avg:574.72ms +step:40527/57344 train_time:23291476ms step_avg:574.72ms +grad accum step:10132/14336 +step:40528/57344 train_time:23292767ms step_avg:574.73ms +step:40529/57344 train_time:23292784ms step_avg:574.72ms +step:40530/57344 train_time:23293028ms step_avg:574.71ms +step:40531/57344 train_time:23293580ms step_avg:574.71ms +grad accum step:10133/14336 +step:40532/57344 train_time:23294901ms step_avg:574.73ms +step:40533/57344 train_time:23294927ms step_avg:574.72ms +step:40534/57344 train_time:23295165ms step_avg:574.71ms +step:40535/57344 train_time:23295711ms step_avg:574.71ms +grad accum step:10134/14336 +step:40536/57344 train_time:23297053ms step_avg:574.73ms +step:40537/57344 train_time:23297070ms step_avg:574.71ms +step:40538/57344 train_time:23297324ms step_avg:574.70ms +step:40539/57344 train_time:23297894ms step_avg:574.70ms +grad accum step:10135/14336 +step:40540/57344 train_time:23299238ms step_avg:574.72ms +step:40541/57344 train_time:23299250ms step_avg:574.71ms +step:40542/57344 train_time:23299505ms step_avg:574.70ms +step:40543/57344 train_time:23300068ms step_avg:574.70ms +grad accum step:10136/14336 +step:40544/57344 train_time:23301415ms step_avg:574.72ms +step:40545/57344 train_time:23301429ms step_avg:574.71ms +step:40546/57344 train_time:23301672ms step_avg:574.70ms +step:40547/57344 train_time:23302232ms step_avg:574.70ms +grad accum step:10137/14336 +step:40548/57344 train_time:23303632ms step_avg:574.72ms +step:40549/57344 train_time:23303649ms step_avg:574.70ms +step:40550/57344 train_time:23303878ms step_avg:574.69ms +step:40551/57344 train_time:23304420ms step_avg:574.69ms +grad accum step:10138/14336 +step:40552/57344 train_time:23305697ms step_avg:574.71ms +step:40553/57344 train_time:23305711ms step_avg:574.70ms +step:40554/57344 train_time:23305957ms step_avg:574.69ms +step:40555/57344 train_time:23306502ms step_avg:574.69ms +grad accum step:10139/14336 +step:40556/57344 train_time:23307793ms step_avg:574.71ms +step:40557/57344 train_time:23307815ms step_avg:574.69ms +step:40558/57344 train_time:23308046ms step_avg:574.68ms +step:40559/57344 train_time:23308606ms step_avg:574.68ms +grad accum step:10140/14336 +step:40560/57344 train_time:23309963ms step_avg:574.70ms +step:40561/57344 train_time:23309979ms step_avg:574.69ms +step:40562/57344 train_time:23310230ms step_avg:574.68ms +step:40563/57344 train_time:23310791ms step_avg:574.68ms +grad accum step:10141/14336 +step:40564/57344 train_time:23312109ms step_avg:574.70ms +step:40565/57344 train_time:23312123ms step_avg:574.69ms +step:40566/57344 train_time:23312375ms step_avg:574.68ms +step:40567/57344 train_time:23312928ms step_avg:574.68ms +grad accum step:10142/14336 +step:40568/57344 train_time:23314223ms step_avg:574.69ms +step:40569/57344 train_time:23314245ms step_avg:574.68ms +step:40570/57344 train_time:23314493ms step_avg:574.67ms +step:40571/57344 train_time:23315074ms step_avg:574.67ms +grad accum step:10143/14336 +step:40572/57344 train_time:23316420ms step_avg:574.69ms +step:40573/57344 train_time:23316438ms step_avg:574.68ms +step:40574/57344 train_time:23316663ms step_avg:574.67ms +step:40575/57344 train_time:23317205ms step_avg:574.67ms +grad accum step:10144/14336 +step:40576/57344 train_time:23318501ms step_avg:574.69ms +step:40576/57344 val_loss:5.659797 train_time:23318538ms step_avg:574.69ms +step:40577/57344 train_time:23318550ms step_avg:574.67ms +step:40578/57344 train_time:23318779ms step_avg:574.67ms +step:40579/57344 train_time:23319344ms step_avg:574.67ms +grad accum step:10145/14336 +step:40580/57344 train_time:23320704ms step_avg:574.68ms +step:40581/57344 train_time:23320738ms step_avg:574.67ms +step:40582/57344 train_time:23320954ms step_avg:574.66ms +step:40583/57344 train_time:23321488ms step_avg:574.66ms +grad accum step:10146/14336 +step:40584/57344 train_time:23322789ms step_avg:574.68ms +step:40585/57344 train_time:23322824ms step_avg:574.67ms +step:40586/57344 train_time:23323049ms step_avg:574.66ms +step:40587/57344 train_time:23323596ms step_avg:574.66ms +grad accum step:10147/14336 +step:40588/57344 train_time:23324879ms step_avg:574.67ms +step:40589/57344 train_time:23324895ms step_avg:574.66ms +step:40590/57344 train_time:23325141ms step_avg:574.65ms +step:40591/57344 train_time:23325694ms step_avg:574.65ms +grad accum step:10148/14336 +step:40592/57344 train_time:23327024ms step_avg:574.67ms +step:40593/57344 train_time:23327040ms step_avg:574.66ms +step:40594/57344 train_time:23327294ms step_avg:574.65ms +step:40595/57344 train_time:23327864ms step_avg:574.65ms +grad accum step:10149/14336 +step:40596/57344 train_time:23329160ms step_avg:574.67ms +step:40597/57344 train_time:23329175ms step_avg:574.65ms +step:40598/57344 train_time:23329416ms step_avg:574.64ms +step:40599/57344 train_time:23329966ms step_avg:574.64ms +grad accum step:10150/14336 +step:40600/57344 train_time:23331272ms step_avg:574.66ms +step:40601/57344 train_time:23331290ms step_avg:574.65ms +step:40602/57344 train_time:23331512ms step_avg:574.64ms +step:40603/57344 train_time:23332057ms step_avg:574.64ms +grad accum step:10151/14336 +step:40604/57344 train_time:23333395ms step_avg:574.66ms +step:40605/57344 train_time:23333412ms step_avg:574.64ms +step:40606/57344 train_time:23333654ms step_avg:574.64ms +step:40607/57344 train_time:23334193ms step_avg:574.63ms +grad accum step:10152/14336 +step:40608/57344 train_time:23335537ms step_avg:574.65ms +step:40609/57344 train_time:23335553ms step_avg:574.64ms +step:40610/57344 train_time:23335798ms step_avg:574.63ms +step:40611/57344 train_time:23336343ms step_avg:574.63ms +grad accum step:10153/14336 +step:40612/57344 train_time:23337675ms step_avg:574.65ms +step:40613/57344 train_time:23337691ms step_avg:574.64ms +step:40614/57344 train_time:23337941ms step_avg:574.63ms +step:40615/57344 train_time:23338500ms step_avg:574.63ms +grad accum step:10154/14336 +step:40616/57344 train_time:23339946ms step_avg:574.65ms +step:40617/57344 train_time:23340000ms step_avg:574.64ms +step:40618/57344 train_time:23340213ms step_avg:574.63ms +step:40619/57344 train_time:23340757ms step_avg:574.63ms +grad accum step:10155/14336 +step:40620/57344 train_time:23342067ms step_avg:574.64ms +step:40621/57344 train_time:23342082ms step_avg:574.63ms +step:40622/57344 train_time:23342328ms step_avg:574.62ms +step:40623/57344 train_time:23342879ms step_avg:574.62ms +grad accum step:10156/14336 +step:40624/57344 train_time:23344200ms step_avg:574.64ms +step:40625/57344 train_time:23344215ms step_avg:574.63ms +step:40626/57344 train_time:23344450ms step_avg:574.62ms +step:40627/57344 train_time:23344999ms step_avg:574.62ms +grad accum step:10157/14336 +step:40628/57344 train_time:23346293ms step_avg:574.64ms +step:40629/57344 train_time:23346307ms step_avg:574.62ms +step:40630/57344 train_time:23346551ms step_avg:574.61ms +step:40631/57344 train_time:23347094ms step_avg:574.61ms +grad accum step:10158/14336 +step:40632/57344 train_time:23348451ms step_avg:574.63ms +step:40633/57344 train_time:23348468ms step_avg:574.62ms +step:40634/57344 train_time:23348716ms step_avg:574.61ms +step:40635/57344 train_time:23349259ms step_avg:574.61ms +grad accum step:10159/14336 +step:40636/57344 train_time:23350549ms step_avg:574.63ms +step:40637/57344 train_time:23350565ms step_avg:574.61ms +step:40638/57344 train_time:23350814ms step_avg:574.61ms +step:40639/57344 train_time:23351368ms step_avg:574.60ms +grad accum step:10160/14336 +step:40640/57344 train_time:23352686ms step_avg:574.62ms +step:40640/57344 val_loss:5.650706 train_time:23352688ms step_avg:574.62ms +step:40641/57344 train_time:23352700ms step_avg:574.61ms +step:40642/57344 train_time:23352919ms step_avg:574.60ms +step:40643/57344 train_time:23353467ms step_avg:574.60ms +grad accum step:10161/14336 +step:40644/57344 train_time:23354814ms step_avg:574.62ms +step:40645/57344 train_time:23354839ms step_avg:574.61ms +step:40646/57344 train_time:23355057ms step_avg:574.60ms +step:40647/57344 train_time:23355608ms step_avg:574.60ms +grad accum step:10162/14336 +step:40648/57344 train_time:23356923ms step_avg:574.61ms +step:40649/57344 train_time:23356938ms step_avg:574.60ms +step:40650/57344 train_time:23357187ms step_avg:574.59ms +step:40651/57344 train_time:23357737ms step_avg:574.59ms +grad accum step:10163/14336 +step:40652/57344 train_time:23359063ms step_avg:574.61ms +step:40653/57344 train_time:23359080ms step_avg:574.60ms +step:40654/57344 train_time:23359331ms step_avg:574.59ms +step:40655/57344 train_time:23359886ms step_avg:574.59ms +grad accum step:10164/14336 +step:40656/57344 train_time:23361200ms step_avg:574.61ms +step:40657/57344 train_time:23361214ms step_avg:574.59ms +step:40658/57344 train_time:23361461ms step_avg:574.58ms +step:40659/57344 train_time:23362002ms step_avg:574.58ms +grad accum step:10165/14336 +step:40660/57344 train_time:23363304ms step_avg:574.60ms +step:40661/57344 train_time:23363320ms step_avg:574.59ms +step:40662/57344 train_time:23363557ms step_avg:574.58ms +step:40663/57344 train_time:23364102ms step_avg:574.58ms +grad accum step:10166/14336 +step:40664/57344 train_time:23365378ms step_avg:574.60ms +step:40665/57344 train_time:23365393ms step_avg:574.58ms +step:40666/57344 train_time:23365646ms step_avg:574.57ms +step:40667/57344 train_time:23366203ms step_avg:574.57ms +grad accum step:10167/14336 +step:40668/57344 train_time:23367504ms step_avg:574.59ms +step:40669/57344 train_time:23367518ms step_avg:574.58ms +step:40670/57344 train_time:23367759ms step_avg:574.57ms +step:40671/57344 train_time:23368310ms step_avg:574.57ms +grad accum step:10168/14336 +step:40672/57344 train_time:23369594ms step_avg:574.59ms +step:40673/57344 train_time:23369607ms step_avg:574.57ms +step:40674/57344 train_time:23369864ms step_avg:574.57ms +step:40675/57344 train_time:23370446ms step_avg:574.57ms +grad accum step:10169/14336 +step:40676/57344 train_time:23371833ms step_avg:574.59ms +step:40677/57344 train_time:23371856ms step_avg:574.57ms +step:40678/57344 train_time:23372079ms step_avg:574.56ms +step:40679/57344 train_time:23372623ms step_avg:574.56ms +grad accum step:10170/14336 +step:40680/57344 train_time:23373935ms step_avg:574.58ms +step:40681/57344 train_time:23373947ms step_avg:574.57ms +step:40682/57344 train_time:23374189ms step_avg:574.56ms +step:40683/57344 train_time:23374729ms step_avg:574.56ms +grad accum step:10171/14336 +step:40684/57344 train_time:23376017ms step_avg:574.58ms +step:40685/57344 train_time:23376034ms step_avg:574.56ms +step:40686/57344 train_time:23376285ms step_avg:574.55ms +step:40687/57344 train_time:23376847ms step_avg:574.55ms +grad accum step:10172/14336 +step:40688/57344 train_time:23378154ms step_avg:574.57ms +step:40689/57344 train_time:23378168ms step_avg:574.56ms +step:40690/57344 train_time:23378415ms step_avg:574.55ms +step:40691/57344 train_time:23378972ms step_avg:574.55ms +grad accum step:10173/14336 +step:40692/57344 train_time:23380285ms step_avg:574.57ms +step:40693/57344 train_time:23380300ms step_avg:574.55ms +step:40694/57344 train_time:23380546ms step_avg:574.55ms +step:40695/57344 train_time:23381082ms step_avg:574.54ms +grad accum step:10174/14336 +step:40696/57344 train_time:23382380ms step_avg:574.56ms +step:40697/57344 train_time:23382396ms step_avg:574.55ms +step:40698/57344 train_time:23382646ms step_avg:574.54ms +step:40699/57344 train_time:23383201ms step_avg:574.54ms +grad accum step:10175/14336 +step:40700/57344 train_time:23400209ms step_avg:574.94ms +step:40701/57344 train_time:23400221ms step_avg:574.93ms +step:40702/57344 train_time:23400514ms step_avg:574.92ms +step:40703/57344 train_time:23401073ms step_avg:574.92ms +grad accum step:10176/14336 +step:40704/57344 train_time:23402364ms step_avg:574.94ms +step:40704/57344 val_loss:5.643932 train_time:23402365ms step_avg:574.94ms +step:40705/57344 train_time:23402377ms step_avg:574.93ms +step:40706/57344 train_time:23402606ms step_avg:574.92ms +step:40707/57344 train_time:23403177ms step_avg:574.92ms +grad accum step:10177/14336 +step:40708/57344 train_time:23404494ms step_avg:574.94ms +step:40709/57344 train_time:23404511ms step_avg:574.92ms +step:40710/57344 train_time:23404752ms step_avg:574.91ms +step:40711/57344 train_time:23405300ms step_avg:574.91ms +grad accum step:10178/14336 +step:40712/57344 train_time:23406594ms step_avg:574.93ms +step:40713/57344 train_time:23406607ms step_avg:574.92ms +step:40714/57344 train_time:23406853ms step_avg:574.91ms +step:40715/57344 train_time:23407409ms step_avg:574.91ms +grad accum step:10179/14336 +step:40716/57344 train_time:23408742ms step_avg:574.93ms +step:40717/57344 train_time:23408759ms step_avg:574.91ms +step:40718/57344 train_time:23409003ms step_avg:574.91ms +step:40719/57344 train_time:23409551ms step_avg:574.90ms +grad accum step:10180/14336 +step:40720/57344 train_time:23410864ms step_avg:574.92ms +step:40721/57344 train_time:23410881ms step_avg:574.91ms +step:40722/57344 train_time:23411135ms step_avg:574.90ms +step:40723/57344 train_time:23411704ms step_avg:574.90ms +grad accum step:10181/14336 +step:40724/57344 train_time:23413011ms step_avg:574.92ms +step:40725/57344 train_time:23413026ms step_avg:574.91ms +step:40726/57344 train_time:23413270ms step_avg:574.90ms +step:40727/57344 train_time:23413814ms step_avg:574.90ms +grad accum step:10182/14336 +step:40728/57344 train_time:23415094ms step_avg:574.91ms +step:40729/57344 train_time:23415110ms step_avg:574.90ms +step:40730/57344 train_time:23415355ms step_avg:574.89ms +step:40731/57344 train_time:23415904ms step_avg:574.89ms +grad accum step:10183/14336 +step:40732/57344 train_time:23417205ms step_avg:574.91ms +step:40733/57344 train_time:23417222ms step_avg:574.90ms +step:40734/57344 train_time:23417467ms step_avg:574.89ms +step:40735/57344 train_time:23418009ms step_avg:574.89ms +grad accum step:10184/14336 +step:40736/57344 train_time:23419290ms step_avg:574.90ms +step:40737/57344 train_time:23419304ms step_avg:574.89ms +step:40738/57344 train_time:23419552ms step_avg:574.88ms +step:40739/57344 train_time:23420097ms step_avg:574.88ms +grad accum step:10185/14336 +step:40740/57344 train_time:23421403ms step_avg:574.90ms +step:40741/57344 train_time:23421419ms step_avg:574.89ms +step:40742/57344 train_time:23421674ms step_avg:574.88ms +step:40743/57344 train_time:23422241ms step_avg:574.88ms +grad accum step:10186/14336 +step:40744/57344 train_time:23423540ms step_avg:574.90ms +step:40745/57344 train_time:23423557ms step_avg:574.88ms +step:40746/57344 train_time:23423813ms step_avg:574.87ms +step:40747/57344 train_time:23424399ms step_avg:574.87ms +grad accum step:10187/14336 +step:40748/57344 train_time:23425760ms step_avg:574.89ms +step:40749/57344 train_time:23425777ms step_avg:574.88ms +step:40750/57344 train_time:23426030ms step_avg:574.87ms +step:40751/57344 train_time:23426591ms step_avg:574.87ms +grad accum step:10188/14336 +step:40752/57344 train_time:23427883ms step_avg:574.89ms +step:40753/57344 train_time:23427900ms step_avg:574.88ms +step:40754/57344 train_time:23428145ms step_avg:574.87ms +step:40755/57344 train_time:23428688ms step_avg:574.87ms +grad accum step:10189/14336 +step:40756/57344 train_time:23429968ms step_avg:574.88ms +step:40757/57344 train_time:23429985ms step_avg:574.87ms +step:40758/57344 train_time:23430233ms step_avg:574.86ms +step:40759/57344 train_time:23430778ms step_avg:574.86ms +grad accum step:10190/14336 +step:40760/57344 train_time:23432074ms step_avg:574.88ms +step:40761/57344 train_time:23432091ms step_avg:574.87ms +step:40762/57344 train_time:23432339ms step_avg:574.86ms +step:40763/57344 train_time:23432893ms step_avg:574.86ms +grad accum step:10191/14336 +step:40764/57344 train_time:23434207ms step_avg:574.88ms +step:40765/57344 train_time:23434225ms step_avg:574.86ms +step:40766/57344 train_time:23434478ms step_avg:574.85ms +step:40767/57344 train_time:23435040ms step_avg:574.85ms +grad accum step:10192/14336 +step:40768/57344 train_time:23436342ms step_avg:574.87ms +step:40768/57344 val_loss:5.638920 train_time:23436351ms step_avg:574.87ms +step:40769/57344 train_time:23436362ms step_avg:574.86ms +step:40770/57344 train_time:23436587ms step_avg:574.85ms +step:40771/57344 train_time:23437146ms step_avg:574.85ms +grad accum step:10193/14336 +step:40772/57344 train_time:23438435ms step_avg:574.87ms +step:40773/57344 train_time:23438467ms step_avg:574.85ms +step:40774/57344 train_time:23438687ms step_avg:574.84ms +step:40775/57344 train_time:23439231ms step_avg:574.84ms +grad accum step:10194/14336 +step:40776/57344 train_time:23440527ms step_avg:574.86ms +step:40777/57344 train_time:23440544ms step_avg:574.85ms +step:40778/57344 train_time:23440796ms step_avg:574.84ms +step:40779/57344 train_time:23441355ms step_avg:574.84ms +grad accum step:10195/14336 +step:40780/57344 train_time:23442648ms step_avg:574.86ms +step:40781/57344 train_time:23442665ms step_avg:574.84ms +step:40782/57344 train_time:23442915ms step_avg:574.83ms +step:40783/57344 train_time:23443463ms step_avg:574.83ms +grad accum step:10196/14336 +step:40784/57344 train_time:23444761ms step_avg:574.85ms +step:40785/57344 train_time:23444778ms step_avg:574.84ms +step:40786/57344 train_time:23445029ms step_avg:574.83ms +step:40787/57344 train_time:23445588ms step_avg:574.83ms +grad accum step:10197/14336 +step:40788/57344 train_time:23446908ms step_avg:574.85ms +step:40789/57344 train_time:23446926ms step_avg:574.83ms +step:40790/57344 train_time:23447177ms step_avg:574.83ms +step:40791/57344 train_time:23447729ms step_avg:574.83ms +grad accum step:10198/14336 +step:40792/57344 train_time:23449059ms step_avg:574.84ms +step:40793/57344 train_time:23449074ms step_avg:574.83ms +step:40794/57344 train_time:23449321ms step_avg:574.82ms +step:40795/57344 train_time:23449868ms step_avg:574.82ms +grad accum step:10199/14336 +step:40796/57344 train_time:23451205ms step_avg:574.84ms +step:40797/57344 train_time:23451222ms step_avg:574.83ms +step:40798/57344 train_time:23451476ms step_avg:574.82ms +step:40799/57344 train_time:23452045ms step_avg:574.82ms +grad accum step:10200/14336 +step:40800/57344 train_time:23453389ms step_avg:574.84ms +step:40801/57344 train_time:23453406ms step_avg:574.82ms +step:40802/57344 train_time:23453650ms step_avg:574.82ms +step:40803/57344 train_time:23454199ms step_avg:574.82ms +grad accum step:10201/14336 +step:40804/57344 train_time:23455519ms step_avg:574.83ms +step:40805/57344 train_time:23455540ms step_avg:574.82ms +step:40806/57344 train_time:23455778ms step_avg:574.81ms +step:40807/57344 train_time:23456326ms step_avg:574.81ms +grad accum step:10202/14336 +step:40808/57344 train_time:23457632ms step_avg:574.83ms +step:40809/57344 train_time:23457676ms step_avg:574.82ms +step:40810/57344 train_time:23457894ms step_avg:574.81ms +step:40811/57344 train_time:23458435ms step_avg:574.81ms +grad accum step:10203/14336 +step:40812/57344 train_time:23459750ms step_avg:574.82ms +step:40813/57344 train_time:23459767ms step_avg:574.81ms +step:40814/57344 train_time:23460012ms step_avg:574.80ms +step:40815/57344 train_time:23460566ms step_avg:574.80ms +grad accum step:10204/14336 +step:40816/57344 train_time:23461894ms step_avg:574.82ms +step:40817/57344 train_time:23461906ms step_avg:574.81ms +step:40818/57344 train_time:23462149ms step_avg:574.80ms +step:40819/57344 train_time:23462712ms step_avg:574.80ms +grad accum step:10205/14336 +step:40820/57344 train_time:23464060ms step_avg:574.82ms +step:40821/57344 train_time:23464081ms step_avg:574.80ms +step:40822/57344 train_time:23464300ms step_avg:574.80ms +step:40823/57344 train_time:23464843ms step_avg:574.79ms +grad accum step:10206/14336 +step:40824/57344 train_time:23466168ms step_avg:574.81ms +step:40825/57344 train_time:23466185ms step_avg:574.80ms +step:40826/57344 train_time:23466430ms step_avg:574.79ms +step:40827/57344 train_time:23466971ms step_avg:574.79ms +grad accum step:10207/14336 +step:40828/57344 train_time:23468285ms step_avg:574.81ms +step:40829/57344 train_time:23468302ms step_avg:574.79ms +step:40830/57344 train_time:23468551ms step_avg:574.79ms +step:40831/57344 train_time:23469107ms step_avg:574.79ms +grad accum step:10208/14336 +step:40832/57344 train_time:23470416ms step_avg:574.80ms +step:40832/57344 val_loss:5.637269 train_time:23470417ms step_avg:574.80ms +step:40833/57344 train_time:23470429ms step_avg:574.79ms +step:40834/57344 train_time:23470659ms step_avg:574.78ms +step:40835/57344 train_time:23471236ms step_avg:574.78ms +grad accum step:10209/14336 +step:40836/57344 train_time:23472536ms step_avg:574.80ms +step:40837/57344 train_time:23472553ms step_avg:574.79ms +step:40838/57344 train_time:23472806ms step_avg:574.78ms +step:40839/57344 train_time:23473368ms step_avg:574.78ms +grad accum step:10210/14336 +step:40840/57344 train_time:23474658ms step_avg:574.80ms +step:40841/57344 train_time:23474675ms step_avg:574.78ms +step:40842/57344 train_time:23474923ms step_avg:574.77ms +step:40843/57344 train_time:23475480ms step_avg:574.77ms +grad accum step:10211/14336 +step:40844/57344 train_time:23476818ms step_avg:574.79ms +step:40845/57344 train_time:23476835ms step_avg:574.78ms +step:40846/57344 train_time:23477080ms step_avg:574.77ms +step:40847/57344 train_time:23477622ms step_avg:574.77ms +grad accum step:10212/14336 +step:40848/57344 train_time:23478936ms step_avg:574.79ms +step:40849/57344 train_time:23478953ms step_avg:574.77ms +step:40850/57344 train_time:23479199ms step_avg:574.77ms +step:40851/57344 train_time:23479746ms step_avg:574.77ms +grad accum step:10213/14336 +step:40852/57344 train_time:23481056ms step_avg:574.78ms +step:40853/57344 train_time:23481072ms step_avg:574.77ms +step:40854/57344 train_time:23481319ms step_avg:574.76ms +step:40855/57344 train_time:23481868ms step_avg:574.76ms +grad accum step:10214/14336 +step:40856/57344 train_time:23483169ms step_avg:574.78ms +step:40857/57344 train_time:23483186ms step_avg:574.77ms +step:40858/57344 train_time:23483434ms step_avg:574.76ms +step:40859/57344 train_time:23483990ms step_avg:574.76ms +grad accum step:10215/14336 +step:40860/57344 train_time:23485297ms step_avg:574.77ms +step:40861/57344 train_time:23485314ms step_avg:574.76ms +step:40862/57344 train_time:23485563ms step_avg:574.75ms +step:40863/57344 train_time:23486118ms step_avg:574.75ms +grad accum step:10216/14336 +step:40864/57344 train_time:23487450ms step_avg:574.77ms +step:40865/57344 train_time:23487467ms step_avg:574.76ms +step:40866/57344 train_time:23487710ms step_avg:574.75ms +step:40867/57344 train_time:23488259ms step_avg:574.75ms +grad accum step:10217/14336 +step:40868/57344 train_time:23489576ms step_avg:574.77ms +step:40869/57344 train_time:23489591ms step_avg:574.75ms +step:40870/57344 train_time:23489838ms step_avg:574.75ms +step:40871/57344 train_time:23490399ms step_avg:574.74ms +grad accum step:10218/14336 +step:40872/57344 train_time:23491724ms step_avg:574.76ms +step:40873/57344 train_time:23491736ms step_avg:574.75ms +step:40874/57344 train_time:23491974ms step_avg:574.74ms +step:40875/57344 train_time:23492519ms step_avg:574.74ms +grad accum step:10219/14336 +step:40876/57344 train_time:23493862ms step_avg:574.76ms +step:40877/57344 train_time:23493877ms step_avg:574.75ms +step:40878/57344 train_time:23494127ms step_avg:574.74ms +step:40879/57344 train_time:23494678ms step_avg:574.74ms +grad accum step:10220/14336 +step:40880/57344 train_time:23496002ms step_avg:574.76ms +step:40881/57344 train_time:23496021ms step_avg:574.74ms +step:40882/57344 train_time:23496266ms step_avg:574.73ms +step:40883/57344 train_time:23496834ms step_avg:574.73ms +grad accum step:10221/14336 +step:40884/57344 train_time:23498165ms step_avg:574.75ms +step:40885/57344 train_time:23498181ms step_avg:574.74ms +step:40886/57344 train_time:23498431ms step_avg:574.73ms +step:40887/57344 train_time:23498996ms step_avg:574.73ms +grad accum step:10222/14336 +step:40888/57344 train_time:23500323ms step_avg:574.75ms +step:40889/57344 train_time:23500338ms step_avg:574.73ms +step:40890/57344 train_time:23500584ms step_avg:574.73ms +step:40891/57344 train_time:23501140ms step_avg:574.73ms +grad accum step:10223/14336 +step:40892/57344 train_time:23502489ms step_avg:574.75ms +step:40893/57344 train_time:23502504ms step_avg:574.73ms +step:40894/57344 train_time:23502738ms step_avg:574.72ms +step:40895/57344 train_time:23503298ms step_avg:574.72ms +grad accum step:10224/14336 +step:40896/57344 train_time:23504613ms step_avg:574.74ms +step:40896/57344 val_loss:5.642417 train_time:23504619ms step_avg:574.74ms +step:40897/57344 train_time:23504630ms step_avg:574.73ms +step:40898/57344 train_time:23504851ms step_avg:574.72ms +step:40899/57344 train_time:23505396ms step_avg:574.72ms +grad accum step:10225/14336 +step:40900/57344 train_time:23506725ms step_avg:574.74ms +step:40901/57344 train_time:23506743ms step_avg:574.72ms +step:40902/57344 train_time:23506979ms step_avg:574.71ms +step:40903/57344 train_time:23507526ms step_avg:574.71ms +grad accum step:10226/14336 +step:40904/57344 train_time:23508841ms step_avg:574.73ms +step:40905/57344 train_time:23508856ms step_avg:574.72ms +step:40906/57344 train_time:23509102ms step_avg:574.71ms +step:40907/57344 train_time:23509645ms step_avg:574.71ms +grad accum step:10227/14336 +step:40908/57344 train_time:23511163ms step_avg:574.73ms +step:40909/57344 train_time:23511193ms step_avg:574.72ms +step:40910/57344 train_time:23511409ms step_avg:574.71ms +step:40911/57344 train_time:23511965ms step_avg:574.71ms +grad accum step:10228/14336 +step:40912/57344 train_time:23513258ms step_avg:574.73ms +step:40913/57344 train_time:23513274ms step_avg:574.71ms +step:40914/57344 train_time:23513521ms step_avg:574.71ms +step:40915/57344 train_time:23514070ms step_avg:574.71ms +grad accum step:10229/14336 +step:40916/57344 train_time:23515449ms step_avg:574.73ms +step:40917/57344 train_time:23515462ms step_avg:574.71ms +step:40918/57344 train_time:23515686ms step_avg:574.70ms +step:40919/57344 train_time:23516240ms step_avg:574.70ms +grad accum step:10230/14336 +step:40920/57344 train_time:23517553ms step_avg:574.72ms +step:40921/57344 train_time:23517567ms step_avg:574.71ms +step:40922/57344 train_time:23517815ms step_avg:574.70ms +step:40923/57344 train_time:23518375ms step_avg:574.70ms +grad accum step:10231/14336 +step:40924/57344 train_time:23519703ms step_avg:574.72ms +step:40925/57344 train_time:23519717ms step_avg:574.70ms +step:40926/57344 train_time:23519965ms step_avg:574.69ms +step:40927/57344 train_time:23520521ms step_avg:574.69ms +grad accum step:10232/14336 +step:40928/57344 train_time:23521835ms step_avg:574.71ms +step:40929/57344 train_time:23521857ms step_avg:574.70ms +step:40930/57344 train_time:23522086ms step_avg:574.69ms +step:40931/57344 train_time:23522647ms step_avg:574.69ms +grad accum step:10233/14336 +step:40932/57344 train_time:23523951ms step_avg:574.71ms +step:40933/57344 train_time:23523972ms step_avg:574.69ms +step:40934/57344 train_time:23524212ms step_avg:574.69ms +step:40935/57344 train_time:23524756ms step_avg:574.69ms +grad accum step:10234/14336 +step:40936/57344 train_time:23526033ms step_avg:574.70ms +step:40937/57344 train_time:23526047ms step_avg:574.69ms +step:40938/57344 train_time:23526289ms step_avg:574.68ms +step:40939/57344 train_time:23526838ms step_avg:574.68ms +grad accum step:10235/14336 +step:40940/57344 train_time:23528126ms step_avg:574.70ms +step:40941/57344 train_time:23528144ms step_avg:574.68ms +step:40942/57344 train_time:23528392ms step_avg:574.68ms +step:40943/57344 train_time:23528956ms step_avg:574.68ms +grad accum step:10236/14336 +step:40944/57344 train_time:23530276ms step_avg:574.69ms +step:40945/57344 train_time:23530292ms step_avg:574.68ms +step:40946/57344 train_time:23530541ms step_avg:574.67ms +step:40947/57344 train_time:23531100ms step_avg:574.67ms +grad accum step:10237/14336 +step:40948/57344 train_time:23532435ms step_avg:574.69ms +step:40949/57344 train_time:23532449ms step_avg:574.68ms +step:40950/57344 train_time:23532696ms step_avg:574.67ms +step:40951/57344 train_time:23533242ms step_avg:574.67ms +grad accum step:10238/14336 +step:40952/57344 train_time:23534600ms step_avg:574.69ms +step:40953/57344 train_time:23534618ms step_avg:574.67ms +step:40954/57344 train_time:23534860ms step_avg:574.67ms +step:40955/57344 train_time:23535410ms step_avg:574.67ms +grad accum step:10239/14336 +step:40956/57344 train_time:23536724ms step_avg:574.68ms +step:40957/57344 train_time:23536737ms step_avg:574.67ms +step:40958/57344 train_time:23536986ms step_avg:574.66ms +step:40959/57344 train_time:23537539ms step_avg:574.66ms +grad accum step:10240/14336 +step:40960/57344 train_time:23538850ms step_avg:574.68ms +step:40960/57344 val_loss:5.645013 train_time:23538855ms step_avg:574.68ms +step:40961/57344 train_time:23538867ms step_avg:574.67ms +step:40962/57344 train_time:23539086ms step_avg:574.66ms +step:40963/57344 train_time:23539632ms step_avg:574.66ms +grad accum step:10241/14336 +step:40964/57344 train_time:23540915ms step_avg:574.67ms +step:40965/57344 train_time:23540932ms step_avg:574.66ms +step:40966/57344 train_time:23541183ms step_avg:574.65ms +step:40967/57344 train_time:23541740ms step_avg:574.65ms +grad accum step:10242/14336 +step:40968/57344 train_time:23543045ms step_avg:574.67ms +step:40969/57344 train_time:23543062ms step_avg:574.66ms +step:40970/57344 train_time:23543312ms step_avg:574.65ms +step:40971/57344 train_time:23543873ms step_avg:574.65ms +grad accum step:10243/14336 +step:40972/57344 train_time:23545206ms step_avg:574.67ms +step:40973/57344 train_time:23545231ms step_avg:574.65ms +step:40974/57344 train_time:23545460ms step_avg:574.64ms +step:40975/57344 train_time:23546005ms step_avg:574.64ms +grad accum step:10244/14336 +step:40976/57344 train_time:23547322ms step_avg:574.66ms +step:40977/57344 train_time:23547335ms step_avg:574.65ms +step:40978/57344 train_time:23547587ms step_avg:574.64ms +step:40979/57344 train_time:23548161ms step_avg:574.64ms +grad accum step:10245/14336 +step:40980/57344 train_time:23549498ms step_avg:574.66ms +step:40981/57344 train_time:23549513ms step_avg:574.64ms +step:40982/57344 train_time:23549758ms step_avg:574.64ms +step:40983/57344 train_time:23550315ms step_avg:574.64ms +grad accum step:10246/14336 +step:40984/57344 train_time:23551693ms step_avg:574.66ms +step:40985/57344 train_time:23551710ms step_avg:574.64ms +step:40986/57344 train_time:23551962ms step_avg:574.63ms +step:40987/57344 train_time:23552521ms step_avg:574.63ms +grad accum step:10247/14336 +step:40988/57344 train_time:23553812ms step_avg:574.65ms +step:40989/57344 train_time:23553829ms step_avg:574.64ms +step:40990/57344 train_time:23554073ms step_avg:574.63ms +step:40991/57344 train_time:23554624ms step_avg:574.63ms +grad accum step:10248/14336 +step:40992/57344 train_time:23555928ms step_avg:574.65ms +step:40993/57344 train_time:23555970ms step_avg:574.63ms +step:40994/57344 train_time:23556204ms step_avg:574.63ms +step:40995/57344 train_time:23556780ms step_avg:574.63ms +grad accum step:10249/14336 +step:40996/57344 train_time:23558115ms step_avg:574.64ms +step:40997/57344 train_time:23558135ms step_avg:574.63ms +step:40998/57344 train_time:23558359ms step_avg:574.62ms +step:40999/57344 train_time:23558902ms step_avg:574.62ms +grad accum step:10250/14336 +step:41000/57344 train_time:23560220ms step_avg:574.64ms +step:41001/57344 train_time:23560239ms step_avg:574.63ms +step:41002/57344 train_time:23560482ms step_avg:574.62ms +step:41003/57344 train_time:23561030ms step_avg:574.62ms +grad accum step:10251/14336 +step:41004/57344 train_time:23562334ms step_avg:574.64ms +step:41005/57344 train_time:23562349ms step_avg:574.62ms +step:41006/57344 train_time:23562584ms step_avg:574.61ms +step:41007/57344 train_time:23563126ms step_avg:574.61ms +grad accum step:10252/14336 +step:41008/57344 train_time:23564486ms step_avg:574.63ms +step:41009/57344 train_time:23564502ms step_avg:574.62ms +step:41010/57344 train_time:23564730ms step_avg:574.61ms +step:41011/57344 train_time:23565297ms step_avg:574.61ms +grad accum step:10253/14336 +step:41012/57344 train_time:23566616ms step_avg:574.63ms +step:41013/57344 train_time:23566633ms step_avg:574.61ms +step:41014/57344 train_time:23566872ms step_avg:574.61ms +step:41015/57344 train_time:23567418ms step_avg:574.60ms +grad accum step:10254/14336 +step:41016/57344 train_time:23568714ms step_avg:574.62ms +step:41017/57344 train_time:23568731ms step_avg:574.61ms +step:41018/57344 train_time:23568975ms step_avg:574.60ms +step:41019/57344 train_time:23569522ms step_avg:574.60ms +grad accum step:10255/14336 +step:41020/57344 train_time:23570841ms step_avg:574.62ms +step:41021/57344 train_time:23570858ms step_avg:574.60ms +step:41022/57344 train_time:23571098ms step_avg:574.60ms +step:41023/57344 train_time:23571641ms step_avg:574.60ms +grad accum step:10256/14336 +step:41024/57344 train_time:23572940ms step_avg:574.61ms +step:41024/57344 val_loss:5.651797 train_time:23572951ms step_avg:574.61ms +step:41025/57344 train_time:23572963ms step_avg:574.60ms +step:41026/57344 train_time:23573190ms step_avg:574.59ms +step:41027/57344 train_time:23573754ms step_avg:574.59ms +grad accum step:10257/14336 +step:41028/57344 train_time:23575075ms step_avg:574.61ms +step:41029/57344 train_time:23575091ms step_avg:574.60ms +step:41030/57344 train_time:23575341ms step_avg:574.59ms +step:41031/57344 train_time:23575892ms step_avg:574.59ms +grad accum step:10258/14336 +step:41032/57344 train_time:23577215ms step_avg:574.61ms +step:41033/57344 train_time:23577227ms step_avg:574.59ms +step:41034/57344 train_time:23577473ms step_avg:574.58ms +step:41035/57344 train_time:23578026ms step_avg:574.58ms +grad accum step:10259/14336 +step:41036/57344 train_time:23579381ms step_avg:574.60ms +step:41037/57344 train_time:23579396ms step_avg:574.59ms +step:41038/57344 train_time:23579647ms step_avg:574.58ms +step:41039/57344 train_time:23580206ms step_avg:574.58ms +grad accum step:10260/14336 +step:41040/57344 train_time:23581510ms step_avg:574.60ms +step:41041/57344 train_time:23581528ms step_avg:574.58ms +step:41042/57344 train_time:23581769ms step_avg:574.58ms +step:41043/57344 train_time:23582317ms step_avg:574.58ms +grad accum step:10261/14336 +step:41044/57344 train_time:23583615ms step_avg:574.59ms +step:41045/57344 train_time:23583632ms step_avg:574.58ms +step:41046/57344 train_time:23583888ms step_avg:574.57ms +step:41047/57344 train_time:23584460ms step_avg:574.57ms +grad accum step:10262/14336 +step:41048/57344 train_time:23585812ms step_avg:574.59ms +step:41049/57344 train_time:23585833ms step_avg:574.58ms +step:41050/57344 train_time:23586053ms step_avg:574.57ms +step:41051/57344 train_time:23586593ms step_avg:574.57ms +grad accum step:10263/14336 +step:41052/57344 train_time:23587880ms step_avg:574.59ms +step:41053/57344 train_time:23587897ms step_avg:574.57ms +step:41054/57344 train_time:23588147ms step_avg:574.56ms +step:41055/57344 train_time:23588704ms step_avg:574.56ms +grad accum step:10264/14336 +step:41056/57344 train_time:23590018ms step_avg:574.58ms +step:41057/57344 train_time:23590034ms step_avg:574.57ms +step:41058/57344 train_time:23590290ms step_avg:574.56ms +step:41059/57344 train_time:23590863ms step_avg:574.56ms +grad accum step:10265/14336 +step:41060/57344 train_time:23592181ms step_avg:574.58ms +step:41061/57344 train_time:23592196ms step_avg:574.56ms +step:41062/57344 train_time:23592447ms step_avg:574.56ms +step:41063/57344 train_time:23593015ms step_avg:574.56ms +grad accum step:10266/14336 +step:41064/57344 train_time:23594343ms step_avg:574.57ms +step:41065/57344 train_time:23594360ms step_avg:574.56ms +step:41066/57344 train_time:23594606ms step_avg:574.55ms +step:41067/57344 train_time:23595146ms step_avg:574.55ms +grad accum step:10267/14336 +step:41068/57344 train_time:23596429ms step_avg:574.57ms +step:41069/57344 train_time:23596441ms step_avg:574.56ms +step:41070/57344 train_time:23596698ms step_avg:574.55ms +step:41071/57344 train_time:23597274ms step_avg:574.55ms +grad accum step:10268/14336 +step:41072/57344 train_time:23598606ms step_avg:574.57ms +step:41073/57344 train_time:23598621ms step_avg:574.55ms +step:41074/57344 train_time:23598867ms step_avg:574.55ms +step:41075/57344 train_time:23599417ms step_avg:574.54ms +grad accum step:10269/14336 +step:41076/57344 train_time:23600708ms step_avg:574.56ms +step:41077/57344 train_time:23600725ms step_avg:574.55ms +step:41078/57344 train_time:23600975ms step_avg:574.54ms +step:41079/57344 train_time:23601525ms step_avg:574.54ms +grad accum step:10270/14336 +step:41080/57344 train_time:23602844ms step_avg:574.56ms +step:41081/57344 train_time:23602861ms step_avg:574.54ms +step:41082/57344 train_time:23603115ms step_avg:574.54ms +step:41083/57344 train_time:23603683ms step_avg:574.54ms +grad accum step:10271/14336 +step:41084/57344 train_time:23604981ms step_avg:574.55ms +step:41085/57344 train_time:23604995ms step_avg:574.54ms +step:41086/57344 train_time:23605240ms step_avg:574.53ms +step:41087/57344 train_time:23605788ms step_avg:574.53ms +grad accum step:10272/14336 +step:41088/57344 train_time:23607123ms step_avg:574.55ms +step:41088/57344 val_loss:5.661096 train_time:23607124ms step_avg:574.55ms +step:41089/57344 train_time:23607136ms step_avg:574.54ms +step:41090/57344 train_time:23607359ms step_avg:574.53ms +step:41091/57344 train_time:23607897ms step_avg:574.53ms +grad accum step:10273/14336 +step:41092/57344 train_time:23609207ms step_avg:574.55ms +step:41093/57344 train_time:23609223ms step_avg:574.53ms +step:41094/57344 train_time:23609462ms step_avg:574.52ms +step:41095/57344 train_time:23610018ms step_avg:574.52ms +grad accum step:10274/14336 +step:41096/57344 train_time:23611352ms step_avg:574.54ms +step:41097/57344 train_time:23611366ms step_avg:574.53ms +step:41098/57344 train_time:23611610ms step_avg:574.52ms +step:41099/57344 train_time:23612155ms step_avg:574.52ms +grad accum step:10275/14336 +step:41100/57344 train_time:23613535ms step_avg:574.54ms +step:41101/57344 train_time:23613553ms step_avg:574.53ms +step:41102/57344 train_time:23613796ms step_avg:574.52ms +step:41103/57344 train_time:23614358ms step_avg:574.52ms +grad accum step:10276/14336 +step:41104/57344 train_time:23615664ms step_avg:574.53ms +step:41105/57344 train_time:23615679ms step_avg:574.52ms +step:41106/57344 train_time:23615928ms step_avg:574.51ms +step:41107/57344 train_time:23616482ms step_avg:574.51ms +grad accum step:10277/14336 +step:41108/57344 train_time:23617825ms step_avg:574.53ms +step:41109/57344 train_time:23617840ms step_avg:574.52ms +step:41110/57344 train_time:23618085ms step_avg:574.51ms +step:41111/57344 train_time:23618635ms step_avg:574.51ms +grad accum step:10278/14336 +step:41112/57344 train_time:23619930ms step_avg:574.53ms +step:41113/57344 train_time:23619945ms step_avg:574.51ms +step:41114/57344 train_time:23620194ms step_avg:574.50ms +step:41115/57344 train_time:23620746ms step_avg:574.50ms +grad accum step:10279/14336 +step:41116/57344 train_time:23622070ms step_avg:574.52ms +step:41117/57344 train_time:23622093ms step_avg:574.51ms +step:41118/57344 train_time:23622325ms step_avg:574.50ms +step:41119/57344 train_time:23622877ms step_avg:574.50ms +grad accum step:10280/14336 +step:41120/57344 train_time:23624205ms step_avg:574.52ms +step:41121/57344 train_time:23624218ms step_avg:574.50ms +step:41122/57344 train_time:23624467ms step_avg:574.50ms +step:41123/57344 train_time:23625017ms step_avg:574.50ms +grad accum step:10281/14336 +step:41124/57344 train_time:23626329ms step_avg:574.51ms +step:41125/57344 train_time:23626346ms step_avg:574.50ms +step:41126/57344 train_time:23626587ms step_avg:574.49ms +step:41127/57344 train_time:23627137ms step_avg:574.49ms +grad accum step:10282/14336 +step:41128/57344 train_time:23628473ms step_avg:574.51ms +step:41129/57344 train_time:23628488ms step_avg:574.50ms +step:41130/57344 train_time:23628728ms step_avg:574.49ms +step:41131/57344 train_time:23629278ms step_avg:574.49ms +grad accum step:10283/14336 +step:41132/57344 train_time:23630599ms step_avg:574.51ms +step:41133/57344 train_time:23630615ms step_avg:574.49ms +step:41134/57344 train_time:23630859ms step_avg:574.48ms +step:41135/57344 train_time:23631410ms step_avg:574.48ms +grad accum step:10284/14336 +step:41136/57344 train_time:23632753ms step_avg:574.50ms +step:41137/57344 train_time:23632772ms step_avg:574.49ms +step:41138/57344 train_time:23633002ms step_avg:574.48ms +step:41139/57344 train_time:23633544ms step_avg:574.48ms +grad accum step:10285/14336 +step:41140/57344 train_time:23634867ms step_avg:574.50ms +step:41141/57344 train_time:23634886ms step_avg:574.48ms +step:41142/57344 train_time:23635131ms step_avg:574.48ms +step:41143/57344 train_time:23635727ms step_avg:574.48ms +grad accum step:10286/14336 +step:41144/57344 train_time:23637066ms step_avg:574.50ms +step:41145/57344 train_time:23637079ms step_avg:574.48ms +step:41146/57344 train_time:23637324ms step_avg:574.47ms +step:41147/57344 train_time:23637872ms step_avg:574.47ms +grad accum step:10287/14336 +step:41148/57344 train_time:23639239ms step_avg:574.49ms +step:41149/57344 train_time:23639254ms step_avg:574.48ms +step:41150/57344 train_time:23639474ms step_avg:574.47ms +step:41151/57344 train_time:23640021ms step_avg:574.47ms +grad accum step:10288/14336 +step:41152/57344 train_time:23641350ms step_avg:574.49ms +step:41152/57344 val_loss:5.672447 train_time:23641352ms step_avg:574.49ms +step:41153/57344 train_time:23641364ms step_avg:574.47ms +step:41154/57344 train_time:23641596ms step_avg:574.47ms +step:41155/57344 train_time:23642162ms step_avg:574.47ms +grad accum step:10289/14336 +step:41156/57344 train_time:23643464ms step_avg:574.48ms +step:41157/57344 train_time:23643480ms step_avg:574.47ms +step:41158/57344 train_time:23643727ms step_avg:574.46ms +step:41159/57344 train_time:23644279ms step_avg:574.46ms +grad accum step:10290/14336 +step:41160/57344 train_time:23645599ms step_avg:574.48ms +step:41161/57344 train_time:23645614ms step_avg:574.47ms +step:41162/57344 train_time:23645855ms step_avg:574.46ms +step:41163/57344 train_time:23646402ms step_avg:574.46ms +grad accum step:10291/14336 +step:41164/57344 train_time:23647757ms step_avg:574.48ms +step:41165/57344 train_time:23647823ms step_avg:574.46ms +step:41166/57344 train_time:23648057ms step_avg:574.46ms +step:41167/57344 train_time:23648651ms step_avg:574.46ms +grad accum step:10292/14336 +step:41168/57344 train_time:23650044ms step_avg:574.48ms +step:41169/57344 train_time:23650059ms step_avg:574.46ms +step:41170/57344 train_time:23650277ms step_avg:574.45ms +step:41171/57344 train_time:23650815ms step_avg:574.45ms +grad accum step:10293/14336 +step:41172/57344 train_time:23652117ms step_avg:574.47ms +step:41173/57344 train_time:23652131ms step_avg:574.46ms +step:41174/57344 train_time:23652375ms step_avg:574.45ms +step:41175/57344 train_time:23652929ms step_avg:574.45ms +grad accum step:10294/14336 +step:41176/57344 train_time:23654256ms step_avg:574.47ms +step:41177/57344 train_time:23654272ms step_avg:574.45ms +step:41178/57344 train_time:23654516ms step_avg:574.45ms +step:41179/57344 train_time:23655062ms step_avg:574.44ms +grad accum step:10295/14336 +step:41180/57344 train_time:23656372ms step_avg:574.46ms +step:41181/57344 train_time:23656384ms step_avg:574.45ms +step:41182/57344 train_time:23656620ms step_avg:574.44ms +step:41183/57344 train_time:23657162ms step_avg:574.44ms +grad accum step:10296/14336 +step:41184/57344 train_time:23658482ms step_avg:574.46ms +step:41185/57344 train_time:23658500ms step_avg:574.44ms +step:41186/57344 train_time:23658744ms step_avg:574.44ms +step:41187/57344 train_time:23659301ms step_avg:574.44ms +grad accum step:10297/14336 +step:41188/57344 train_time:23660650ms step_avg:574.45ms +step:41189/57344 train_time:23660664ms step_avg:574.44ms +step:41190/57344 train_time:23660911ms step_avg:574.43ms +step:41191/57344 train_time:23661452ms step_avg:574.43ms +grad accum step:10298/14336 +step:41192/57344 train_time:23662785ms step_avg:574.45ms +step:41193/57344 train_time:23662798ms step_avg:574.44ms +step:41194/57344 train_time:23663041ms step_avg:574.43ms +step:41195/57344 train_time:23663599ms step_avg:574.43ms +grad accum step:10299/14336 +step:41196/57344 train_time:23664915ms step_avg:574.45ms +step:41197/57344 train_time:23664931ms step_avg:574.43ms +step:41198/57344 train_time:23665177ms step_avg:574.43ms +step:41199/57344 train_time:23665723ms step_avg:574.42ms +grad accum step:10300/14336 +step:41200/57344 train_time:23667042ms step_avg:574.44ms +step:41201/57344 train_time:23667059ms step_avg:574.43ms +step:41202/57344 train_time:23667282ms step_avg:574.42ms +step:41203/57344 train_time:23667824ms step_avg:574.42ms +grad accum step:10301/14336 +step:41204/57344 train_time:23669137ms step_avg:574.44ms +step:41205/57344 train_time:23669159ms step_avg:574.42ms +step:41206/57344 train_time:23669378ms step_avg:574.42ms +step:41207/57344 train_time:23669928ms step_avg:574.42ms +grad accum step:10302/14336 +step:41208/57344 train_time:23671268ms step_avg:574.43ms +step:41209/57344 train_time:23671285ms step_avg:574.42ms +step:41210/57344 train_time:23671526ms step_avg:574.41ms +step:41211/57344 train_time:23672068ms step_avg:574.41ms +grad accum step:10303/14336 +step:41212/57344 train_time:23673345ms step_avg:574.43ms +step:41213/57344 train_time:23673361ms step_avg:574.41ms +step:41214/57344 train_time:23673605ms step_avg:574.41ms +step:41215/57344 train_time:23674146ms step_avg:574.41ms +grad accum step:10304/14336 +step:41216/57344 train_time:23675455ms step_avg:574.42ms +step:41216/57344 val_loss:5.681271 train_time:23675456ms step_avg:574.42ms +step:41217/57344 train_time:23675467ms step_avg:574.41ms +step:41218/57344 train_time:23675695ms step_avg:574.40ms +step:41219/57344 train_time:23676255ms step_avg:574.40ms +grad accum step:10305/14336 +step:41220/57344 train_time:23677594ms step_avg:574.42ms +step:41221/57344 train_time:23677614ms step_avg:574.41ms +step:41222/57344 train_time:23677833ms step_avg:574.40ms +step:41223/57344 train_time:23678369ms step_avg:574.40ms +grad accum step:10306/14336 +step:41224/57344 train_time:23679674ms step_avg:574.41ms +step:41225/57344 train_time:23679690ms step_avg:574.40ms +step:41226/57344 train_time:23679939ms step_avg:574.39ms +step:41227/57344 train_time:23680476ms step_avg:574.39ms +grad accum step:10307/14336 +step:41228/57344 train_time:23681780ms step_avg:574.41ms +step:41229/57344 train_time:23681801ms step_avg:574.40ms +step:41230/57344 train_time:23682036ms step_avg:574.39ms +step:41231/57344 train_time:23682577ms step_avg:574.39ms +grad accum step:10308/14336 +step:41232/57344 train_time:23683924ms step_avg:574.41ms +step:41233/57344 train_time:23683940ms step_avg:574.39ms +step:41234/57344 train_time:23684188ms step_avg:574.38ms +step:41235/57344 train_time:23684746ms step_avg:574.38ms +grad accum step:10309/14336 +step:41236/57344 train_time:23686066ms step_avg:574.40ms +step:41237/57344 train_time:23686081ms step_avg:574.39ms +step:41238/57344 train_time:23686317ms step_avg:574.38ms +step:41239/57344 train_time:23686864ms step_avg:574.38ms +grad accum step:10310/14336 +step:41240/57344 train_time:23688191ms step_avg:574.40ms +step:41241/57344 train_time:23688205ms step_avg:574.38ms +step:41242/57344 train_time:23688444ms step_avg:574.38ms +step:41243/57344 train_time:23688998ms step_avg:574.38ms +grad accum step:10311/14336 +step:41244/57344 train_time:23690314ms step_avg:574.39ms +step:41245/57344 train_time:23690331ms step_avg:574.38ms +step:41246/57344 train_time:23690580ms step_avg:574.37ms +step:41247/57344 train_time:23691138ms step_avg:574.37ms +grad accum step:10312/14336 +step:41248/57344 train_time:23692468ms step_avg:574.39ms +step:41249/57344 train_time:23692485ms step_avg:574.38ms +step:41250/57344 train_time:23692736ms step_avg:574.37ms +step:41251/57344 train_time:23693292ms step_avg:574.37ms +grad accum step:10313/14336 +step:41252/57344 train_time:23694597ms step_avg:574.39ms +step:41253/57344 train_time:23694613ms step_avg:574.37ms +step:41254/57344 train_time:23694859ms step_avg:574.37ms +step:41255/57344 train_time:23695400ms step_avg:574.36ms +grad accum step:10314/14336 +step:41256/57344 train_time:23696703ms step_avg:574.38ms +step:41257/57344 train_time:23696718ms step_avg:574.37ms +step:41258/57344 train_time:23696969ms step_avg:574.36ms +step:41259/57344 train_time:23697528ms step_avg:574.36ms +grad accum step:10315/14336 +step:41260/57344 train_time:23698846ms step_avg:574.38ms +step:41261/57344 train_time:23698865ms step_avg:574.36ms +step:41262/57344 train_time:23699100ms step_avg:574.36ms +step:41263/57344 train_time:23699647ms step_avg:574.36ms +grad accum step:10316/14336 +step:41264/57344 train_time:23700960ms step_avg:574.37ms +step:41265/57344 train_time:23700976ms step_avg:574.36ms +step:41266/57344 train_time:23701208ms step_avg:574.35ms +step:41267/57344 train_time:23701749ms step_avg:574.35ms +grad accum step:10317/14336 +step:41268/57344 train_time:23703071ms step_avg:574.37ms +step:41269/57344 train_time:23703112ms step_avg:574.36ms +step:41270/57344 train_time:23703331ms step_avg:574.35ms +step:41271/57344 train_time:23703879ms step_avg:574.35ms +grad accum step:10318/14336 +step:41272/57344 train_time:23705191ms step_avg:574.36ms +step:41273/57344 train_time:23705208ms step_avg:574.35ms +step:41274/57344 train_time:23705459ms step_avg:574.34ms +step:41275/57344 train_time:23706016ms step_avg:574.34ms +grad accum step:10319/14336 +step:41276/57344 train_time:23707302ms step_avg:574.36ms +step:41277/57344 train_time:23707318ms step_avg:574.35ms +step:41278/57344 train_time:23707564ms step_avg:574.34ms +step:41279/57344 train_time:23708108ms step_avg:574.34ms +grad accum step:10320/14336 +step:41280/57344 train_time:23709420ms step_avg:574.36ms +step:41280/57344 val_loss:5.690168 train_time:23709425ms step_avg:574.36ms +step:41281/57344 train_time:23709437ms step_avg:574.34ms +step:41282/57344 train_time:23709661ms step_avg:574.33ms +step:41283/57344 train_time:23710207ms step_avg:574.33ms +grad accum step:10321/14336 +step:41284/57344 train_time:23711517ms step_avg:574.35ms +step:41285/57344 train_time:23711533ms step_avg:574.34ms +step:41286/57344 train_time:23711781ms step_avg:574.33ms +step:41287/57344 train_time:23712338ms step_avg:574.33ms +grad accum step:10322/14336 +step:41288/57344 train_time:23713670ms step_avg:574.35ms +step:41289/57344 train_time:23713687ms step_avg:574.33ms +step:41290/57344 train_time:23713936ms step_avg:574.33ms +step:41291/57344 train_time:23714493ms step_avg:574.33ms +grad accum step:10323/14336 +step:41292/57344 train_time:23715838ms step_avg:574.34ms +step:41293/57344 train_time:23715855ms step_avg:574.33ms +step:41294/57344 train_time:23716087ms step_avg:574.32ms +step:41295/57344 train_time:23716638ms step_avg:574.32ms +grad accum step:10324/14336 +step:41296/57344 train_time:23717955ms step_avg:574.34ms +step:41297/57344 train_time:23717972ms step_avg:574.33ms +step:41298/57344 train_time:23718221ms step_avg:574.32ms +step:41299/57344 train_time:23718768ms step_avg:574.32ms +grad accum step:10325/14336 +step:41300/57344 train_time:23720110ms step_avg:574.34ms +step:41301/57344 train_time:23720125ms step_avg:574.32ms +step:41302/57344 train_time:23720373ms step_avg:574.32ms +step:41303/57344 train_time:23720916ms step_avg:574.31ms +grad accum step:10326/14336 +step:41304/57344 train_time:23722246ms step_avg:574.33ms +step:41305/57344 train_time:23722276ms step_avg:574.32ms +step:41306/57344 train_time:23722500ms step_avg:574.31ms +step:41307/57344 train_time:23723042ms step_avg:574.31ms +grad accum step:10327/14336 +step:41308/57344 train_time:23724425ms step_avg:574.33ms +step:41309/57344 train_time:23724443ms step_avg:574.32ms +step:41310/57344 train_time:23724687ms step_avg:574.31ms +step:41311/57344 train_time:23725235ms step_avg:574.31ms +grad accum step:10328/14336 +step:41312/57344 train_time:23726554ms step_avg:574.33ms +step:41313/57344 train_time:23726573ms step_avg:574.31ms +step:41314/57344 train_time:23726804ms step_avg:574.30ms +step:41315/57344 train_time:23727356ms step_avg:574.30ms +grad accum step:10329/14336 +step:41316/57344 train_time:23728678ms step_avg:574.32ms +step:41317/57344 train_time:23728695ms step_avg:574.31ms +step:41318/57344 train_time:23728936ms step_avg:574.30ms +step:41319/57344 train_time:23729469ms step_avg:574.30ms +grad accum step:10330/14336 +step:41320/57344 train_time:23730778ms step_avg:574.32ms +step:41321/57344 train_time:23730794ms step_avg:574.30ms +step:41322/57344 train_time:23731039ms step_avg:574.30ms +step:41323/57344 train_time:23731603ms step_avg:574.30ms +grad accum step:10331/14336 +step:41324/57344 train_time:23732983ms step_avg:574.31ms +step:41325/57344 train_time:23732998ms step_avg:574.30ms +step:41326/57344 train_time:23733248ms step_avg:574.29ms +step:41327/57344 train_time:23733823ms step_avg:574.29ms +grad accum step:10332/14336 +step:41328/57344 train_time:23735271ms step_avg:574.31ms +step:41329/57344 train_time:23735284ms step_avg:574.30ms +step:41330/57344 train_time:23735547ms step_avg:574.29ms +step:41331/57344 train_time:23736137ms step_avg:574.29ms +grad accum step:10333/14336 +step:41332/57344 train_time:23737467ms step_avg:574.31ms +step:41333/57344 train_time:23737487ms step_avg:574.30ms +step:41334/57344 train_time:23737727ms step_avg:574.29ms +step:41335/57344 train_time:23738279ms step_avg:574.29ms +grad accum step:10334/14336 +step:41336/57344 train_time:23739600ms step_avg:574.31ms +step:41337/57344 train_time:23739619ms step_avg:574.29ms +step:41338/57344 train_time:23739859ms step_avg:574.29ms +step:41339/57344 train_time:23740417ms step_avg:574.29ms +grad accum step:10335/14336 +step:41340/57344 train_time:23741739ms step_avg:574.30ms +step:41341/57344 train_time:23741755ms step_avg:574.29ms +step:41342/57344 train_time:23742008ms step_avg:574.28ms +step:41343/57344 train_time:23742573ms step_avg:574.28ms +grad accum step:10336/14336 +step:41344/57344 train_time:23743957ms step_avg:574.30ms +step:41344/57344 val_loss:5.714119 train_time:23743967ms step_avg:574.30ms +step:41345/57344 train_time:23743979ms step_avg:574.29ms +step:41346/57344 train_time:23744204ms step_avg:574.28ms +step:41347/57344 train_time:23744758ms step_avg:574.28ms +grad accum step:10337/14336 +step:41348/57344 train_time:23746143ms step_avg:574.30ms +step:41349/57344 train_time:23746157ms step_avg:574.29ms +step:41350/57344 train_time:23746375ms step_avg:574.28ms +step:41351/57344 train_time:23746939ms step_avg:574.28ms +grad accum step:10338/14336 +step:41352/57344 train_time:23748249ms step_avg:574.30ms +step:41353/57344 train_time:23748265ms step_avg:574.28ms +step:41354/57344 train_time:23748517ms step_avg:574.27ms +step:41355/57344 train_time:23749079ms step_avg:574.27ms +grad accum step:10339/14336 +step:41356/57344 train_time:23750383ms step_avg:574.29ms +step:41357/57344 train_time:23750403ms step_avg:574.28ms +step:41358/57344 train_time:23750641ms step_avg:574.27ms +step:41359/57344 train_time:23751179ms step_avg:574.27ms +grad accum step:10340/14336 +step:41360/57344 train_time:23752573ms step_avg:574.29ms +step:41361/57344 train_time:23752588ms step_avg:574.27ms +step:41362/57344 train_time:23752808ms step_avg:574.27ms +step:41363/57344 train_time:23753350ms step_avg:574.27ms +grad accum step:10341/14336 +step:41364/57344 train_time:23754663ms step_avg:574.28ms +step:41365/57344 train_time:23754676ms step_avg:574.27ms +step:41366/57344 train_time:23754928ms step_avg:574.26ms +step:41367/57344 train_time:23755490ms step_avg:574.26ms +grad accum step:10342/14336 +step:41368/57344 train_time:23756813ms step_avg:574.28ms +step:41369/57344 train_time:23756836ms step_avg:574.27ms +step:41370/57344 train_time:23757065ms step_avg:574.26ms +step:41371/57344 train_time:23757609ms step_avg:574.26ms +grad accum step:10343/14336 +step:41372/57344 train_time:23758918ms step_avg:574.28ms +step:41373/57344 train_time:23758950ms step_avg:574.26ms +step:41374/57344 train_time:23759172ms step_avg:574.25ms +step:41375/57344 train_time:23759717ms step_avg:574.25ms +grad accum step:10344/14336 +step:41376/57344 train_time:23761037ms step_avg:574.27ms +step:41377/57344 train_time:23761054ms step_avg:574.26ms +step:41378/57344 train_time:23761298ms step_avg:574.25ms +step:41379/57344 train_time:23761844ms step_avg:574.25ms +grad accum step:10345/14336 +step:41380/57344 train_time:23763181ms step_avg:574.27ms +step:41381/57344 train_time:23763201ms step_avg:574.25ms +step:41382/57344 train_time:23763441ms step_avg:574.25ms +step:41383/57344 train_time:23764000ms step_avg:574.25ms +grad accum step:10346/14336 +step:41384/57344 train_time:23765329ms step_avg:574.26ms +step:41385/57344 train_time:23765341ms step_avg:574.25ms +step:41386/57344 train_time:23765585ms step_avg:574.24ms +step:41387/57344 train_time:23766134ms step_avg:574.24ms +grad accum step:10347/14336 +step:41388/57344 train_time:23767472ms step_avg:574.26ms +step:41389/57344 train_time:23767488ms step_avg:574.25ms +step:41390/57344 train_time:23767731ms step_avg:574.24ms +step:41391/57344 train_time:23768279ms step_avg:574.24ms +grad accum step:10348/14336 +step:41392/57344 train_time:23769568ms step_avg:574.26ms +step:41393/57344 train_time:23769584ms step_avg:574.24ms +step:41394/57344 train_time:23769837ms step_avg:574.23ms +step:41395/57344 train_time:23770401ms step_avg:574.23ms +grad accum step:10349/14336 +step:41396/57344 train_time:23771743ms step_avg:574.25ms +step:41397/57344 train_time:23771763ms step_avg:574.24ms +step:41398/57344 train_time:23772000ms step_avg:574.23ms +step:41399/57344 train_time:23772548ms step_avg:574.23ms +grad accum step:10350/14336 +step:41400/57344 train_time:23773857ms step_avg:574.25ms +step:41401/57344 train_time:23773878ms step_avg:574.23ms +step:41402/57344 train_time:23774118ms step_avg:574.23ms +step:41403/57344 train_time:23774675ms step_avg:574.23ms +grad accum step:10351/14336 +step:41404/57344 train_time:23775991ms step_avg:574.24ms +step:41405/57344 train_time:23776007ms step_avg:574.23ms +step:41406/57344 train_time:23776260ms step_avg:574.22ms +step:41407/57344 train_time:23776826ms step_avg:574.22ms +grad accum step:10352/14336 +step:41408/57344 train_time:23778124ms step_avg:574.24ms +step:41408/57344 val_loss:5.713656 train_time:23778128ms step_avg:574.24ms +step:41409/57344 train_time:23778140ms step_avg:574.23ms +step:41410/57344 train_time:23778365ms step_avg:574.22ms +step:41411/57344 train_time:23778913ms step_avg:574.22ms +grad accum step:10353/14336 +step:41412/57344 train_time:23780259ms step_avg:574.24ms +step:41413/57344 train_time:23780273ms step_avg:574.22ms +step:41414/57344 train_time:23780528ms step_avg:574.21ms +step:41415/57344 train_time:23781111ms step_avg:574.21ms +grad accum step:10354/14336 +step:41416/57344 train_time:23782444ms step_avg:574.23ms +step:41417/57344 train_time:23782464ms step_avg:574.22ms +step:41418/57344 train_time:23782687ms step_avg:574.21ms +step:41419/57344 train_time:23783218ms step_avg:574.21ms +grad accum step:10355/14336 +step:41420/57344 train_time:23784535ms step_avg:574.23ms +step:41421/57344 train_time:23784556ms step_avg:574.21ms +step:41422/57344 train_time:23784783ms step_avg:574.21ms +step:41423/57344 train_time:23785326ms step_avg:574.21ms +grad accum step:10356/14336 +step:41424/57344 train_time:23786638ms step_avg:574.22ms +step:41425/57344 train_time:23786658ms step_avg:574.21ms +step:41426/57344 train_time:23786890ms step_avg:574.20ms +step:41427/57344 train_time:23787449ms step_avg:574.20ms +grad accum step:10357/14336 +step:41428/57344 train_time:23788749ms step_avg:574.22ms +step:41429/57344 train_time:23788765ms step_avg:574.21ms +step:41430/57344 train_time:23789011ms step_avg:574.20ms +step:41431/57344 train_time:23789553ms step_avg:574.20ms +grad accum step:10358/14336 +step:41432/57344 train_time:23790874ms step_avg:574.21ms +step:41433/57344 train_time:23790889ms step_avg:574.20ms +step:41434/57344 train_time:23791136ms step_avg:574.19ms +step:41435/57344 train_time:23791696ms step_avg:574.19ms +grad accum step:10359/14336 +step:41436/57344 train_time:23793020ms step_avg:574.21ms +step:41437/57344 train_time:23793033ms step_avg:574.20ms +step:41438/57344 train_time:23793256ms step_avg:574.19ms +step:41439/57344 train_time:23793798ms step_avg:574.19ms +grad accum step:10360/14336 +step:41440/57344 train_time:23795125ms step_avg:574.21ms +step:41441/57344 train_time:23795138ms step_avg:574.19ms +step:41442/57344 train_time:23795390ms step_avg:574.19ms +step:41443/57344 train_time:23795953ms step_avg:574.19ms +grad accum step:10361/14336 +step:41444/57344 train_time:23797273ms step_avg:574.20ms +step:41445/57344 train_time:23797289ms step_avg:574.19ms +step:41446/57344 train_time:23797533ms step_avg:574.18ms +step:41447/57344 train_time:23798080ms step_avg:574.18ms +grad accum step:10362/14336 +step:41448/57344 train_time:23799386ms step_avg:574.20ms +step:41449/57344 train_time:23799400ms step_avg:574.19ms +step:41450/57344 train_time:23799651ms step_avg:574.18ms +step:41451/57344 train_time:23800210ms step_avg:574.18ms +grad accum step:10363/14336 +step:41452/57344 train_time:23801513ms step_avg:574.19ms +step:41453/57344 train_time:23801530ms step_avg:574.18ms +step:41454/57344 train_time:23801783ms step_avg:574.17ms +step:41455/57344 train_time:23802340ms step_avg:574.17ms +grad accum step:10364/14336 +step:41456/57344 train_time:23803666ms step_avg:574.19ms +step:41457/57344 train_time:23803685ms step_avg:574.18ms +step:41458/57344 train_time:23803923ms step_avg:574.17ms +step:41459/57344 train_time:23804463ms step_avg:574.17ms +grad accum step:10365/14336 +step:41460/57344 train_time:23805800ms step_avg:574.19ms +step:41461/57344 train_time:23805816ms step_avg:574.17ms +step:41462/57344 train_time:23806039ms step_avg:574.17ms +step:41463/57344 train_time:23806592ms step_avg:574.16ms +grad accum step:10366/14336 +step:41464/57344 train_time:23807944ms step_avg:574.18ms +step:41465/57344 train_time:23807957ms step_avg:574.17ms +step:41466/57344 train_time:23808193ms step_avg:574.16ms +step:41467/57344 train_time:23808741ms step_avg:574.16ms +grad accum step:10367/14336 +step:41468/57344 train_time:23810124ms step_avg:574.18ms +step:41469/57344 train_time:23810165ms step_avg:574.17ms +step:41470/57344 train_time:23810384ms step_avg:574.16ms +step:41471/57344 train_time:23810930ms step_avg:574.16ms +grad accum step:10368/14336 +step:41472/57344 train_time:23812232ms step_avg:574.18ms +step:41472/57344 val_loss:5.725800 train_time:23812238ms step_avg:574.18ms +step:41473/57344 train_time:23812250ms step_avg:574.16ms +step:41474/57344 train_time:23812483ms step_avg:574.15ms +step:41475/57344 train_time:23813054ms step_avg:574.15ms +grad accum step:10369/14336 +step:41476/57344 train_time:23814385ms step_avg:574.17ms +step:41477/57344 train_time:23814401ms step_avg:574.16ms +step:41478/57344 train_time:23814649ms step_avg:574.15ms +step:41479/57344 train_time:23815195ms step_avg:574.15ms +grad accum step:10370/14336 +step:41480/57344 train_time:23816483ms step_avg:574.17ms +step:41481/57344 train_time:23816498ms step_avg:574.15ms +step:41482/57344 train_time:23816747ms step_avg:574.15ms +step:41483/57344 train_time:23817309ms step_avg:574.15ms +grad accum step:10371/14336 +step:41484/57344 train_time:23818635ms step_avg:574.16ms +step:41485/57344 train_time:23818651ms step_avg:574.15ms +step:41486/57344 train_time:23818898ms step_avg:574.14ms +step:41487/57344 train_time:23819443ms step_avg:574.14ms +grad accum step:10372/14336 +step:41488/57344 train_time:23820785ms step_avg:574.16ms +step:41489/57344 train_time:23820802ms step_avg:574.15ms +step:41490/57344 train_time:23821052ms step_avg:574.14ms +step:41491/57344 train_time:23821612ms step_avg:574.14ms +grad accum step:10373/14336 +step:41492/57344 train_time:23822916ms step_avg:574.16ms +step:41493/57344 train_time:23822933ms step_avg:574.14ms +step:41494/57344 train_time:23823185ms step_avg:574.14ms +step:41495/57344 train_time:23823748ms step_avg:574.14ms +grad accum step:10374/14336 +step:41496/57344 train_time:23825077ms step_avg:574.15ms +step:41497/57344 train_time:23825093ms step_avg:574.14ms +step:41498/57344 train_time:23825333ms step_avg:574.13ms +step:41499/57344 train_time:23825903ms step_avg:574.13ms +grad accum step:10375/14336 +step:41500/57344 train_time:23827224ms step_avg:574.15ms +step:41501/57344 train_time:23827241ms step_avg:574.14ms +step:41502/57344 train_time:23827494ms step_avg:574.13ms +step:41503/57344 train_time:23828056ms step_avg:574.13ms +grad accum step:10376/14336 +step:41504/57344 train_time:23829396ms step_avg:574.15ms +step:41505/57344 train_time:23829412ms step_avg:574.13ms +step:41506/57344 train_time:23829675ms step_avg:574.13ms +step:41507/57344 train_time:23830283ms step_avg:574.13ms +grad accum step:10377/14336 +step:41508/57344 train_time:23831666ms step_avg:574.15ms +step:41509/57344 train_time:23831683ms step_avg:574.13ms +step:41510/57344 train_time:23831936ms step_avg:574.13ms +step:41511/57344 train_time:23832503ms step_avg:574.12ms +grad accum step:10378/14336 +step:41512/57344 train_time:23833855ms step_avg:574.14ms +step:41513/57344 train_time:23833870ms step_avg:574.13ms +step:41514/57344 train_time:23834116ms step_avg:574.12ms +step:41515/57344 train_time:23834668ms step_avg:574.12ms +grad accum step:10379/14336 +step:41516/57344 train_time:23835995ms step_avg:574.14ms +step:41517/57344 train_time:23836012ms step_avg:574.13ms +step:41518/57344 train_time:23836259ms step_avg:574.12ms +step:41519/57344 train_time:23836807ms step_avg:574.12ms +grad accum step:10380/14336 +step:41520/57344 train_time:23838111ms step_avg:574.14ms +step:41521/57344 train_time:23838129ms step_avg:574.12ms +step:41522/57344 train_time:23838376ms step_avg:574.11ms +step:41523/57344 train_time:23838956ms step_avg:574.11ms +grad accum step:10381/14336 +step:41524/57344 train_time:23840273ms step_avg:574.13ms +step:41525/57344 train_time:23840290ms step_avg:574.12ms +step:41526/57344 train_time:23840541ms step_avg:574.11ms +step:41527/57344 train_time:23841103ms step_avg:574.11ms +grad accum step:10382/14336 +step:41528/57344 train_time:23842461ms step_avg:574.13ms +step:41529/57344 train_time:23842475ms step_avg:574.12ms +step:41530/57344 train_time:23842728ms step_avg:574.11ms +step:41531/57344 train_time:23843294ms step_avg:574.11ms +grad accum step:10383/14336 +step:41532/57344 train_time:23844641ms step_avg:574.13ms +step:41533/57344 train_time:23844660ms step_avg:574.11ms +step:41534/57344 train_time:23844887ms step_avg:574.11ms +step:41535/57344 train_time:23845439ms step_avg:574.10ms +grad accum step:10384/14336 +step:41536/57344 train_time:23846756ms step_avg:574.12ms +step:41536/57344 val_loss:5.742362 train_time:23846757ms step_avg:574.12ms +step:41537/57344 train_time:23846769ms step_avg:574.11ms +step:41538/57344 train_time:23847004ms step_avg:574.10ms +step:41539/57344 train_time:23847581ms step_avg:574.10ms +grad accum step:10385/14336 +step:41540/57344 train_time:23848917ms step_avg:574.12ms +step:41541/57344 train_time:23848933ms step_avg:574.11ms +step:41542/57344 train_time:23849180ms step_avg:574.10ms +step:41543/57344 train_time:23849725ms step_avg:574.10ms +grad accum step:10386/14336 +step:41544/57344 train_time:23851038ms step_avg:574.12ms +step:41545/57344 train_time:23851066ms step_avg:574.10ms +step:41546/57344 train_time:23851287ms step_avg:574.09ms +step:41547/57344 train_time:23851826ms step_avg:574.09ms +grad accum step:10387/14336 +step:41548/57344 train_time:23853110ms step_avg:574.11ms +step:41549/57344 train_time:23853132ms step_avg:574.10ms +step:41550/57344 train_time:23853368ms step_avg:574.09ms +step:41551/57344 train_time:23853913ms step_avg:574.09ms +grad accum step:10388/14336 +step:41552/57344 train_time:23855228ms step_avg:574.11ms +step:41553/57344 train_time:23855242ms step_avg:574.09ms +step:41554/57344 train_time:23855490ms step_avg:574.08ms +step:41555/57344 train_time:23856039ms step_avg:574.08ms +grad accum step:10389/14336 +step:41556/57344 train_time:23857359ms step_avg:574.10ms +step:41557/57344 train_time:23857376ms step_avg:574.09ms +step:41558/57344 train_time:23857623ms step_avg:574.08ms +step:41559/57344 train_time:23858185ms step_avg:574.08ms +grad accum step:10390/14336 +step:41560/57344 train_time:23859490ms step_avg:574.10ms +step:41561/57344 train_time:23859506ms step_avg:574.08ms +step:41562/57344 train_time:23859748ms step_avg:574.08ms +step:41563/57344 train_time:23860303ms step_avg:574.08ms +grad accum step:10391/14336 +step:41564/57344 train_time:23861608ms step_avg:574.09ms +step:41565/57344 train_time:23861625ms step_avg:574.08ms +step:41566/57344 train_time:23861870ms step_avg:574.07ms +step:41567/57344 train_time:23862420ms step_avg:574.07ms +grad accum step:10392/14336 +step:41568/57344 train_time:23863773ms step_avg:574.09ms +step:41569/57344 train_time:23863786ms step_avg:574.08ms +step:41570/57344 train_time:23864024ms step_avg:574.07ms +step:41571/57344 train_time:23864562ms step_avg:574.07ms +grad accum step:10393/14336 +step:41572/57344 train_time:23865878ms step_avg:574.09ms +step:41573/57344 train_time:23865893ms step_avg:574.07ms +step:41574/57344 train_time:23866139ms step_avg:574.06ms +step:41575/57344 train_time:23866689ms step_avg:574.06ms +grad accum step:10394/14336 +step:41576/57344 train_time:23867965ms step_avg:574.08ms +step:41577/57344 train_time:23867980ms step_avg:574.07ms +step:41578/57344 train_time:23868224ms step_avg:574.06ms +step:41579/57344 train_time:23868772ms step_avg:574.06ms +grad accum step:10395/14336 +step:41580/57344 train_time:23870086ms step_avg:574.08ms +step:41581/57344 train_time:23870115ms step_avg:574.06ms +step:41582/57344 train_time:23870335ms step_avg:574.05ms +step:41583/57344 train_time:23870883ms step_avg:574.05ms +grad accum step:10396/14336 +step:41584/57344 train_time:23872205ms step_avg:574.07ms +step:41585/57344 train_time:23872222ms step_avg:574.06ms +step:41586/57344 train_time:23872460ms step_avg:574.05ms +step:41587/57344 train_time:23873021ms step_avg:574.05ms +grad accum step:10397/14336 +step:41588/57344 train_time:23874328ms step_avg:574.07ms +step:41589/57344 train_time:23874346ms step_avg:574.05ms +step:41590/57344 train_time:23874591ms step_avg:574.05ms +step:41591/57344 train_time:23875141ms step_avg:574.05ms +grad accum step:10398/14336 +step:41592/57344 train_time:23876452ms step_avg:574.06ms +step:41593/57344 train_time:23876469ms step_avg:574.05ms +step:41594/57344 train_time:23876717ms step_avg:574.04ms +step:41595/57344 train_time:23877274ms step_avg:574.04ms +grad accum step:10399/14336 +step:41596/57344 train_time:23878611ms step_avg:574.06ms +step:41597/57344 train_time:23878624ms step_avg:574.05ms +step:41598/57344 train_time:23878875ms step_avg:574.04ms +step:41599/57344 train_time:23879436ms step_avg:574.04ms +grad accum step:10400/14336 +step:41600/57344 train_time:23880740ms step_avg:574.06ms +step:41600/57344 val_loss:5.748781 train_time:23880749ms step_avg:574.06ms +step:41601/57344 train_time:23880761ms step_avg:574.04ms +step:41602/57344 train_time:23880993ms step_avg:574.03ms +step:41603/57344 train_time:23881554ms step_avg:574.03ms +grad accum step:10401/14336 +step:41604/57344 train_time:23882858ms step_avg:574.05ms +step:41605/57344 train_time:23882875ms step_avg:574.04ms +step:41606/57344 train_time:23883137ms step_avg:574.03ms +step:41607/57344 train_time:23883758ms step_avg:574.03ms +grad accum step:10402/14336 +step:41608/57344 train_time:23885194ms step_avg:574.05ms +step:41609/57344 train_time:23885214ms step_avg:574.04ms +step:41610/57344 train_time:23885447ms step_avg:574.03ms +step:41611/57344 train_time:23885992ms step_avg:574.03ms +grad accum step:10403/14336 +step:41612/57344 train_time:23887305ms step_avg:574.05ms +step:41613/57344 train_time:23887321ms step_avg:574.04ms +step:41614/57344 train_time:23887570ms step_avg:574.03ms +step:41615/57344 train_time:23888121ms step_avg:574.03ms +grad accum step:10404/14336 +step:41616/57344 train_time:23889414ms step_avg:574.04ms +step:41617/57344 train_time:23889432ms step_avg:574.03ms +step:41618/57344 train_time:23889676ms step_avg:574.02ms +step:41619/57344 train_time:23890236ms step_avg:574.02ms +grad accum step:10405/14336 +step:41620/57344 train_time:23891594ms step_avg:574.04ms +step:41621/57344 train_time:23891609ms step_avg:574.03ms +step:41622/57344 train_time:23891854ms step_avg:574.02ms +step:41623/57344 train_time:23892407ms step_avg:574.02ms +grad accum step:10406/14336 +step:41624/57344 train_time:23893725ms step_avg:574.04ms +step:41625/57344 train_time:23893738ms step_avg:574.02ms +step:41626/57344 train_time:23893979ms step_avg:574.02ms +step:41627/57344 train_time:23894510ms step_avg:574.01ms +grad accum step:10407/14336 +step:41628/57344 train_time:23895845ms step_avg:574.03ms +step:41629/57344 train_time:23895860ms step_avg:574.02ms +step:41630/57344 train_time:23896110ms step_avg:574.01ms +step:41631/57344 train_time:23896661ms step_avg:574.01ms +grad accum step:10408/14336 +step:41632/57344 train_time:23897961ms step_avg:574.03ms +step:41633/57344 train_time:23897977ms step_avg:574.02ms +step:41634/57344 train_time:23898226ms step_avg:574.01ms +step:41635/57344 train_time:23898771ms step_avg:574.01ms +grad accum step:10409/14336 +step:41636/57344 train_time:23900068ms step_avg:574.02ms +step:41637/57344 train_time:23900084ms step_avg:574.01ms +step:41638/57344 train_time:23900326ms step_avg:574.00ms +step:41639/57344 train_time:23900873ms step_avg:574.00ms +grad accum step:10410/14336 +step:41640/57344 train_time:23902201ms step_avg:574.02ms +step:41641/57344 train_time:23902217ms step_avg:574.01ms +step:41642/57344 train_time:23902464ms step_avg:574.00ms +step:41643/57344 train_time:23903009ms step_avg:574.00ms +grad accum step:10411/14336 +step:41644/57344 train_time:23904293ms step_avg:574.02ms +step:41645/57344 train_time:23904311ms step_avg:574.00ms +step:41646/57344 train_time:23904553ms step_avg:573.99ms +step:41647/57344 train_time:23905089ms step_avg:573.99ms +grad accum step:10412/14336 +step:41648/57344 train_time:23906373ms step_avg:574.01ms +step:41649/57344 train_time:23906390ms step_avg:574.00ms +step:41650/57344 train_time:23906634ms step_avg:573.99ms +step:41651/57344 train_time:23907177ms step_avg:573.99ms +grad accum step:10413/14336 +step:41652/57344 train_time:23908469ms step_avg:574.01ms +step:41653/57344 train_time:23908486ms step_avg:573.99ms +step:41654/57344 train_time:23908739ms step_avg:573.98ms +step:41655/57344 train_time:23909299ms step_avg:573.98ms +grad accum step:10414/14336 +step:41656/57344 train_time:23910591ms step_avg:574.00ms +step:41657/57344 train_time:23910608ms step_avg:573.99ms +step:41658/57344 train_time:23910854ms step_avg:573.98ms +step:41659/57344 train_time:23911405ms step_avg:573.98ms +grad accum step:10415/14336 +step:41660/57344 train_time:23912722ms step_avg:574.00ms +step:41661/57344 train_time:23912739ms step_avg:573.98ms +step:41662/57344 train_time:23912997ms step_avg:573.98ms +step:41663/57344 train_time:23913576ms step_avg:573.98ms +grad accum step:10416/14336 +step:41664/57344 train_time:23915947ms step_avg:574.02ms +step:41664/57344 val_loss:5.762106 train_time:23915949ms step_avg:574.02ms +step:41665/57344 train_time:23915961ms step_avg:574.01ms +step:41666/57344 train_time:23916184ms step_avg:574.00ms +step:41667/57344 train_time:23916738ms step_avg:574.00ms +grad accum step:10417/14336 +step:41668/57344 train_time:23918088ms step_avg:574.02ms +step:41669/57344 train_time:23918113ms step_avg:574.00ms +step:41670/57344 train_time:23918351ms step_avg:573.99ms +step:41671/57344 train_time:23918918ms step_avg:573.99ms +grad accum step:10418/14336 +step:41672/57344 train_time:23920273ms step_avg:574.01ms +step:41673/57344 train_time:23920291ms step_avg:574.00ms +step:41674/57344 train_time:23920534ms step_avg:573.99ms +step:41675/57344 train_time:23921070ms step_avg:573.99ms +grad accum step:10419/14336 +step:41676/57344 train_time:23922401ms step_avg:574.01ms +step:41677/57344 train_time:23922428ms step_avg:574.00ms +step:41678/57344 train_time:23922656ms step_avg:573.99ms +step:41679/57344 train_time:23923208ms step_avg:573.99ms +grad accum step:10420/14336 +step:41680/57344 train_time:23924522ms step_avg:574.00ms +step:41681/57344 train_time:23924536ms step_avg:573.99ms +step:41682/57344 train_time:23924786ms step_avg:573.98ms +step:41683/57344 train_time:23925336ms step_avg:573.98ms +grad accum step:10421/14336 +step:41684/57344 train_time:23926675ms step_avg:574.00ms +step:41685/57344 train_time:23926691ms step_avg:573.99ms +step:41686/57344 train_time:23926933ms step_avg:573.98ms +step:41687/57344 train_time:23927479ms step_avg:573.98ms +grad accum step:10422/14336 +step:41688/57344 train_time:23928839ms step_avg:574.00ms +step:41689/57344 train_time:23928857ms step_avg:573.98ms +step:41690/57344 train_time:23929105ms step_avg:573.98ms +step:41691/57344 train_time:23929670ms step_avg:573.98ms +grad accum step:10423/14336 +step:41692/57344 train_time:23930968ms step_avg:573.99ms +step:41693/57344 train_time:23930983ms step_avg:573.98ms +step:41694/57344 train_time:23931228ms step_avg:573.97ms +step:41695/57344 train_time:23931777ms step_avg:573.97ms +grad accum step:10424/14336 +step:41696/57344 train_time:23933334ms step_avg:574.00ms +step:41697/57344 train_time:23933365ms step_avg:573.98ms +step:41698/57344 train_time:23933581ms step_avg:573.97ms +step:41699/57344 train_time:23934128ms step_avg:573.97ms +grad accum step:10425/14336 +step:41700/57344 train_time:23935506ms step_avg:573.99ms +step:41701/57344 train_time:23935518ms step_avg:573.98ms +step:41702/57344 train_time:23935754ms step_avg:573.97ms +step:41703/57344 train_time:23936318ms step_avg:573.97ms +grad accum step:10426/14336 +step:41704/57344 train_time:23937693ms step_avg:573.99ms +step:41705/57344 train_time:23937709ms step_avg:573.98ms +step:41706/57344 train_time:23937934ms step_avg:573.97ms +step:41707/57344 train_time:23938498ms step_avg:573.97ms +grad accum step:10427/14336 +step:41708/57344 train_time:23939814ms step_avg:573.99ms +step:41709/57344 train_time:23939831ms step_avg:573.97ms +step:41710/57344 train_time:23940085ms step_avg:573.97ms +step:41711/57344 train_time:23940653ms step_avg:573.96ms +grad accum step:10428/14336 +step:41712/57344 train_time:23941978ms step_avg:573.98ms +step:41713/57344 train_time:23941997ms step_avg:573.97ms +step:41714/57344 train_time:23942236ms step_avg:573.96ms +step:41715/57344 train_time:23942790ms step_avg:573.96ms +grad accum step:10429/14336 +step:41716/57344 train_time:23944162ms step_avg:573.98ms +step:41717/57344 train_time:23944177ms step_avg:573.97ms +step:41718/57344 train_time:23944426ms step_avg:573.96ms +step:41719/57344 train_time:23944982ms step_avg:573.96ms +grad accum step:10430/14336 +step:41720/57344 train_time:23946293ms step_avg:573.98ms +step:41721/57344 train_time:23946312ms step_avg:573.96ms +step:41722/57344 train_time:23946547ms step_avg:573.95ms +step:41723/57344 train_time:23947093ms step_avg:573.95ms +grad accum step:10431/14336 +step:41724/57344 train_time:23948392ms step_avg:573.97ms +step:41725/57344 train_time:23948407ms step_avg:573.96ms +step:41726/57344 train_time:23948656ms step_avg:573.95ms +step:41727/57344 train_time:23949215ms step_avg:573.95ms +grad accum step:10432/14336 +step:41728/57344 train_time:23950491ms step_avg:573.97ms +step:41728/57344 val_loss:5.774612 train_time:23950493ms step_avg:573.97ms +step:41729/57344 train_time:23950505ms step_avg:573.95ms +step:41730/57344 train_time:23950729ms step_avg:573.95ms +step:41731/57344 train_time:23951283ms step_avg:573.94ms +grad accum step:10433/14336 +step:41732/57344 train_time:23952605ms step_avg:573.96ms +step:41733/57344 train_time:23952621ms step_avg:573.95ms +step:41734/57344 train_time:23952867ms step_avg:573.94ms +step:41735/57344 train_time:23953428ms step_avg:573.94ms +grad accum step:10434/14336 +step:41736/57344 train_time:23954776ms step_avg:573.96ms +step:41737/57344 train_time:23954791ms step_avg:573.95ms +step:41738/57344 train_time:23955044ms step_avg:573.94ms +step:41739/57344 train_time:23955598ms step_avg:573.94ms +grad accum step:10435/14336 +step:41740/57344 train_time:23956959ms step_avg:573.96ms +step:41741/57344 train_time:23956982ms step_avg:573.94ms +step:41742/57344 train_time:23957202ms step_avg:573.94ms +step:41743/57344 train_time:23957757ms step_avg:573.93ms +grad accum step:10436/14336 +step:41744/57344 train_time:23959074ms step_avg:573.95ms +step:41745/57344 train_time:23959091ms step_avg:573.94ms +step:41746/57344 train_time:23959344ms step_avg:573.93ms +step:41747/57344 train_time:23959907ms step_avg:573.93ms +grad accum step:10437/14336 +step:41748/57344 train_time:23961207ms step_avg:573.95ms +step:41749/57344 train_time:23961225ms step_avg:573.94ms +step:41750/57344 train_time:23961469ms step_avg:573.93ms +step:41751/57344 train_time:23962018ms step_avg:573.93ms +grad accum step:10438/14336 +step:41752/57344 train_time:23963335ms step_avg:573.94ms +step:41753/57344 train_time:23963352ms step_avg:573.93ms +step:41754/57344 train_time:23963599ms step_avg:573.92ms +step:41755/57344 train_time:23964151ms step_avg:573.92ms +grad accum step:10439/14336 +step:41756/57344 train_time:23965463ms step_avg:573.94ms +step:41757/57344 train_time:23965479ms step_avg:573.93ms +step:41758/57344 train_time:23965725ms step_avg:573.92ms +step:41759/57344 train_time:23966274ms step_avg:573.92ms +grad accum step:10440/14336 +step:41760/57344 train_time:23967586ms step_avg:573.94ms +step:41761/57344 train_time:23967598ms step_avg:573.92ms +step:41762/57344 train_time:23967837ms step_avg:573.91ms +step:41763/57344 train_time:23968381ms step_avg:573.91ms +grad accum step:10441/14336 +step:41764/57344 train_time:23969676ms step_avg:573.93ms +step:41765/57344 train_time:23969696ms step_avg:573.92ms +step:41766/57344 train_time:23969935ms step_avg:573.91ms +step:41767/57344 train_time:23970507ms step_avg:573.91ms +grad accum step:10442/14336 +step:41768/57344 train_time:23971840ms step_avg:573.93ms +step:41769/57344 train_time:23971856ms step_avg:573.92ms +step:41770/57344 train_time:23972106ms step_avg:573.91ms +step:41771/57344 train_time:23972663ms step_avg:573.91ms +grad accum step:10443/14336 +step:41772/57344 train_time:23973971ms step_avg:573.92ms +step:41773/57344 train_time:23973993ms step_avg:573.91ms +step:41774/57344 train_time:23974228ms step_avg:573.90ms +step:41775/57344 train_time:23974787ms step_avg:573.90ms +grad accum step:10444/14336 +step:41776/57344 train_time:23976122ms step_avg:573.92ms +step:41777/57344 train_time:23976146ms step_avg:573.91ms +step:41778/57344 train_time:23976377ms step_avg:573.90ms +step:41779/57344 train_time:23976923ms step_avg:573.90ms +grad accum step:10445/14336 +step:41780/57344 train_time:23978239ms step_avg:573.92ms +step:41781/57344 train_time:23978255ms step_avg:573.90ms +step:41782/57344 train_time:23978508ms step_avg:573.90ms +step:41783/57344 train_time:23979070ms step_avg:573.90ms +grad accum step:10446/14336 +step:41784/57344 train_time:23980405ms step_avg:573.91ms +step:41785/57344 train_time:23980421ms step_avg:573.90ms +step:41786/57344 train_time:23980666ms step_avg:573.89ms +step:41787/57344 train_time:23981208ms step_avg:573.89ms +grad accum step:10447/14336 +step:41788/57344 train_time:23982491ms step_avg:573.91ms +step:41789/57344 train_time:23982505ms step_avg:573.90ms +step:41790/57344 train_time:23982756ms step_avg:573.89ms +step:41791/57344 train_time:23983309ms step_avg:573.89ms +grad accum step:10448/14336 +step:41792/57344 train_time:23984605ms step_avg:573.90ms +step:41792/57344 val_loss:5.786279 train_time:23984608ms step_avg:573.90ms +step:41793/57344 train_time:23984620ms step_avg:573.89ms +step:41794/57344 train_time:23984889ms step_avg:573.88ms +step:41795/57344 train_time:23985434ms step_avg:573.88ms +grad accum step:10449/14336 +step:41796/57344 train_time:23986755ms step_avg:573.90ms +step:41797/57344 train_time:23986766ms step_avg:573.89ms +step:41798/57344 train_time:23986997ms step_avg:573.88ms +step:41799/57344 train_time:23987537ms step_avg:573.88ms +grad accum step:10450/14336 +step:41800/57344 train_time:23988815ms step_avg:573.90ms +step:41801/57344 train_time:23988832ms step_avg:573.88ms +step:41802/57344 train_time:23989073ms step_avg:573.87ms +step:41803/57344 train_time:23989612ms step_avg:573.87ms +grad accum step:10451/14336 +step:41804/57344 train_time:23990931ms step_avg:573.89ms +step:41805/57344 train_time:23990946ms step_avg:573.88ms +step:41806/57344 train_time:23991189ms step_avg:573.87ms +step:41807/57344 train_time:23991729ms step_avg:573.87ms +grad accum step:10452/14336 +step:41808/57344 train_time:23993002ms step_avg:573.89ms +step:41809/57344 train_time:23993019ms step_avg:573.87ms +step:41810/57344 train_time:23993264ms step_avg:573.86ms +step:41811/57344 train_time:23993809ms step_avg:573.86ms +grad accum step:10453/14336 +step:41812/57344 train_time:23995127ms step_avg:573.88ms +step:41813/57344 train_time:23995144ms step_avg:573.87ms +step:41814/57344 train_time:23995387ms step_avg:573.86ms +step:41815/57344 train_time:23995931ms step_avg:573.86ms +grad accum step:10454/14336 +step:41816/57344 train_time:23997218ms step_avg:573.88ms +step:41817/57344 train_time:23997230ms step_avg:573.86ms +step:41818/57344 train_time:23997470ms step_avg:573.86ms +step:41819/57344 train_time:23998019ms step_avg:573.85ms +grad accum step:10455/14336 +step:41820/57344 train_time:23999352ms step_avg:573.87ms +step:41821/57344 train_time:23999369ms step_avg:573.86ms +step:41822/57344 train_time:23999621ms step_avg:573.85ms +step:41823/57344 train_time:24000180ms step_avg:573.85ms +grad accum step:10456/14336 +step:41824/57344 train_time:24001528ms step_avg:573.87ms +step:41825/57344 train_time:24001540ms step_avg:573.86ms +step:41826/57344 train_time:24001789ms step_avg:573.85ms +step:41827/57344 train_time:24002357ms step_avg:573.85ms +grad accum step:10457/14336 +step:41828/57344 train_time:24003713ms step_avg:573.87ms +step:41829/57344 train_time:24003730ms step_avg:573.85ms +step:41830/57344 train_time:24003980ms step_avg:573.85ms +step:41831/57344 train_time:24004539ms step_avg:573.85ms +grad accum step:10458/14336 +step:41832/57344 train_time:24005842ms step_avg:573.86ms +step:41833/57344 train_time:24005858ms step_avg:573.85ms +step:41834/57344 train_time:24006113ms step_avg:573.84ms +step:41835/57344 train_time:24006688ms step_avg:573.84ms +grad accum step:10459/14336 +step:41836/57344 train_time:24008009ms step_avg:573.86ms +step:41837/57344 train_time:24008028ms step_avg:573.85ms +step:41838/57344 train_time:24008246ms step_avg:573.84ms +step:41839/57344 train_time:24008796ms step_avg:573.84ms +grad accum step:10460/14336 +step:41840/57344 train_time:24010131ms step_avg:573.86ms +step:41841/57344 train_time:24010148ms step_avg:573.84ms +step:41842/57344 train_time:24010401ms step_avg:573.83ms +step:41843/57344 train_time:24010961ms step_avg:573.83ms +grad accum step:10461/14336 +step:41844/57344 train_time:24012272ms step_avg:573.85ms +step:41845/57344 train_time:24012284ms step_avg:573.84ms +step:41846/57344 train_time:24012523ms step_avg:573.83ms +step:41847/57344 train_time:24013063ms step_avg:573.83ms +grad accum step:10462/14336 +step:41848/57344 train_time:24014371ms step_avg:573.85ms +step:41849/57344 train_time:24014386ms step_avg:573.83ms +step:41850/57344 train_time:24014626ms step_avg:573.83ms +step:41851/57344 train_time:24015167ms step_avg:573.83ms +grad accum step:10463/14336 +step:41852/57344 train_time:24016480ms step_avg:573.84ms +step:41853/57344 train_time:24016497ms step_avg:573.83ms +step:41854/57344 train_time:24016738ms step_avg:573.82ms +step:41855/57344 train_time:24017274ms step_avg:573.82ms +grad accum step:10464/14336 +step:41856/57344 train_time:24018673ms step_avg:573.84ms +step:41856/57344 val_loss:5.804955 train_time:24018674ms step_avg:573.84ms +step:41857/57344 train_time:24018686ms step_avg:573.83ms +step:41858/57344 train_time:24018908ms step_avg:573.82ms +step:41859/57344 train_time:24019454ms step_avg:573.82ms +grad accum step:10465/14336 +step:41860/57344 train_time:24020762ms step_avg:573.84ms +step:41861/57344 train_time:24020779ms step_avg:573.82ms +step:41862/57344 train_time:24021024ms step_avg:573.81ms +step:41863/57344 train_time:24021572ms step_avg:573.81ms +grad accum step:10466/14336 +step:41864/57344 train_time:24022889ms step_avg:573.83ms +step:41865/57344 train_time:24022906ms step_avg:573.82ms +step:41866/57344 train_time:24023156ms step_avg:573.81ms +step:41867/57344 train_time:24023714ms step_avg:573.81ms +grad accum step:10467/14336 +step:41868/57344 train_time:24025076ms step_avg:573.83ms +step:41869/57344 train_time:24025093ms step_avg:573.82ms +step:41870/57344 train_time:24025340ms step_avg:573.81ms +step:41871/57344 train_time:24025883ms step_avg:573.81ms +grad accum step:10468/14336 +step:41872/57344 train_time:24027215ms step_avg:573.83ms +step:41873/57344 train_time:24027242ms step_avg:573.81ms +step:41874/57344 train_time:24027460ms step_avg:573.80ms +step:41875/57344 train_time:24028015ms step_avg:573.80ms +grad accum step:10469/14336 +step:41876/57344 train_time:24029400ms step_avg:573.82ms +step:41877/57344 train_time:24029416ms step_avg:573.81ms +step:41878/57344 train_time:24029680ms step_avg:573.80ms +step:41879/57344 train_time:24030269ms step_avg:573.80ms +grad accum step:10470/14336 +step:41880/57344 train_time:24031591ms step_avg:573.82ms +step:41881/57344 train_time:24031608ms step_avg:573.81ms +step:41882/57344 train_time:24031833ms step_avg:573.80ms +step:41883/57344 train_time:24032378ms step_avg:573.80ms +grad accum step:10471/14336 +step:41884/57344 train_time:24033673ms step_avg:573.82ms +step:41885/57344 train_time:24033690ms step_avg:573.80ms +step:41886/57344 train_time:24033944ms step_avg:573.79ms +step:41887/57344 train_time:24034523ms step_avg:573.79ms +grad accum step:10472/14336 +step:41888/57344 train_time:24035888ms step_avg:573.81ms +step:41889/57344 train_time:24035899ms step_avg:573.80ms +step:41890/57344 train_time:24036140ms step_avg:573.79ms +step:41891/57344 train_time:24036685ms step_avg:573.79ms +grad accum step:10473/14336 +step:41892/57344 train_time:24037995ms step_avg:573.81ms +step:41893/57344 train_time:24038012ms step_avg:573.80ms +step:41894/57344 train_time:24038261ms step_avg:573.79ms +step:41895/57344 train_time:24038813ms step_avg:573.79ms +grad accum step:10474/14336 +step:41896/57344 train_time:24040125ms step_avg:573.80ms +step:41897/57344 train_time:24040142ms step_avg:573.79ms +step:41898/57344 train_time:24040387ms step_avg:573.78ms +step:41899/57344 train_time:24040936ms step_avg:573.78ms +grad accum step:10475/14336 +step:41900/57344 train_time:24042320ms step_avg:573.80ms +step:41901/57344 train_time:24042337ms step_avg:573.79ms +step:41902/57344 train_time:24042585ms step_avg:573.78ms +step:41903/57344 train_time:24043148ms step_avg:573.78ms +grad accum step:10476/14336 +step:41904/57344 train_time:24044505ms step_avg:573.80ms +step:41905/57344 train_time:24044555ms step_avg:573.79ms +step:41906/57344 train_time:24044773ms step_avg:573.78ms +step:41907/57344 train_time:24045323ms step_avg:573.78ms +grad accum step:10477/14336 +step:41908/57344 train_time:24046669ms step_avg:573.80ms +step:41909/57344 train_time:24046707ms step_avg:573.78ms +step:41910/57344 train_time:24046928ms step_avg:573.78ms +step:41911/57344 train_time:24047475ms step_avg:573.77ms +grad accum step:10478/14336 +step:41912/57344 train_time:24048784ms step_avg:573.79ms +step:41913/57344 train_time:24048796ms step_avg:573.78ms +step:41914/57344 train_time:24049020ms step_avg:573.77ms +step:41915/57344 train_time:24049582ms step_avg:573.77ms +grad accum step:10479/14336 +step:41916/57344 train_time:24050899ms step_avg:573.79ms +step:41917/57344 train_time:24050915ms step_avg:573.77ms +step:41918/57344 train_time:24051163ms step_avg:573.77ms +step:41919/57344 train_time:24051735ms step_avg:573.77ms +grad accum step:10480/14336 +step:41920/57344 train_time:24116685ms step_avg:575.30ms +step:41920/57344 val_loss:5.815269 train_time:24116686ms step_avg:575.30ms +step:41921/57344 train_time:24116698ms step_avg:575.29ms +step:41922/57344 train_time:24116922ms step_avg:575.28ms +step:41923/57344 train_time:24117468ms step_avg:575.28ms +grad accum step:10481/14336 +step:41924/57344 train_time:24118758ms step_avg:575.30ms +step:41925/57344 train_time:24118773ms step_avg:575.28ms +step:41926/57344 train_time:24119027ms step_avg:575.28ms +step:41927/57344 train_time:24119588ms step_avg:575.28ms +grad accum step:10482/14336 +step:41928/57344 train_time:24120915ms step_avg:575.29ms +step:41929/57344 train_time:24120933ms step_avg:575.28ms +step:41930/57344 train_time:24121171ms step_avg:575.27ms +step:41931/57344 train_time:24121728ms step_avg:575.27ms +grad accum step:10483/14336 +step:41932/57344 train_time:24123006ms step_avg:575.29ms +step:41933/57344 train_time:24123022ms step_avg:575.28ms +step:41934/57344 train_time:24123269ms step_avg:575.27ms +step:41935/57344 train_time:24123816ms step_avg:575.27ms +grad accum step:10484/14336 +step:41936/57344 train_time:24125134ms step_avg:575.28ms +step:41937/57344 train_time:24125152ms step_avg:575.27ms +step:41938/57344 train_time:24125393ms step_avg:575.26ms +step:41939/57344 train_time:24125930ms step_avg:575.26ms +grad accum step:10485/14336 +step:41940/57344 train_time:24127242ms step_avg:575.28ms +step:41941/57344 train_time:24127258ms step_avg:575.27ms +step:41942/57344 train_time:24127527ms step_avg:575.26ms +step:41943/57344 train_time:24128136ms step_avg:575.26ms +grad accum step:10486/14336 +step:41944/57344 train_time:24129462ms step_avg:575.28ms +step:41945/57344 train_time:24129479ms step_avg:575.26ms +step:41946/57344 train_time:24129728ms step_avg:575.26ms +step:41947/57344 train_time:24130277ms step_avg:575.26ms +grad accum step:10487/14336 +step:41948/57344 train_time:24131591ms step_avg:575.27ms +step:41949/57344 train_time:24131608ms step_avg:575.26ms +step:41950/57344 train_time:24131854ms step_avg:575.25ms +step:41951/57344 train_time:24132405ms step_avg:575.25ms +grad accum step:10488/14336 +step:41952/57344 train_time:24133687ms step_avg:575.27ms +step:41953/57344 train_time:24133703ms step_avg:575.26ms +step:41954/57344 train_time:24133950ms step_avg:575.25ms +step:41955/57344 train_time:24134498ms step_avg:575.25ms +grad accum step:10489/14336 +step:41956/57344 train_time:24135815ms step_avg:575.26ms +step:41957/57344 train_time:24135831ms step_avg:575.25ms +step:41958/57344 train_time:24136076ms step_avg:575.24ms +step:41959/57344 train_time:24136613ms step_avg:575.24ms +grad accum step:10490/14336 +step:41960/57344 train_time:24137909ms step_avg:575.26ms +step:41961/57344 train_time:24137925ms step_avg:575.25ms +step:41962/57344 train_time:24138163ms step_avg:575.24ms +step:41963/57344 train_time:24138698ms step_avg:575.24ms +grad accum step:10491/14336 +step:41964/57344 train_time:24139983ms step_avg:575.25ms +step:41965/57344 train_time:24139999ms step_avg:575.24ms +step:41966/57344 train_time:24140262ms step_avg:575.23ms +step:41967/57344 train_time:24140854ms step_avg:575.23ms +grad accum step:10492/14336 +step:41968/57344 train_time:24142165ms step_avg:575.25ms +step:41969/57344 train_time:24142182ms step_avg:575.24ms +step:41970/57344 train_time:24142430ms step_avg:575.23ms +step:41971/57344 train_time:24142982ms step_avg:575.23ms +grad accum step:10493/14336 +step:41972/57344 train_time:24144309ms step_avg:575.25ms +step:41973/57344 train_time:24144325ms step_avg:575.23ms +step:41974/57344 train_time:24144570ms step_avg:575.23ms +step:41975/57344 train_time:24145108ms step_avg:575.23ms +grad accum step:10494/14336 +step:41976/57344 train_time:24146431ms step_avg:575.24ms +step:41977/57344 train_time:24146447ms step_avg:575.23ms +step:41978/57344 train_time:24146695ms step_avg:575.22ms +step:41979/57344 train_time:24147243ms step_avg:575.22ms +grad accum step:10495/14336 +step:41980/57344 train_time:24148531ms step_avg:575.24ms +step:41981/57344 train_time:24148548ms step_avg:575.23ms +step:41982/57344 train_time:24148795ms step_avg:575.22ms +step:41983/57344 train_time:24149346ms step_avg:575.22ms +grad accum step:10496/14336 +step:41984/57344 train_time:24150664ms step_avg:575.23ms +step:41984/57344 val_loss:5.823739 train_time:24150664ms step_avg:575.23ms +step:41985/57344 train_time:24150676ms step_avg:575.22ms +step:41986/57344 train_time:24150893ms step_avg:575.21ms +step:41987/57344 train_time:24151444ms step_avg:575.21ms +grad accum step:10497/14336 +step:41988/57344 train_time:24152774ms step_avg:575.23ms +step:41989/57344 train_time:24152792ms step_avg:575.22ms +step:41990/57344 train_time:24153011ms step_avg:575.21ms +step:41991/57344 train_time:24153549ms step_avg:575.21ms +grad accum step:10498/14336 +step:41992/57344 train_time:24154861ms step_avg:575.23ms +step:41993/57344 train_time:24154882ms step_avg:575.21ms +step:41994/57344 train_time:24155107ms step_avg:575.20ms +step:41995/57344 train_time:24155662ms step_avg:575.20ms +grad accum step:10499/14336 +step:41996/57344 train_time:24156949ms step_avg:575.22ms +step:41997/57344 train_time:24156966ms step_avg:575.21ms +step:41998/57344 train_time:24157217ms step_avg:575.20ms +step:41999/57344 train_time:24157777ms step_avg:575.20ms +grad accum step:10500/14336 +step:42000/57344 train_time:24159125ms step_avg:575.22ms +step:42001/57344 train_time:24159158ms step_avg:575.20ms +step:42002/57344 train_time:24159375ms step_avg:575.20ms +step:42003/57344 train_time:24159921ms step_avg:575.20ms +grad accum step:10501/14336 +step:42004/57344 train_time:24161239ms step_avg:575.21ms +step:42005/57344 train_time:24161256ms step_avg:575.20ms +step:42006/57344 train_time:24161507ms step_avg:575.19ms +step:42007/57344 train_time:24162066ms step_avg:575.19ms +grad accum step:10502/14336 +step:42008/57344 train_time:24163402ms step_avg:575.21ms +step:42009/57344 train_time:24163418ms step_avg:575.20ms +step:42010/57344 train_time:24163667ms step_avg:575.19ms +step:42011/57344 train_time:24164218ms step_avg:575.19ms +grad accum step:10503/14336 +step:42012/57344 train_time:24165528ms step_avg:575.21ms +step:42013/57344 train_time:24165545ms step_avg:575.19ms +step:42014/57344 train_time:24165796ms step_avg:575.18ms +step:42015/57344 train_time:24166348ms step_avg:575.18ms +grad accum step:10504/14336 +step:42016/57344 train_time:24167630ms step_avg:575.20ms +step:42017/57344 train_time:24167647ms step_avg:575.19ms +step:42018/57344 train_time:24167895ms step_avg:575.18ms +step:42019/57344 train_time:24168442ms step_avg:575.18ms +grad accum step:10505/14336 +step:42020/57344 train_time:24169740ms step_avg:575.20ms +step:42021/57344 train_time:24169766ms step_avg:575.18ms +step:42022/57344 train_time:24169999ms step_avg:575.17ms +step:42023/57344 train_time:24170545ms step_avg:575.17ms +grad accum step:10506/14336 +step:42024/57344 train_time:24171911ms step_avg:575.19ms +step:42025/57344 train_time:24171928ms step_avg:575.18ms +step:42026/57344 train_time:24172185ms step_avg:575.17ms +step:42027/57344 train_time:24172755ms step_avg:575.17ms +grad accum step:10507/14336 +step:42028/57344 train_time:24174055ms step_avg:575.19ms +step:42029/57344 train_time:24174072ms step_avg:575.18ms +step:42030/57344 train_time:24174318ms step_avg:575.17ms +step:42031/57344 train_time:24174855ms step_avg:575.17ms +grad accum step:10508/14336 +step:42032/57344 train_time:24176191ms step_avg:575.19ms +step:42033/57344 train_time:24176208ms step_avg:575.17ms +step:42034/57344 train_time:24176462ms step_avg:575.16ms +step:42035/57344 train_time:24177019ms step_avg:575.16ms +grad accum step:10509/14336 +step:42036/57344 train_time:24178346ms step_avg:575.18ms +step:42037/57344 train_time:24178362ms step_avg:575.17ms +step:42038/57344 train_time:24178610ms step_avg:575.16ms +step:42039/57344 train_time:24179156ms step_avg:575.16ms +grad accum step:10510/14336 +step:42040/57344 train_time:24180466ms step_avg:575.18ms +step:42041/57344 train_time:24180483ms step_avg:575.16ms +step:42042/57344 train_time:24180732ms step_avg:575.16ms +step:42043/57344 train_time:24181280ms step_avg:575.16ms +grad accum step:10511/14336 +step:42044/57344 train_time:24182587ms step_avg:575.17ms +step:42045/57344 train_time:24182604ms step_avg:575.16ms +step:42046/57344 train_time:24182849ms step_avg:575.15ms +step:42047/57344 train_time:24183388ms step_avg:575.15ms +grad accum step:10512/14336 +step:42048/57344 train_time:24184723ms step_avg:575.17ms +step:42048/57344 val_loss:5.834544 train_time:24184724ms step_avg:575.17ms +step:42049/57344 train_time:24184735ms step_avg:575.16ms +step:42050/57344 train_time:24184957ms step_avg:575.15ms +step:42051/57344 train_time:24185508ms step_avg:575.15ms +grad accum step:10513/14336 +step:42052/57344 train_time:24186872ms step_avg:575.17ms +step:42053/57344 train_time:24186889ms step_avg:575.15ms +step:42054/57344 train_time:24187137ms step_avg:575.14ms +step:42055/57344 train_time:24187678ms step_avg:575.14ms +grad accum step:10514/14336 +step:42056/57344 train_time:24188971ms step_avg:575.16ms +step:42057/57344 train_time:24188987ms step_avg:575.15ms +step:42058/57344 train_time:24189222ms step_avg:575.14ms +step:42059/57344 train_time:24189787ms step_avg:575.14ms +grad accum step:10515/14336 +step:42060/57344 train_time:24191097ms step_avg:575.16ms +step:42061/57344 train_time:24191114ms step_avg:575.14ms +step:42062/57344 train_time:24191362ms step_avg:575.14ms +step:42063/57344 train_time:24191906ms step_avg:575.14ms +grad accum step:10516/14336 +step:42064/57344 train_time:24193205ms step_avg:575.15ms +step:42065/57344 train_time:24193222ms step_avg:575.14ms +step:42066/57344 train_time:24193472ms step_avg:575.13ms +step:42067/57344 train_time:24194012ms step_avg:575.13ms +grad accum step:10517/14336 +step:42068/57344 train_time:24195343ms step_avg:575.15ms +step:42069/57344 train_time:24195360ms step_avg:575.14ms +step:42070/57344 train_time:24195610ms step_avg:575.13ms +step:42071/57344 train_time:24196155ms step_avg:575.13ms +grad accum step:10518/14336 +step:42072/57344 train_time:24197492ms step_avg:575.14ms +step:42073/57344 train_time:24197508ms step_avg:575.13ms +step:42074/57344 train_time:24197762ms step_avg:575.12ms +step:42075/57344 train_time:24198338ms step_avg:575.12ms +grad accum step:10519/14336 +step:42076/57344 train_time:24199685ms step_avg:575.14ms +step:42077/57344 train_time:24199697ms step_avg:575.13ms +step:42078/57344 train_time:24199940ms step_avg:575.12ms +step:42079/57344 train_time:24200481ms step_avg:575.12ms +grad accum step:10520/14336 +step:42080/57344 train_time:24201780ms step_avg:575.14ms +step:42081/57344 train_time:24201796ms step_avg:575.12ms +step:42082/57344 train_time:24202047ms step_avg:575.12ms +step:42083/57344 train_time:24202596ms step_avg:575.12ms +grad accum step:10521/14336 +step:42084/57344 train_time:24203923ms step_avg:575.13ms +step:42085/57344 train_time:24203938ms step_avg:575.12ms +step:42086/57344 train_time:24204188ms step_avg:575.11ms +step:42087/57344 train_time:24204744ms step_avg:575.11ms +grad accum step:10522/14336 +step:42088/57344 train_time:24206051ms step_avg:575.13ms +step:42089/57344 train_time:24206068ms step_avg:575.12ms +step:42090/57344 train_time:24206313ms step_avg:575.11ms +step:42091/57344 train_time:24206864ms step_avg:575.11ms +grad accum step:10523/14336 +step:42092/57344 train_time:24208182ms step_avg:575.13ms +step:42093/57344 train_time:24208199ms step_avg:575.11ms +step:42094/57344 train_time:24208450ms step_avg:575.10ms +step:42095/57344 train_time:24209010ms step_avg:575.10ms +grad accum step:10524/14336 +step:42096/57344 train_time:24210323ms step_avg:575.12ms +step:42097/57344 train_time:24210340ms step_avg:575.11ms +step:42098/57344 train_time:24210590ms step_avg:575.10ms +step:42099/57344 train_time:24211138ms step_avg:575.10ms +grad accum step:10525/14336 +step:42100/57344 train_time:24212477ms step_avg:575.12ms +step:42101/57344 train_time:24212489ms step_avg:575.10ms +step:42102/57344 train_time:24212716ms step_avg:575.10ms +step:42103/57344 train_time:24213262ms step_avg:575.10ms +grad accum step:10526/14336 +step:42104/57344 train_time:24214619ms step_avg:575.11ms +step:42105/57344 train_time:24214636ms step_avg:575.10ms +step:42106/57344 train_time:24214888ms step_avg:575.09ms +step:42107/57344 train_time:24215449ms step_avg:575.09ms +grad accum step:10527/14336 +step:42108/57344 train_time:24216750ms step_avg:575.11ms +step:42109/57344 train_time:24216767ms step_avg:575.10ms +step:42110/57344 train_time:24217017ms step_avg:575.09ms +step:42111/57344 train_time:24217566ms step_avg:575.09ms +grad accum step:10528/14336 +step:42112/57344 train_time:24218896ms step_avg:575.11ms +step:42112/57344 val_loss:5.846921 train_time:24218896ms step_avg:575.11ms +step:42113/57344 train_time:24218908ms step_avg:575.09ms +step:42114/57344 train_time:24219134ms step_avg:575.09ms +step:42115/57344 train_time:24219685ms step_avg:575.08ms +grad accum step:10529/14336 +step:42116/57344 train_time:24221016ms step_avg:575.10ms +step:42117/57344 train_time:24221033ms step_avg:575.09ms +step:42118/57344 train_time:24221281ms step_avg:575.08ms +step:42119/57344 train_time:24221837ms step_avg:575.08ms +grad accum step:10530/14336 +step:42120/57344 train_time:24223154ms step_avg:575.10ms +step:42121/57344 train_time:24223171ms step_avg:575.09ms +step:42122/57344 train_time:24223418ms step_avg:575.08ms +step:42123/57344 train_time:24223964ms step_avg:575.08ms +grad accum step:10531/14336 +step:42124/57344 train_time:24225277ms step_avg:575.09ms +step:42125/57344 train_time:24225289ms step_avg:575.08ms +step:42126/57344 train_time:24225526ms step_avg:575.07ms +step:42127/57344 train_time:24226067ms step_avg:575.07ms +grad accum step:10532/14336 +step:42128/57344 train_time:24227452ms step_avg:575.09ms +step:42129/57344 train_time:24227469ms step_avg:575.08ms +step:42130/57344 train_time:24227739ms step_avg:575.07ms +step:42131/57344 train_time:24228353ms step_avg:575.07ms +grad accum step:10533/14336 +step:42132/57344 train_time:24229674ms step_avg:575.09ms +step:42133/57344 train_time:24229692ms step_avg:575.08ms +step:42134/57344 train_time:24229939ms step_avg:575.07ms +step:42135/57344 train_time:24230486ms step_avg:575.07ms +grad accum step:10534/14336 +step:42136/57344 train_time:24231775ms step_avg:575.08ms +step:42137/57344 train_time:24231792ms step_avg:575.07ms +step:42138/57344 train_time:24232051ms step_avg:575.06ms +step:42139/57344 train_time:24232639ms step_avg:575.06ms +grad accum step:10535/14336 +step:42140/57344 train_time:24233974ms step_avg:575.08ms +step:42141/57344 train_time:24233986ms step_avg:575.07ms +step:42142/57344 train_time:24234222ms step_avg:575.06ms +step:42143/57344 train_time:24234770ms step_avg:575.06ms +grad accum step:10536/14336 +step:42144/57344 train_time:24236139ms step_avg:575.08ms +step:42145/57344 train_time:24236151ms step_avg:575.07ms +step:42146/57344 train_time:24236411ms step_avg:575.06ms +step:42147/57344 train_time:24236995ms step_avg:575.06ms +grad accum step:10537/14336 +step:42148/57344 train_time:24238274ms step_avg:575.08ms +step:42149/57344 train_time:24238291ms step_avg:575.06ms +step:42150/57344 train_time:24238541ms step_avg:575.05ms +step:42151/57344 train_time:24239098ms step_avg:575.05ms +grad accum step:10538/14336 +step:42152/57344 train_time:24240439ms step_avg:575.07ms +step:42153/57344 train_time:24240450ms step_avg:575.06ms +step:42154/57344 train_time:24240698ms step_avg:575.05ms +step:42155/57344 train_time:24241253ms step_avg:575.05ms +grad accum step:10539/14336 +step:42156/57344 train_time:24242573ms step_avg:575.07ms +step:42157/57344 train_time:24242590ms step_avg:575.05ms +step:42158/57344 train_time:24242838ms step_avg:575.05ms +step:42159/57344 train_time:24243382ms step_avg:575.05ms +grad accum step:10540/14336 +step:42160/57344 train_time:24244701ms step_avg:575.06ms +step:42161/57344 train_time:24244714ms step_avg:575.05ms +step:42162/57344 train_time:24244958ms step_avg:575.04ms +step:42163/57344 train_time:24245506ms step_avg:575.04ms +grad accum step:10541/14336 +step:42164/57344 train_time:24246816ms step_avg:575.06ms +step:42165/57344 train_time:24246831ms step_avg:575.05ms +step:42166/57344 train_time:24247075ms step_avg:575.04ms +step:42167/57344 train_time:24247611ms step_avg:575.04ms +grad accum step:10542/14336 +step:42168/57344 train_time:24248932ms step_avg:575.06ms +step:42169/57344 train_time:24248948ms step_avg:575.04ms +step:42170/57344 train_time:24249173ms step_avg:575.03ms +step:42171/57344 train_time:24249722ms step_avg:575.03ms +grad accum step:10543/14336 +step:42172/57344 train_time:24251021ms step_avg:575.05ms +step:42173/57344 train_time:24251038ms step_avg:575.04ms +step:42174/57344 train_time:24251285ms step_avg:575.03ms +step:42175/57344 train_time:24251834ms step_avg:575.03ms +grad accum step:10544/14336 +step:42176/57344 train_time:24253155ms step_avg:575.05ms +step:42176/57344 val_loss:5.855831 train_time:24253156ms step_avg:575.05ms +step:42177/57344 train_time:24253762ms step_avg:575.05ms +step:42178/57344 train_time:24253811ms step_avg:575.03ms +step:42179/57344 train_time:24254343ms step_avg:575.03ms +grad accum step:10545/14336 +step:42180/57344 train_time:24255967ms step_avg:575.06ms +step:42181/57344 train_time:24255988ms step_avg:575.05ms +step:42182/57344 train_time:24256205ms step_avg:575.04ms +step:42183/57344 train_time:24256756ms step_avg:575.04ms +grad accum step:10546/14336 +step:42184/57344 train_time:24258097ms step_avg:575.05ms +step:42185/57344 train_time:24258114ms step_avg:575.04ms +step:42186/57344 train_time:24258338ms step_avg:575.03ms +step:42187/57344 train_time:24258885ms step_avg:575.03ms +grad accum step:10547/14336 +step:42188/57344 train_time:24260252ms step_avg:575.05ms +step:42189/57344 train_time:24260278ms step_avg:575.04ms +step:42190/57344 train_time:24260516ms step_avg:575.03ms +step:42191/57344 train_time:24261093ms step_avg:575.03ms +grad accum step:10548/14336 +step:42192/57344 train_time:24262473ms step_avg:575.05ms +step:42193/57344 train_time:24262488ms step_avg:575.04ms +step:42194/57344 train_time:24262707ms step_avg:575.03ms +step:42195/57344 train_time:24263260ms step_avg:575.03ms +grad accum step:10549/14336 +step:42196/57344 train_time:24264615ms step_avg:575.05ms +step:42197/57344 train_time:24264634ms step_avg:575.03ms +step:42198/57344 train_time:24264873ms step_avg:575.02ms +step:42199/57344 train_time:24265422ms step_avg:575.02ms +grad accum step:10550/14336 +step:42200/57344 train_time:24266746ms step_avg:575.04ms +step:42201/57344 train_time:24266771ms step_avg:575.03ms +step:42202/57344 train_time:24267001ms step_avg:575.02ms +step:42203/57344 train_time:24267548ms step_avg:575.02ms +grad accum step:10551/14336 +step:42204/57344 train_time:24268850ms step_avg:575.04ms +step:42205/57344 train_time:24268874ms step_avg:575.02ms +step:42206/57344 train_time:24269112ms step_avg:575.02ms +step:42207/57344 train_time:24269674ms step_avg:575.02ms +grad accum step:10552/14336 +step:42208/57344 train_time:24270998ms step_avg:575.03ms +step:42209/57344 train_time:24271020ms step_avg:575.02ms +step:42210/57344 train_time:24271253ms step_avg:575.01ms +step:42211/57344 train_time:24271798ms step_avg:575.01ms +grad accum step:10553/14336 +step:42212/57344 train_time:24273095ms step_avg:575.03ms +step:42213/57344 train_time:24273108ms step_avg:575.01ms +step:42214/57344 train_time:24273351ms step_avg:575.01ms +step:42215/57344 train_time:24273887ms step_avg:575.01ms +grad accum step:10554/14336 +step:42216/57344 train_time:24275189ms step_avg:575.02ms +step:42217/57344 train_time:24275206ms step_avg:575.01ms +step:42218/57344 train_time:24275451ms step_avg:575.00ms +step:42219/57344 train_time:24276014ms step_avg:575.00ms +grad accum step:10555/14336 +step:42220/57344 train_time:24277315ms step_avg:575.02ms +step:42221/57344 train_time:24277333ms step_avg:575.01ms +step:42222/57344 train_time:24277576ms step_avg:575.00ms +step:42223/57344 train_time:24278122ms step_avg:575.00ms +grad accum step:10556/14336 +step:42224/57344 train_time:24279424ms step_avg:575.01ms +step:42225/57344 train_time:24279442ms step_avg:575.00ms +step:42226/57344 train_time:24279697ms step_avg:574.99ms +step:42227/57344 train_time:24280274ms step_avg:574.99ms +grad accum step:10557/14336 +step:42228/57344 train_time:24281622ms step_avg:575.01ms +step:42229/57344 train_time:24281646ms step_avg:575.00ms +step:42230/57344 train_time:24281861ms step_avg:574.99ms +step:42231/57344 train_time:24282404ms step_avg:574.99ms +grad accum step:10558/14336 +step:42232/57344 train_time:24283762ms step_avg:575.01ms +step:42233/57344 train_time:24283779ms step_avg:575.00ms +step:42234/57344 train_time:24284020ms step_avg:574.99ms +step:42235/57344 train_time:24284573ms step_avg:574.99ms +grad accum step:10559/14336 +step:42236/57344 train_time:24285880ms step_avg:575.00ms +step:42237/57344 train_time:24285910ms step_avg:574.99ms +step:42238/57344 train_time:24286136ms step_avg:574.98ms +step:42239/57344 train_time:24286713ms step_avg:574.98ms +grad accum step:10560/14336 +step:42240/57344 train_time:24288126ms step_avg:575.00ms +step:42240/57344 val_loss:5.874364 train_time:24288127ms step_avg:575.00ms +step:42241/57344 train_time:24288138ms step_avg:574.99ms +step:42242/57344 train_time:24288359ms step_avg:574.98ms +step:42243/57344 train_time:24288907ms step_avg:574.98ms +grad accum step:10561/14336 +step:42244/57344 train_time:24290236ms step_avg:575.00ms +step:42245/57344 train_time:24290291ms step_avg:574.99ms +step:42246/57344 train_time:24290514ms step_avg:574.98ms +step:42247/57344 train_time:24291072ms step_avg:574.98ms +grad accum step:10562/14336 +step:42248/57344 train_time:24292390ms step_avg:575.00ms +step:42249/57344 train_time:24292425ms step_avg:574.98ms +step:42250/57344 train_time:24292649ms step_avg:574.97ms +step:42251/57344 train_time:24293209ms step_avg:574.97ms +grad accum step:10563/14336 +step:42252/57344 train_time:24294781ms step_avg:575.00ms +step:42253/57344 train_time:24294794ms step_avg:574.98ms +step:42254/57344 train_time:24295016ms step_avg:574.98ms +step:42255/57344 train_time:24295573ms step_avg:574.98ms +grad accum step:10564/14336 +step:42256/57344 train_time:24297011ms step_avg:575.00ms +step:42257/57344 train_time:24297026ms step_avg:574.98ms +step:42258/57344 train_time:24297546ms step_avg:574.98ms +step:42259/57344 train_time:24297800ms step_avg:574.97ms +grad accum step:10565/14336 +step:42260/57344 train_time:24299172ms step_avg:574.99ms +step:42261/57344 train_time:24299205ms step_avg:574.98ms +step:42262/57344 train_time:24299415ms step_avg:574.97ms +step:42263/57344 train_time:24299978ms step_avg:574.97ms +grad accum step:10566/14336 +step:42264/57344 train_time:24301308ms step_avg:574.99ms +step:42265/57344 train_time:24301324ms step_avg:574.98ms +step:42266/57344 train_time:24301573ms step_avg:574.97ms +step:42267/57344 train_time:24302123ms step_avg:574.97ms +grad accum step:10567/14336 +step:42268/57344 train_time:24303471ms step_avg:574.99ms +step:42269/57344 train_time:24303487ms step_avg:574.97ms +step:42270/57344 train_time:24303706ms step_avg:574.96ms +step:42271/57344 train_time:24304251ms step_avg:574.96ms +grad accum step:10568/14336 +step:42272/57344 train_time:24305640ms step_avg:574.98ms +step:42273/57344 train_time:24305655ms step_avg:574.97ms +step:42274/57344 train_time:24305898ms step_avg:574.96ms +step:42275/57344 train_time:24306430ms step_avg:574.96ms +grad accum step:10569/14336 +step:42276/57344 train_time:24307799ms step_avg:574.98ms +step:42277/57344 train_time:24307814ms step_avg:574.97ms +step:42278/57344 train_time:24308034ms step_avg:574.96ms +step:42279/57344 train_time:24308587ms step_avg:574.96ms +grad accum step:10570/14336 +step:42280/57344 train_time:24309890ms step_avg:574.97ms +step:42281/57344 train_time:24309910ms step_avg:574.96ms +step:42282/57344 train_time:24310151ms step_avg:574.95ms +step:42283/57344 train_time:24310704ms step_avg:574.95ms +grad accum step:10571/14336 +step:42284/57344 train_time:24312088ms step_avg:574.97ms +step:42285/57344 train_time:24312122ms step_avg:574.96ms +step:42286/57344 train_time:24312345ms step_avg:574.95ms +step:42287/57344 train_time:24312900ms step_avg:574.95ms +grad accum step:10572/14336 +step:42288/57344 train_time:24314205ms step_avg:574.97ms +step:42289/57344 train_time:24314220ms step_avg:574.95ms +step:42290/57344 train_time:24314468ms step_avg:574.95ms +step:42291/57344 train_time:24315016ms step_avg:574.95ms +grad accum step:10573/14336 +step:42292/57344 train_time:24316334ms step_avg:574.96ms +step:42293/57344 train_time:24316351ms step_avg:574.95ms +step:42294/57344 train_time:24316583ms step_avg:574.94ms +step:42295/57344 train_time:24317129ms step_avg:574.94ms +grad accum step:10574/14336 +step:42296/57344 train_time:24318429ms step_avg:574.96ms +step:42297/57344 train_time:24318456ms step_avg:574.95ms +step:42298/57344 train_time:24318691ms step_avg:574.94ms +step:42299/57344 train_time:24319237ms step_avg:574.94ms +grad accum step:10575/14336 +step:42300/57344 train_time:24320544ms step_avg:574.95ms +step:42301/57344 train_time:24320562ms step_avg:574.94ms +step:42302/57344 train_time:24320802ms step_avg:574.93ms +step:42303/57344 train_time:24321343ms step_avg:574.93ms +grad accum step:10576/14336 +step:42304/57344 train_time:24322677ms step_avg:574.95ms +step:42304/57344 val_loss:5.877427 train_time:24322683ms step_avg:574.95ms +step:42305/57344 train_time:24322695ms step_avg:574.94ms +step:42306/57344 train_time:24322914ms step_avg:574.93ms +step:42307/57344 train_time:24323463ms step_avg:574.93ms +grad accum step:10577/14336 +step:42308/57344 train_time:24324774ms step_avg:574.95ms +step:42309/57344 train_time:24324795ms step_avg:574.93ms +step:42310/57344 train_time:24325026ms step_avg:574.92ms +step:42311/57344 train_time:24325579ms step_avg:574.92ms +grad accum step:10578/14336 +step:42312/57344 train_time:24326904ms step_avg:574.94ms +step:42313/57344 train_time:24326926ms step_avg:574.93ms +step:42314/57344 train_time:24327167ms step_avg:574.92ms +step:42315/57344 train_time:24327719ms step_avg:574.92ms +grad accum step:10579/14336 +step:42316/57344 train_time:24329109ms step_avg:574.94ms +step:42317/57344 train_time:24329128ms step_avg:574.93ms +step:42318/57344 train_time:24329370ms step_avg:574.92ms +step:42319/57344 train_time:24329927ms step_avg:574.92ms +grad accum step:10580/14336 +step:42320/57344 train_time:24331319ms step_avg:574.94ms +step:42321/57344 train_time:24331338ms step_avg:574.92ms +step:42322/57344 train_time:24331562ms step_avg:574.92ms +step:42323/57344 train_time:24332118ms step_avg:574.91ms +grad accum step:10581/14336 +step:42324/57344 train_time:24333457ms step_avg:574.93ms +step:42325/57344 train_time:24333475ms step_avg:574.92ms +step:42326/57344 train_time:24333725ms step_avg:574.91ms +step:42327/57344 train_time:24334312ms step_avg:574.91ms +grad accum step:10582/14336 +step:42328/57344 train_time:24335620ms step_avg:574.93ms +step:42329/57344 train_time:24335636ms step_avg:574.92ms +step:42330/57344 train_time:24335881ms step_avg:574.91ms +step:42331/57344 train_time:24336434ms step_avg:574.91ms +grad accum step:10583/14336 +step:42332/57344 train_time:24337749ms step_avg:574.93ms +step:42333/57344 train_time:24337764ms step_avg:574.91ms +step:42334/57344 train_time:24338018ms step_avg:574.90ms +step:42335/57344 train_time:24338584ms step_avg:574.90ms +grad accum step:10584/14336 +step:42336/57344 train_time:24339922ms step_avg:574.92ms +step:42337/57344 train_time:24339937ms step_avg:574.91ms +step:42338/57344 train_time:24340179ms step_avg:574.90ms +step:42339/57344 train_time:24340732ms step_avg:574.90ms +grad accum step:10585/14336 +step:42340/57344 train_time:24342109ms step_avg:574.92ms +step:42341/57344 train_time:24342125ms step_avg:574.91ms +step:42342/57344 train_time:24342374ms step_avg:574.90ms +step:42343/57344 train_time:24342928ms step_avg:574.90ms +grad accum step:10586/14336 +step:42344/57344 train_time:24344260ms step_avg:574.92ms +step:42345/57344 train_time:24344279ms step_avg:574.90ms +step:42346/57344 train_time:24344522ms step_avg:574.90ms +step:42347/57344 train_time:24345084ms step_avg:574.90ms +grad accum step:10587/14336 +step:42348/57344 train_time:24346450ms step_avg:574.91ms +step:42349/57344 train_time:24346464ms step_avg:574.90ms +step:42350/57344 train_time:24346687ms step_avg:574.89ms +step:42351/57344 train_time:24347234ms step_avg:574.89ms +grad accum step:10588/14336 +step:42352/57344 train_time:24348669ms step_avg:574.91ms +step:42353/57344 train_time:24348685ms step_avg:574.90ms +step:42354/57344 train_time:24348901ms step_avg:574.89ms +step:42355/57344 train_time:24349455ms step_avg:574.89ms +grad accum step:10589/14336 +step:42356/57344 train_time:24350928ms step_avg:574.91ms +step:42357/57344 train_time:24350952ms step_avg:574.90ms +step:42358/57344 train_time:24351174ms step_avg:574.89ms +step:42359/57344 train_time:24351720ms step_avg:574.89ms +grad accum step:10590/14336 +step:42360/57344 train_time:24353018ms step_avg:574.91ms +step:42361/57344 train_time:24353036ms step_avg:574.89ms +step:42362/57344 train_time:24353278ms step_avg:574.88ms +step:42363/57344 train_time:24353830ms step_avg:574.88ms +grad accum step:10591/14336 +step:42364/57344 train_time:24355159ms step_avg:574.90ms +step:42365/57344 train_time:24355193ms step_avg:574.89ms +step:42366/57344 train_time:24355425ms step_avg:574.88ms +step:42367/57344 train_time:24355998ms step_avg:574.88ms +grad accum step:10592/14336 +step:42368/57344 train_time:24357328ms step_avg:574.90ms +step:42368/57344 val_loss:5.891029 train_time:24357337ms step_avg:574.90ms +step:42369/57344 train_time:24357349ms step_avg:574.89ms +step:42370/57344 train_time:24357578ms step_avg:574.88ms +step:42371/57344 train_time:24358134ms step_avg:574.88ms +grad accum step:10593/14336 +step:42372/57344 train_time:24359475ms step_avg:574.90ms +step:42373/57344 train_time:24359515ms step_avg:574.88ms +step:42374/57344 train_time:24359733ms step_avg:574.87ms +step:42375/57344 train_time:24360271ms step_avg:574.87ms +grad accum step:10594/14336 +step:42376/57344 train_time:24361593ms step_avg:574.89ms +step:42377/57344 train_time:24361609ms step_avg:574.88ms +step:42378/57344 train_time:24361851ms step_avg:574.87ms +step:42379/57344 train_time:24362402ms step_avg:574.87ms +grad accum step:10595/14336 +step:42380/57344 train_time:24363739ms step_avg:574.89ms +step:42381/57344 train_time:24363766ms step_avg:574.87ms +step:42382/57344 train_time:24364000ms step_avg:574.87ms +step:42383/57344 train_time:24364551ms step_avg:574.87ms +grad accum step:10596/14336 +step:42384/57344 train_time:24365886ms step_avg:574.88ms +step:42385/57344 train_time:24365901ms step_avg:574.87ms +step:42386/57344 train_time:24366150ms step_avg:574.86ms +step:42387/57344 train_time:24366696ms step_avg:574.86ms +grad accum step:10597/14336 +step:42388/57344 train_time:24368209ms step_avg:574.88ms +step:42389/57344 train_time:24368232ms step_avg:574.87ms +step:42390/57344 train_time:24368451ms step_avg:574.86ms +step:42391/57344 train_time:24369008ms step_avg:574.86ms +grad accum step:10598/14336 +step:42392/57344 train_time:24370357ms step_avg:574.88ms +step:42393/57344 train_time:24370373ms step_avg:574.87ms +step:42394/57344 train_time:24370640ms step_avg:574.86ms +step:42395/57344 train_time:24371150ms step_avg:574.86ms +grad accum step:10599/14336 +step:42396/57344 train_time:24372447ms step_avg:574.88ms +step:42397/57344 train_time:24372467ms step_avg:574.86ms +step:42398/57344 train_time:24372703ms step_avg:574.86ms +step:42399/57344 train_time:24373256ms step_avg:574.85ms +grad accum step:10600/14336 +step:42400/57344 train_time:24374544ms step_avg:574.87ms +step:42401/57344 train_time:24374559ms step_avg:574.86ms +step:42402/57344 train_time:24374804ms step_avg:574.85ms +step:42403/57344 train_time:24375359ms step_avg:574.85ms +grad accum step:10601/14336 +step:42404/57344 train_time:24376676ms step_avg:574.87ms +step:42405/57344 train_time:24376690ms step_avg:574.85ms +step:42406/57344 train_time:24376937ms step_avg:574.85ms +step:42407/57344 train_time:24377484ms step_avg:574.85ms +grad accum step:10602/14336 +step:42408/57344 train_time:24378766ms step_avg:574.86ms +step:42409/57344 train_time:24378781ms step_avg:574.85ms +step:42410/57344 train_time:24379021ms step_avg:574.84ms +step:42411/57344 train_time:24379564ms step_avg:574.84ms +grad accum step:10603/14336 +step:42412/57344 train_time:24381298ms step_avg:574.87ms +step:42413/57344 train_time:24381320ms step_avg:574.85ms +step:42414/57344 train_time:24381539ms step_avg:574.85ms +step:42415/57344 train_time:24382087ms step_avg:574.85ms +grad accum step:10604/14336 +step:42416/57344 train_time:24383380ms step_avg:574.86ms +step:42417/57344 train_time:24383394ms step_avg:574.85ms +step:42418/57344 train_time:24383644ms step_avg:574.84ms +step:42419/57344 train_time:24384203ms step_avg:574.84ms +grad accum step:10605/14336 +step:42420/57344 train_time:24385509ms step_avg:574.86ms +step:42421/57344 train_time:24385544ms step_avg:574.85ms +step:42422/57344 train_time:24385769ms step_avg:574.84ms +step:42423/57344 train_time:24386313ms step_avg:574.84ms +grad accum step:10606/14336 +step:42424/57344 train_time:24387680ms step_avg:574.86ms +step:42425/57344 train_time:24387696ms step_avg:574.84ms +step:42426/57344 train_time:24387917ms step_avg:574.83ms +step:42427/57344 train_time:24388468ms step_avg:574.83ms +grad accum step:10607/14336 +step:42428/57344 train_time:24389768ms step_avg:574.85ms +step:42429/57344 train_time:24389790ms step_avg:574.84ms +step:42430/57344 train_time:24390037ms step_avg:574.83ms +step:42431/57344 train_time:24390595ms step_avg:574.83ms +grad accum step:10608/14336 +step:42432/57344 train_time:24391940ms step_avg:574.85ms +step:42432/57344 val_loss:5.894765 train_time:24391947ms step_avg:574.85ms +step:42433/57344 train_time:24391959ms step_avg:574.83ms +step:42434/57344 train_time:24392177ms step_avg:574.83ms +step:42435/57344 train_time:24392725ms step_avg:574.83ms +grad accum step:10609/14336 +step:42436/57344 train_time:24394045ms step_avg:574.84ms +step:42437/57344 train_time:24394075ms step_avg:574.83ms +step:42438/57344 train_time:24394302ms step_avg:574.82ms +step:42439/57344 train_time:24394849ms step_avg:574.82ms +grad accum step:10610/14336 +step:42440/57344 train_time:24396151ms step_avg:574.84ms +step:42441/57344 train_time:24396168ms step_avg:574.83ms +step:42442/57344 train_time:24396409ms step_avg:574.82ms +step:42443/57344 train_time:24396959ms step_avg:574.82ms +grad accum step:10611/14336 +step:42444/57344 train_time:24398254ms step_avg:574.83ms +step:42445/57344 train_time:24398270ms step_avg:574.82ms +step:42446/57344 train_time:24398507ms step_avg:574.81ms +step:42447/57344 train_time:24399051ms step_avg:574.81ms +grad accum step:10612/14336 +step:42448/57344 train_time:24400346ms step_avg:574.83ms +step:42449/57344 train_time:24400363ms step_avg:574.82ms +step:42450/57344 train_time:24400616ms step_avg:574.81ms +step:42451/57344 train_time:24401184ms step_avg:574.81ms +grad accum step:10613/14336 +step:42452/57344 train_time:24402527ms step_avg:574.83ms +step:42453/57344 train_time:24402542ms step_avg:574.81ms +step:42454/57344 train_time:24402795ms step_avg:574.81ms +step:42455/57344 train_time:24403369ms step_avg:574.81ms +grad accum step:10614/14336 +step:42456/57344 train_time:24404648ms step_avg:574.82ms +step:42457/57344 train_time:24404663ms step_avg:574.81ms +step:42458/57344 train_time:24404912ms step_avg:574.80ms +step:42459/57344 train_time:24405458ms step_avg:574.80ms +grad accum step:10615/14336 +step:42460/57344 train_time:24406830ms step_avg:574.82ms +step:42461/57344 train_time:24406949ms step_avg:574.81ms +step:42462/57344 train_time:24407167ms step_avg:574.80ms +step:42463/57344 train_time:24407711ms step_avg:574.80ms +grad accum step:10616/14336 +step:42464/57344 train_time:24409034ms step_avg:574.82ms +step:42465/57344 train_time:24409052ms step_avg:574.80ms +step:42466/57344 train_time:24409297ms step_avg:574.80ms +step:42467/57344 train_time:24409864ms step_avg:574.80ms +grad accum step:10617/14336 +step:42468/57344 train_time:24411238ms step_avg:574.81ms +step:42469/57344 train_time:24411253ms step_avg:574.80ms +step:42470/57344 train_time:24411481ms step_avg:574.79ms +step:42471/57344 train_time:24412045ms step_avg:574.79ms +grad accum step:10618/14336 +step:42472/57344 train_time:24413426ms step_avg:574.81ms +step:42473/57344 train_time:24413443ms step_avg:574.80ms +step:42474/57344 train_time:24413691ms step_avg:574.79ms +step:42475/57344 train_time:24414282ms step_avg:574.79ms +grad accum step:10619/14336 +step:42476/57344 train_time:24415687ms step_avg:574.81ms +step:42477/57344 train_time:24415700ms step_avg:574.80ms +step:42478/57344 train_time:24415927ms step_avg:574.79ms +step:42479/57344 train_time:24416495ms step_avg:574.79ms +grad accum step:10620/14336 +step:42480/57344 train_time:24417874ms step_avg:574.81ms +step:42481/57344 train_time:24417887ms step_avg:574.80ms +step:42482/57344 train_time:24418112ms step_avg:574.79ms +step:42483/57344 train_time:24418656ms step_avg:574.79ms +grad accum step:10621/14336 +step:42484/57344 train_time:24420009ms step_avg:574.80ms +step:42485/57344 train_time:24420027ms step_avg:574.79ms +step:42486/57344 train_time:24420268ms step_avg:574.78ms +step:42487/57344 train_time:24420825ms step_avg:574.78ms +grad accum step:10622/14336 +step:42488/57344 train_time:24422161ms step_avg:574.80ms +step:42489/57344 train_time:24422179ms step_avg:574.79ms +step:42490/57344 train_time:24422408ms step_avg:574.78ms +step:42491/57344 train_time:24422967ms step_avg:574.78ms +grad accum step:10623/14336 +step:42492/57344 train_time:24424279ms step_avg:574.80ms +step:42493/57344 train_time:24424295ms step_avg:574.78ms +step:42494/57344 train_time:24424547ms step_avg:574.78ms +step:42495/57344 train_time:24425114ms step_avg:574.78ms +grad accum step:10624/14336 +step:42496/57344 train_time:24426449ms step_avg:574.79ms +step:42496/57344 val_loss:5.906126 train_time:24426454ms step_avg:574.79ms +step:42497/57344 train_time:24426466ms step_avg:574.78ms +step:42498/57344 train_time:24426690ms step_avg:574.77ms +step:42499/57344 train_time:24427245ms step_avg:574.77ms +grad accum step:10625/14336 +step:42500/57344 train_time:24428628ms step_avg:574.79ms +step:42501/57344 train_time:24428645ms step_avg:574.78ms +step:42502/57344 train_time:24428891ms step_avg:574.77ms +step:42503/57344 train_time:24429434ms step_avg:574.77ms +grad accum step:10626/14336 +step:42504/57344 train_time:24430786ms step_avg:574.79ms +step:42505/57344 train_time:24430798ms step_avg:574.77ms +step:42506/57344 train_time:24431047ms step_avg:574.77ms +step:42507/57344 train_time:24431611ms step_avg:574.77ms +grad accum step:10627/14336 +step:42508/57344 train_time:24432975ms step_avg:574.79ms +step:42509/57344 train_time:24432992ms step_avg:574.77ms +step:42510/57344 train_time:24433246ms step_avg:574.76ms +step:42511/57344 train_time:24433815ms step_avg:574.76ms +grad accum step:10628/14336 +step:42512/57344 train_time:24435144ms step_avg:574.78ms +step:42513/57344 train_time:24435159ms step_avg:574.77ms +step:42514/57344 train_time:24435409ms step_avg:574.76ms +step:42515/57344 train_time:24435967ms step_avg:574.76ms +grad accum step:10629/14336 +step:42516/57344 train_time:24437377ms step_avg:574.78ms +step:42517/57344 train_time:24437393ms step_avg:574.77ms +step:42518/57344 train_time:24437650ms step_avg:574.76ms +step:42519/57344 train_time:24438218ms step_avg:574.76ms +grad accum step:10630/14336 +step:42520/57344 train_time:24439548ms step_avg:574.78ms +step:42521/57344 train_time:24439559ms step_avg:574.76ms +step:42522/57344 train_time:24439795ms step_avg:574.76ms +step:42523/57344 train_time:24440338ms step_avg:574.76ms +grad accum step:10631/14336 +step:42524/57344 train_time:24441635ms step_avg:574.77ms +step:42525/57344 train_time:24441646ms step_avg:574.76ms +step:42526/57344 train_time:24441874ms step_avg:574.75ms +step:42527/57344 train_time:24442424ms step_avg:574.75ms +grad accum step:10632/14336 +step:42528/57344 train_time:24443794ms step_avg:574.77ms +step:42529/57344 train_time:24443820ms step_avg:574.76ms +step:42530/57344 train_time:24444039ms step_avg:574.75ms +step:42531/57344 train_time:24444596ms step_avg:574.75ms +grad accum step:10633/14336 +step:42532/57344 train_time:24445947ms step_avg:574.77ms +step:42533/57344 train_time:24445963ms step_avg:574.75ms +step:42534/57344 train_time:24446223ms step_avg:574.75ms +step:42535/57344 train_time:24446806ms step_avg:574.75ms +grad accum step:10634/14336 +step:42536/57344 train_time:24448165ms step_avg:574.76ms +step:42537/57344 train_time:24448182ms step_avg:574.75ms +step:42538/57344 train_time:24448429ms step_avg:574.74ms +step:42539/57344 train_time:24448977ms step_avg:574.74ms +grad accum step:10635/14336 +step:42540/57344 train_time:24450326ms step_avg:574.76ms +step:42541/57344 train_time:24450338ms step_avg:574.75ms +step:42542/57344 train_time:24450569ms step_avg:574.74ms +step:42543/57344 train_time:24451103ms step_avg:574.74ms +grad accum step:10636/14336 +step:42544/57344 train_time:24452451ms step_avg:574.76ms +step:42545/57344 train_time:24452462ms step_avg:574.74ms +step:42546/57344 train_time:24452706ms step_avg:574.74ms +step:42547/57344 train_time:24453263ms step_avg:574.74ms +grad accum step:10637/14336 +step:42548/57344 train_time:24454581ms step_avg:574.75ms +step:42549/57344 train_time:24454593ms step_avg:574.74ms +step:42550/57344 train_time:24454830ms step_avg:574.73ms +step:42551/57344 train_time:24455387ms step_avg:574.73ms +grad accum step:10638/14336 +step:42552/57344 train_time:24456703ms step_avg:574.75ms +step:42553/57344 train_time:24456720ms step_avg:574.74ms +step:42554/57344 train_time:24456968ms step_avg:574.73ms +step:42555/57344 train_time:24457528ms step_avg:574.73ms +grad accum step:10639/14336 +step:42556/57344 train_time:24458906ms step_avg:574.75ms +step:42557/57344 train_time:24458924ms step_avg:574.73ms +step:42558/57344 train_time:24459157ms step_avg:574.73ms +step:42559/57344 train_time:24459720ms step_avg:574.72ms +grad accum step:10640/14336 +step:42560/57344 train_time:24461049ms step_avg:574.74ms +step:42560/57344 val_loss:5.916362 train_time:24461050ms step_avg:574.74ms +step:42561/57344 train_time:24461061ms step_avg:574.73ms +step:42562/57344 train_time:24461284ms step_avg:574.72ms +step:42563/57344 train_time:24461848ms step_avg:574.72ms +grad accum step:10641/14336 +step:42564/57344 train_time:24463188ms step_avg:574.74ms +step:42565/57344 train_time:24463205ms step_avg:574.73ms +step:42566/57344 train_time:24463455ms step_avg:574.72ms +step:42567/57344 train_time:24464013ms step_avg:574.72ms +grad accum step:10642/14336 +step:42568/57344 train_time:24465331ms step_avg:574.74ms +step:42569/57344 train_time:24465364ms step_avg:574.72ms +step:42570/57344 train_time:24465604ms step_avg:574.71ms +step:42571/57344 train_time:24466192ms step_avg:574.71ms +grad accum step:10643/14336 +step:42572/57344 train_time:24467534ms step_avg:574.73ms +step:42573/57344 train_time:24467551ms step_avg:574.72ms +step:42574/57344 train_time:24467803ms step_avg:574.71ms +step:42575/57344 train_time:24468358ms step_avg:574.71ms +grad accum step:10644/14336 +step:42576/57344 train_time:24469680ms step_avg:574.73ms +step:42577/57344 train_time:24469692ms step_avg:574.72ms +step:42578/57344 train_time:24469935ms step_avg:574.71ms +step:42579/57344 train_time:24470488ms step_avg:574.71ms +grad accum step:10645/14336 +step:42580/57344 train_time:24471822ms step_avg:574.73ms +step:42581/57344 train_time:24471839ms step_avg:574.71ms +step:42582/57344 train_time:24472098ms step_avg:574.71ms +step:42583/57344 train_time:24472673ms step_avg:574.71ms +grad accum step:10646/14336 +step:42584/57344 train_time:24473992ms step_avg:574.72ms +step:42585/57344 train_time:24474009ms step_avg:574.71ms +step:42586/57344 train_time:24474266ms step_avg:574.70ms +step:42587/57344 train_time:24474841ms step_avg:574.70ms +grad accum step:10647/14336 +step:42588/57344 train_time:24476169ms step_avg:574.72ms +step:42589/57344 train_time:24476186ms step_avg:574.71ms +step:42590/57344 train_time:24476459ms step_avg:574.70ms +step:42591/57344 train_time:24477084ms step_avg:574.70ms +grad accum step:10648/14336 +step:42592/57344 train_time:24478434ms step_avg:574.72ms +step:42593/57344 train_time:24478448ms step_avg:574.71ms +step:42594/57344 train_time:24478685ms step_avg:574.70ms +step:42595/57344 train_time:24479233ms step_avg:574.70ms +grad accum step:10649/14336 +step:42596/57344 train_time:24480574ms step_avg:574.72ms +step:42597/57344 train_time:24480618ms step_avg:574.70ms +step:42598/57344 train_time:24480847ms step_avg:574.69ms +step:42599/57344 train_time:24481422ms step_avg:574.69ms +grad accum step:10650/14336 +step:42600/57344 train_time:24482754ms step_avg:574.71ms +step:42601/57344 train_time:24482771ms step_avg:574.70ms +step:42602/57344 train_time:24483018ms step_avg:574.69ms +step:42603/57344 train_time:24483566ms step_avg:574.69ms +grad accum step:10651/14336 +step:42604/57344 train_time:24484870ms step_avg:574.71ms +step:42605/57344 train_time:24484906ms step_avg:574.70ms +step:42606/57344 train_time:24485126ms step_avg:574.69ms +step:42607/57344 train_time:24485669ms step_avg:574.69ms +grad accum step:10652/14336 +step:42608/57344 train_time:24487021ms step_avg:574.70ms +step:42609/57344 train_time:24487032ms step_avg:574.69ms +step:42610/57344 train_time:24487276ms step_avg:574.68ms +step:42611/57344 train_time:24487835ms step_avg:574.68ms +grad accum step:10653/14336 +step:42612/57344 train_time:24489209ms step_avg:574.70ms +step:42613/57344 train_time:24489247ms step_avg:574.69ms +step:42614/57344 train_time:24489472ms step_avg:574.68ms +step:42615/57344 train_time:24490038ms step_avg:574.68ms +grad accum step:10654/14336 +step:42616/57344 train_time:24491359ms step_avg:574.70ms +step:42617/57344 train_time:24491376ms step_avg:574.69ms +step:42618/57344 train_time:24491630ms step_avg:574.68ms +step:42619/57344 train_time:24492194ms step_avg:574.68ms +grad accum step:10655/14336 +step:42620/57344 train_time:24493547ms step_avg:574.70ms +step:42621/57344 train_time:24493561ms step_avg:574.68ms +step:42622/57344 train_time:24493783ms step_avg:574.67ms +step:42623/57344 train_time:24494330ms step_avg:574.67ms +grad accum step:10656/14336 +step:42624/57344 train_time:24495712ms step_avg:574.69ms +step:42624/57344 val_loss:5.923081 train_time:24495730ms step_avg:574.69ms +step:42625/57344 train_time:24495742ms step_avg:574.68ms +step:42626/57344 train_time:24495964ms step_avg:574.67ms +step:42627/57344 train_time:24496498ms step_avg:574.67ms +grad accum step:10657/14336 +step:42628/57344 train_time:24497903ms step_avg:574.69ms +step:42629/57344 train_time:24497918ms step_avg:574.68ms +step:42630/57344 train_time:24498134ms step_avg:574.67ms +step:42631/57344 train_time:24498678ms step_avg:574.67ms +grad accum step:10658/14336 +step:42632/57344 train_time:24500067ms step_avg:574.69ms +step:42633/57344 train_time:24500086ms step_avg:574.67ms +step:42634/57344 train_time:24500328ms step_avg:574.67ms +step:42635/57344 train_time:24500890ms step_avg:574.67ms +grad accum step:10659/14336 +step:42636/57344 train_time:24502515ms step_avg:574.69ms +step:42637/57344 train_time:24502527ms step_avg:574.68ms +step:42638/57344 train_time:24502760ms step_avg:574.67ms +step:42639/57344 train_time:24503353ms step_avg:574.67ms +grad accum step:10660/14336 +step:42640/57344 train_time:24504702ms step_avg:574.69ms +step:42641/57344 train_time:24504718ms step_avg:574.68ms +step:42642/57344 train_time:24504965ms step_avg:574.67ms +step:42643/57344 train_time:24505510ms step_avg:574.67ms +grad accum step:10661/14336 +step:42644/57344 train_time:24506914ms step_avg:574.69ms +step:42645/57344 train_time:24506930ms step_avg:574.67ms +step:42646/57344 train_time:24507164ms step_avg:574.67ms +step:42647/57344 train_time:24507729ms step_avg:574.66ms +grad accum step:10662/14336 +step:42648/57344 train_time:24509158ms step_avg:574.68ms +step:42649/57344 train_time:24509171ms step_avg:574.67ms +step:42650/57344 train_time:24509393ms step_avg:574.66ms +step:42651/57344 train_time:24509962ms step_avg:574.66ms +grad accum step:10663/14336 +step:42652/57344 train_time:24511358ms step_avg:574.68ms +step:42653/57344 train_time:24511380ms step_avg:574.67ms +step:42654/57344 train_time:24511616ms step_avg:574.66ms +step:42655/57344 train_time:24512164ms step_avg:574.66ms +grad accum step:10664/14336 +step:42656/57344 train_time:24513448ms step_avg:574.68ms +step:42657/57344 train_time:24513465ms step_avg:574.66ms +step:42658/57344 train_time:24513719ms step_avg:574.66ms +step:42659/57344 train_time:24514287ms step_avg:574.66ms +grad accum step:10665/14336 +step:42660/57344 train_time:24515688ms step_avg:574.68ms +step:42661/57344 train_time:24515706ms step_avg:574.66ms +step:42662/57344 train_time:24515928ms step_avg:574.65ms +step:42663/57344 train_time:24516485ms step_avg:574.65ms +grad accum step:10666/14336 +step:42664/57344 train_time:24517819ms step_avg:574.67ms +step:42665/57344 train_time:24517840ms step_avg:574.66ms +step:42666/57344 train_time:24518069ms step_avg:574.65ms +step:42667/57344 train_time:24518635ms step_avg:574.65ms +grad accum step:10667/14336 +step:42668/57344 train_time:24519976ms step_avg:574.67ms +step:42669/57344 train_time:24519991ms step_avg:574.66ms +step:42670/57344 train_time:24520242ms step_avg:574.65ms +step:42671/57344 train_time:24520812ms step_avg:574.65ms +grad accum step:10668/14336 +step:42672/57344 train_time:24522304ms step_avg:574.67ms +step:42673/57344 train_time:24522323ms step_avg:574.66ms +step:42674/57344 train_time:24522542ms step_avg:574.65ms +step:42675/57344 train_time:24523105ms step_avg:574.65ms +grad accum step:10669/14336 +step:42676/57344 train_time:24524492ms step_avg:574.67ms +step:42677/57344 train_time:24524510ms step_avg:574.65ms +step:42678/57344 train_time:24524729ms step_avg:574.65ms +step:42679/57344 train_time:24525275ms step_avg:574.65ms +grad accum step:10670/14336 +step:42680/57344 train_time:24526614ms step_avg:574.66ms +step:42681/57344 train_time:24526627ms step_avg:574.65ms +step:42682/57344 train_time:24526873ms step_avg:574.64ms +step:42683/57344 train_time:24527426ms step_avg:574.64ms +grad accum step:10671/14336 +step:42684/57344 train_time:24528766ms step_avg:574.66ms +step:42685/57344 train_time:24528785ms step_avg:574.65ms +step:42686/57344 train_time:24529029ms step_avg:574.64ms +step:42687/57344 train_time:24529589ms step_avg:574.64ms +grad accum step:10672/14336 +step:42688/57344 train_time:24530900ms step_avg:574.66ms +step:42688/57344 val_loss:5.931384 train_time:24530905ms step_avg:574.66ms +step:42689/57344 train_time:24531501ms step_avg:574.66ms +step:42690/57344 train_time:24531582ms step_avg:574.64ms +step:42691/57344 train_time:24532133ms step_avg:574.64ms +grad accum step:10673/14336 +step:42692/57344 train_time:24533513ms step_avg:574.66ms +step:42693/57344 train_time:24533529ms step_avg:574.65ms +step:42694/57344 train_time:24533772ms step_avg:574.64ms +step:42695/57344 train_time:24534324ms step_avg:574.64ms +grad accum step:10674/14336 +step:42696/57344 train_time:24538987ms step_avg:574.74ms +step:42697/57344 train_time:24540004ms step_avg:574.75ms +step:42698/57344 train_time:24540113ms step_avg:574.74ms +step:42699/57344 train_time:24540602ms step_avg:574.73ms +grad accum step:10675/14336 +step:42700/57344 train_time:24541907ms step_avg:574.75ms +step:42701/57344 train_time:24541921ms step_avg:574.74ms +step:42702/57344 train_time:24542163ms step_avg:574.73ms +step:42703/57344 train_time:24542704ms step_avg:574.73ms +grad accum step:10676/14336 +step:42704/57344 train_time:24544005ms step_avg:574.75ms +step:42705/57344 train_time:24544021ms step_avg:574.73ms +step:42706/57344 train_time:24544269ms step_avg:574.73ms +step:42707/57344 train_time:24544830ms step_avg:574.73ms +grad accum step:10677/14336 +step:42708/57344 train_time:24546141ms step_avg:574.74ms +step:42709/57344 train_time:24546156ms step_avg:574.73ms +step:42710/57344 train_time:24546408ms step_avg:574.72ms +step:42711/57344 train_time:24547009ms step_avg:574.72ms +grad accum step:10678/14336 +step:42712/57344 train_time:24548391ms step_avg:574.74ms +step:42713/57344 train_time:24548406ms step_avg:574.73ms +step:42714/57344 train_time:24548647ms step_avg:574.72ms +step:42715/57344 train_time:24549205ms step_avg:574.72ms +grad accum step:10679/14336 +step:42716/57344 train_time:24550541ms step_avg:574.74ms +step:42717/57344 train_time:24550558ms step_avg:574.73ms +step:42718/57344 train_time:24550813ms step_avg:574.72ms +step:42719/57344 train_time:24551401ms step_avg:574.72ms +grad accum step:10680/14336 +step:42720/57344 train_time:24552754ms step_avg:574.74ms +step:42721/57344 train_time:24552770ms step_avg:574.72ms +step:42722/57344 train_time:24553019ms step_avg:574.72ms +step:42723/57344 train_time:24553589ms step_avg:574.72ms +grad accum step:10681/14336 +step:42724/57344 train_time:24554898ms step_avg:574.73ms +step:42725/57344 train_time:24554912ms step_avg:574.72ms +step:42726/57344 train_time:24555159ms step_avg:574.71ms +step:42727/57344 train_time:24555713ms step_avg:574.71ms +grad accum step:10682/14336 +step:42728/57344 train_time:24557051ms step_avg:574.73ms +step:42729/57344 train_time:24557074ms step_avg:574.72ms +step:42730/57344 train_time:24557311ms step_avg:574.71ms +step:42731/57344 train_time:24557866ms step_avg:574.71ms +grad accum step:10683/14336 +step:42732/57344 train_time:24559217ms step_avg:574.73ms +step:42733/57344 train_time:24559230ms step_avg:574.71ms +step:42734/57344 train_time:24559479ms step_avg:574.71ms +step:42735/57344 train_time:24560029ms step_avg:574.71ms +grad accum step:10684/14336 +step:42736/57344 train_time:24561367ms step_avg:574.72ms +step:42737/57344 train_time:24561387ms step_avg:574.71ms +step:42738/57344 train_time:24561627ms step_avg:574.70ms +step:42739/57344 train_time:24562191ms step_avg:574.70ms +grad accum step:10685/14336 +step:42740/57344 train_time:24563489ms step_avg:574.72ms +step:42741/57344 train_time:24563504ms step_avg:574.71ms +step:42742/57344 train_time:24563752ms step_avg:574.70ms +step:42743/57344 train_time:24564303ms step_avg:574.70ms +grad accum step:10686/14336 +step:42744/57344 train_time:24565652ms step_avg:574.72ms +step:42745/57344 train_time:24565665ms step_avg:574.70ms +step:42746/57344 train_time:24565897ms step_avg:574.69ms +step:42747/57344 train_time:24566459ms step_avg:574.69ms +grad accum step:10687/14336 +step:42748/57344 train_time:24567800ms step_avg:574.71ms +step:42749/57344 train_time:24567842ms step_avg:574.70ms +step:42750/57344 train_time:24568069ms step_avg:574.69ms +step:42751/57344 train_time:24568651ms step_avg:574.69ms +grad accum step:10688/14336 +step:42752/57344 train_time:24569977ms step_avg:574.71ms +step:42752/57344 val_loss:5.939623 train_time:24569989ms step_avg:574.71ms +step:42753/57344 train_time:24570001ms step_avg:574.70ms +step:42754/57344 train_time:24570228ms step_avg:574.69ms +step:42755/57344 train_time:24570791ms step_avg:574.69ms +grad accum step:10689/14336 +step:42756/57344 train_time:24572161ms step_avg:574.71ms +step:42757/57344 train_time:24572184ms step_avg:574.69ms +step:42758/57344 train_time:24572413ms step_avg:574.69ms +step:42759/57344 train_time:24572966ms step_avg:574.69ms +grad accum step:10690/14336 +step:42760/57344 train_time:24574334ms step_avg:574.70ms +step:42761/57344 train_time:24574364ms step_avg:574.69ms +step:42762/57344 train_time:24574582ms step_avg:574.68ms +step:42763/57344 train_time:24575122ms step_avg:574.68ms +grad accum step:10691/14336 +step:42764/57344 train_time:24576455ms step_avg:574.70ms +step:42765/57344 train_time:24576482ms step_avg:574.69ms +step:42766/57344 train_time:24576717ms step_avg:574.68ms +step:42767/57344 train_time:24577278ms step_avg:574.68ms +grad accum step:10692/14336 +step:42768/57344 train_time:24578620ms step_avg:574.70ms +step:42769/57344 train_time:24578638ms step_avg:574.68ms +step:42770/57344 train_time:24578875ms step_avg:574.68ms +step:42771/57344 train_time:24579452ms step_avg:574.68ms +grad accum step:10693/14336 +step:42772/57344 train_time:24580779ms step_avg:574.69ms +step:42773/57344 train_time:24580793ms step_avg:574.68ms +step:42774/57344 train_time:24581044ms step_avg:574.67ms +step:42775/57344 train_time:24581606ms step_avg:574.67ms +grad accum step:10694/14336 +step:42776/57344 train_time:24582914ms step_avg:574.69ms +step:42777/57344 train_time:24582933ms step_avg:574.68ms +step:42778/57344 train_time:24583172ms step_avg:574.67ms +step:42779/57344 train_time:24583728ms step_avg:574.67ms +grad accum step:10695/14336 +step:42780/57344 train_time:24585061ms step_avg:574.69ms +step:42781/57344 train_time:24585092ms step_avg:574.67ms +step:42782/57344 train_time:24585327ms step_avg:574.67ms +step:42783/57344 train_time:24585912ms step_avg:574.67ms +grad accum step:10696/14336 +step:42784/57344 train_time:24587236ms step_avg:574.68ms +step:42785/57344 train_time:24587252ms step_avg:574.67ms +step:42786/57344 train_time:24587497ms step_avg:574.66ms +step:42787/57344 train_time:24588049ms step_avg:574.66ms +grad accum step:10697/14336 +step:42788/57344 train_time:24589401ms step_avg:574.68ms +step:42789/57344 train_time:24589429ms step_avg:574.67ms +step:42790/57344 train_time:24589664ms step_avg:574.66ms +step:42791/57344 train_time:24590247ms step_avg:574.66ms +grad accum step:10698/14336 +step:42792/57344 train_time:24591598ms step_avg:574.68ms +step:42793/57344 train_time:24591618ms step_avg:574.66ms +step:42794/57344 train_time:24591867ms step_avg:574.66ms +step:42795/57344 train_time:24592471ms step_avg:574.66ms +grad accum step:10699/14336 +step:42796/57344 train_time:24593791ms step_avg:574.67ms +step:42797/57344 train_time:24593814ms step_avg:574.66ms +step:42798/57344 train_time:24594049ms step_avg:574.65ms +step:42799/57344 train_time:24594599ms step_avg:574.65ms +grad accum step:10700/14336 +step:42800/57344 train_time:24595938ms step_avg:574.67ms +step:42801/57344 train_time:24595987ms step_avg:574.66ms +step:42802/57344 train_time:24596208ms step_avg:574.65ms +step:42803/57344 train_time:24596765ms step_avg:574.65ms +grad accum step:10701/14336 +step:42804/57344 train_time:24598239ms step_avg:574.67ms +step:42805/57344 train_time:24598257ms step_avg:574.66ms +step:42806/57344 train_time:24598479ms step_avg:574.65ms +step:42807/57344 train_time:24599030ms step_avg:574.65ms +grad accum step:10702/14336 +step:42808/57344 train_time:24600342ms step_avg:574.67ms +step:42809/57344 train_time:24600357ms step_avg:574.65ms +step:42810/57344 train_time:24600609ms step_avg:574.65ms +step:42811/57344 train_time:24601169ms step_avg:574.65ms +grad accum step:10703/14336 +step:42812/57344 train_time:24602480ms step_avg:574.66ms +step:42813/57344 train_time:24602501ms step_avg:574.65ms +step:42814/57344 train_time:24602744ms step_avg:574.64ms +step:42815/57344 train_time:24603315ms step_avg:574.64ms +grad accum step:10704/14336 +step:42816/57344 train_time:24604684ms step_avg:574.66ms +step:42816/57344 val_loss:5.941106 train_time:24604687ms step_avg:574.66ms +step:42817/57344 train_time:24604699ms step_avg:574.65ms +step:42818/57344 train_time:24604917ms step_avg:574.64ms +step:42819/57344 train_time:24605462ms step_avg:574.64ms +grad accum step:10705/14336 +step:42820/57344 train_time:24606805ms step_avg:574.66ms +step:42821/57344 train_time:24606843ms step_avg:574.64ms +step:42822/57344 train_time:24607071ms step_avg:574.64ms +step:42823/57344 train_time:24607647ms step_avg:574.64ms +grad accum step:10706/14336 +step:42824/57344 train_time:24609336ms step_avg:574.66ms +step:42825/57344 train_time:24609626ms step_avg:574.66ms +step:42826/57344 train_time:24609862ms step_avg:574.65ms +step:42827/57344 train_time:24610461ms step_avg:574.65ms +grad accum step:10707/14336 +step:42828/57344 train_time:24611878ms step_avg:574.67ms +step:42829/57344 train_time:24611938ms step_avg:574.66ms +step:42830/57344 train_time:24612158ms step_avg:574.65ms +step:42831/57344 train_time:24612710ms step_avg:574.65ms +grad accum step:10708/14336 +step:42832/57344 train_time:24614044ms step_avg:574.66ms +step:42833/57344 train_time:24614060ms step_avg:574.65ms +step:42834/57344 train_time:24614285ms step_avg:574.64ms +step:42835/57344 train_time:24614833ms step_avg:574.64ms +grad accum step:10709/14336 +step:42836/57344 train_time:24616157ms step_avg:574.66ms +step:42837/57344 train_time:24616172ms step_avg:574.65ms +step:42838/57344 train_time:24616417ms step_avg:574.64ms +step:42839/57344 train_time:24616960ms step_avg:574.64ms +grad accum step:10710/14336 +step:42840/57344 train_time:24618309ms step_avg:574.66ms +step:42841/57344 train_time:24618338ms step_avg:574.64ms +step:42842/57344 train_time:24618559ms step_avg:574.64ms +step:42843/57344 train_time:24619101ms step_avg:574.64ms +grad accum step:10711/14336 +step:42844/57344 train_time:24620431ms step_avg:574.65ms +step:42845/57344 train_time:24620450ms step_avg:574.64ms +step:42846/57344 train_time:24620688ms step_avg:574.63ms +step:42847/57344 train_time:24621227ms step_avg:574.63ms +grad accum step:10712/14336 +step:42848/57344 train_time:24622598ms step_avg:574.65ms +step:42849/57344 train_time:24622613ms step_avg:574.64ms +step:42850/57344 train_time:24622844ms step_avg:574.63ms +step:42851/57344 train_time:24623418ms step_avg:574.63ms +grad accum step:10713/14336 +step:42852/57344 train_time:24624734ms step_avg:574.65ms +step:42853/57344 train_time:24624754ms step_avg:574.63ms +step:42854/57344 train_time:24624989ms step_avg:574.63ms +step:42855/57344 train_time:24625554ms step_avg:574.62ms +grad accum step:10714/14336 +step:42856/57344 train_time:24626881ms step_avg:574.64ms +step:42857/57344 train_time:24626898ms step_avg:574.63ms +step:42858/57344 train_time:24627149ms step_avg:574.62ms +step:42859/57344 train_time:24627708ms step_avg:574.62ms +grad accum step:10715/14336 +step:42860/57344 train_time:24629064ms step_avg:574.64ms +step:42861/57344 train_time:24629080ms step_avg:574.63ms +step:42862/57344 train_time:24629339ms step_avg:574.62ms +step:42863/57344 train_time:24629927ms step_avg:574.62ms +grad accum step:10716/14336 +step:42864/57344 train_time:24631285ms step_avg:574.64ms +step:42865/57344 train_time:24631307ms step_avg:574.63ms +step:42866/57344 train_time:24631546ms step_avg:574.62ms +step:42867/57344 train_time:24632116ms step_avg:574.62ms +grad accum step:10717/14336 +step:42868/57344 train_time:24633520ms step_avg:574.64ms +step:42869/57344 train_time:24633545ms step_avg:574.62ms +step:42870/57344 train_time:24633767ms step_avg:574.62ms +step:42871/57344 train_time:24634313ms step_avg:574.61ms +grad accum step:10718/14336 +step:42872/57344 train_time:24635741ms step_avg:574.63ms +step:42873/57344 train_time:24635754ms step_avg:574.62ms +step:42874/57344 train_time:24635994ms step_avg:574.61ms +step:42875/57344 train_time:24636568ms step_avg:574.61ms +grad accum step:10719/14336 +step:42876/57344 train_time:24637889ms step_avg:574.63ms +step:42877/57344 train_time:24637928ms step_avg:574.62ms +step:42878/57344 train_time:24638154ms step_avg:574.61ms +step:42879/57344 train_time:24638711ms step_avg:574.61ms +grad accum step:10720/14336 +step:42880/57344 train_time:24640029ms step_avg:574.63ms +step:42880/57344 val_loss:5.949475 train_time:24640040ms step_avg:574.63ms +step:42881/57344 train_time:24640052ms step_avg:574.61ms +step:42882/57344 train_time:24640319ms step_avg:574.61ms +step:42883/57344 train_time:24640869ms step_avg:574.61ms +grad accum step:10721/14336 +step:42884/57344 train_time:24642189ms step_avg:574.62ms +step:42885/57344 train_time:24642201ms step_avg:574.61ms +step:42886/57344 train_time:24642443ms step_avg:574.60ms +step:42887/57344 train_time:24642987ms step_avg:574.60ms +grad accum step:10722/14336 +step:42888/57344 train_time:24644315ms step_avg:574.62ms +step:42889/57344 train_time:24644348ms step_avg:574.61ms +step:42890/57344 train_time:24644566ms step_avg:574.60ms +step:42891/57344 train_time:24645113ms step_avg:574.60ms +grad accum step:10723/14336 +step:42892/57344 train_time:24646392ms step_avg:574.62ms +step:42893/57344 train_time:24646409ms step_avg:574.60ms +step:42894/57344 train_time:24646656ms step_avg:574.59ms +step:42895/57344 train_time:24647202ms step_avg:574.59ms +grad accum step:10724/14336 +step:42896/57344 train_time:24648479ms step_avg:574.61ms +step:42897/57344 train_time:24648495ms step_avg:574.60ms +step:42898/57344 train_time:24648736ms step_avg:574.59ms +step:42899/57344 train_time:24649282ms step_avg:574.59ms +grad accum step:10725/14336 +step:42900/57344 train_time:24650617ms step_avg:574.61ms +step:42901/57344 train_time:24650644ms step_avg:574.59ms +step:42902/57344 train_time:24650860ms step_avg:574.59ms +step:42903/57344 train_time:24651405ms step_avg:574.58ms +grad accum step:10726/14336 +step:42904/57344 train_time:24652721ms step_avg:574.60ms +step:42905/57344 train_time:24652736ms step_avg:574.59ms +step:42906/57344 train_time:24652984ms step_avg:574.58ms +step:42907/57344 train_time:24653537ms step_avg:574.58ms +grad accum step:10727/14336 +step:42908/57344 train_time:24654875ms step_avg:574.60ms +step:42909/57344 train_time:24654901ms step_avg:574.59ms +step:42910/57344 train_time:24655133ms step_avg:574.58ms +step:42911/57344 train_time:24655694ms step_avg:574.58ms +grad accum step:10728/14336 +step:42912/57344 train_time:24657046ms step_avg:574.60ms +step:42913/57344 train_time:24657063ms step_avg:574.58ms +step:42914/57344 train_time:24657317ms step_avg:574.58ms +step:42915/57344 train_time:24657883ms step_avg:574.57ms +grad accum step:10729/14336 +step:42916/57344 train_time:24659160ms step_avg:574.59ms +step:42917/57344 train_time:24659176ms step_avg:574.58ms +step:42918/57344 train_time:24659414ms step_avg:574.57ms +step:42919/57344 train_time:24659963ms step_avg:574.57ms +grad accum step:10730/14336 +step:42920/57344 train_time:24661277ms step_avg:574.59ms +step:42921/57344 train_time:24661294ms step_avg:574.57ms +step:42922/57344 train_time:24661544ms step_avg:574.57ms +step:42923/57344 train_time:24662101ms step_avg:574.57ms +grad accum step:10731/14336 +step:42924/57344 train_time:24663405ms step_avg:574.58ms +step:42925/57344 train_time:24663421ms step_avg:574.57ms +step:42926/57344 train_time:24663671ms step_avg:574.56ms +step:42927/57344 train_time:24664230ms step_avg:574.56ms +grad accum step:10732/14336 +step:42928/57344 train_time:24665585ms step_avg:574.58ms +step:42929/57344 train_time:24665596ms step_avg:574.57ms +step:42930/57344 train_time:24665822ms step_avg:574.56ms +step:42931/57344 train_time:24666374ms step_avg:574.56ms +grad accum step:10733/14336 +step:42932/57344 train_time:24667706ms step_avg:574.58ms +step:42933/57344 train_time:24667717ms step_avg:574.56ms +step:42934/57344 train_time:24667958ms step_avg:574.56ms +step:42935/57344 train_time:24668519ms step_avg:574.56ms +grad accum step:10734/14336 +step:42936/57344 train_time:24669859ms step_avg:574.57ms +step:42937/57344 train_time:24669891ms step_avg:574.56ms +step:42938/57344 train_time:24670112ms step_avg:574.55ms +step:42939/57344 train_time:24670668ms step_avg:574.55ms +grad accum step:10735/14336 +step:42940/57344 train_time:24672043ms step_avg:574.57ms +step:42941/57344 train_time:24672073ms step_avg:574.56ms +step:42942/57344 train_time:24672295ms step_avg:574.55ms +step:42943/57344 train_time:24672852ms step_avg:574.55ms +grad accum step:10736/14336 +step:42944/57344 train_time:24743262ms step_avg:576.18ms +step:42944/57344 val_loss:5.944251 train_time:24743273ms step_avg:576.18ms +step:42945/57344 train_time:24743285ms step_avg:576.16ms +step:42946/57344 train_time:24743546ms step_avg:576.15ms +step:42947/57344 train_time:24744097ms step_avg:576.15ms +grad accum step:10737/14336 +step:42948/57344 train_time:24745412ms step_avg:576.17ms +step:42949/57344 train_time:24745428ms step_avg:576.16ms +step:42950/57344 train_time:24745676ms step_avg:576.15ms +step:42951/57344 train_time:24746228ms step_avg:576.15ms +grad accum step:10738/14336 +step:42952/57344 train_time:24747552ms step_avg:576.17ms +step:42953/57344 train_time:24747563ms step_avg:576.15ms +step:42954/57344 train_time:24747806ms step_avg:576.15ms +step:42955/57344 train_time:24748367ms step_avg:576.15ms +grad accum step:10739/14336 +step:42956/57344 train_time:24749643ms step_avg:576.16ms +step:42957/57344 train_time:24749660ms step_avg:576.15ms +step:42958/57344 train_time:24749903ms step_avg:576.14ms +step:42959/57344 train_time:24750448ms step_avg:576.14ms +grad accum step:10740/14336 +step:42960/57344 train_time:24751773ms step_avg:576.16ms +step:42961/57344 train_time:24751805ms step_avg:576.15ms +step:42962/57344 train_time:24752024ms step_avg:576.14ms +step:42963/57344 train_time:24752571ms step_avg:576.14ms +grad accum step:10741/14336 +step:42964/57344 train_time:24753896ms step_avg:576.15ms +step:42965/57344 train_time:24753924ms step_avg:576.14ms +step:42966/57344 train_time:24754142ms step_avg:576.13ms +step:42967/57344 train_time:24754694ms step_avg:576.13ms +grad accum step:10742/14336 +step:42968/57344 train_time:24755999ms step_avg:576.15ms +step:42969/57344 train_time:24756011ms step_avg:576.14ms +step:42970/57344 train_time:24756257ms step_avg:576.13ms +step:42971/57344 train_time:24756800ms step_avg:576.13ms +grad accum step:10743/14336 +step:42972/57344 train_time:24758111ms step_avg:576.15ms +step:42973/57344 train_time:24758134ms step_avg:576.13ms +step:42974/57344 train_time:24758363ms step_avg:576.12ms +step:42975/57344 train_time:24758922ms step_avg:576.12ms +grad accum step:10744/14336 +step:42976/57344 train_time:24760223ms step_avg:576.14ms +step:42977/57344 train_time:24760238ms step_avg:576.13ms +step:42978/57344 train_time:24760484ms step_avg:576.12ms +step:42979/57344 train_time:24761035ms step_avg:576.12ms +grad accum step:10745/14336 +step:42980/57344 train_time:24762397ms step_avg:576.14ms +step:42981/57344 train_time:24762416ms step_avg:576.12ms +step:42982/57344 train_time:24762636ms step_avg:576.12ms +step:42983/57344 train_time:24763183ms step_avg:576.12ms +grad accum step:10746/14336 +step:42984/57344 train_time:24764529ms step_avg:576.13ms +step:42985/57344 train_time:24764545ms step_avg:576.12ms +step:42986/57344 train_time:24764796ms step_avg:576.11ms +step:42987/57344 train_time:24765362ms step_avg:576.11ms +grad accum step:10747/14336 +step:42988/57344 train_time:24766690ms step_avg:576.13ms +step:42989/57344 train_time:24766706ms step_avg:576.12ms +step:42990/57344 train_time:24766944ms step_avg:576.11ms +step:42991/57344 train_time:24767490ms step_avg:576.11ms +grad accum step:10748/14336 +step:42992/57344 train_time:24768788ms step_avg:576.13ms +step:42993/57344 train_time:24768808ms step_avg:576.11ms +step:42994/57344 train_time:24769050ms step_avg:576.10ms +step:42995/57344 train_time:24769613ms step_avg:576.10ms +grad accum step:10749/14336 +step:42996/57344 train_time:24770937ms step_avg:576.12ms +step:42997/57344 train_time:24770957ms step_avg:576.11ms +step:42998/57344 train_time:24771198ms step_avg:576.10ms +step:42999/57344 train_time:24771764ms step_avg:576.10ms +grad accum step:10750/14336 +step:43000/57344 train_time:24773112ms step_avg:576.12ms +step:43001/57344 train_time:24773131ms step_avg:576.11ms +step:43002/57344 train_time:24773369ms step_avg:576.10ms +step:43003/57344 train_time:24773938ms step_avg:576.10ms +grad accum step:10751/14336 +step:43004/57344 train_time:24775306ms step_avg:576.12ms +step:43005/57344 train_time:24775322ms step_avg:576.10ms +step:43006/57344 train_time:24775538ms step_avg:576.09ms +step:43007/57344 train_time:24776077ms step_avg:576.09ms +grad accum step:10752/14336 +step:43008/57344 train_time:24777404ms step_avg:576.11ms +step:43008/57344 val_loss:5.937825 train_time:24777407ms step_avg:576.11ms +step:43009/57344 train_time:24777419ms step_avg:576.10ms +step:43010/57344 train_time:24777641ms step_avg:576.09ms +step:43011/57344 train_time:24778203ms step_avg:576.09ms +grad accum step:10753/14336 +step:43012/57344 train_time:24779543ms step_avg:576.11ms +step:43013/57344 train_time:24779557ms step_avg:576.09ms +step:43014/57344 train_time:24779809ms step_avg:576.09ms +step:43015/57344 train_time:24780380ms step_avg:576.09ms +grad accum step:10754/14336 +step:43016/57344 train_time:24781722ms step_avg:576.10ms +step:43017/57344 train_time:24781740ms step_avg:576.09ms +step:43018/57344 train_time:24781988ms step_avg:576.08ms +step:43019/57344 train_time:24782561ms step_avg:576.08ms +grad accum step:10755/14336 +step:43020/57344 train_time:24783882ms step_avg:576.10ms +step:43021/57344 train_time:24783906ms step_avg:576.09ms +step:43022/57344 train_time:24784131ms step_avg:576.08ms +step:43023/57344 train_time:24784678ms step_avg:576.08ms +grad accum step:10756/14336 +step:43024/57344 train_time:24786023ms step_avg:576.10ms +step:43025/57344 train_time:24786049ms step_avg:576.08ms +step:43026/57344 train_time:24786277ms step_avg:576.08ms +step:43027/57344 train_time:24786842ms step_avg:576.08ms +grad accum step:10757/14336 +step:43028/57344 train_time:24788172ms step_avg:576.09ms +step:43029/57344 train_time:24788188ms step_avg:576.08ms +step:43030/57344 train_time:24788432ms step_avg:576.07ms +step:43031/57344 train_time:24788989ms step_avg:576.07ms +grad accum step:10758/14336 +step:43032/57344 train_time:24790332ms step_avg:576.09ms +step:43033/57344 train_time:24790348ms step_avg:576.08ms +step:43034/57344 train_time:24790586ms step_avg:576.07ms +step:43035/57344 train_time:24791140ms step_avg:576.07ms +grad accum step:10759/14336 +step:43036/57344 train_time:24792500ms step_avg:576.09ms +step:43037/57344 train_time:24792519ms step_avg:576.07ms +step:43038/57344 train_time:24792765ms step_avg:576.07ms +step:43039/57344 train_time:24793337ms step_avg:576.07ms +grad accum step:10760/14336 +step:43040/57344 train_time:24794855ms step_avg:576.09ms +step:43041/57344 train_time:24794873ms step_avg:576.08ms +step:43042/57344 train_time:24795116ms step_avg:576.07ms +step:43043/57344 train_time:24795729ms step_avg:576.07ms +grad accum step:10761/14336 +step:43044/57344 train_time:24797049ms step_avg:576.09ms +step:43045/57344 train_time:24797070ms step_avg:576.07ms +step:43046/57344 train_time:24797309ms step_avg:576.07ms +step:43047/57344 train_time:24797863ms step_avg:576.06ms +grad accum step:10762/14336 +step:43048/57344 train_time:24799194ms step_avg:576.08ms +step:43049/57344 train_time:24799209ms step_avg:576.07ms +step:43050/57344 train_time:24799458ms step_avg:576.06ms +step:43051/57344 train_time:24800020ms step_avg:576.06ms +grad accum step:10763/14336 +step:43052/57344 train_time:24801342ms step_avg:576.08ms +step:43053/57344 train_time:24801361ms step_avg:576.07ms +step:43054/57344 train_time:24801603ms step_avg:576.06ms +step:43055/57344 train_time:24802168ms step_avg:576.06ms +grad accum step:10764/14336 +step:43056/57344 train_time:24803517ms step_avg:576.08ms +step:43057/57344 train_time:24803529ms step_avg:576.06ms +step:43058/57344 train_time:24803760ms step_avg:576.05ms +step:43059/57344 train_time:24804314ms step_avg:576.05ms +grad accum step:10765/14336 +step:43060/57344 train_time:24805636ms step_avg:576.07ms +step:43061/57344 train_time:24805651ms step_avg:576.06ms +step:43062/57344 train_time:24805905ms step_avg:576.05ms +step:43063/57344 train_time:24806471ms step_avg:576.05ms +grad accum step:10766/14336 +step:43064/57344 train_time:24807831ms step_avg:576.07ms +step:43065/57344 train_time:24807847ms step_avg:576.06ms +step:43066/57344 train_time:24808098ms step_avg:576.05ms +step:43067/57344 train_time:24808654ms step_avg:576.05ms +grad accum step:10767/14336 +step:43068/57344 train_time:24810038ms step_avg:576.07ms +step:43069/57344 train_time:24810054ms step_avg:576.05ms +step:43070/57344 train_time:24810305ms step_avg:576.05ms +step:43071/57344 train_time:24810866ms step_avg:576.05ms +grad accum step:10768/14336 +step:43072/57344 train_time:24812179ms step_avg:576.06ms +step:43072/57344 val_loss:5.929796 train_time:24812179ms step_avg:576.06ms +step:43073/57344 train_time:24812191ms step_avg:576.05ms +step:43074/57344 train_time:24812409ms step_avg:576.04ms +step:43075/57344 train_time:24812949ms step_avg:576.04ms +grad accum step:10769/14336 +step:43076/57344 train_time:24814305ms step_avg:576.06ms +step:43077/57344 train_time:24814323ms step_avg:576.05ms +step:43078/57344 train_time:24814566ms step_avg:576.04ms +step:43079/57344 train_time:24815113ms step_avg:576.04ms +grad accum step:10770/14336 +step:43080/57344 train_time:24816517ms step_avg:576.06ms +step:43081/57344 train_time:24816539ms step_avg:576.04ms +step:43082/57344 train_time:24816767ms step_avg:576.04ms +step:43083/57344 train_time:24817348ms step_avg:576.04ms +grad accum step:10771/14336 +step:43084/57344 train_time:24818677ms step_avg:576.05ms +step:43085/57344 train_time:24818690ms step_avg:576.04ms +step:43086/57344 train_time:24818936ms step_avg:576.03ms +step:43087/57344 train_time:24819479ms step_avg:576.03ms +grad accum step:10772/14336 +step:43088/57344 train_time:24820789ms step_avg:576.05ms +step:43089/57344 train_time:24820806ms step_avg:576.04ms +step:43090/57344 train_time:24821051ms step_avg:576.03ms +step:43091/57344 train_time:24821606ms step_avg:576.03ms +grad accum step:10773/14336 +step:43092/57344 train_time:24822929ms step_avg:576.04ms +step:43093/57344 train_time:24822947ms step_avg:576.03ms +step:43094/57344 train_time:24823190ms step_avg:576.02ms +step:43095/57344 train_time:24823748ms step_avg:576.02ms +grad accum step:10774/14336 +step:43096/57344 train_time:24825095ms step_avg:576.04ms +step:43097/57344 train_time:24825261ms step_avg:576.03ms +step:43098/57344 train_time:24825478ms step_avg:576.02ms +step:43099/57344 train_time:24826039ms step_avg:576.02ms +grad accum step:10775/14336 +step:43100/57344 train_time:24827381ms step_avg:576.04ms +step:43101/57344 train_time:24827396ms step_avg:576.03ms +step:43102/57344 train_time:24827645ms step_avg:576.02ms +step:43103/57344 train_time:24828205ms step_avg:576.02ms +grad accum step:10776/14336 +step:43104/57344 train_time:24829574ms step_avg:576.04ms +step:43105/57344 train_time:24829593ms step_avg:576.03ms +step:43106/57344 train_time:24829823ms step_avg:576.02ms +step:43107/57344 train_time:24830358ms step_avg:576.02ms +grad accum step:10777/14336 +step:43108/57344 train_time:24831650ms step_avg:576.03ms +step:43109/57344 train_time:24831666ms step_avg:576.02ms +step:43110/57344 train_time:24831912ms step_avg:576.01ms +step:43111/57344 train_time:24832468ms step_avg:576.01ms +grad accum step:10778/14336 +step:43112/57344 train_time:24833825ms step_avg:576.03ms +step:43113/57344 train_time:24833846ms step_avg:576.02ms +step:43114/57344 train_time:24834068ms step_avg:576.01ms +step:43115/57344 train_time:24834618ms step_avg:576.01ms +grad accum step:10779/14336 +step:43116/57344 train_time:24836364ms step_avg:576.04ms +step:43117/57344 train_time:24836386ms step_avg:576.02ms +step:43118/57344 train_time:24836609ms step_avg:576.01ms +step:43119/57344 train_time:24837180ms step_avg:576.01ms +grad accum step:10780/14336 +step:43120/57344 train_time:24838543ms step_avg:576.03ms +step:43121/57344 train_time:24838556ms step_avg:576.02ms +step:43122/57344 train_time:24838800ms step_avg:576.01ms +step:43123/57344 train_time:24839350ms step_avg:576.01ms +grad accum step:10781/14336 +step:43124/57344 train_time:24840745ms step_avg:576.03ms +step:43125/57344 train_time:24840764ms step_avg:576.02ms +step:43126/57344 train_time:24840987ms step_avg:576.01ms +step:43127/57344 train_time:24841536ms step_avg:576.01ms +grad accum step:10782/14336 +step:43128/57344 train_time:24842845ms step_avg:576.03ms +step:43129/57344 train_time:24842865ms step_avg:576.01ms +step:43130/57344 train_time:24843090ms step_avg:576.00ms +step:43131/57344 train_time:24843643ms step_avg:576.00ms +grad accum step:10783/14336 +step:43132/57344 train_time:24844974ms step_avg:576.02ms +step:43133/57344 train_time:24844987ms step_avg:576.01ms +step:43134/57344 train_time:24845246ms step_avg:576.00ms +step:43135/57344 train_time:24845830ms step_avg:576.00ms +grad accum step:10784/14336 +step:43136/57344 train_time:24847153ms step_avg:576.02ms +step:43136/57344 val_loss:5.913666 train_time:24847159ms step_avg:576.02ms +step:43137/57344 train_time:24847170ms step_avg:576.01ms +step:43138/57344 train_time:24847397ms step_avg:576.00ms +step:43139/57344 train_time:24847960ms step_avg:576.00ms +grad accum step:10785/14336 +step:43140/57344 train_time:24849292ms step_avg:576.02ms +step:43141/57344 train_time:24849311ms step_avg:576.00ms +step:43142/57344 train_time:24849553ms step_avg:575.99ms +step:43143/57344 train_time:24850108ms step_avg:575.99ms +grad accum step:10786/14336 +step:43144/57344 train_time:24851411ms step_avg:576.01ms +step:43145/57344 train_time:24851426ms step_avg:576.00ms +step:43146/57344 train_time:24851671ms step_avg:575.99ms +step:43147/57344 train_time:24852231ms step_avg:575.99ms +grad accum step:10787/14336 +step:43148/57344 train_time:24853559ms step_avg:576.01ms +step:43149/57344 train_time:24853573ms step_avg:575.99ms +step:43150/57344 train_time:24853825ms step_avg:575.99ms +step:43151/57344 train_time:24854386ms step_avg:575.99ms +grad accum step:10788/14336 +step:43152/57344 train_time:24855731ms step_avg:576.00ms +step:43153/57344 train_time:24855747ms step_avg:575.99ms +step:43154/57344 train_time:24855998ms step_avg:575.98ms +step:43155/57344 train_time:24856556ms step_avg:575.98ms +grad accum step:10789/14336 +step:43156/57344 train_time:24857977ms step_avg:576.00ms +step:43157/57344 train_time:24857992ms step_avg:575.99ms +step:43158/57344 train_time:24858212ms step_avg:575.98ms +step:43159/57344 train_time:24858760ms step_avg:575.98ms +grad accum step:10790/14336 +step:43160/57344 train_time:24860087ms step_avg:576.00ms +step:43161/57344 train_time:24860103ms step_avg:575.99ms +step:43162/57344 train_time:24860341ms step_avg:575.98ms +step:43163/57344 train_time:24860897ms step_avg:575.98ms +grad accum step:10791/14336 +step:43164/57344 train_time:24862381ms step_avg:576.00ms +step:43165/57344 train_time:24862394ms step_avg:575.99ms +step:43166/57344 train_time:24862625ms step_avg:575.98ms +step:43167/57344 train_time:24863207ms step_avg:575.98ms +grad accum step:10792/14336 +step:43168/57344 train_time:24864772ms step_avg:576.00ms +step:43169/57344 train_time:24864794ms step_avg:575.99ms +step:43170/57344 train_time:24865010ms step_avg:575.98ms +step:43171/57344 train_time:24865549ms step_avg:575.98ms +grad accum step:10793/14336 +step:43172/57344 train_time:24866916ms step_avg:576.00ms +step:43173/57344 train_time:24866940ms step_avg:575.98ms +step:43174/57344 train_time:24867169ms step_avg:575.98ms +step:43175/57344 train_time:24867741ms step_avg:575.98ms +grad accum step:10794/14336 +step:43176/57344 train_time:24869042ms step_avg:575.99ms +step:43177/57344 train_time:24869062ms step_avg:575.98ms +step:43178/57344 train_time:24869307ms step_avg:575.97ms +step:43179/57344 train_time:24869873ms step_avg:575.97ms +grad accum step:10795/14336 +step:43180/57344 train_time:24871233ms step_avg:575.99ms +step:43181/57344 train_time:24871248ms step_avg:575.98ms +step:43182/57344 train_time:24871503ms step_avg:575.97ms +step:43183/57344 train_time:24872082ms step_avg:575.97ms +grad accum step:10796/14336 +step:43184/57344 train_time:24873421ms step_avg:575.99ms +step:43185/57344 train_time:24873440ms step_avg:575.97ms +step:43186/57344 train_time:24873685ms step_avg:575.97ms +step:43187/57344 train_time:24874249ms step_avg:575.97ms +grad accum step:10797/14336 +step:43188/57344 train_time:24875596ms step_avg:575.98ms +step:43189/57344 train_time:24875977ms step_avg:575.98ms +step:43190/57344 train_time:24876199ms step_avg:575.97ms +step:43191/57344 train_time:24876763ms step_avg:575.97ms +grad accum step:10798/14336 +step:43192/57344 train_time:24878236ms step_avg:575.99ms +step:43193/57344 train_time:24878258ms step_avg:575.98ms +step:43194/57344 train_time:24878479ms step_avg:575.97ms +step:43195/57344 train_time:24879038ms step_avg:575.97ms +grad accum step:10799/14336 +step:43196/57344 train_time:24880358ms step_avg:575.99ms +step:43197/57344 train_time:24880381ms step_avg:575.97ms +step:43198/57344 train_time:24880620ms step_avg:575.97ms +step:43199/57344 train_time:24881203ms step_avg:575.97ms +grad accum step:10800/14336 +step:43200/57344 train_time:24882653ms step_avg:575.99ms +step:43200/57344 val_loss:5.891812 train_time:24882667ms step_avg:575.99ms +step:43201/57344 train_time:24882679ms step_avg:575.97ms +step:43202/57344 train_time:24882901ms step_avg:575.97ms +step:43203/57344 train_time:24883456ms step_avg:575.97ms +grad accum step:10801/14336 +step:43204/57344 train_time:24884780ms step_avg:575.98ms +step:43205/57344 train_time:24884796ms step_avg:575.97ms +step:43206/57344 train_time:24885042ms step_avg:575.96ms +step:43207/57344 train_time:24885600ms step_avg:575.96ms +grad accum step:10802/14336 +step:43208/57344 train_time:24886939ms step_avg:575.98ms +step:43209/57344 train_time:24886955ms step_avg:575.97ms +step:43210/57344 train_time:24887209ms step_avg:575.96ms +step:43211/57344 train_time:24887768ms step_avg:575.96ms +grad accum step:10803/14336 +step:43212/57344 train_time:24889161ms step_avg:575.98ms +step:43213/57344 train_time:24889174ms step_avg:575.96ms +step:43214/57344 train_time:24889435ms step_avg:575.96ms +step:43215/57344 train_time:24890034ms step_avg:575.96ms +grad accum step:10804/14336 +step:43216/57344 train_time:24891334ms step_avg:575.97ms +step:43217/57344 train_time:24891351ms step_avg:575.96ms +step:43218/57344 train_time:24891601ms step_avg:575.95ms +step:43219/57344 train_time:24892175ms step_avg:575.95ms +grad accum step:10805/14336 +step:43220/57344 train_time:24893562ms step_avg:575.97ms +step:43221/57344 train_time:24893579ms step_avg:575.96ms +step:43222/57344 train_time:24893826ms step_avg:575.95ms +step:43223/57344 train_time:24894381ms step_avg:575.95ms +grad accum step:10806/14336 +step:43224/57344 train_time:24895740ms step_avg:575.97ms +step:43225/57344 train_time:24895757ms step_avg:575.96ms +step:43226/57344 train_time:24896003ms step_avg:575.95ms +step:43227/57344 train_time:24896555ms step_avg:575.95ms +grad accum step:10807/14336 +step:43228/57344 train_time:24897922ms step_avg:575.97ms +step:43229/57344 train_time:24897938ms step_avg:575.95ms +step:43230/57344 train_time:24898187ms step_avg:575.95ms +step:43231/57344 train_time:24898741ms step_avg:575.95ms +grad accum step:10808/14336 +step:43232/57344 train_time:24900050ms step_avg:575.96ms +step:43233/57344 train_time:24900062ms step_avg:575.95ms +step:43234/57344 train_time:24900310ms step_avg:575.94ms +step:43235/57344 train_time:24900884ms step_avg:575.94ms +grad accum step:10809/14336 +step:43236/57344 train_time:24902257ms step_avg:575.96ms +step:43237/57344 train_time:24902272ms step_avg:575.95ms +step:43238/57344 train_time:24902521ms step_avg:575.94ms +step:43239/57344 train_time:24903085ms step_avg:575.94ms +grad accum step:10810/14336 +step:43240/57344 train_time:24904461ms step_avg:575.96ms +step:43241/57344 train_time:24904497ms step_avg:575.95ms +step:43242/57344 train_time:24904719ms step_avg:575.94ms +step:43243/57344 train_time:24905277ms step_avg:575.94ms +grad accum step:10811/14336 +step:43244/57344 train_time:24906617ms step_avg:575.96ms +step:43245/57344 train_time:24906632ms step_avg:575.94ms +step:43246/57344 train_time:24906881ms step_avg:575.93ms +step:43247/57344 train_time:24907476ms step_avg:575.94ms +grad accum step:10812/14336 +step:43248/57344 train_time:24908850ms step_avg:575.95ms +step:43249/57344 train_time:24908865ms step_avg:575.94ms +step:43250/57344 train_time:24909116ms step_avg:575.93ms +step:43251/57344 train_time:24909678ms step_avg:575.93ms +grad accum step:10813/14336 +step:43252/57344 train_time:24911001ms step_avg:575.95ms +step:43253/57344 train_time:24911018ms step_avg:575.94ms +step:43254/57344 train_time:24911268ms step_avg:575.93ms +step:43255/57344 train_time:24911830ms step_avg:575.93ms +grad accum step:10814/14336 +step:43256/57344 train_time:24913181ms step_avg:575.95ms +step:43257/57344 train_time:24913196ms step_avg:575.93ms +step:43258/57344 train_time:24913448ms step_avg:575.93ms +step:43259/57344 train_time:24914027ms step_avg:575.93ms +grad accum step:10815/14336 +step:43260/57344 train_time:24915417ms step_avg:575.95ms +step:43261/57344 train_time:24915434ms step_avg:575.93ms +step:43262/57344 train_time:24915675ms step_avg:575.93ms +step:43263/57344 train_time:24916213ms step_avg:575.92ms +grad accum step:10816/14336 +step:43264/57344 train_time:24917495ms step_avg:575.94ms +step:43264/57344 val_loss:5.870100 train_time:24917501ms step_avg:575.94ms +step:43265/57344 train_time:24917513ms step_avg:575.93ms +step:43266/57344 train_time:24917738ms step_avg:575.92ms +step:43267/57344 train_time:24918290ms step_avg:575.92ms +grad accum step:10817/14336 +step:43268/57344 train_time:24919628ms step_avg:575.94ms +step:43269/57344 train_time:24919659ms step_avg:575.92ms +step:43270/57344 train_time:24919879ms step_avg:575.92ms +step:43271/57344 train_time:24920430ms step_avg:575.92ms +grad accum step:10818/14336 +step:43272/57344 train_time:24921747ms step_avg:575.93ms +step:43273/57344 train_time:24921764ms step_avg:575.92ms +step:43274/57344 train_time:24922011ms step_avg:575.91ms +step:43275/57344 train_time:24922564ms step_avg:575.91ms +grad accum step:10819/14336 +step:43276/57344 train_time:24923877ms step_avg:575.93ms +step:43277/57344 train_time:24923894ms step_avg:575.92ms +step:43278/57344 train_time:24924140ms step_avg:575.91ms +step:43279/57344 train_time:24924694ms step_avg:575.91ms +grad accum step:10820/14336 +step:43280/57344 train_time:24926090ms step_avg:575.93ms +step:43281/57344 train_time:24926113ms step_avg:575.91ms +step:43282/57344 train_time:24926338ms step_avg:575.91ms +step:43283/57344 train_time:24926905ms step_avg:575.91ms +grad accum step:10821/14336 +step:43284/57344 train_time:24928265ms step_avg:575.92ms +step:43285/57344 train_time:24928291ms step_avg:575.91ms +step:43286/57344 train_time:24928511ms step_avg:575.90ms +step:43287/57344 train_time:24929050ms step_avg:575.90ms +grad accum step:10822/14336 +step:43288/57344 train_time:24930368ms step_avg:575.92ms +step:43289/57344 train_time:24930385ms step_avg:575.91ms +step:43290/57344 train_time:24930639ms step_avg:575.90ms +step:43291/57344 train_time:24931203ms step_avg:575.90ms +grad accum step:10823/14336 +step:43292/57344 train_time:24932591ms step_avg:575.92ms +step:43293/57344 train_time:24932621ms step_avg:575.90ms +step:43294/57344 train_time:24932848ms step_avg:575.90ms +step:43295/57344 train_time:24933418ms step_avg:575.90ms +grad accum step:10824/14336 +step:43296/57344 train_time:24934727ms step_avg:575.91ms +step:43297/57344 train_time:24934744ms step_avg:575.90ms +step:43298/57344 train_time:24934993ms step_avg:575.89ms +step:43299/57344 train_time:24935550ms step_avg:575.89ms +grad accum step:10825/14336 +step:43300/57344 train_time:24936902ms step_avg:575.91ms +step:43301/57344 train_time:24936917ms step_avg:575.90ms +step:43302/57344 train_time:24937170ms step_avg:575.89ms +step:43303/57344 train_time:24937734ms step_avg:575.89ms +grad accum step:10826/14336 +step:43304/57344 train_time:24939030ms step_avg:575.91ms +step:43305/57344 train_time:24939047ms step_avg:575.89ms +step:43306/57344 train_time:24939294ms step_avg:575.89ms +step:43307/57344 train_time:24939835ms step_avg:575.88ms +grad accum step:10827/14336 +step:43308/57344 train_time:24941170ms step_avg:575.90ms +step:43309/57344 train_time:24941186ms step_avg:575.89ms +step:43310/57344 train_time:24941439ms step_avg:575.88ms +step:43311/57344 train_time:24942010ms step_avg:575.88ms +grad accum step:10828/14336 +step:43312/57344 train_time:24943359ms step_avg:575.90ms +step:43313/57344 train_time:24943390ms step_avg:575.89ms +step:43314/57344 train_time:24943611ms step_avg:575.88ms +step:43315/57344 train_time:24944174ms step_avg:575.88ms +grad accum step:10829/14336 +step:43316/57344 train_time:24945538ms step_avg:575.90ms +step:43317/57344 train_time:24945555ms step_avg:575.88ms +step:43318/57344 train_time:24945804ms step_avg:575.88ms +step:43319/57344 train_time:24946374ms step_avg:575.88ms +grad accum step:10830/14336 +step:43320/57344 train_time:24947788ms step_avg:575.90ms +step:43321/57344 train_time:24947801ms step_avg:575.88ms +step:43322/57344 train_time:24948053ms step_avg:575.87ms +step:43323/57344 train_time:24948617ms step_avg:575.87ms +grad accum step:10831/14336 +step:43324/57344 train_time:24949990ms step_avg:575.89ms +step:43325/57344 train_time:24950007ms step_avg:575.88ms +step:43326/57344 train_time:24950258ms step_avg:575.87ms +step:43327/57344 train_time:24950816ms step_avg:575.87ms +grad accum step:10832/14336 +step:43328/57344 train_time:24952102ms step_avg:575.89ms +step:43328/57344 val_loss:5.847303 train_time:24952127ms step_avg:575.89ms +step:43329/57344 train_time:24952139ms step_avg:575.88ms +step:43330/57344 train_time:24952364ms step_avg:575.87ms +step:43331/57344 train_time:24952915ms step_avg:575.87ms +grad accum step:10833/14336 +step:43332/57344 train_time:24954240ms step_avg:575.88ms +step:43333/57344 train_time:24954251ms step_avg:575.87ms +step:43334/57344 train_time:24954490ms step_avg:575.86ms +step:43335/57344 train_time:24955046ms step_avg:575.86ms +grad accum step:10834/14336 +step:43336/57344 train_time:24956339ms step_avg:575.88ms +step:43337/57344 train_time:24956356ms step_avg:575.87ms +step:43338/57344 train_time:24956604ms step_avg:575.86ms +step:43339/57344 train_time:24957162ms step_avg:575.86ms +grad accum step:10835/14336 +step:43340/57344 train_time:24958580ms step_avg:575.88ms +step:43341/57344 train_time:24958596ms step_avg:575.87ms +step:43342/57344 train_time:24958837ms step_avg:575.86ms +step:43343/57344 train_time:24959390ms step_avg:575.86ms +grad accum step:10836/14336 +step:43344/57344 train_time:24960748ms step_avg:575.88ms +step:43345/57344 train_time:24960765ms step_avg:575.86ms +step:43346/57344 train_time:24961019ms step_avg:575.86ms +step:43347/57344 train_time:24961591ms step_avg:575.86ms +grad accum step:10837/14336 +step:43348/57344 train_time:24962981ms step_avg:575.87ms +step:43349/57344 train_time:24962997ms step_avg:575.86ms +step:43350/57344 train_time:24963241ms step_avg:575.85ms +step:43351/57344 train_time:24963794ms step_avg:575.85ms +grad accum step:10838/14336 +step:43352/57344 train_time:24965093ms step_avg:575.87ms +step:43353/57344 train_time:24965110ms step_avg:575.86ms +step:43354/57344 train_time:24965359ms step_avg:575.85ms +step:43355/57344 train_time:24965915ms step_avg:575.85ms +grad accum step:10839/14336 +step:43356/57344 train_time:24967247ms step_avg:575.87ms +step:43357/57344 train_time:24967264ms step_avg:575.85ms +step:43358/57344 train_time:24967517ms step_avg:575.85ms +step:43359/57344 train_time:24968075ms step_avg:575.85ms +grad accum step:10840/14336 +step:43360/57344 train_time:24969387ms step_avg:575.86ms +step:43361/57344 train_time:24969409ms step_avg:575.85ms +step:43362/57344 train_time:24969652ms step_avg:575.84ms +step:43363/57344 train_time:24970198ms step_avg:575.84ms +grad accum step:10841/14336 +step:43364/57344 train_time:24971562ms step_avg:575.86ms +step:43365/57344 train_time:24971579ms step_avg:575.85ms +step:43366/57344 train_time:24971827ms step_avg:575.84ms +step:43367/57344 train_time:24972379ms step_avg:575.84ms +grad accum step:10842/14336 +step:43368/57344 train_time:24976419ms step_avg:575.92ms +step:43369/57344 train_time:24976431ms step_avg:575.91ms +step:43370/57344 train_time:24976692ms step_avg:575.90ms +step:43371/57344 train_time:24977239ms step_avg:575.90ms +grad accum step:10843/14336 +step:43372/57344 train_time:24978573ms step_avg:575.91ms +step:43373/57344 train_time:24978586ms step_avg:575.90ms +step:43374/57344 train_time:24978836ms step_avg:575.89ms +step:43375/57344 train_time:24979386ms step_avg:575.89ms +grad accum step:10844/14336 +step:43376/57344 train_time:24980718ms step_avg:575.91ms +step:43377/57344 train_time:24980741ms step_avg:575.90ms +step:43378/57344 train_time:24980965ms step_avg:575.89ms +step:43379/57344 train_time:24981541ms step_avg:575.89ms +grad accum step:10845/14336 +step:43380/57344 train_time:24982898ms step_avg:575.91ms +step:43381/57344 train_time:24982935ms step_avg:575.90ms +step:43382/57344 train_time:24983161ms step_avg:575.89ms +step:43383/57344 train_time:24983721ms step_avg:575.89ms +grad accum step:10846/14336 +step:43384/57344 train_time:24985044ms step_avg:575.90ms +step:43385/57344 train_time:24985061ms step_avg:575.89ms +step:43386/57344 train_time:24985305ms step_avg:575.88ms +step:43387/57344 train_time:24985868ms step_avg:575.88ms +grad accum step:10847/14336 +step:43388/57344 train_time:24987218ms step_avg:575.90ms +step:43389/57344 train_time:24987235ms step_avg:575.89ms +step:43390/57344 train_time:24987481ms step_avg:575.88ms +step:43391/57344 train_time:24988030ms step_avg:575.88ms +grad accum step:10848/14336 +step:43392/57344 train_time:24989399ms step_avg:575.90ms +step:43392/57344 val_loss:5.814780 train_time:24989419ms step_avg:575.90ms +step:43393/57344 train_time:24989431ms step_avg:575.89ms +step:43394/57344 train_time:24989654ms step_avg:575.88ms +step:43395/57344 train_time:24990198ms step_avg:575.88ms +grad accum step:10849/14336 +step:43396/57344 train_time:24991530ms step_avg:575.89ms +step:43397/57344 train_time:24991551ms step_avg:575.88ms +step:43398/57344 train_time:24991780ms step_avg:575.87ms +step:43399/57344 train_time:24992334ms step_avg:575.87ms +grad accum step:10850/14336 +step:43400/57344 train_time:24993687ms step_avg:575.89ms +step:43401/57344 train_time:24993704ms step_avg:575.88ms +step:43402/57344 train_time:24993959ms step_avg:575.87ms +step:43403/57344 train_time:24994529ms step_avg:575.87ms +grad accum step:10851/14336 +step:43404/57344 train_time:24995879ms step_avg:575.89ms +step:43405/57344 train_time:24995906ms step_avg:575.88ms +step:43406/57344 train_time:24996132ms step_avg:575.87ms +step:43407/57344 train_time:24996685ms step_avg:575.87ms +grad accum step:10852/14336 +step:43408/57344 train_time:24997985ms step_avg:575.88ms +step:43409/57344 train_time:24998002ms step_avg:575.87ms +step:43410/57344 train_time:24998255ms step_avg:575.86ms +step:43411/57344 train_time:24998824ms step_avg:575.86ms +grad accum step:10853/14336 +step:43412/57344 train_time:25000151ms step_avg:575.88ms +step:43413/57344 train_time:25000163ms step_avg:575.87ms +step:43414/57344 train_time:25000382ms step_avg:575.86ms +step:43415/57344 train_time:25000921ms step_avg:575.86ms +grad accum step:10854/14336 +step:43416/57344 train_time:25002280ms step_avg:575.88ms +step:43417/57344 train_time:25002301ms step_avg:575.86ms +step:43418/57344 train_time:25002535ms step_avg:575.86ms +step:43419/57344 train_time:25003110ms step_avg:575.86ms +grad accum step:10855/14336 +step:43420/57344 train_time:25004558ms step_avg:575.88ms +step:43421/57344 train_time:25004575ms step_avg:575.86ms +step:43422/57344 train_time:25004825ms step_avg:575.86ms +step:43423/57344 train_time:25005392ms step_avg:575.86ms +grad accum step:10856/14336 +step:43424/57344 train_time:25006750ms step_avg:575.87ms +step:43425/57344 train_time:25006764ms step_avg:575.86ms +step:43426/57344 train_time:25007014ms step_avg:575.85ms +step:43427/57344 train_time:25007572ms step_avg:575.85ms +grad accum step:10857/14336 +step:43428/57344 train_time:25008923ms step_avg:575.87ms +step:43429/57344 train_time:25008936ms step_avg:575.86ms +step:43430/57344 train_time:25009174ms step_avg:575.85ms +step:43431/57344 train_time:25009746ms step_avg:575.85ms +grad accum step:10858/14336 +step:43432/57344 train_time:25011083ms step_avg:575.87ms +step:43433/57344 train_time:25011106ms step_avg:575.85ms +step:43434/57344 train_time:25011345ms step_avg:575.85ms +step:43435/57344 train_time:25011888ms step_avg:575.85ms +grad accum step:10859/14336 +step:43436/57344 train_time:25013206ms step_avg:575.86ms +step:43437/57344 train_time:25013223ms step_avg:575.85ms +step:43438/57344 train_time:25013472ms step_avg:575.84ms +step:43439/57344 train_time:25014037ms step_avg:575.84ms +grad accum step:10860/14336 +step:43440/57344 train_time:25015449ms step_avg:575.86ms +step:43441/57344 train_time:25015460ms step_avg:575.85ms +step:43442/57344 train_time:25015716ms step_avg:575.84ms +step:43443/57344 train_time:25016315ms step_avg:575.84ms +grad accum step:10861/14336 +step:43444/57344 train_time:25017686ms step_avg:575.86ms +step:43445/57344 train_time:25017703ms step_avg:575.85ms +step:43446/57344 train_time:25017951ms step_avg:575.84ms +step:43447/57344 train_time:25018506ms step_avg:575.84ms +grad accum step:10862/14336 +step:43448/57344 train_time:25019837ms step_avg:575.86ms +step:43449/57344 train_time:25019855ms step_avg:575.84ms +step:43450/57344 train_time:25020108ms step_avg:575.84ms +step:43451/57344 train_time:25020675ms step_avg:575.84ms +grad accum step:10863/14336 +step:43452/57344 train_time:25054416ms step_avg:576.60ms +step:43453/57344 train_time:25054435ms step_avg:576.59ms +step:43454/57344 train_time:25054740ms step_avg:576.58ms +step:43455/57344 train_time:25055360ms step_avg:576.58ms +grad accum step:10864/14336 +step:43456/57344 train_time:25056664ms step_avg:576.60ms +step:43456/57344 val_loss:5.802860 train_time:25056664ms step_avg:576.60ms +step:43457/57344 train_time:25056676ms step_avg:576.59ms +step:43458/57344 train_time:25056932ms step_avg:576.58ms +step:43459/57344 train_time:25057494ms step_avg:576.58ms +grad accum step:10865/14336 +step:43460/57344 train_time:25058801ms step_avg:576.59ms +step:43461/57344 train_time:25058814ms step_avg:576.58ms +step:43462/57344 train_time:25059060ms step_avg:576.57ms +step:43463/57344 train_time:25059606ms step_avg:576.57ms +grad accum step:10866/14336 +step:43464/57344 train_time:25060917ms step_avg:576.59ms +step:43465/57344 train_time:25060932ms step_avg:576.58ms +step:43466/57344 train_time:25061167ms step_avg:576.57ms +step:43467/57344 train_time:25061720ms step_avg:576.57ms +grad accum step:10867/14336 +step:43468/57344 train_time:25063045ms step_avg:576.59ms +step:43469/57344 train_time:25063060ms step_avg:576.57ms +step:43470/57344 train_time:25063316ms step_avg:576.57ms +step:43471/57344 train_time:25063889ms step_avg:576.57ms +grad accum step:10868/14336 +step:43472/57344 train_time:25065235ms step_avg:576.58ms +step:43473/57344 train_time:25065272ms step_avg:576.57ms +step:43474/57344 train_time:25065491ms step_avg:576.56ms +step:43475/57344 train_time:25066038ms step_avg:576.56ms +grad accum step:10869/14336 +step:43476/57344 train_time:25067379ms step_avg:576.58ms +step:43477/57344 train_time:25067391ms step_avg:576.57ms +step:43478/57344 train_time:25067638ms step_avg:576.56ms +step:43479/57344 train_time:25068186ms step_avg:576.56ms +grad accum step:10870/14336 +step:43480/57344 train_time:25069495ms step_avg:576.58ms +step:43481/57344 train_time:25069512ms step_avg:576.56ms +step:43482/57344 train_time:25069763ms step_avg:576.55ms +step:43483/57344 train_time:25070324ms step_avg:576.55ms +grad accum step:10871/14336 +step:43484/57344 train_time:25071672ms step_avg:576.57ms +step:43485/57344 train_time:25071708ms step_avg:576.56ms +step:43486/57344 train_time:25071935ms step_avg:576.55ms +step:43487/57344 train_time:25072498ms step_avg:576.55ms +grad accum step:10872/14336 +step:43488/57344 train_time:25073830ms step_avg:576.57ms +step:43489/57344 train_time:25073842ms step_avg:576.56ms +step:43490/57344 train_time:25074073ms step_avg:576.55ms +step:43491/57344 train_time:25074621ms step_avg:576.55ms +grad accum step:10873/14336 +step:43492/57344 train_time:25075969ms step_avg:576.57ms +step:43493/57344 train_time:25075984ms step_avg:576.55ms +step:43494/57344 train_time:25076217ms step_avg:576.54ms +step:43495/57344 train_time:25076789ms step_avg:576.54ms +grad accum step:10874/14336 +step:43496/57344 train_time:25078126ms step_avg:576.56ms +step:43497/57344 train_time:25078143ms step_avg:576.55ms +step:43498/57344 train_time:25078390ms step_avg:576.54ms +step:43499/57344 train_time:25078937ms step_avg:576.54ms +grad accum step:10875/14336 +step:43500/57344 train_time:25080312ms step_avg:576.56ms +step:43501/57344 train_time:25080326ms step_avg:576.55ms +step:43502/57344 train_time:25080583ms step_avg:576.54ms +step:43503/57344 train_time:25081175ms step_avg:576.54ms +grad accum step:10876/14336 +step:43504/57344 train_time:25082533ms step_avg:576.56ms +step:43505/57344 train_time:25082549ms step_avg:576.54ms +step:43506/57344 train_time:25082798ms step_avg:576.54ms +step:43507/57344 train_time:25083362ms step_avg:576.54ms +grad accum step:10877/14336 +step:43508/57344 train_time:25084680ms step_avg:576.55ms +step:43509/57344 train_time:25084693ms step_avg:576.54ms +step:43510/57344 train_time:25084948ms step_avg:576.53ms +step:43511/57344 train_time:25085517ms step_avg:576.53ms +grad accum step:10878/14336 +step:43512/57344 train_time:25086907ms step_avg:576.55ms +step:43513/57344 train_time:25086921ms step_avg:576.54ms +step:43514/57344 train_time:25087174ms step_avg:576.53ms +step:43515/57344 train_time:25087743ms step_avg:576.53ms +grad accum step:10879/14336 +step:43516/57344 train_time:25104518ms step_avg:576.90ms +step:43517/57344 train_time:25104532ms step_avg:576.89ms +step:43518/57344 train_time:25104794ms step_avg:576.88ms +step:43519/57344 train_time:25105340ms step_avg:576.88ms +grad accum step:10880/14336 +step:43520/57344 train_time:25106684ms step_avg:576.90ms +step:43520/57344 val_loss:5.776017 train_time:25106688ms step_avg:576.90ms +step:43521/57344 train_time:25106699ms step_avg:576.89ms +step:43522/57344 train_time:25106923ms step_avg:576.88ms +step:43523/57344 train_time:25107461ms step_avg:576.88ms +grad accum step:10881/14336 +step:43524/57344 train_time:25108759ms step_avg:576.89ms +step:43525/57344 train_time:25108776ms step_avg:576.88ms +step:43526/57344 train_time:25109024ms step_avg:576.87ms +step:43527/57344 train_time:25109571ms step_avg:576.87ms +grad accum step:10882/14336 +step:43528/57344 train_time:25110883ms step_avg:576.89ms +step:43529/57344 train_time:25110925ms step_avg:576.88ms +step:43530/57344 train_time:25111146ms step_avg:576.87ms +step:43531/57344 train_time:25111713ms step_avg:576.87ms +grad accum step:10883/14336 +step:43532/57344 train_time:25113057ms step_avg:576.89ms +step:43533/57344 train_time:25113069ms step_avg:576.87ms +step:43534/57344 train_time:25113320ms step_avg:576.87ms +step:43535/57344 train_time:25113878ms step_avg:576.87ms +grad accum step:10884/14336 +step:43536/57344 train_time:25115221ms step_avg:576.88ms +step:43537/57344 train_time:25115238ms step_avg:576.87ms +step:43538/57344 train_time:25115490ms step_avg:576.86ms +step:43539/57344 train_time:25116064ms step_avg:576.86ms +grad accum step:10885/14336 +step:43540/57344 train_time:25117413ms step_avg:576.88ms +step:43541/57344 train_time:25117446ms step_avg:576.87ms +step:43542/57344 train_time:25117686ms step_avg:576.86ms +step:43543/57344 train_time:25118260ms step_avg:576.86ms +grad accum step:10886/14336 +step:43544/57344 train_time:25119551ms step_avg:576.88ms +step:43545/57344 train_time:25119568ms step_avg:576.86ms +step:43546/57344 train_time:25119816ms step_avg:576.86ms +step:43547/57344 train_time:25120366ms step_avg:576.86ms +grad accum step:10887/14336 +step:43548/57344 train_time:25121744ms step_avg:576.87ms +step:43549/57344 train_time:25121761ms step_avg:576.86ms +step:43550/57344 train_time:25122012ms step_avg:576.85ms +step:43551/57344 train_time:25122575ms step_avg:576.85ms +grad accum step:10888/14336 +step:43552/57344 train_time:25123899ms step_avg:576.87ms +step:43553/57344 train_time:25123915ms step_avg:576.86ms +step:43554/57344 train_time:25124160ms step_avg:576.85ms +step:43555/57344 train_time:25124715ms step_avg:576.85ms +grad accum step:10889/14336 +step:43556/57344 train_time:25126041ms step_avg:576.87ms +step:43557/57344 train_time:25126097ms step_avg:576.86ms +step:43558/57344 train_time:25126327ms step_avg:576.85ms +step:43559/57344 train_time:25126891ms step_avg:576.85ms +grad accum step:10890/14336 +step:43560/57344 train_time:25128208ms step_avg:576.86ms +step:43561/57344 train_time:25128227ms step_avg:576.85ms +step:43562/57344 train_time:25128470ms step_avg:576.84ms +step:43563/57344 train_time:25129034ms step_avg:576.84ms +grad accum step:10891/14336 +step:43564/57344 train_time:25130389ms step_avg:576.86ms +step:43565/57344 train_time:25130408ms step_avg:576.85ms +step:43566/57344 train_time:25130646ms step_avg:576.84ms +step:43567/57344 train_time:25131195ms step_avg:576.84ms +grad accum step:10892/14336 +step:43568/57344 train_time:25132578ms step_avg:576.86ms +step:43569/57344 train_time:25132598ms step_avg:576.85ms +step:43570/57344 train_time:25132855ms step_avg:576.84ms +step:43571/57344 train_time:25133445ms step_avg:576.84ms +grad accum step:10893/14336 +step:43572/57344 train_time:25134750ms step_avg:576.86ms +step:43573/57344 train_time:25134773ms step_avg:576.84ms +step:43574/57344 train_time:25135004ms step_avg:576.83ms +step:43575/57344 train_time:25135558ms step_avg:576.83ms +grad accum step:10894/14336 +step:43576/57344 train_time:25136887ms step_avg:576.85ms +step:43577/57344 train_time:25136904ms step_avg:576.84ms +step:43578/57344 train_time:25137152ms step_avg:576.83ms +step:43579/57344 train_time:25137705ms step_avg:576.83ms +grad accum step:10895/14336 +step:43580/57344 train_time:25139042ms step_avg:576.85ms +step:43581/57344 train_time:25139058ms step_avg:576.84ms +step:43582/57344 train_time:25139313ms step_avg:576.83ms +step:43583/57344 train_time:25139902ms step_avg:576.83ms +grad accum step:10896/14336 +step:43584/57344 train_time:25141354ms step_avg:576.85ms +step:43584/57344 val_loss:5.755119 train_time:25141362ms step_avg:576.85ms +step:43585/57344 train_time:25141374ms step_avg:576.84ms +step:43586/57344 train_time:25141607ms step_avg:576.83ms +step:43587/57344 train_time:25142179ms step_avg:576.83ms +grad accum step:10897/14336 +step:43588/57344 train_time:25143508ms step_avg:576.84ms +step:43589/57344 train_time:25143523ms step_avg:576.83ms +step:43590/57344 train_time:25143776ms step_avg:576.82ms +step:43591/57344 train_time:25144349ms step_avg:576.82ms +grad accum step:10898/14336 +step:43592/57344 train_time:25145700ms step_avg:576.84ms +step:43593/57344 train_time:25145715ms step_avg:576.83ms +step:43594/57344 train_time:25145966ms step_avg:576.82ms +step:43595/57344 train_time:25146529ms step_avg:576.82ms +grad accum step:10899/14336 +step:43596/57344 train_time:25147827ms step_avg:576.84ms +step:43597/57344 train_time:25147842ms step_avg:576.83ms +step:43598/57344 train_time:25148092ms step_avg:576.82ms +step:43599/57344 train_time:25148652ms step_avg:576.82ms +grad accum step:10900/14336 +step:43600/57344 train_time:25150029ms step_avg:576.84ms +step:43601/57344 train_time:25150054ms step_avg:576.82ms +step:43602/57344 train_time:25150281ms step_avg:576.81ms +step:43603/57344 train_time:25150849ms step_avg:576.81ms +grad accum step:10901/14336 +step:43604/57344 train_time:25152194ms step_avg:576.83ms +step:43605/57344 train_time:25152206ms step_avg:576.82ms +step:43606/57344 train_time:25152456ms step_avg:576.81ms +step:43607/57344 train_time:25153018ms step_avg:576.81ms +grad accum step:10902/14336 +step:43608/57344 train_time:25154361ms step_avg:576.83ms +step:43609/57344 train_time:25154409ms step_avg:576.82ms +step:43610/57344 train_time:25154636ms step_avg:576.81ms +step:43611/57344 train_time:25155201ms step_avg:576.81ms +grad accum step:10903/14336 +step:43612/57344 train_time:25156548ms step_avg:576.83ms +step:43613/57344 train_time:25156568ms step_avg:576.81ms +step:43614/57344 train_time:25156803ms step_avg:576.81ms +step:43615/57344 train_time:25157352ms step_avg:576.81ms +grad accum step:10904/14336 +step:43616/57344 train_time:25158697ms step_avg:576.82ms +step:43617/57344 train_time:25158718ms step_avg:576.81ms +step:43618/57344 train_time:25158968ms step_avg:576.80ms +step:43619/57344 train_time:25159549ms step_avg:576.80ms +grad accum step:10905/14336 +step:43620/57344 train_time:25160888ms step_avg:576.82ms +step:43621/57344 train_time:25160905ms step_avg:576.81ms +step:43622/57344 train_time:25161157ms step_avg:576.80ms +step:43623/57344 train_time:25161729ms step_avg:576.80ms +grad accum step:10906/14336 +step:43624/57344 train_time:25163072ms step_avg:576.82ms +step:43625/57344 train_time:25163089ms step_avg:576.80ms +step:43626/57344 train_time:25163333ms step_avg:576.80ms +step:43627/57344 train_time:25163893ms step_avg:576.80ms +grad accum step:10907/14336 +step:43628/57344 train_time:25165324ms step_avg:576.82ms +step:43629/57344 train_time:25165338ms step_avg:576.80ms +step:43630/57344 train_time:25165550ms step_avg:576.79ms +step:43631/57344 train_time:25166097ms step_avg:576.79ms +grad accum step:10908/14336 +step:43632/57344 train_time:25167442ms step_avg:576.81ms +step:43633/57344 train_time:25167460ms step_avg:576.80ms +step:43634/57344 train_time:25167698ms step_avg:576.79ms +step:43635/57344 train_time:25168241ms step_avg:576.79ms +grad accum step:10909/14336 +step:43636/57344 train_time:25169535ms step_avg:576.81ms +step:43637/57344 train_time:25169550ms step_avg:576.79ms +step:43638/57344 train_time:25169817ms step_avg:576.79ms +step:43639/57344 train_time:25170417ms step_avg:576.79ms +grad accum step:10910/14336 +step:43640/57344 train_time:25171721ms step_avg:576.80ms +step:43641/57344 train_time:25171796ms step_avg:576.79ms +step:43642/57344 train_time:25172021ms step_avg:576.78ms +step:43643/57344 train_time:25172593ms step_avg:576.78ms +grad accum step:10911/14336 +step:43644/57344 train_time:25173928ms step_avg:576.80ms +step:43645/57344 train_time:25173944ms step_avg:576.79ms +step:43646/57344 train_time:25174189ms step_avg:576.78ms +step:43647/57344 train_time:25174743ms step_avg:576.78ms +grad accum step:10912/14336 +step:43648/57344 train_time:25176265ms step_avg:576.80ms +step:43648/57344 val_loss:5.734976 train_time:25176277ms step_avg:576.80ms +step:43649/57344 train_time:25176289ms step_avg:576.79ms +step:43650/57344 train_time:25176505ms step_avg:576.78ms +step:43651/57344 train_time:25177048ms step_avg:576.78ms +grad accum step:10913/14336 +step:43652/57344 train_time:25178402ms step_avg:576.80ms +step:43653/57344 train_time:25178417ms step_avg:576.79ms +step:43654/57344 train_time:25178669ms step_avg:576.78ms +step:43655/57344 train_time:25179244ms step_avg:576.78ms +grad accum step:10914/14336 +step:43656/57344 train_time:25180593ms step_avg:576.80ms +step:43657/57344 train_time:25180610ms step_avg:576.78ms +step:43658/57344 train_time:25180853ms step_avg:576.78ms +step:43659/57344 train_time:25181392ms step_avg:576.77ms +grad accum step:10915/14336 +step:43660/57344 train_time:25182703ms step_avg:576.79ms +step:43661/57344 train_time:25182725ms step_avg:576.78ms +step:43662/57344 train_time:25182962ms step_avg:576.77ms +step:43663/57344 train_time:25183517ms step_avg:576.77ms +grad accum step:10916/14336 +step:43664/57344 train_time:25184879ms step_avg:576.79ms +step:43665/57344 train_time:25184896ms step_avg:576.78ms +step:43666/57344 train_time:25185144ms step_avg:576.77ms +step:43667/57344 train_time:25185711ms step_avg:576.77ms +grad accum step:10917/14336 +step:43668/57344 train_time:25187059ms step_avg:576.79ms +step:43669/57344 train_time:25187075ms step_avg:576.77ms +step:43670/57344 train_time:25187322ms step_avg:576.76ms +step:43671/57344 train_time:25187870ms step_avg:576.76ms +grad accum step:10918/14336 +step:43672/57344 train_time:25189218ms step_avg:576.78ms +step:43673/57344 train_time:25189241ms step_avg:576.77ms +step:43674/57344 train_time:25189478ms step_avg:576.76ms +step:43675/57344 train_time:25190033ms step_avg:576.76ms +grad accum step:10919/14336 +step:43676/57344 train_time:25191346ms step_avg:576.78ms +step:43677/57344 train_time:25191368ms step_avg:576.77ms +step:43678/57344 train_time:25191593ms step_avg:576.76ms +step:43679/57344 train_time:25192164ms step_avg:576.76ms +grad accum step:10920/14336 +step:43680/57344 train_time:25193508ms step_avg:576.77ms +step:43681/57344 train_time:25193526ms step_avg:576.76ms +step:43682/57344 train_time:25193769ms step_avg:576.75ms +step:43683/57344 train_time:25194329ms step_avg:576.75ms +grad accum step:10921/14336 +step:43684/57344 train_time:25195636ms step_avg:576.77ms +step:43685/57344 train_time:25195650ms step_avg:576.76ms +step:43686/57344 train_time:25195892ms step_avg:576.75ms +step:43687/57344 train_time:25196450ms step_avg:576.75ms +grad accum step:10922/14336 +step:43688/57344 train_time:25197775ms step_avg:576.77ms +step:43689/57344 train_time:25197806ms step_avg:576.75ms +step:43690/57344 train_time:25198046ms step_avg:576.75ms +step:43691/57344 train_time:25198624ms step_avg:576.75ms +grad accum step:10923/14336 +step:43692/57344 train_time:25199923ms step_avg:576.76ms +step:43693/57344 train_time:25199937ms step_avg:576.75ms +step:43694/57344 train_time:25200192ms step_avg:576.74ms +step:43695/57344 train_time:25200772ms step_avg:576.74ms +grad accum step:10924/14336 +step:43696/57344 train_time:25202111ms step_avg:576.76ms +step:43697/57344 train_time:25202130ms step_avg:576.75ms +step:43698/57344 train_time:25202368ms step_avg:576.74ms +step:43699/57344 train_time:25202917ms step_avg:576.74ms +grad accum step:10925/14336 +step:43700/57344 train_time:25204232ms step_avg:576.76ms +step:43701/57344 train_time:25204247ms step_avg:576.74ms +step:43702/57344 train_time:25204499ms step_avg:576.74ms +step:43703/57344 train_time:25205062ms step_avg:576.74ms +grad accum step:10926/14336 +step:43704/57344 train_time:25206621ms step_avg:576.76ms +step:43705/57344 train_time:25206704ms step_avg:576.75ms +step:43706/57344 train_time:25206925ms step_avg:576.74ms +step:43707/57344 train_time:25207476ms step_avg:576.74ms +grad accum step:10927/14336 +step:43708/57344 train_time:25208829ms step_avg:576.76ms +step:43709/57344 train_time:25208850ms step_avg:576.74ms +step:43710/57344 train_time:25209089ms step_avg:576.74ms +step:43711/57344 train_time:25209651ms step_avg:576.73ms +grad accum step:10928/14336 +step:43712/57344 train_time:25211023ms step_avg:576.75ms +step:43712/57344 val_loss:5.717041 train_time:25211027ms step_avg:576.75ms +step:43713/57344 train_time:25211039ms step_avg:576.74ms +step:43714/57344 train_time:25211271ms step_avg:576.73ms +step:43715/57344 train_time:25211846ms step_avg:576.73ms +grad accum step:10929/14336 +step:43716/57344 train_time:25213201ms step_avg:576.75ms +step:43717/57344 train_time:25213230ms step_avg:576.74ms +step:43718/57344 train_time:25213459ms step_avg:576.73ms +step:43719/57344 train_time:25214028ms step_avg:576.73ms +grad accum step:10930/14336 +step:43720/57344 train_time:25215363ms step_avg:576.75ms +step:43721/57344 train_time:25215382ms step_avg:576.73ms +step:43722/57344 train_time:25215622ms step_avg:576.73ms +step:43723/57344 train_time:25216184ms step_avg:576.73ms +grad accum step:10931/14336 +step:43724/57344 train_time:25217504ms step_avg:576.74ms +step:43725/57344 train_time:25217520ms step_avg:576.73ms +step:43726/57344 train_time:25217773ms step_avg:576.72ms +step:43727/57344 train_time:25218345ms step_avg:576.72ms +grad accum step:10932/14336 +step:43728/57344 train_time:25219686ms step_avg:576.74ms +step:43729/57344 train_time:25219723ms step_avg:576.73ms +step:43730/57344 train_time:25219948ms step_avg:576.72ms +step:43731/57344 train_time:25220507ms step_avg:576.72ms +grad accum step:10933/14336 +step:43732/57344 train_time:25221833ms step_avg:576.74ms +step:43733/57344 train_time:25221849ms step_avg:576.72ms +step:43734/57344 train_time:25222102ms step_avg:576.72ms +step:43735/57344 train_time:25222680ms step_avg:576.72ms +grad accum step:10934/14336 +step:43736/57344 train_time:25224038ms step_avg:576.73ms +step:43737/57344 train_time:25224056ms step_avg:576.72ms +step:43738/57344 train_time:25224282ms step_avg:576.71ms +step:43739/57344 train_time:25224848ms step_avg:576.71ms +grad accum step:10935/14336 +step:43740/57344 train_time:25226159ms step_avg:576.73ms +step:43741/57344 train_time:25226178ms step_avg:576.72ms +step:43742/57344 train_time:25226420ms step_avg:576.71ms +step:43743/57344 train_time:25226982ms step_avg:576.71ms +grad accum step:10936/14336 +step:43744/57344 train_time:25228372ms step_avg:576.73ms +step:43745/57344 train_time:25228495ms step_avg:576.72ms +step:43746/57344 train_time:25228712ms step_avg:576.71ms +step:43747/57344 train_time:25229269ms step_avg:576.71ms +grad accum step:10937/14336 +step:43748/57344 train_time:25230564ms step_avg:576.72ms +step:43749/57344 train_time:25230581ms step_avg:576.71ms +step:43750/57344 train_time:25230828ms step_avg:576.70ms +step:43751/57344 train_time:25231392ms step_avg:576.70ms +grad accum step:10938/14336 +step:43752/57344 train_time:25232840ms step_avg:576.72ms +step:43753/57344 train_time:25232856ms step_avg:576.71ms +step:43754/57344 train_time:25233108ms step_avg:576.70ms +step:43755/57344 train_time:25233678ms step_avg:576.70ms +grad accum step:10939/14336 +step:43756/57344 train_time:25235016ms step_avg:576.72ms +step:43757/57344 train_time:25235030ms step_avg:576.71ms +step:43758/57344 train_time:25235277ms step_avg:576.70ms +step:43759/57344 train_time:25235826ms step_avg:576.70ms +grad accum step:10940/14336 +step:43760/57344 train_time:25237108ms step_avg:576.72ms +step:43761/57344 train_time:25237124ms step_avg:576.70ms +step:43762/57344 train_time:25237367ms step_avg:576.70ms +step:43763/57344 train_time:25237906ms step_avg:576.70ms +grad accum step:10941/14336 +step:43764/57344 train_time:25239235ms step_avg:576.71ms +step:43765/57344 train_time:25239252ms step_avg:576.70ms +step:43766/57344 train_time:25239491ms step_avg:576.69ms +step:43767/57344 train_time:25240046ms step_avg:576.69ms +grad accum step:10942/14336 +step:43768/57344 train_time:25241408ms step_avg:576.71ms +step:43769/57344 train_time:25241424ms step_avg:576.70ms +step:43770/57344 train_time:25241673ms step_avg:576.69ms +step:43771/57344 train_time:25242238ms step_avg:576.69ms +grad accum step:10943/14336 +step:43772/57344 train_time:25243575ms step_avg:576.71ms +step:43773/57344 train_time:25243612ms step_avg:576.69ms +step:43774/57344 train_time:25243836ms step_avg:576.69ms +step:43775/57344 train_time:25244393ms step_avg:576.69ms +grad accum step:10944/14336 +step:43776/57344 train_time:25245761ms step_avg:576.70ms +step:43776/57344 val_loss:5.703759 train_time:25245766ms step_avg:576.70ms +step:43777/57344 train_time:25245778ms step_avg:576.69ms +step:43778/57344 train_time:25246007ms step_avg:576.68ms +step:43779/57344 train_time:25246561ms step_avg:576.68ms +grad accum step:10945/14336 +step:43780/57344 train_time:25247939ms step_avg:576.70ms +step:43781/57344 train_time:25247951ms step_avg:576.69ms +step:43782/57344 train_time:25248174ms step_avg:576.68ms +step:43783/57344 train_time:25248720ms step_avg:576.68ms +grad accum step:10946/14336 +step:43784/57344 train_time:25250053ms step_avg:576.70ms +step:43785/57344 train_time:25250072ms step_avg:576.68ms +step:43786/57344 train_time:25250306ms step_avg:576.68ms +step:43787/57344 train_time:25250844ms step_avg:576.67ms +grad accum step:10947/14336 +step:43788/57344 train_time:25252211ms step_avg:576.69ms +step:43789/57344 train_time:25252227ms step_avg:576.68ms +step:43790/57344 train_time:25252469ms step_avg:576.67ms +step:43791/57344 train_time:25253015ms step_avg:576.67ms +grad accum step:10948/14336 +step:43792/57344 train_time:25254343ms step_avg:576.69ms +step:43793/57344 train_time:25254359ms step_avg:576.68ms +step:43794/57344 train_time:25254614ms step_avg:576.67ms +step:43795/57344 train_time:25255182ms step_avg:576.67ms +grad accum step:10949/14336 +step:43796/57344 train_time:25256513ms step_avg:576.69ms +step:43797/57344 train_time:25256528ms step_avg:576.67ms +step:43798/57344 train_time:25256772ms step_avg:576.66ms +step:43799/57344 train_time:25257320ms step_avg:576.66ms +grad accum step:10950/14336 +step:43800/57344 train_time:25258651ms step_avg:576.68ms +step:43801/57344 train_time:25258668ms step_avg:576.67ms +step:43802/57344 train_time:25258916ms step_avg:576.66ms +step:43803/57344 train_time:25259485ms step_avg:576.66ms +grad accum step:10951/14336 +step:43804/57344 train_time:25260789ms step_avg:576.68ms +step:43805/57344 train_time:25260804ms step_avg:576.66ms +step:43806/57344 train_time:25261071ms step_avg:576.66ms +step:43807/57344 train_time:25261682ms step_avg:576.66ms +grad accum step:10952/14336 +step:43808/57344 train_time:25263043ms step_avg:576.68ms +step:43809/57344 train_time:25263057ms step_avg:576.66ms +step:43810/57344 train_time:25263300ms step_avg:576.66ms +step:43811/57344 train_time:25263852ms step_avg:576.66ms +grad accum step:10953/14336 +step:43812/57344 train_time:25265189ms step_avg:576.67ms +step:43813/57344 train_time:25265202ms step_avg:576.66ms +step:43814/57344 train_time:25265431ms step_avg:576.65ms +step:43815/57344 train_time:25265994ms step_avg:576.65ms +grad accum step:10954/14336 +step:43816/57344 train_time:25267313ms step_avg:576.67ms +step:43817/57344 train_time:25267329ms step_avg:576.66ms +step:43818/57344 train_time:25267577ms step_avg:576.65ms +step:43819/57344 train_time:25268134ms step_avg:576.65ms +grad accum step:10955/14336 +step:43820/57344 train_time:25269472ms step_avg:576.67ms +step:43821/57344 train_time:25269487ms step_avg:576.65ms +step:43822/57344 train_time:25269734ms step_avg:576.64ms +step:43823/57344 train_time:25270282ms step_avg:576.64ms +grad accum step:10956/14336 +step:43824/57344 train_time:25271608ms step_avg:576.66ms +step:43825/57344 train_time:25271653ms step_avg:576.65ms +step:43826/57344 train_time:25271877ms step_avg:576.64ms +step:43827/57344 train_time:25272437ms step_avg:576.64ms +grad accum step:10957/14336 +step:43828/57344 train_time:25273716ms step_avg:576.66ms +step:43829/57344 train_time:25273731ms step_avg:576.64ms +step:43830/57344 train_time:25273980ms step_avg:576.64ms +step:43831/57344 train_time:25274537ms step_avg:576.64ms +grad accum step:10958/14336 +step:43832/57344 train_time:25275850ms step_avg:576.65ms +step:43833/57344 train_time:25275864ms step_avg:576.64ms +step:43834/57344 train_time:25276115ms step_avg:576.63ms +step:43835/57344 train_time:25276680ms step_avg:576.63ms +grad accum step:10959/14336 +step:43836/57344 train_time:25278023ms step_avg:576.65ms +step:43837/57344 train_time:25278041ms step_avg:576.64ms +step:43838/57344 train_time:25278286ms step_avg:576.63ms +step:43839/57344 train_time:25278856ms step_avg:576.63ms +grad accum step:10960/14336 +step:43840/57344 train_time:25280329ms step_avg:576.65ms +step:43840/57344 val_loss:5.692029 train_time:25280388ms step_avg:576.65ms +step:43841/57344 train_time:25280400ms step_avg:576.64ms +step:43842/57344 train_time:25280627ms step_avg:576.63ms +step:43843/57344 train_time:25281194ms step_avg:576.63ms +grad accum step:10961/14336 +step:43844/57344 train_time:25282561ms step_avg:576.65ms +step:43845/57344 train_time:25282583ms step_avg:576.64ms +step:43846/57344 train_time:25282824ms step_avg:576.63ms +step:43847/57344 train_time:25283385ms step_avg:576.63ms +grad accum step:10962/14336 +step:43848/57344 train_time:25284713ms step_avg:576.64ms +step:43849/57344 train_time:25284738ms step_avg:576.63ms +step:43850/57344 train_time:25284966ms step_avg:576.62ms +step:43851/57344 train_time:25285510ms step_avg:576.62ms +grad accum step:10963/14336 +step:43852/57344 train_time:25286850ms step_avg:576.64ms +step:43853/57344 train_time:25286870ms step_avg:576.63ms +step:43854/57344 train_time:25287099ms step_avg:576.62ms +step:43855/57344 train_time:25287675ms step_avg:576.62ms +grad accum step:10964/14336 +step:43856/57344 train_time:25289045ms step_avg:576.64ms +step:43857/57344 train_time:25289065ms step_avg:576.63ms +step:43858/57344 train_time:25289307ms step_avg:576.62ms +step:43859/57344 train_time:25289872ms step_avg:576.62ms +grad accum step:10965/14336 +step:43860/57344 train_time:25291229ms step_avg:576.64ms +step:43861/57344 train_time:25291242ms step_avg:576.62ms +step:43862/57344 train_time:25291499ms step_avg:576.62ms +step:43863/57344 train_time:25292073ms step_avg:576.62ms +grad accum step:10966/14336 +step:43864/57344 train_time:25293398ms step_avg:576.63ms +step:43865/57344 train_time:25293412ms step_avg:576.62ms +step:43866/57344 train_time:25293657ms step_avg:576.61ms +step:43867/57344 train_time:25294201ms step_avg:576.61ms +grad accum step:10967/14336 +step:43868/57344 train_time:25295551ms step_avg:576.63ms +step:43869/57344 train_time:25295567ms step_avg:576.62ms +step:43870/57344 train_time:25295791ms step_avg:576.61ms +step:43871/57344 train_time:25296347ms step_avg:576.61ms +grad accum step:10968/14336 +step:43872/57344 train_time:25297647ms step_avg:576.62ms +step:43873/57344 train_time:25297668ms step_avg:576.61ms +step:43874/57344 train_time:25297909ms step_avg:576.60ms +step:43875/57344 train_time:25298459ms step_avg:576.60ms +grad accum step:10969/14336 +step:43876/57344 train_time:25299799ms step_avg:576.62ms +step:43877/57344 train_time:25299815ms step_avg:576.61ms +step:43878/57344 train_time:25300063ms step_avg:576.60ms +step:43879/57344 train_time:25300609ms step_avg:576.60ms +grad accum step:10970/14336 +step:43880/57344 train_time:25301938ms step_avg:576.62ms +step:43881/57344 train_time:25301952ms step_avg:576.60ms +step:43882/57344 train_time:25302193ms step_avg:576.60ms +step:43883/57344 train_time:25302761ms step_avg:576.60ms +grad accum step:10971/14336 +step:43884/57344 train_time:25304081ms step_avg:576.61ms +step:43885/57344 train_time:25304094ms step_avg:576.60ms +step:43886/57344 train_time:25304348ms step_avg:576.59ms +step:43887/57344 train_time:25304921ms step_avg:576.59ms +grad accum step:10972/14336 +step:43888/57344 train_time:25306248ms step_avg:576.61ms +step:43889/57344 train_time:25306317ms step_avg:576.60ms +step:43890/57344 train_time:25306543ms step_avg:576.59ms +step:43891/57344 train_time:25307112ms step_avg:576.59ms +grad accum step:10973/14336 +step:43892/57344 train_time:25308418ms step_avg:576.61ms +step:43893/57344 train_time:25308431ms step_avg:576.59ms +step:43894/57344 train_time:25308679ms step_avg:576.59ms +step:43895/57344 train_time:25309227ms step_avg:576.59ms +grad accum step:10974/14336 +step:43896/57344 train_time:25310511ms step_avg:576.60ms +step:43897/57344 train_time:25310528ms step_avg:576.59ms +step:43898/57344 train_time:25310780ms step_avg:576.58ms +step:43899/57344 train_time:25311345ms step_avg:576.58ms +grad accum step:10975/14336 +step:43900/57344 train_time:25312650ms step_avg:576.60ms +step:43901/57344 train_time:25312666ms step_avg:576.59ms +step:43902/57344 train_time:25312911ms step_avg:576.58ms +step:43903/57344 train_time:25313464ms step_avg:576.58ms +grad accum step:10976/14336 +step:43904/57344 train_time:25314817ms step_avg:576.59ms +step:43904/57344 val_loss:5.675054 train_time:25314825ms step_avg:576.59ms +step:43905/57344 train_time:25314837ms step_avg:576.58ms +step:43906/57344 train_time:25315059ms step_avg:576.57ms +step:43907/57344 train_time:25315604ms step_avg:576.57ms +grad accum step:10977/14336 +step:43908/57344 train_time:25316926ms step_avg:576.59ms +step:43909/57344 train_time:25316948ms step_avg:576.58ms +step:43910/57344 train_time:25317175ms step_avg:576.57ms +step:43911/57344 train_time:25317735ms step_avg:576.57ms +grad accum step:10978/14336 +step:43912/57344 train_time:25319071ms step_avg:576.59ms +step:43913/57344 train_time:25319111ms step_avg:576.57ms +step:43914/57344 train_time:25319336ms step_avg:576.57ms +step:43915/57344 train_time:25319896ms step_avg:576.57ms +grad accum step:10979/14336 +step:43916/57344 train_time:25321201ms step_avg:576.58ms +step:43917/57344 train_time:25321212ms step_avg:576.57ms +step:43918/57344 train_time:25321457ms step_avg:576.56ms +step:43919/57344 train_time:25322005ms step_avg:576.56ms +grad accum step:10980/14336 +step:43920/57344 train_time:25323331ms step_avg:576.58ms +step:43921/57344 train_time:25323352ms step_avg:576.57ms +step:43922/57344 train_time:25323591ms step_avg:576.56ms +step:43923/57344 train_time:25324181ms step_avg:576.56ms +grad accum step:10981/14336 +step:43924/57344 train_time:25325579ms step_avg:576.58ms +step:43925/57344 train_time:25325599ms step_avg:576.56ms +step:43926/57344 train_time:25325837ms step_avg:576.56ms +step:43927/57344 train_time:25326400ms step_avg:576.56ms +grad accum step:10982/14336 +step:43928/57344 train_time:25327700ms step_avg:576.57ms +step:43929/57344 train_time:25327712ms step_avg:576.56ms +step:43930/57344 train_time:25327951ms step_avg:576.55ms +step:43931/57344 train_time:25328517ms step_avg:576.55ms +grad accum step:10983/14336 +step:43932/57344 train_time:25329859ms step_avg:576.57ms +step:43933/57344 train_time:25329871ms step_avg:576.56ms +step:43934/57344 train_time:25330121ms step_avg:576.55ms +step:43935/57344 train_time:25330676ms step_avg:576.55ms +grad accum step:10984/14336 +step:43936/57344 train_time:25331988ms step_avg:576.57ms +step:43937/57344 train_time:25332007ms step_avg:576.55ms +step:43938/57344 train_time:25332255ms step_avg:576.55ms +step:43939/57344 train_time:25332820ms step_avg:576.55ms +grad accum step:10985/14336 +step:43940/57344 train_time:25334150ms step_avg:576.56ms +step:43941/57344 train_time:25334166ms step_avg:576.55ms +step:43942/57344 train_time:25334416ms step_avg:576.54ms +step:43943/57344 train_time:25334977ms step_avg:576.54ms +grad accum step:10986/14336 +step:43944/57344 train_time:25336357ms step_avg:576.56ms +step:43945/57344 train_time:25336376ms step_avg:576.55ms +step:43946/57344 train_time:25336598ms step_avg:576.54ms +step:43947/57344 train_time:25337148ms step_avg:576.54ms +grad accum step:10987/14336 +step:43948/57344 train_time:25338529ms step_avg:576.56ms +step:43949/57344 train_time:25338546ms step_avg:576.54ms +step:43950/57344 train_time:25338798ms step_avg:576.54ms +step:43951/57344 train_time:25339356ms step_avg:576.54ms +grad accum step:10988/14336 +step:43952/57344 train_time:25340674ms step_avg:576.55ms +step:43953/57344 train_time:25340685ms step_avg:576.54ms +step:43954/57344 train_time:25340931ms step_avg:576.53ms +step:43955/57344 train_time:25341481ms step_avg:576.53ms +grad accum step:10989/14336 +step:43956/57344 train_time:25342805ms step_avg:576.55ms +step:43957/57344 train_time:25342822ms step_avg:576.54ms +step:43958/57344 train_time:25343074ms step_avg:576.53ms +step:43959/57344 train_time:25343635ms step_avg:576.53ms +grad accum step:10990/14336 +step:43960/57344 train_time:25344977ms step_avg:576.55ms +step:43961/57344 train_time:25344993ms step_avg:576.53ms +step:43962/57344 train_time:25345244ms step_avg:576.53ms +step:43963/57344 train_time:25345808ms step_avg:576.53ms +grad accum step:10991/14336 +step:43964/57344 train_time:25353703ms step_avg:576.69ms +step:43965/57344 train_time:25353732ms step_avg:576.68ms +step:43966/57344 train_time:25353999ms step_avg:576.67ms +step:43967/57344 train_time:25354604ms step_avg:576.67ms +grad accum step:10992/14336 +step:43968/57344 train_time:25355982ms step_avg:576.69ms +step:43968/57344 val_loss:5.667020 train_time:25355986ms step_avg:576.69ms +step:43969/57344 train_time:25355998ms step_avg:576.68ms +step:43970/57344 train_time:25356614ms step_avg:576.68ms +step:43971/57344 train_time:25356888ms step_avg:576.67ms +grad accum step:10993/14336 +step:43972/57344 train_time:25358228ms step_avg:576.69ms +step:43973/57344 train_time:25358244ms step_avg:576.68ms +step:43974/57344 train_time:25358494ms step_avg:576.67ms +step:43975/57344 train_time:25359044ms step_avg:576.67ms +grad accum step:10994/14336 +step:43976/57344 train_time:25360401ms step_avg:576.69ms +step:43977/57344 train_time:25360416ms step_avg:576.67ms +step:43978/57344 train_time:25360675ms step_avg:576.67ms +step:43979/57344 train_time:25361256ms step_avg:576.67ms +grad accum step:10995/14336 +step:43980/57344 train_time:25362567ms step_avg:576.68ms +step:43981/57344 train_time:25362584ms step_avg:576.67ms +step:43982/57344 train_time:25362836ms step_avg:576.66ms +step:43983/57344 train_time:25363398ms step_avg:576.66ms +grad accum step:10996/14336 +step:43984/57344 train_time:25364746ms step_avg:576.68ms +step:43985/57344 train_time:25364758ms step_avg:576.67ms +step:43986/57344 train_time:25365002ms step_avg:576.66ms +step:43987/57344 train_time:25365572ms step_avg:576.66ms +grad accum step:10997/14336 +step:43988/57344 train_time:25366953ms step_avg:576.68ms +step:43989/57344 train_time:25366981ms step_avg:576.67ms +step:43990/57344 train_time:25367203ms step_avg:576.66ms +step:43991/57344 train_time:25367773ms step_avg:576.66ms +grad accum step:10998/14336 +step:43992/57344 train_time:25369154ms step_avg:576.68ms +step:43993/57344 train_time:25369165ms step_avg:576.66ms +step:43994/57344 train_time:25369394ms step_avg:576.66ms +step:43995/57344 train_time:25369944ms step_avg:576.66ms +grad accum step:10999/14336 +step:43996/57344 train_time:25371243ms step_avg:576.67ms +step:43997/57344 train_time:25371259ms step_avg:576.66ms +step:43998/57344 train_time:25371510ms step_avg:576.65ms +step:43999/57344 train_time:25372070ms step_avg:576.65ms +grad accum step:11000/14336 +step:44000/57344 train_time:25373432ms step_avg:576.67ms +step:44001/57344 train_time:25373444ms step_avg:576.66ms +step:44002/57344 train_time:25373699ms step_avg:576.65ms +step:44003/57344 train_time:25374303ms step_avg:576.65ms +grad accum step:11001/14336 +step:44004/57344 train_time:25375683ms step_avg:576.67ms +step:44005/57344 train_time:25375713ms step_avg:576.66ms +step:44006/57344 train_time:25375938ms step_avg:576.65ms +step:44007/57344 train_time:25376503ms step_avg:576.65ms +grad accum step:11002/14336 +step:44008/57344 train_time:25377943ms step_avg:576.67ms +step:44009/57344 train_time:25377967ms step_avg:576.65ms +step:44010/57344 train_time:25378192ms step_avg:576.65ms +step:44011/57344 train_time:25378750ms step_avg:576.65ms +grad accum step:11003/14336 +step:44012/57344 train_time:25380042ms step_avg:576.66ms +step:44013/57344 train_time:25380056ms step_avg:576.65ms +step:44014/57344 train_time:25380329ms step_avg:576.64ms +step:44015/57344 train_time:25380945ms step_avg:576.64ms +grad accum step:11004/14336 +step:44016/57344 train_time:25382296ms step_avg:576.66ms +step:44017/57344 train_time:25382318ms step_avg:576.65ms +step:44018/57344 train_time:25382541ms step_avg:576.64ms +step:44019/57344 train_time:25383095ms step_avg:576.64ms +grad accum step:11005/14336 +step:44020/57344 train_time:25384448ms step_avg:576.66ms +step:44021/57344 train_time:25384459ms step_avg:576.64ms +step:44022/57344 train_time:25384700ms step_avg:576.64ms +step:44023/57344 train_time:25385260ms step_avg:576.64ms +grad accum step:11006/14336 +step:44024/57344 train_time:25386566ms step_avg:576.65ms +step:44025/57344 train_time:25386600ms step_avg:576.64ms +step:44026/57344 train_time:25386827ms step_avg:576.63ms +step:44027/57344 train_time:25387399ms step_avg:576.63ms +grad accum step:11007/14336 +step:44028/57344 train_time:25388727ms step_avg:576.65ms +step:44029/57344 train_time:25388744ms step_avg:576.64ms +step:44030/57344 train_time:25388990ms step_avg:576.63ms +step:44031/57344 train_time:25389538ms step_avg:576.63ms +grad accum step:11008/14336 +step:44032/57344 train_time:25396135ms step_avg:576.77ms +step:44032/57344 val_loss:5.655145 train_time:25396136ms step_avg:576.77ms +step:44033/57344 train_time:25396148ms step_avg:576.75ms +step:44034/57344 train_time:25396408ms step_avg:576.75ms +step:44035/57344 train_time:25396948ms step_avg:576.74ms +grad accum step:11009/14336 +step:44036/57344 train_time:25398250ms step_avg:576.76ms +step:44037/57344 train_time:25398272ms step_avg:576.75ms +step:44038/57344 train_time:25398488ms step_avg:576.74ms +step:44039/57344 train_time:25399033ms step_avg:576.74ms +grad accum step:11010/14336 +step:44040/57344 train_time:25400338ms step_avg:576.76ms +step:44041/57344 train_time:25400349ms step_avg:576.74ms +step:44042/57344 train_time:25400595ms step_avg:576.74ms +step:44043/57344 train_time:25401137ms step_avg:576.73ms +grad accum step:11011/14336 +step:44044/57344 train_time:25402433ms step_avg:576.75ms +step:44045/57344 train_time:25402449ms step_avg:576.74ms +step:44046/57344 train_time:25402691ms step_avg:576.73ms +step:44047/57344 train_time:25403238ms step_avg:576.73ms +grad accum step:11012/14336 +step:44048/57344 train_time:25404560ms step_avg:576.75ms +step:44049/57344 train_time:25404577ms step_avg:576.73ms +step:44050/57344 train_time:25404822ms step_avg:576.73ms +step:44051/57344 train_time:25405365ms step_avg:576.73ms +grad accum step:11013/14336 +step:44052/57344 train_time:25406678ms step_avg:576.74ms +step:44053/57344 train_time:25406690ms step_avg:576.73ms +step:44054/57344 train_time:25406931ms step_avg:576.72ms +step:44055/57344 train_time:25407495ms step_avg:576.72ms +grad accum step:11014/14336 +step:44056/57344 train_time:25408844ms step_avg:576.74ms +step:44057/57344 train_time:25408860ms step_avg:576.73ms +step:44058/57344 train_time:25409095ms step_avg:576.72ms +step:44059/57344 train_time:25409658ms step_avg:576.72ms +grad accum step:11015/14336 +step:44060/57344 train_time:25410984ms step_avg:576.74ms +step:44061/57344 train_time:25411000ms step_avg:576.72ms +step:44062/57344 train_time:25411219ms step_avg:576.72ms +step:44063/57344 train_time:25411759ms step_avg:576.71ms +grad accum step:11016/14336 +step:44064/57344 train_time:25413063ms step_avg:576.73ms +step:44065/57344 train_time:25413080ms step_avg:576.72ms +step:44066/57344 train_time:25413329ms step_avg:576.71ms +step:44067/57344 train_time:25413880ms step_avg:576.71ms +grad accum step:11017/14336 +step:44068/57344 train_time:25415175ms step_avg:576.73ms +step:44069/57344 train_time:25415200ms step_avg:576.71ms +step:44070/57344 train_time:25415431ms step_avg:576.71ms +step:44071/57344 train_time:25415988ms step_avg:576.71ms +grad accum step:11018/14336 +step:44072/57344 train_time:25417333ms step_avg:576.72ms +step:44073/57344 train_time:25417350ms step_avg:576.71ms +step:44074/57344 train_time:25417595ms step_avg:576.70ms +step:44075/57344 train_time:25418140ms step_avg:576.70ms +grad accum step:11019/14336 +step:44076/57344 train_time:25419456ms step_avg:576.72ms +step:44077/57344 train_time:25419472ms step_avg:576.71ms +step:44078/57344 train_time:25419715ms step_avg:576.70ms +step:44079/57344 train_time:25420268ms step_avg:576.70ms +grad accum step:11020/14336 +step:44080/57344 train_time:25421549ms step_avg:576.71ms +step:44081/57344 train_time:25421568ms step_avg:576.70ms +step:44082/57344 train_time:25421813ms step_avg:576.69ms +step:44083/57344 train_time:25422372ms step_avg:576.69ms +grad accum step:11021/14336 +step:44084/57344 train_time:25423728ms step_avg:576.71ms +step:44085/57344 train_time:25423749ms step_avg:576.70ms +step:44086/57344 train_time:25423967ms step_avg:576.69ms +step:44087/57344 train_time:25424514ms step_avg:576.69ms +grad accum step:11022/14336 +step:44088/57344 train_time:25425845ms step_avg:576.71ms +step:44089/57344 train_time:25425862ms step_avg:576.69ms +step:44090/57344 train_time:25426112ms step_avg:576.69ms +step:44091/57344 train_time:25426669ms step_avg:576.69ms +grad accum step:11023/14336 +step:44092/57344 train_time:25428005ms step_avg:576.70ms +step:44093/57344 train_time:25428023ms step_avg:576.69ms +step:44094/57344 train_time:25428269ms step_avg:576.68ms +step:44095/57344 train_time:25428829ms step_avg:576.68ms +grad accum step:11024/14336 +step:44096/57344 train_time:25430133ms step_avg:576.70ms +step:44096/57344 val_loss:5.643243 train_time:25430145ms step_avg:576.70ms +step:44097/57344 train_time:25430157ms step_avg:576.69ms +step:44098/57344 train_time:25430418ms step_avg:576.68ms +step:44099/57344 train_time:25430984ms step_avg:576.68ms +grad accum step:11025/14336 +step:44100/57344 train_time:25432432ms step_avg:576.70ms +step:44101/57344 train_time:25432449ms step_avg:576.69ms +step:44102/57344 train_time:25432696ms step_avg:576.68ms +step:44103/57344 train_time:25433259ms step_avg:576.68ms +grad accum step:11026/14336 +step:44104/57344 train_time:25434584ms step_avg:576.70ms +step:44105/57344 train_time:25434597ms step_avg:576.68ms +step:44106/57344 train_time:25434836ms step_avg:576.68ms +step:44107/57344 train_time:25435382ms step_avg:576.67ms +grad accum step:11027/14336 +step:44108/57344 train_time:25436686ms step_avg:576.69ms +step:44109/57344 train_time:25436697ms step_avg:576.68ms +step:44110/57344 train_time:25436925ms step_avg:576.67ms +step:44111/57344 train_time:25437471ms step_avg:576.67ms +grad accum step:11028/14336 +step:44112/57344 train_time:25438788ms step_avg:576.69ms +step:44113/57344 train_time:25438804ms step_avg:576.67ms +step:44114/57344 train_time:25439050ms step_avg:576.67ms +step:44115/57344 train_time:25439596ms step_avg:576.67ms +grad accum step:11029/14336 +step:44116/57344 train_time:25440908ms step_avg:576.68ms +step:44117/57344 train_time:25440925ms step_avg:576.67ms +step:44118/57344 train_time:25441171ms step_avg:576.66ms +step:44119/57344 train_time:25441716ms step_avg:576.66ms +grad accum step:11030/14336 +step:44120/57344 train_time:25443052ms step_avg:576.68ms +step:44121/57344 train_time:25443069ms step_avg:576.67ms +step:44122/57344 train_time:25443316ms step_avg:576.66ms +step:44123/57344 train_time:25443860ms step_avg:576.66ms +grad accum step:11031/14336 +step:44124/57344 train_time:25445259ms step_avg:576.68ms +step:44125/57344 train_time:25445270ms step_avg:576.66ms +step:44126/57344 train_time:25445508ms step_avg:576.66ms +step:44127/57344 train_time:25446059ms step_avg:576.66ms +grad accum step:11032/14336 +step:44128/57344 train_time:25447345ms step_avg:576.67ms +step:44129/57344 train_time:25447357ms step_avg:576.66ms +step:44130/57344 train_time:25447604ms step_avg:576.65ms +step:44131/57344 train_time:25448168ms step_avg:576.65ms +grad accum step:11033/14336 +step:44132/57344 train_time:25449521ms step_avg:576.67ms +step:44133/57344 train_time:25449534ms step_avg:576.66ms +step:44134/57344 train_time:25449770ms step_avg:576.65ms +step:44135/57344 train_time:25450329ms step_avg:576.65ms +grad accum step:11034/14336 +step:44136/57344 train_time:25451653ms step_avg:576.66ms +step:44137/57344 train_time:25451669ms step_avg:576.65ms +step:44138/57344 train_time:25451920ms step_avg:576.64ms +step:44139/57344 train_time:25452494ms step_avg:576.64ms +grad accum step:11035/14336 +step:44140/57344 train_time:25453849ms step_avg:576.66ms +step:44141/57344 train_time:25453862ms step_avg:576.65ms +step:44142/57344 train_time:25454107ms step_avg:576.64ms +step:44143/57344 train_time:25454669ms step_avg:576.64ms +grad accum step:11036/14336 +step:44144/57344 train_time:25456034ms step_avg:576.66ms +step:44145/57344 train_time:25456049ms step_avg:576.65ms +step:44146/57344 train_time:25456284ms step_avg:576.64ms +step:44147/57344 train_time:25456830ms step_avg:576.64ms +grad accum step:11037/14336 +step:44148/57344 train_time:25458250ms step_avg:576.66ms +step:44149/57344 train_time:25458296ms step_avg:576.64ms +step:44150/57344 train_time:25458515ms step_avg:576.64ms +step:44151/57344 train_time:25459066ms step_avg:576.64ms +grad accum step:11038/14336 +step:44152/57344 train_time:25460384ms step_avg:576.65ms +step:44153/57344 train_time:25460401ms step_avg:576.64ms +step:44154/57344 train_time:25460653ms step_avg:576.63ms +step:44155/57344 train_time:25461216ms step_avg:576.63ms +grad accum step:11039/14336 +step:44156/57344 train_time:25513656ms step_avg:577.81ms +step:44157/57344 train_time:25513683ms step_avg:577.79ms +step:44158/57344 train_time:25513984ms step_avg:577.79ms +step:44159/57344 train_time:25514558ms step_avg:577.79ms +grad accum step:11040/14336 +step:44160/57344 train_time:25515891ms step_avg:577.81ms +step:44160/57344 val_loss:5.637422 train_time:25515891ms step_avg:577.81ms +step:44161/57344 train_time:25515903ms step_avg:577.79ms +step:44162/57344 train_time:25516118ms step_avg:577.78ms +step:44163/57344 train_time:25516669ms step_avg:577.78ms +grad accum step:11041/14336 +step:44164/57344 train_time:25518027ms step_avg:577.80ms +step:44165/57344 train_time:25518038ms step_avg:577.79ms +step:44166/57344 train_time:25518274ms step_avg:577.78ms +step:44167/57344 train_time:25518827ms step_avg:577.78ms +grad accum step:11042/14336 +step:44168/57344 train_time:25520128ms step_avg:577.80ms +step:44169/57344 train_time:25520150ms step_avg:577.78ms +step:44170/57344 train_time:25520390ms step_avg:577.78ms +step:44171/57344 train_time:25520936ms step_avg:577.78ms +grad accum step:11043/14336 +step:44172/57344 train_time:25522239ms step_avg:577.79ms +step:44173/57344 train_time:25522254ms step_avg:577.78ms +step:44174/57344 train_time:25522501ms step_avg:577.77ms +step:44175/57344 train_time:25523046ms step_avg:577.77ms +grad accum step:11044/14336 +step:44176/57344 train_time:25524344ms step_avg:577.79ms +step:44177/57344 train_time:25524361ms step_avg:577.77ms +step:44178/57344 train_time:25524612ms step_avg:577.77ms +step:44179/57344 train_time:25525180ms step_avg:577.77ms +grad accum step:11045/14336 +step:44180/57344 train_time:25526504ms step_avg:577.78ms +step:44181/57344 train_time:25526521ms step_avg:577.77ms +step:44182/57344 train_time:25526779ms step_avg:577.76ms +step:44183/57344 train_time:25527363ms step_avg:577.76ms +grad accum step:11046/14336 +step:44184/57344 train_time:25528754ms step_avg:577.78ms +step:44185/57344 train_time:25528782ms step_avg:577.77ms +step:44186/57344 train_time:25529007ms step_avg:577.76ms +step:44187/57344 train_time:25529576ms step_avg:577.76ms +grad accum step:11047/14336 +step:44188/57344 train_time:25530888ms step_avg:577.78ms +step:44189/57344 train_time:25530904ms step_avg:577.77ms +step:44190/57344 train_time:25531151ms step_avg:577.76ms +step:44191/57344 train_time:25531699ms step_avg:577.76ms +grad accum step:11048/14336 +step:44192/57344 train_time:25533026ms step_avg:577.77ms +step:44193/57344 train_time:25533064ms step_avg:577.76ms +step:44194/57344 train_time:25533284ms step_avg:577.75ms +step:44195/57344 train_time:25533833ms step_avg:577.75ms +grad accum step:11049/14336 +step:44196/57344 train_time:25535148ms step_avg:577.77ms +step:44197/57344 train_time:25535159ms step_avg:577.76ms +step:44198/57344 train_time:25535404ms step_avg:577.75ms +step:44199/57344 train_time:25535964ms step_avg:577.75ms +grad accum step:11050/14336 +step:44200/57344 train_time:25537291ms step_avg:577.77ms +step:44201/57344 train_time:25537302ms step_avg:577.75ms +step:44202/57344 train_time:25537543ms step_avg:577.75ms +step:44203/57344 train_time:25538089ms step_avg:577.75ms +grad accum step:11051/14336 +step:44204/57344 train_time:25539431ms step_avg:577.76ms +step:44205/57344 train_time:25539447ms step_avg:577.75ms +step:44206/57344 train_time:25539706ms step_avg:577.74ms +step:44207/57344 train_time:25540283ms step_avg:577.74ms +grad accum step:11052/14336 +step:44208/57344 train_time:25541607ms step_avg:577.76ms +step:44209/57344 train_time:25541623ms step_avg:577.75ms +step:44210/57344 train_time:25541868ms step_avg:577.74ms +step:44211/57344 train_time:25542413ms step_avg:577.74ms +grad accum step:11053/14336 +step:44212/57344 train_time:25543753ms step_avg:577.76ms +step:44213/57344 train_time:25543767ms step_avg:577.74ms +step:44214/57344 train_time:25544014ms step_avg:577.74ms +step:44215/57344 train_time:25544565ms step_avg:577.74ms +grad accum step:11054/14336 +step:44216/57344 train_time:25545870ms step_avg:577.75ms +step:44217/57344 train_time:25545888ms step_avg:577.74ms +step:44218/57344 train_time:25546138ms step_avg:577.73ms +step:44219/57344 train_time:25548071ms step_avg:577.76ms +grad accum step:11055/14336 +step:44220/57344 train_time:25549118ms step_avg:577.77ms +step:44221/57344 train_time:25549133ms step_avg:577.76ms +step:44222/57344 train_time:25549380ms step_avg:577.75ms +step:44223/57344 train_time:25549927ms step_avg:577.75ms +grad accum step:11056/14336 +step:44224/57344 train_time:25551291ms step_avg:577.77ms +step:44224/57344 val_loss:5.627028 train_time:25551292ms step_avg:577.77ms +step:44225/57344 train_time:25551484ms step_avg:577.76ms +step:44226/57344 train_time:25559518ms step_avg:577.93ms +step:44227/57344 train_time:25559815ms step_avg:577.92ms +grad accum step:11057/14336 +step:44228/57344 train_time:25623877ms step_avg:579.36ms +step:44229/57344 train_time:25632834ms step_avg:579.55ms +step:44230/57344 train_time:25633144ms step_avg:579.54ms +step:44231/57344 train_time:25633677ms step_avg:579.54ms +grad accum step:11058/14336 +step:44232/57344 train_time:25635012ms step_avg:579.56ms +step:44233/57344 train_time:25635023ms step_avg:579.55ms +step:44234/57344 train_time:25635263ms step_avg:579.54ms +step:44235/57344 train_time:25635811ms step_avg:579.54ms +grad accum step:11059/14336 +step:44236/57344 train_time:25637082ms step_avg:579.55ms +step:44237/57344 train_time:25637098ms step_avg:579.54ms +step:44238/57344 train_time:25637342ms step_avg:579.53ms +step:44239/57344 train_time:25637889ms step_avg:579.53ms +grad accum step:11060/14336 +step:44240/57344 train_time:25639167ms step_avg:579.55ms +step:44241/57344 train_time:25639184ms step_avg:579.53ms +step:44242/57344 train_time:25639422ms step_avg:579.53ms +step:44243/57344 train_time:25639952ms step_avg:579.53ms +grad accum step:11061/14336 +step:44244/57344 train_time:25641277ms step_avg:579.54ms +step:44245/57344 train_time:25641298ms step_avg:579.53ms +step:44246/57344 train_time:25641534ms step_avg:579.52ms +step:44247/57344 train_time:25642087ms step_avg:579.52ms +grad accum step:11062/14336 +step:44248/57344 train_time:25643381ms step_avg:579.54ms +step:44249/57344 train_time:25643398ms step_avg:579.52ms +step:44250/57344 train_time:25643653ms step_avg:579.52ms +step:44251/57344 train_time:25644225ms step_avg:579.52ms +grad accum step:11063/14336 +step:44252/57344 train_time:25645510ms step_avg:579.53ms +step:44253/57344 train_time:25645531ms step_avg:579.52ms +step:44254/57344 train_time:25645763ms step_avg:579.51ms +step:44255/57344 train_time:25646345ms step_avg:579.51ms +grad accum step:11064/14336 +step:44256/57344 train_time:25647722ms step_avg:579.53ms +step:44257/57344 train_time:25647733ms step_avg:579.52ms +step:44258/57344 train_time:25647965ms step_avg:579.51ms +step:44259/57344 train_time:25648511ms step_avg:579.51ms +grad accum step:11065/14336 +step:44260/57344 train_time:25649869ms step_avg:579.53ms +step:44261/57344 train_time:25649881ms step_avg:579.51ms +step:44262/57344 train_time:25650110ms step_avg:579.51ms +step:44263/57344 train_time:25650677ms step_avg:579.51ms +grad accum step:11066/14336 +step:44264/57344 train_time:25652007ms step_avg:579.52ms +step:44265/57344 train_time:25652024ms step_avg:579.51ms +step:44266/57344 train_time:25652260ms step_avg:579.50ms +step:44267/57344 train_time:25652805ms step_avg:579.50ms +grad accum step:11067/14336 +step:44268/57344 train_time:25654113ms step_avg:579.52ms +step:44269/57344 train_time:25654129ms step_avg:579.51ms +step:44270/57344 train_time:25654379ms step_avg:579.50ms +step:44271/57344 train_time:25654929ms step_avg:579.50ms +grad accum step:11068/14336 +step:44272/57344 train_time:25691816ms step_avg:580.32ms +step:44273/57344 train_time:25691839ms step_avg:580.30ms +step:44274/57344 train_time:25692093ms step_avg:580.30ms +step:44275/57344 train_time:25692635ms step_avg:580.30ms +grad accum step:11069/14336 +step:44276/57344 train_time:25693926ms step_avg:580.31ms +step:44277/57344 train_time:25693942ms step_avg:580.30ms +step:44278/57344 train_time:25694185ms step_avg:580.29ms +step:44279/57344 train_time:25694730ms step_avg:580.29ms +grad accum step:11070/14336 +step:44280/57344 train_time:25696041ms step_avg:580.31ms +step:44281/57344 train_time:25696071ms step_avg:580.30ms +step:44282/57344 train_time:25696291ms step_avg:580.29ms +step:44283/57344 train_time:25696838ms step_avg:580.29ms +grad accum step:11071/14336 +step:44284/57344 train_time:25698205ms step_avg:580.30ms +step:44285/57344 train_time:25698221ms step_avg:580.29ms +step:44286/57344 train_time:25698463ms step_avg:580.28ms +step:44287/57344 train_time:25698998ms step_avg:580.28ms +grad accum step:11072/14336 +step:44288/57344 train_time:25700282ms step_avg:580.30ms +step:44288/57344 val_loss:5.620749 train_time:25700288ms step_avg:580.30ms +step:44289/57344 train_time:25700300ms step_avg:580.29ms +step:44290/57344 train_time:25700526ms step_avg:580.28ms +step:44291/57344 train_time:25701104ms step_avg:580.28ms +grad accum step:11073/14336 +step:44292/57344 train_time:25702480ms step_avg:580.30ms +step:44293/57344 train_time:25702524ms step_avg:580.28ms +step:44294/57344 train_time:25702744ms step_avg:580.28ms +step:44295/57344 train_time:25703295ms step_avg:580.28ms +grad accum step:11074/14336 +step:44296/57344 train_time:25704605ms step_avg:580.29ms +step:44297/57344 train_time:25704643ms step_avg:580.28ms +step:44298/57344 train_time:25704868ms step_avg:580.27ms +step:44299/57344 train_time:25705430ms step_avg:580.27ms +grad accum step:11075/14336 +step:44300/57344 train_time:25706814ms step_avg:580.29ms +step:44301/57344 train_time:25706826ms step_avg:580.28ms +step:44302/57344 train_time:25707081ms step_avg:580.27ms +step:44303/57344 train_time:25707653ms step_avg:580.27ms +grad accum step:11076/14336 +step:44304/57344 train_time:25708965ms step_avg:580.29ms +step:44305/57344 train_time:25708978ms step_avg:580.27ms +step:44306/57344 train_time:25709217ms step_avg:580.26ms +step:44307/57344 train_time:25709784ms step_avg:580.26ms +grad accum step:11077/14336 +step:44308/57344 train_time:25711127ms step_avg:580.28ms +step:44309/57344 train_time:25711143ms step_avg:580.27ms +step:44310/57344 train_time:25711400ms step_avg:580.26ms +step:44311/57344 train_time:25711972ms step_avg:580.26ms +grad accum step:11078/14336 +step:44312/57344 train_time:25713256ms step_avg:580.28ms +step:44313/57344 train_time:25713278ms step_avg:580.26ms +step:44314/57344 train_time:25713512ms step_avg:580.26ms +step:44315/57344 train_time:25714063ms step_avg:580.26ms +grad accum step:11079/14336 +step:44316/57344 train_time:25715381ms step_avg:580.27ms +step:44317/57344 train_time:25715392ms step_avg:580.26ms +step:44318/57344 train_time:25715632ms step_avg:580.25ms +step:44319/57344 train_time:25716185ms step_avg:580.25ms +grad accum step:11080/14336 +step:44320/57344 train_time:25717584ms step_avg:580.27ms +step:44321/57344 train_time:25717606ms step_avg:580.26ms +step:44322/57344 train_time:25717831ms step_avg:580.25ms +step:44323/57344 train_time:25718385ms step_avg:580.25ms +grad accum step:11081/14336 +step:44324/57344 train_time:25719692ms step_avg:580.27ms +step:44325/57344 train_time:25719704ms step_avg:580.25ms +step:44326/57344 train_time:25719951ms step_avg:580.25ms +step:44327/57344 train_time:25720513ms step_avg:580.24ms +grad accum step:11082/14336 +step:44328/57344 train_time:25721868ms step_avg:580.26ms +step:44329/57344 train_time:25721884ms step_avg:580.25ms +step:44330/57344 train_time:25722146ms step_avg:580.24ms +step:44331/57344 train_time:25722733ms step_avg:580.24ms +grad accum step:11083/14336 +step:44332/57344 train_time:25724073ms step_avg:580.26ms +step:44333/57344 train_time:25724090ms step_avg:580.25ms +step:44334/57344 train_time:25724337ms step_avg:580.24ms +step:44335/57344 train_time:25724885ms step_avg:580.24ms +grad accum step:11084/14336 +step:44336/57344 train_time:25726176ms step_avg:580.25ms +step:44337/57344 train_time:25726193ms step_avg:580.24ms +step:44338/57344 train_time:25726462ms step_avg:580.24ms +step:44339/57344 train_time:25727067ms step_avg:580.24ms +grad accum step:11085/14336 +step:44340/57344 train_time:25728434ms step_avg:580.25ms +step:44341/57344 train_time:25728450ms step_avg:580.24ms +step:44342/57344 train_time:25728702ms step_avg:580.23ms +step:44343/57344 train_time:25729257ms step_avg:580.23ms +grad accum step:11086/14336 +step:44344/57344 train_time:25730611ms step_avg:580.25ms +step:44345/57344 train_time:25730623ms step_avg:580.24ms +step:44346/57344 train_time:25730858ms step_avg:580.23ms +step:44347/57344 train_time:25731424ms step_avg:580.23ms +grad accum step:11087/14336 +step:44348/57344 train_time:25732774ms step_avg:580.25ms +step:44349/57344 train_time:25732787ms step_avg:580.23ms +step:44350/57344 train_time:25733038ms step_avg:580.23ms +step:44351/57344 train_time:25733605ms step_avg:580.23ms +grad accum step:11088/14336 +step:44352/57344 train_time:25734977ms step_avg:580.24ms +step:44352/57344 val_loss:5.608880 train_time:25734978ms step_avg:580.24ms +step:44353/57344 train_time:25734990ms step_avg:580.23ms +step:44354/57344 train_time:25735215ms step_avg:580.22ms +step:44355/57344 train_time:25735768ms step_avg:580.22ms +grad accum step:11089/14336 +step:44356/57344 train_time:25737167ms step_avg:580.24ms +step:44357/57344 train_time:25737183ms step_avg:580.23ms +step:44358/57344 train_time:25737432ms step_avg:580.22ms +step:44359/57344 train_time:25737988ms step_avg:580.22ms +grad accum step:11090/14336 +step:44360/57344 train_time:25739318ms step_avg:580.24ms +step:44361/57344 train_time:25739346ms step_avg:580.22ms +step:44362/57344 train_time:25739572ms step_avg:580.22ms +step:44363/57344 train_time:25740143ms step_avg:580.22ms +grad accum step:11091/14336 +step:44364/57344 train_time:25741504ms step_avg:580.23ms +step:44365/57344 train_time:25741534ms step_avg:580.22ms +step:44366/57344 train_time:25741759ms step_avg:580.21ms +step:44367/57344 train_time:25742328ms step_avg:580.21ms +grad accum step:11092/14336 +step:44368/57344 train_time:25743692ms step_avg:580.23ms +step:44369/57344 train_time:25743728ms step_avg:580.22ms +step:44370/57344 train_time:25743956ms step_avg:580.21ms +step:44371/57344 train_time:25744527ms step_avg:580.21ms +grad accum step:11093/14336 +step:44372/57344 train_time:25745867ms step_avg:580.23ms +step:44373/57344 train_time:25745881ms step_avg:580.22ms +step:44374/57344 train_time:25746135ms step_avg:580.21ms +step:44375/57344 train_time:25746706ms step_avg:580.21ms +grad accum step:11094/14336 +step:44376/57344 train_time:25748039ms step_avg:580.22ms +step:44377/57344 train_time:25748059ms step_avg:580.21ms +step:44378/57344 train_time:25748314ms step_avg:580.20ms +step:44379/57344 train_time:25748897ms step_avg:580.20ms +grad accum step:11095/14336 +step:44380/57344 train_time:25750205ms step_avg:580.22ms +step:44381/57344 train_time:25750217ms step_avg:580.21ms +step:44382/57344 train_time:25750464ms step_avg:580.20ms +step:44383/57344 train_time:25751024ms step_avg:580.20ms +grad accum step:11096/14336 +step:44384/57344 train_time:25752390ms step_avg:580.22ms +step:44385/57344 train_time:25752401ms step_avg:580.21ms +step:44386/57344 train_time:25752638ms step_avg:580.20ms +step:44387/57344 train_time:25753198ms step_avg:580.20ms +grad accum step:11097/14336 +step:44388/57344 train_time:25754535ms step_avg:580.21ms +step:44389/57344 train_time:25754552ms step_avg:580.20ms +step:44390/57344 train_time:25754805ms step_avg:580.19ms +step:44391/57344 train_time:25755372ms step_avg:580.19ms +grad accum step:11098/14336 +step:44392/57344 train_time:25756691ms step_avg:580.21ms +step:44393/57344 train_time:25756707ms step_avg:580.20ms +step:44394/57344 train_time:25756954ms step_avg:580.19ms +step:44395/57344 train_time:25757499ms step_avg:580.19ms +grad accum step:11099/14336 +step:44396/57344 train_time:25758828ms step_avg:580.21ms +step:44397/57344 train_time:25758839ms step_avg:580.19ms +step:44398/57344 train_time:25759089ms step_avg:580.19ms +step:44399/57344 train_time:25759660ms step_avg:580.19ms +grad accum step:11100/14336 +step:44400/57344 train_time:25760996ms step_avg:580.20ms +step:44401/57344 train_time:25761013ms step_avg:580.19ms +step:44402/57344 train_time:25761260ms step_avg:580.18ms +step:44403/57344 train_time:25761806ms step_avg:580.18ms +grad accum step:11101/14336 +step:44404/57344 train_time:25763131ms step_avg:580.20ms +step:44405/57344 train_time:25763160ms step_avg:580.19ms +step:44406/57344 train_time:25763389ms step_avg:580.18ms +step:44407/57344 train_time:25763949ms step_avg:580.18ms +grad accum step:11102/14336 +step:44408/57344 train_time:25765274ms step_avg:580.19ms +step:44409/57344 train_time:25765292ms step_avg:580.18ms +step:44410/57344 train_time:25765532ms step_avg:580.17ms +step:44411/57344 train_time:25766097ms step_avg:580.17ms +grad accum step:11103/14336 +step:44412/57344 train_time:25767429ms step_avg:580.19ms +step:44413/57344 train_time:25767446ms step_avg:580.18ms +step:44414/57344 train_time:25767695ms step_avg:580.17ms +step:44415/57344 train_time:25768243ms step_avg:580.17ms +grad accum step:11104/14336 +step:44416/57344 train_time:25769574ms step_avg:580.19ms +step:44416/57344 val_loss:5.600425 train_time:25769584ms step_avg:580.19ms +step:44417/57344 train_time:25769595ms step_avg:580.17ms +step:44418/57344 train_time:25769821ms step_avg:580.17ms +step:44419/57344 train_time:25770389ms step_avg:580.17ms +grad accum step:11105/14336 +step:44420/57344 train_time:25771745ms step_avg:580.18ms +step:44421/57344 train_time:25771778ms step_avg:580.17ms +step:44422/57344 train_time:25772001ms step_avg:580.16ms +step:44423/57344 train_time:25772549ms step_avg:580.16ms +grad accum step:11106/14336 +step:44424/57344 train_time:25773991ms step_avg:580.18ms +step:44425/57344 train_time:25774003ms step_avg:580.17ms +step:44426/57344 train_time:25774250ms step_avg:580.16ms +step:44427/57344 train_time:25774825ms step_avg:580.16ms +grad accum step:11107/14336 +step:44428/57344 train_time:25776165ms step_avg:580.18ms +step:44429/57344 train_time:25776180ms step_avg:580.17ms +step:44430/57344 train_time:25776426ms step_avg:580.16ms +step:44431/57344 train_time:25776979ms step_avg:580.16ms +grad accum step:11108/14336 +step:44432/57344 train_time:25778307ms step_avg:580.17ms +step:44433/57344 train_time:25778335ms step_avg:580.16ms +step:44434/57344 train_time:25778575ms step_avg:580.15ms +step:44435/57344 train_time:25779143ms step_avg:580.15ms +grad accum step:11109/14336 +step:44436/57344 train_time:25780438ms step_avg:580.17ms +step:44437/57344 train_time:25780449ms step_avg:580.16ms +step:44438/57344 train_time:25780694ms step_avg:580.15ms +step:44439/57344 train_time:25781236ms step_avg:580.15ms +grad accum step:11110/14336 +step:44440/57344 train_time:25782550ms step_avg:580.17ms +step:44441/57344 train_time:25782565ms step_avg:580.15ms +step:44442/57344 train_time:25782809ms step_avg:580.15ms +step:44443/57344 train_time:25783355ms step_avg:580.14ms +grad accum step:11111/14336 +step:44444/57344 train_time:25784664ms step_avg:580.16ms +step:44445/57344 train_time:25784681ms step_avg:580.15ms +step:44446/57344 train_time:25784939ms step_avg:580.14ms +step:44447/57344 train_time:25785520ms step_avg:580.14ms +grad accum step:11112/14336 +step:44448/57344 train_time:25786841ms step_avg:580.16ms +step:44449/57344 train_time:25786858ms step_avg:580.14ms +step:44450/57344 train_time:25787111ms step_avg:580.14ms +step:44451/57344 train_time:25787677ms step_avg:580.14ms +grad accum step:11113/14336 +step:44452/57344 train_time:25789094ms step_avg:580.16ms +step:44453/57344 train_time:25789108ms step_avg:580.14ms +step:44454/57344 train_time:25789364ms step_avg:580.14ms +step:44455/57344 train_time:25789939ms step_avg:580.14ms +grad accum step:11114/14336 +step:44456/57344 train_time:25791351ms step_avg:580.15ms +step:44457/57344 train_time:25791385ms step_avg:580.14ms +step:44458/57344 train_time:25791610ms step_avg:580.13ms +step:44459/57344 train_time:25792175ms step_avg:580.13ms +grad accum step:11115/14336 +step:44460/57344 train_time:25793513ms step_avg:580.15ms +step:44461/57344 train_time:25793527ms step_avg:580.14ms +step:44462/57344 train_time:25793767ms step_avg:580.13ms +step:44463/57344 train_time:25794337ms step_avg:580.13ms +grad accum step:11116/14336 +step:44464/57344 train_time:25795698ms step_avg:580.15ms +step:44465/57344 train_time:25795710ms step_avg:580.14ms +step:44466/57344 train_time:25795947ms step_avg:580.13ms +step:44467/57344 train_time:25796507ms step_avg:580.13ms +grad accum step:11117/14336 +step:44468/57344 train_time:25797827ms step_avg:580.14ms +step:44469/57344 train_time:25797844ms step_avg:580.13ms +step:44470/57344 train_time:25798101ms step_avg:580.12ms +step:44471/57344 train_time:25798674ms step_avg:580.12ms +grad accum step:11118/14336 +step:44472/57344 train_time:25800007ms step_avg:580.14ms +step:44473/57344 train_time:25800030ms step_avg:580.13ms +step:44474/57344 train_time:25800255ms step_avg:580.12ms +step:44475/57344 train_time:25800816ms step_avg:580.12ms +grad accum step:11119/14336 +step:44476/57344 train_time:25802119ms step_avg:580.14ms +step:44477/57344 train_time:25802130ms step_avg:580.12ms +step:44478/57344 train_time:25802381ms step_avg:580.12ms +step:44479/57344 train_time:25802945ms step_avg:580.12ms +grad accum step:11120/14336 +step:44480/57344 train_time:25804301ms step_avg:580.13ms +step:44480/57344 val_loss:5.594759 train_time:25804303ms step_avg:580.13ms +step:44481/57344 train_time:25804315ms step_avg:580.12ms +step:44482/57344 train_time:25804534ms step_avg:580.11ms +step:44483/57344 train_time:25805093ms step_avg:580.11ms +grad accum step:11121/14336 +step:44484/57344 train_time:25806480ms step_avg:580.13ms +step:44485/57344 train_time:25806494ms step_avg:580.12ms +step:44486/57344 train_time:25806726ms step_avg:580.11ms +step:44487/57344 train_time:25807276ms step_avg:580.11ms +grad accum step:11122/14336 +step:44488/57344 train_time:25808597ms step_avg:580.12ms +step:44489/57344 train_time:25808608ms step_avg:580.11ms +step:44490/57344 train_time:25808838ms step_avg:580.10ms +step:44491/57344 train_time:25809393ms step_avg:580.10ms +grad accum step:11123/14336 +step:44492/57344 train_time:25810726ms step_avg:580.12ms +step:44493/57344 train_time:25810743ms step_avg:580.11ms +step:44494/57344 train_time:25810989ms step_avg:580.10ms +step:44495/57344 train_time:25811547ms step_avg:580.10ms +grad accum step:11124/14336 +step:44496/57344 train_time:25812899ms step_avg:580.12ms +step:44497/57344 train_time:25812912ms step_avg:580.10ms +step:44498/57344 train_time:25813161ms step_avg:580.10ms +step:44499/57344 train_time:25813716ms step_avg:580.10ms +grad accum step:11125/14336 +step:44500/57344 train_time:25815044ms step_avg:580.11ms +step:44501/57344 train_time:25815058ms step_avg:580.10ms +step:44502/57344 train_time:25815310ms step_avg:580.09ms +step:44503/57344 train_time:25815890ms step_avg:580.09ms +grad accum step:11126/14336 +step:44504/57344 train_time:25817237ms step_avg:580.11ms +step:44505/57344 train_time:25817252ms step_avg:580.10ms +step:44506/57344 train_time:25817497ms step_avg:580.09ms +step:44507/57344 train_time:25818052ms step_avg:580.09ms +grad accum step:11127/14336 +step:44508/57344 train_time:25819377ms step_avg:580.11ms +step:44509/57344 train_time:25819389ms step_avg:580.09ms +step:44510/57344 train_time:25819639ms step_avg:580.09ms +step:44511/57344 train_time:25820195ms step_avg:580.09ms +grad accum step:11128/14336 +step:44512/57344 train_time:25821501ms step_avg:580.10ms +step:44513/57344 train_time:25821519ms step_avg:580.09ms +step:44514/57344 train_time:25821766ms step_avg:580.08ms +step:44515/57344 train_time:25822330ms step_avg:580.08ms +grad accum step:11129/14336 +step:44516/57344 train_time:25823788ms step_avg:580.10ms +step:44517/57344 train_time:25823801ms step_avg:580.09ms +step:44518/57344 train_time:25824016ms step_avg:580.08ms +step:44519/57344 train_time:25824577ms step_avg:580.08ms +grad accum step:11130/14336 +step:44520/57344 train_time:25825951ms step_avg:580.10ms +step:44521/57344 train_time:25825967ms step_avg:580.09ms +step:44522/57344 train_time:25826210ms step_avg:580.08ms +step:44523/57344 train_time:25826763ms step_avg:580.08ms +grad accum step:11131/14336 +step:44524/57344 train_time:25828071ms step_avg:580.09ms +step:44525/57344 train_time:25828086ms step_avg:580.08ms +step:44526/57344 train_time:25828332ms step_avg:580.07ms +step:44527/57344 train_time:25828890ms step_avg:580.07ms +grad accum step:11132/14336 +step:44528/57344 train_time:25830309ms step_avg:580.09ms +step:44529/57344 train_time:25830322ms step_avg:580.08ms +step:44530/57344 train_time:25830538ms step_avg:580.07ms +step:44531/57344 train_time:25831080ms step_avg:580.07ms +grad accum step:11133/14336 +step:44532/57344 train_time:25832432ms step_avg:580.09ms +step:44533/57344 train_time:25832449ms step_avg:580.07ms +step:44534/57344 train_time:25832677ms step_avg:580.07ms +step:44535/57344 train_time:25833238ms step_avg:580.07ms +grad accum step:11134/14336 +step:44536/57344 train_time:25834585ms step_avg:580.08ms +step:44537/57344 train_time:25834598ms step_avg:580.07ms +step:44538/57344 train_time:25834837ms step_avg:580.06ms +step:44539/57344 train_time:25835397ms step_avg:580.06ms +grad accum step:11135/14336 +step:44540/57344 train_time:25836841ms step_avg:580.08ms +step:44541/57344 train_time:25836858ms step_avg:580.07ms +step:44542/57344 train_time:25837105ms step_avg:580.06ms +step:44543/57344 train_time:25837671ms step_avg:580.06ms +grad accum step:11136/14336 +step:44544/57344 train_time:25838981ms step_avg:580.08ms +step:44544/57344 val_loss:5.592071 train_time:25838983ms step_avg:580.08ms +step:44545/57344 train_time:25838995ms step_avg:580.06ms +step:44546/57344 train_time:25839214ms step_avg:580.06ms +step:44547/57344 train_time:25839758ms step_avg:580.06ms +grad accum step:11137/14336 +step:44548/57344 train_time:25841057ms step_avg:580.07ms +step:44549/57344 train_time:25841077ms step_avg:580.06ms +step:44550/57344 train_time:25841323ms step_avg:580.05ms +step:44551/57344 train_time:25841925ms step_avg:580.05ms +grad accum step:11138/14336 +step:44552/57344 train_time:25843435ms step_avg:580.07ms +step:44553/57344 train_time:25843449ms step_avg:580.06ms +step:44554/57344 train_time:25843689ms step_avg:580.05ms +step:44555/57344 train_time:25844251ms step_avg:580.05ms +grad accum step:11139/14336 +step:44556/57344 train_time:25845663ms step_avg:580.07ms +step:44557/57344 train_time:25845685ms step_avg:580.06ms +step:44558/57344 train_time:25845919ms step_avg:580.05ms +step:44559/57344 train_time:25846502ms step_avg:580.05ms +grad accum step:11140/14336 +step:44560/57344 train_time:25848253ms step_avg:580.08ms +step:44561/57344 train_time:25848268ms step_avg:580.06ms +step:44562/57344 train_time:25848487ms step_avg:580.06ms +step:44563/57344 train_time:25849035ms step_avg:580.06ms +grad accum step:11141/14336 +step:44564/57344 train_time:25850331ms step_avg:580.07ms +step:44565/57344 train_time:25850343ms step_avg:580.06ms +step:44566/57344 train_time:25850583ms step_avg:580.05ms +step:44567/57344 train_time:25851132ms step_avg:580.05ms +grad accum step:11142/14336 +step:44568/57344 train_time:25852441ms step_avg:580.07ms +step:44569/57344 train_time:25852455ms step_avg:580.05ms +step:44570/57344 train_time:25852698ms step_avg:580.05ms +step:44571/57344 train_time:25853247ms step_avg:580.05ms +grad accum step:11143/14336 +step:44572/57344 train_time:25854600ms step_avg:580.06ms +step:44573/57344 train_time:25854632ms step_avg:580.05ms +step:44574/57344 train_time:25854851ms step_avg:580.04ms +step:44575/57344 train_time:25855391ms step_avg:580.04ms +grad accum step:11144/14336 +step:44576/57344 train_time:25856755ms step_avg:580.06ms +step:44577/57344 train_time:25856772ms step_avg:580.05ms +step:44578/57344 train_time:25857024ms step_avg:580.04ms +step:44579/57344 train_time:25857587ms step_avg:580.04ms +grad accum step:11145/14336 +step:44580/57344 train_time:25858919ms step_avg:580.06ms +step:44581/57344 train_time:25858934ms step_avg:580.04ms +step:44582/57344 train_time:25859190ms step_avg:580.04ms +step:44583/57344 train_time:25859763ms step_avg:580.04ms +grad accum step:11146/14336 +step:44584/57344 train_time:25861132ms step_avg:580.05ms +step:44585/57344 train_time:25861153ms step_avg:580.04ms +step:44586/57344 train_time:25861382ms step_avg:580.03ms +step:44587/57344 train_time:25861925ms step_avg:580.03ms +grad accum step:11147/14336 +step:44588/57344 train_time:25863230ms step_avg:580.05ms +step:44589/57344 train_time:25863248ms step_avg:580.04ms +step:44590/57344 train_time:25863495ms step_avg:580.03ms +step:44591/57344 train_time:25864072ms step_avg:580.03ms +grad accum step:11148/14336 +step:44592/57344 train_time:25865389ms step_avg:580.05ms +step:44593/57344 train_time:25865420ms step_avg:580.03ms +step:44594/57344 train_time:25865649ms step_avg:580.03ms +step:44595/57344 train_time:25866194ms step_avg:580.02ms +grad accum step:11149/14336 +step:44596/57344 train_time:25867629ms step_avg:580.04ms +step:44597/57344 train_time:25867647ms step_avg:580.03ms +step:44598/57344 train_time:25867868ms step_avg:580.02ms +step:44599/57344 train_time:25868422ms step_avg:580.02ms +grad accum step:11150/14336 +step:44600/57344 train_time:25869811ms step_avg:580.04ms +step:44601/57344 train_time:25869829ms step_avg:580.03ms +step:44602/57344 train_time:25870048ms step_avg:580.02ms +step:44603/57344 train_time:25870592ms step_avg:580.02ms +grad accum step:11151/14336 +step:44604/57344 train_time:25871920ms step_avg:580.04ms +step:44605/57344 train_time:25871936ms step_avg:580.02ms +step:44606/57344 train_time:25872186ms step_avg:580.02ms +step:44607/57344 train_time:25872750ms step_avg:580.02ms +grad accum step:11152/14336 +step:44608/57344 train_time:25874053ms step_avg:580.03ms +step:44608/57344 val_loss:5.579262 train_time:25874062ms step_avg:580.03ms +step:44609/57344 train_time:25874073ms step_avg:580.02ms +step:44610/57344 train_time:25874298ms step_avg:580.01ms +step:44611/57344 train_time:25874861ms step_avg:580.01ms +grad accum step:11153/14336 +step:44612/57344 train_time:25876195ms step_avg:580.03ms +step:44613/57344 train_time:25876210ms step_avg:580.02ms +step:44614/57344 train_time:25876466ms step_avg:580.01ms +step:44615/57344 train_time:25877036ms step_avg:580.01ms +grad accum step:11154/14336 +step:44616/57344 train_time:25878348ms step_avg:580.02ms +step:44617/57344 train_time:25878364ms step_avg:580.01ms +step:44618/57344 train_time:25878621ms step_avg:580.00ms +step:44619/57344 train_time:25879203ms step_avg:580.00ms +grad accum step:11155/14336 +step:44620/57344 train_time:25880547ms step_avg:580.02ms +step:44621/57344 train_time:25880562ms step_avg:580.01ms +step:44622/57344 train_time:25880806ms step_avg:580.00ms +step:44623/57344 train_time:25881354ms step_avg:580.00ms +grad accum step:11156/14336 +step:44624/57344 train_time:25882699ms step_avg:580.02ms +step:44625/57344 train_time:25882713ms step_avg:580.00ms +step:44626/57344 train_time:25882964ms step_avg:580.00ms +step:44627/57344 train_time:25883524ms step_avg:580.00ms +grad accum step:11157/14336 +step:44628/57344 train_time:25884850ms step_avg:580.01ms +step:44629/57344 train_time:25884867ms step_avg:580.00ms +step:44630/57344 train_time:25885108ms step_avg:579.99ms +step:44631/57344 train_time:25885654ms step_avg:579.99ms +grad accum step:11158/14336 +step:44632/57344 train_time:25887003ms step_avg:580.01ms +step:44633/57344 train_time:25887019ms step_avg:580.00ms +step:44634/57344 train_time:25887274ms step_avg:579.99ms +step:44635/57344 train_time:25887852ms step_avg:579.99ms +grad accum step:11159/14336 +step:44636/57344 train_time:25889254ms step_avg:580.01ms +step:44637/57344 train_time:25889279ms step_avg:580.00ms +step:44638/57344 train_time:25889499ms step_avg:579.99ms +step:44639/57344 train_time:25890045ms step_avg:579.99ms +grad accum step:11160/14336 +step:44640/57344 train_time:25891380ms step_avg:580.00ms +step:44641/57344 train_time:25891396ms step_avg:579.99ms +step:44642/57344 train_time:25891651ms step_avg:579.98ms +step:44643/57344 train_time:25892227ms step_avg:579.98ms +grad accum step:11161/14336 +step:44644/57344 train_time:25893594ms step_avg:580.00ms +step:44645/57344 train_time:25893614ms step_avg:579.99ms +step:44646/57344 train_time:25893857ms step_avg:579.98ms +step:44647/57344 train_time:25894415ms step_avg:579.98ms +grad accum step:11162/14336 +step:44648/57344 train_time:25895687ms step_avg:580.00ms +step:44649/57344 train_time:25895704ms step_avg:579.98ms +step:44650/57344 train_time:25895949ms step_avg:579.98ms +step:44651/57344 train_time:25896510ms step_avg:579.98ms +grad accum step:11163/14336 +step:44652/57344 train_time:25897876ms step_avg:579.99ms +step:44653/57344 train_time:25897891ms step_avg:579.98ms +step:44654/57344 train_time:25898122ms step_avg:579.97ms +step:44655/57344 train_time:25898698ms step_avg:579.97ms +grad accum step:11164/14336 +step:44656/57344 train_time:25900091ms step_avg:579.99ms +step:44657/57344 train_time:25900110ms step_avg:579.98ms +step:44658/57344 train_time:25900333ms step_avg:579.97ms +step:44659/57344 train_time:25900900ms step_avg:579.97ms +grad accum step:11165/14336 +step:44660/57344 train_time:25902265ms step_avg:579.99ms +step:44661/57344 train_time:25902279ms step_avg:579.98ms +step:44662/57344 train_time:25902534ms step_avg:579.97ms +step:44663/57344 train_time:25903103ms step_avg:579.97ms +grad accum step:11166/14336 +step:44664/57344 train_time:25904473ms step_avg:579.99ms +step:44665/57344 train_time:25904490ms step_avg:579.97ms +step:44666/57344 train_time:25904714ms step_avg:579.96ms +step:44667/57344 train_time:25905259ms step_avg:579.96ms +grad accum step:11167/14336 +step:44668/57344 train_time:25906539ms step_avg:579.98ms +step:44669/57344 train_time:25906555ms step_avg:579.97ms +step:44670/57344 train_time:25906799ms step_avg:579.96ms +step:44671/57344 train_time:25907346ms step_avg:579.96ms +grad accum step:11168/14336 +step:44672/57344 train_time:25908687ms step_avg:579.98ms +step:44672/57344 val_loss:5.572731 train_time:25908690ms step_avg:579.98ms +step:44673/57344 train_time:25908701ms step_avg:579.96ms +step:44674/57344 train_time:25908927ms step_avg:579.96ms +step:44675/57344 train_time:25909502ms step_avg:579.96ms +grad accum step:11169/14336 +step:44676/57344 train_time:25910995ms step_avg:579.98ms +step:44677/57344 train_time:25911016ms step_avg:579.96ms +step:44678/57344 train_time:25911242ms step_avg:579.96ms +step:44679/57344 train_time:25911805ms step_avg:579.95ms +grad accum step:11170/14336 +step:44680/57344 train_time:25913149ms step_avg:579.97ms +step:44681/57344 train_time:25913164ms step_avg:579.96ms +step:44682/57344 train_time:25913411ms step_avg:579.95ms +step:44683/57344 train_time:25913964ms step_avg:579.95ms +grad accum step:11171/14336 +step:44684/57344 train_time:25915271ms step_avg:579.97ms +step:44685/57344 train_time:25915289ms step_avg:579.96ms +step:44686/57344 train_time:25915534ms step_avg:579.95ms +step:44687/57344 train_time:25916094ms step_avg:579.95ms +grad accum step:11172/14336 +step:44688/57344 train_time:25917421ms step_avg:579.96ms +step:44689/57344 train_time:25917449ms step_avg:579.95ms +step:44690/57344 train_time:25917680ms step_avg:579.94ms +step:44691/57344 train_time:25918223ms step_avg:579.94ms +grad accum step:11173/14336 +step:44692/57344 train_time:25919554ms step_avg:579.96ms +step:44693/57344 train_time:25919572ms step_avg:579.95ms +step:44694/57344 train_time:25919807ms step_avg:579.94ms +step:44695/57344 train_time:25920360ms step_avg:579.94ms +grad accum step:11174/14336 +step:44696/57344 train_time:25921708ms step_avg:579.96ms +step:44697/57344 train_time:25921724ms step_avg:579.94ms +step:44698/57344 train_time:25921968ms step_avg:579.94ms +step:44699/57344 train_time:25922526ms step_avg:579.94ms +grad accum step:11175/14336 +step:44700/57344 train_time:25923871ms step_avg:579.95ms +step:44701/57344 train_time:25923884ms step_avg:579.94ms +step:44702/57344 train_time:25924133ms step_avg:579.93ms +step:44703/57344 train_time:25924692ms step_avg:579.93ms +grad accum step:11176/14336 +step:44704/57344 train_time:25926059ms step_avg:579.95ms +step:44705/57344 train_time:25926081ms step_avg:579.94ms +step:44706/57344 train_time:25926309ms step_avg:579.93ms +step:44707/57344 train_time:25926856ms step_avg:579.93ms +grad accum step:11177/14336 +step:44708/57344 train_time:25928178ms step_avg:579.94ms +step:44709/57344 train_time:25928193ms step_avg:579.93ms +step:44710/57344 train_time:25928445ms step_avg:579.92ms +step:44711/57344 train_time:25929003ms step_avg:579.92ms +grad accum step:11178/14336 +step:44712/57344 train_time:25930338ms step_avg:579.94ms +step:44713/57344 train_time:25930415ms step_avg:579.93ms +step:44714/57344 train_time:25930651ms step_avg:579.92ms +step:44715/57344 train_time:25931239ms step_avg:579.92ms +grad accum step:11179/14336 +step:44716/57344 train_time:25932557ms step_avg:579.94ms +step:44717/57344 train_time:25932571ms step_avg:579.93ms +step:44718/57344 train_time:25932815ms step_avg:579.92ms +step:44719/57344 train_time:25933360ms step_avg:579.92ms +grad accum step:11180/14336 +step:44720/57344 train_time:25934674ms step_avg:579.93ms +step:44721/57344 train_time:25934691ms step_avg:579.92ms +step:44722/57344 train_time:25934938ms step_avg:579.91ms +step:44723/57344 train_time:25935509ms step_avg:579.91ms +grad accum step:11181/14336 +step:44724/57344 train_time:25936871ms step_avg:579.93ms +step:44725/57344 train_time:25936887ms step_avg:579.92ms +step:44726/57344 train_time:25937133ms step_avg:579.91ms +step:44727/57344 train_time:25937692ms step_avg:579.91ms +grad accum step:11182/14336 +step:44728/57344 train_time:25939105ms step_avg:579.93ms +step:44729/57344 train_time:25939118ms step_avg:579.92ms +step:44730/57344 train_time:25939338ms step_avg:579.91ms +step:44731/57344 train_time:25939887ms step_avg:579.91ms +grad accum step:11183/14336 +step:44732/57344 train_time:25941254ms step_avg:579.93ms +step:44733/57344 train_time:25941275ms step_avg:579.91ms +step:44734/57344 train_time:25941503ms step_avg:579.91ms +step:44735/57344 train_time:25942045ms step_avg:579.90ms +grad accum step:11184/14336 +step:44736/57344 train_time:25943410ms step_avg:579.92ms +step:44736/57344 val_loss:5.566231 train_time:25943413ms step_avg:579.92ms +step:44737/57344 train_time:25943425ms step_avg:579.91ms +step:44738/57344 train_time:25943648ms step_avg:579.90ms +step:44739/57344 train_time:25944208ms step_avg:579.90ms +grad accum step:11185/14336 +step:44740/57344 train_time:25945591ms step_avg:579.92ms +step:44741/57344 train_time:25945615ms step_avg:579.91ms +step:44742/57344 train_time:25945839ms step_avg:579.90ms +step:44743/57344 train_time:25946400ms step_avg:579.90ms +grad accum step:11186/14336 +step:44744/57344 train_time:25947708ms step_avg:579.91ms +step:44745/57344 train_time:25947725ms step_avg:579.90ms +step:44746/57344 train_time:25947973ms step_avg:579.89ms +step:44747/57344 train_time:25948531ms step_avg:579.89ms +grad accum step:11187/14336 +step:44748/57344 train_time:25949876ms step_avg:579.91ms +step:44749/57344 train_time:25949892ms step_avg:579.90ms +step:44750/57344 train_time:25950141ms step_avg:579.89ms +step:44751/57344 train_time:25950699ms step_avg:579.89ms +grad accum step:11188/14336 +step:44752/57344 train_time:25952101ms step_avg:579.91ms +step:44753/57344 train_time:25952117ms step_avg:579.90ms +step:44754/57344 train_time:25952341ms step_avg:579.89ms +step:44755/57344 train_time:25952900ms step_avg:579.89ms +grad accum step:11189/14336 +step:44756/57344 train_time:25954236ms step_avg:579.91ms +step:44757/57344 train_time:25954251ms step_avg:579.89ms +step:44758/57344 train_time:25954502ms step_avg:579.89ms +step:44759/57344 train_time:25955060ms step_avg:579.88ms +grad accum step:11190/14336 +step:44760/57344 train_time:25956379ms step_avg:579.90ms +step:44761/57344 train_time:25956394ms step_avg:579.89ms +step:44762/57344 train_time:25956645ms step_avg:579.88ms +step:44763/57344 train_time:25957211ms step_avg:579.88ms +grad accum step:11191/14336 +step:44764/57344 train_time:25958553ms step_avg:579.90ms +step:44765/57344 train_time:25958574ms step_avg:579.89ms +step:44766/57344 train_time:25958811ms step_avg:579.88ms +step:44767/57344 train_time:25959363ms step_avg:579.88ms +grad accum step:11192/14336 +step:44768/57344 train_time:25960698ms step_avg:579.89ms +step:44769/57344 train_time:25960713ms step_avg:579.88ms +step:44770/57344 train_time:25960968ms step_avg:579.87ms +step:44771/57344 train_time:25961543ms step_avg:579.87ms +grad accum step:11193/14336 +step:44772/57344 train_time:25962874ms step_avg:579.89ms +step:44773/57344 train_time:25962889ms step_avg:579.88ms +step:44774/57344 train_time:25963139ms step_avg:579.87ms +step:44775/57344 train_time:25963707ms step_avg:579.87ms +grad accum step:11194/14336 +step:44776/57344 train_time:25965046ms step_avg:579.89ms +step:44777/57344 train_time:25965060ms step_avg:579.87ms +step:44778/57344 train_time:25965281ms step_avg:579.87ms +step:44779/57344 train_time:25965831ms step_avg:579.87ms +grad accum step:11195/14336 +step:44780/57344 train_time:25967150ms step_avg:579.88ms +step:44781/57344 train_time:25967163ms step_avg:579.87ms +step:44782/57344 train_time:25967406ms step_avg:579.86ms +step:44783/57344 train_time:25967992ms step_avg:579.86ms +grad accum step:11196/14336 +step:44784/57344 train_time:25969345ms step_avg:579.88ms +step:44785/57344 train_time:25969361ms step_avg:579.87ms +step:44786/57344 train_time:25969602ms step_avg:579.86ms +step:44787/57344 train_time:25970150ms step_avg:579.86ms +grad accum step:11197/14336 +step:44788/57344 train_time:25971552ms step_avg:579.88ms +step:44789/57344 train_time:25971565ms step_avg:579.86ms +step:44790/57344 train_time:25971786ms step_avg:579.86ms +step:44791/57344 train_time:25972335ms step_avg:579.86ms +grad accum step:11198/14336 +step:44792/57344 train_time:25973644ms step_avg:579.87ms +step:44793/57344 train_time:25973739ms step_avg:579.86ms +step:44794/57344 train_time:25973957ms step_avg:579.85ms +step:44795/57344 train_time:25974496ms step_avg:579.85ms +grad accum step:11199/14336 +step:44796/57344 train_time:25975816ms step_avg:579.87ms +step:44797/57344 train_time:25975841ms step_avg:579.86ms +step:44798/57344 train_time:25976075ms step_avg:579.85ms +step:44799/57344 train_time:25976635ms step_avg:579.85ms +grad accum step:11200/14336 +step:44800/57344 train_time:25978097ms step_avg:579.87ms +step:44800/57344 val_loss:5.560724 train_time:25978106ms step_avg:579.87ms +step:44801/57344 train_time:25978118ms step_avg:579.86ms +step:44802/57344 train_time:25978343ms step_avg:579.85ms +step:44803/57344 train_time:25978906ms step_avg:579.85ms +grad accum step:11201/14336 +step:44804/57344 train_time:25980253ms step_avg:579.86ms +step:44805/57344 train_time:25980269ms step_avg:579.85ms +step:44806/57344 train_time:25980523ms step_avg:579.84ms +step:44807/57344 train_time:25981092ms step_avg:579.84ms +grad accum step:11202/14336 +step:44808/57344 train_time:25982425ms step_avg:579.86ms +step:44809/57344 train_time:25982449ms step_avg:579.85ms +step:44810/57344 train_time:25982677ms step_avg:579.84ms +step:44811/57344 train_time:25983238ms step_avg:579.84ms +grad accum step:11203/14336 +step:44812/57344 train_time:25984569ms step_avg:579.86ms +step:44813/57344 train_time:25984581ms step_avg:579.84ms +step:44814/57344 train_time:25984813ms step_avg:579.84ms +step:44815/57344 train_time:25985362ms step_avg:579.84ms +grad accum step:11204/14336 +step:44816/57344 train_time:25986706ms step_avg:579.85ms +step:44817/57344 train_time:25986725ms step_avg:579.84ms +step:44818/57344 train_time:25986967ms step_avg:579.83ms +step:44819/57344 train_time:25987524ms step_avg:579.83ms +grad accum step:11205/14336 +step:44820/57344 train_time:25988875ms step_avg:579.85ms +step:44821/57344 train_time:25988893ms step_avg:579.84ms +step:44822/57344 train_time:25989138ms step_avg:579.83ms +step:44823/57344 train_time:25989720ms step_avg:579.83ms +grad accum step:11206/14336 +step:44824/57344 train_time:25991128ms step_avg:579.85ms +step:44825/57344 train_time:25991142ms step_avg:579.84ms +step:44826/57344 train_time:25991359ms step_avg:579.83ms +step:44827/57344 train_time:25991908ms step_avg:579.83ms +grad accum step:11207/14336 +step:44828/57344 train_time:25993230ms step_avg:579.84ms +step:44829/57344 train_time:25993243ms step_avg:579.83ms +step:44830/57344 train_time:25993482ms step_avg:579.82ms +step:44831/57344 train_time:25994026ms step_avg:579.82ms +grad accum step:11208/14336 +step:44832/57344 train_time:25995379ms step_avg:579.84ms +step:44833/57344 train_time:25995409ms step_avg:579.83ms +step:44834/57344 train_time:25995633ms step_avg:579.82ms +step:44835/57344 train_time:25996182ms step_avg:579.82ms +grad accum step:11209/14336 +step:44836/57344 train_time:25997564ms step_avg:579.84ms +step:44837/57344 train_time:25997594ms step_avg:579.82ms +step:44838/57344 train_time:25997812ms step_avg:579.82ms +step:44839/57344 train_time:25998361ms step_avg:579.82ms +grad accum step:11210/14336 +step:44840/57344 train_time:25999675ms step_avg:579.83ms +step:44841/57344 train_time:25999691ms step_avg:579.82ms +step:44842/57344 train_time:25999926ms step_avg:579.81ms +step:44843/57344 train_time:26000488ms step_avg:579.81ms +grad accum step:11211/14336 +step:44844/57344 train_time:26001827ms step_avg:579.83ms +step:44845/57344 train_time:26001847ms step_avg:579.82ms +step:44846/57344 train_time:26002088ms step_avg:579.81ms +step:44847/57344 train_time:26002658ms step_avg:579.81ms +grad accum step:11212/14336 +step:44848/57344 train_time:26004115ms step_avg:579.83ms +step:44849/57344 train_time:26004144ms step_avg:579.82ms +step:44850/57344 train_time:26004365ms step_avg:579.81ms +step:44851/57344 train_time:26004925ms step_avg:579.81ms +grad accum step:11213/14336 +step:44852/57344 train_time:26006259ms step_avg:579.82ms +step:44853/57344 train_time:26006274ms step_avg:579.81ms +step:44854/57344 train_time:26006515ms step_avg:579.80ms +step:44855/57344 train_time:26007057ms step_avg:579.80ms +grad accum step:11214/14336 +step:44856/57344 train_time:26008365ms step_avg:579.82ms +step:44857/57344 train_time:26008380ms step_avg:579.81ms +step:44858/57344 train_time:26008627ms step_avg:579.80ms +step:44859/57344 train_time:26009178ms step_avg:579.80ms +grad accum step:11215/14336 +step:44860/57344 train_time:26010509ms step_avg:579.82ms +step:44861/57344 train_time:26011021ms step_avg:579.81ms +step:44862/57344 train_time:26011235ms step_avg:579.81ms +step:44863/57344 train_time:26011782ms step_avg:579.80ms +grad accum step:11216/14336 +step:44864/57344 train_time:26013182ms step_avg:579.82ms +step:44864/57344 val_loss:5.553838 train_time:26013184ms step_avg:579.82ms +step:44865/57344 train_time:26013196ms step_avg:579.81ms +step:44866/57344 train_time:26013421ms step_avg:579.80ms +step:44867/57344 train_time:26013992ms step_avg:579.80ms +grad accum step:11217/14336 +step:44868/57344 train_time:26015362ms step_avg:579.82ms +step:44869/57344 train_time:26015378ms step_avg:579.81ms +step:44870/57344 train_time:26015611ms step_avg:579.80ms +step:44871/57344 train_time:26016158ms step_avg:579.80ms +grad accum step:11218/14336 +step:44872/57344 train_time:26017450ms step_avg:579.81ms +step:44873/57344 train_time:26017468ms step_avg:579.80ms +step:44874/57344 train_time:26017712ms step_avg:579.79ms +step:44875/57344 train_time:26018255ms step_avg:579.79ms +grad accum step:11219/14336 +step:44876/57344 train_time:26019564ms step_avg:579.81ms +step:44877/57344 train_time:26019581ms step_avg:579.80ms +step:44878/57344 train_time:26019827ms step_avg:579.79ms +step:44879/57344 train_time:26020380ms step_avg:579.79ms +grad accum step:11220/14336 +step:44880/57344 train_time:26021701ms step_avg:579.81ms +step:44881/57344 train_time:26021721ms step_avg:579.79ms +step:44882/57344 train_time:26021963ms step_avg:579.79ms +step:44883/57344 train_time:26022507ms step_avg:579.79ms +grad accum step:11221/14336 +step:44884/57344 train_time:26023810ms step_avg:579.80ms +step:44885/57344 train_time:26023825ms step_avg:579.79ms +step:44886/57344 train_time:26024076ms step_avg:579.78ms +step:44887/57344 train_time:26024635ms step_avg:579.78ms +grad accum step:11222/14336 +step:44888/57344 train_time:26026021ms step_avg:579.80ms +step:44889/57344 train_time:26026033ms step_avg:579.79ms +step:44890/57344 train_time:26026282ms step_avg:579.78ms +step:44891/57344 train_time:26026843ms step_avg:579.78ms +grad accum step:11223/14336 +step:44892/57344 train_time:26028150ms step_avg:579.79ms +step:44893/57344 train_time:26028169ms step_avg:579.78ms +step:44894/57344 train_time:26028406ms step_avg:579.77ms +step:44895/57344 train_time:26028945ms step_avg:579.77ms +grad accum step:11224/14336 +step:44896/57344 train_time:26030251ms step_avg:579.79ms +step:44897/57344 train_time:26030272ms step_avg:579.78ms +step:44898/57344 train_time:26030515ms step_avg:579.77ms +step:44899/57344 train_time:26031065ms step_avg:579.77ms +grad accum step:11225/14336 +step:44900/57344 train_time:26032438ms step_avg:579.79ms +step:44901/57344 train_time:26032455ms step_avg:579.77ms +step:44902/57344 train_time:26032699ms step_avg:579.77ms +step:44903/57344 train_time:26033253ms step_avg:579.77ms +grad accum step:11226/14336 +step:44904/57344 train_time:26034611ms step_avg:579.78ms +step:44905/57344 train_time:26034627ms step_avg:579.77ms +step:44906/57344 train_time:26034871ms step_avg:579.76ms +step:44907/57344 train_time:26035421ms step_avg:579.76ms +grad accum step:11227/14336 +step:44908/57344 train_time:26036745ms step_avg:579.78ms +step:44909/57344 train_time:26036760ms step_avg:579.77ms +step:44910/57344 train_time:26037004ms step_avg:579.76ms +step:44911/57344 train_time:26037546ms step_avg:579.76ms +grad accum step:11228/14336 +step:44912/57344 train_time:26038929ms step_avg:579.78ms +step:44913/57344 train_time:26038945ms step_avg:579.76ms +step:44914/57344 train_time:26039196ms step_avg:579.76ms +step:44915/57344 train_time:26039757ms step_avg:579.76ms +grad accum step:11229/14336 +step:44916/57344 train_time:26041077ms step_avg:579.77ms +step:44917/57344 train_time:26041094ms step_avg:579.76ms +step:44918/57344 train_time:26041347ms step_avg:579.75ms +step:44919/57344 train_time:26041911ms step_avg:579.75ms +grad accum step:11230/14336 +step:44920/57344 train_time:26043226ms step_avg:579.77ms +step:44921/57344 train_time:26043242ms step_avg:579.76ms +step:44922/57344 train_time:26043500ms step_avg:579.75ms +step:44923/57344 train_time:26044081ms step_avg:579.75ms +grad accum step:11231/14336 +step:44924/57344 train_time:26045438ms step_avg:579.77ms +step:44925/57344 train_time:26045454ms step_avg:579.75ms +step:44926/57344 train_time:26045701ms step_avg:579.75ms +step:44927/57344 train_time:26046266ms step_avg:579.75ms +grad accum step:11232/14336 +step:44928/57344 train_time:26047603ms step_avg:579.76ms +step:44928/57344 val_loss:5.549256 train_time:26047608ms step_avg:579.76ms +step:44929/57344 train_time:26047620ms step_avg:579.75ms +step:44930/57344 train_time:26047843ms step_avg:579.74ms +step:44931/57344 train_time:26048402ms step_avg:579.74ms +grad accum step:11233/14336 +step:44932/57344 train_time:26049781ms step_avg:579.76ms +step:44933/57344 train_time:26049817ms step_avg:579.75ms +step:44934/57344 train_time:26050055ms step_avg:579.74ms +step:44935/57344 train_time:26050645ms step_avg:579.74ms +grad accum step:11234/14336 +step:44936/57344 train_time:26052006ms step_avg:579.76ms +step:44937/57344 train_time:26052021ms step_avg:579.75ms +step:44938/57344 train_time:26052259ms step_avg:579.74ms +step:44939/57344 train_time:26052841ms step_avg:579.74ms +grad accum step:11235/14336 +step:44940/57344 train_time:26054154ms step_avg:579.75ms +step:44941/57344 train_time:26054170ms step_avg:579.74ms +step:44942/57344 train_time:26054415ms step_avg:579.73ms +step:44943/57344 train_time:26054989ms step_avg:579.73ms +grad accum step:11236/14336 +step:44944/57344 train_time:26056389ms step_avg:579.75ms +step:44945/57344 train_time:26056404ms step_avg:579.74ms +step:44946/57344 train_time:26056650ms step_avg:579.73ms +step:44947/57344 train_time:26057201ms step_avg:579.73ms +grad accum step:11237/14336 +step:44948/57344 train_time:26058533ms step_avg:579.75ms +step:44949/57344 train_time:26058554ms step_avg:579.74ms +step:44950/57344 train_time:26058795ms step_avg:579.73ms +step:44951/57344 train_time:26059360ms step_avg:579.73ms +grad accum step:11238/14336 +step:44952/57344 train_time:26060712ms step_avg:579.75ms +step:44953/57344 train_time:26060733ms step_avg:579.73ms +step:44954/57344 train_time:26060972ms step_avg:579.73ms +step:44955/57344 train_time:26061528ms step_avg:579.72ms +grad accum step:11239/14336 +step:44956/57344 train_time:26062864ms step_avg:579.74ms +step:44957/57344 train_time:26062930ms step_avg:579.73ms +step:44958/57344 train_time:26063148ms step_avg:579.72ms +step:44959/57344 train_time:26063699ms step_avg:579.72ms +grad accum step:11240/14336 +step:44960/57344 train_time:26065006ms step_avg:579.74ms +step:44961/57344 train_time:26065017ms step_avg:579.73ms +step:44962/57344 train_time:26065263ms step_avg:579.72ms +step:44963/57344 train_time:26065829ms step_avg:579.72ms +grad accum step:11241/14336 +step:44964/57344 train_time:26067174ms step_avg:579.73ms +step:44965/57344 train_time:26067191ms step_avg:579.72ms +step:44966/57344 train_time:26067439ms step_avg:579.71ms +step:44967/57344 train_time:26067984ms step_avg:579.71ms +grad accum step:11242/14336 +step:44968/57344 train_time:26069322ms step_avg:579.73ms +step:44969/57344 train_time:26069334ms step_avg:579.72ms +step:44970/57344 train_time:26069564ms step_avg:579.71ms +step:44971/57344 train_time:26070121ms step_avg:579.71ms +grad accum step:11243/14336 +step:44972/57344 train_time:26071440ms step_avg:579.73ms +step:44973/57344 train_time:26071454ms step_avg:579.71ms +step:44974/57344 train_time:26071705ms step_avg:579.71ms +step:44975/57344 train_time:26072264ms step_avg:579.71ms +grad accum step:11244/14336 +step:44976/57344 train_time:26073634ms step_avg:579.72ms +step:44977/57344 train_time:26073654ms step_avg:579.71ms +step:44978/57344 train_time:26073886ms step_avg:579.70ms +step:44979/57344 train_time:26074426ms step_avg:579.70ms +grad accum step:11245/14336 +step:44980/57344 train_time:26075717ms step_avg:579.72ms +step:44981/57344 train_time:26075738ms step_avg:579.71ms +step:44982/57344 train_time:26075981ms step_avg:579.70ms +step:44983/57344 train_time:26076544ms step_avg:579.70ms +grad accum step:11246/14336 +step:44984/57344 train_time:26077901ms step_avg:579.72ms +step:44985/57344 train_time:26077933ms step_avg:579.70ms +step:44986/57344 train_time:26078154ms step_avg:579.69ms +step:44987/57344 train_time:26078702ms step_avg:579.69ms +grad accum step:11247/14336 +step:44988/57344 train_time:26080002ms step_avg:579.71ms +step:44989/57344 train_time:26080017ms step_avg:579.70ms +step:44990/57344 train_time:26080268ms step_avg:579.69ms +step:44991/57344 train_time:26080829ms step_avg:579.69ms +grad accum step:11248/14336 +step:44992/57344 train_time:26082178ms step_avg:579.71ms +step:44992/57344 val_loss:5.542297 train_time:26082183ms step_avg:579.71ms +step:44993/57344 train_time:26082195ms step_avg:579.69ms +step:44994/57344 train_time:26082417ms step_avg:579.69ms +step:44995/57344 train_time:26082967ms step_avg:579.69ms +grad accum step:11249/14336 +step:44996/57344 train_time:26084297ms step_avg:579.70ms +step:44997/57344 train_time:26084312ms step_avg:579.69ms +step:44998/57344 train_time:26084541ms step_avg:579.68ms +step:44999/57344 train_time:26085091ms step_avg:579.68ms +grad accum step:11250/14336 +step:45000/57344 train_time:26086405ms step_avg:579.70ms +step:45001/57344 train_time:26086422ms step_avg:579.69ms +step:45002/57344 train_time:26086668ms step_avg:579.68ms +step:45003/57344 train_time:26087215ms step_avg:579.68ms +grad accum step:11251/14336 +step:45004/57344 train_time:26088533ms step_avg:579.69ms +step:45005/57344 train_time:26088550ms step_avg:579.68ms +step:45006/57344 train_time:26088801ms step_avg:579.67ms +step:45007/57344 train_time:26089359ms step_avg:579.67ms +grad accum step:11252/14336 +step:45008/57344 train_time:26090683ms step_avg:579.69ms +step:45009/57344 train_time:26090716ms step_avg:579.68ms +step:45010/57344 train_time:26090940ms step_avg:579.67ms +step:45011/57344 train_time:26091503ms step_avg:579.67ms +grad accum step:11253/14336 +step:45012/57344 train_time:26092827ms step_avg:579.69ms +step:45013/57344 train_time:26092842ms step_avg:579.67ms +step:45014/57344 train_time:26093106ms step_avg:579.67ms +step:45015/57344 train_time:26093696ms step_avg:579.67ms +grad accum step:11254/14336 +step:45016/57344 train_time:26095037ms step_avg:579.68ms +step:45017/57344 train_time:26104647ms step_avg:579.88ms +step:45018/57344 train_time:26104908ms step_avg:579.88ms +step:45019/57344 train_time:26105454ms step_avg:579.88ms +grad accum step:11255/14336 +step:45020/57344 train_time:26106757ms step_avg:579.89ms +step:45021/57344 train_time:26106771ms step_avg:579.88ms +step:45022/57344 train_time:26107029ms step_avg:579.87ms +step:45023/57344 train_time:26107615ms step_avg:579.87ms +grad accum step:11256/14336 +step:45024/57344 train_time:26108978ms step_avg:579.89ms +step:45025/57344 train_time:26108991ms step_avg:579.88ms +step:45026/57344 train_time:26109234ms step_avg:579.87ms +step:45027/57344 train_time:26109797ms step_avg:579.87ms +grad accum step:11257/14336 +step:45028/57344 train_time:26111127ms step_avg:579.89ms +step:45029/57344 train_time:26111160ms step_avg:579.87ms +step:45030/57344 train_time:26111388ms step_avg:579.87ms +step:45031/57344 train_time:26111952ms step_avg:579.87ms +grad accum step:11258/14336 +step:45032/57344 train_time:26115716ms step_avg:579.94ms +step:45033/57344 train_time:26115736ms step_avg:579.92ms +step:45034/57344 train_time:26116019ms step_avg:579.92ms +step:45035/57344 train_time:26116605ms step_avg:579.92ms +grad accum step:11259/14336 +step:45036/57344 train_time:26118038ms step_avg:579.94ms +step:45037/57344 train_time:26118078ms step_avg:579.92ms +step:45038/57344 train_time:26118297ms step_avg:579.92ms +step:45039/57344 train_time:26118851ms step_avg:579.92ms +grad accum step:11260/14336 +step:45040/57344 train_time:26120191ms step_avg:579.93ms +step:45041/57344 train_time:26120206ms step_avg:579.92ms +step:45042/57344 train_time:26120453ms step_avg:579.91ms +step:45043/57344 train_time:26121006ms step_avg:579.91ms +grad accum step:11261/14336 +step:45044/57344 train_time:26122352ms step_avg:579.93ms +step:45045/57344 train_time:26122368ms step_avg:579.92ms +step:45046/57344 train_time:26122599ms step_avg:579.91ms +step:45047/57344 train_time:26123145ms step_avg:579.91ms +grad accum step:11262/14336 +step:45048/57344 train_time:26124455ms step_avg:579.92ms +step:45049/57344 train_time:26124472ms step_avg:579.91ms +step:45050/57344 train_time:26124723ms step_avg:579.91ms +step:45051/57344 train_time:26125285ms step_avg:579.90ms +grad accum step:11263/14336 +step:45052/57344 train_time:26126609ms step_avg:579.92ms +step:45053/57344 train_time:26126649ms step_avg:579.91ms +step:45054/57344 train_time:26126878ms step_avg:579.90ms +step:45055/57344 train_time:26127449ms step_avg:579.90ms +grad accum step:11264/14336 +step:45056/57344 train_time:26128772ms step_avg:579.92ms +step:45056/57344 val_loss:5.544479 train_time:26128800ms step_avg:579.92ms +step:45057/57344 train_time:26128812ms step_avg:579.91ms +step:45058/57344 train_time:26129036ms step_avg:579.90ms +step:45059/57344 train_time:26129598ms step_avg:579.90ms +grad accum step:11265/14336 +step:45060/57344 train_time:26130951ms step_avg:579.91ms +step:45061/57344 train_time:26130984ms step_avg:579.90ms +step:45062/57344 train_time:26131203ms step_avg:579.89ms +step:45063/57344 train_time:26131746ms step_avg:579.89ms +grad accum step:11266/14336 +step:45064/57344 train_time:26133075ms step_avg:579.91ms +step:45065/57344 train_time:26133092ms step_avg:579.90ms +step:45066/57344 train_time:26133348ms step_avg:579.89ms +step:45067/57344 train_time:26133919ms step_avg:579.89ms +grad accum step:11267/14336 +step:45068/57344 train_time:26135236ms step_avg:579.91ms +step:45069/57344 train_time:26135253ms step_avg:579.89ms +step:45070/57344 train_time:26135496ms step_avg:579.89ms +step:45071/57344 train_time:26136046ms step_avg:579.89ms +grad accum step:11268/14336 +step:45072/57344 train_time:26137356ms step_avg:579.90ms +step:45073/57344 train_time:26137396ms step_avg:579.89ms +step:45074/57344 train_time:26137617ms step_avg:579.88ms +step:45075/57344 train_time:26138170ms step_avg:579.88ms +grad accum step:11269/14336 +step:45076/57344 train_time:26139486ms step_avg:579.90ms +step:45077/57344 train_time:26139502ms step_avg:579.89ms +step:45078/57344 train_time:26139754ms step_avg:579.88ms +step:45079/57344 train_time:26140318ms step_avg:579.88ms +grad accum step:11270/14336 +step:45080/57344 train_time:26141611ms step_avg:579.89ms +step:45081/57344 train_time:26141626ms step_avg:579.88ms +step:45082/57344 train_time:26141878ms step_avg:579.87ms +step:45083/57344 train_time:26142444ms step_avg:579.87ms +grad accum step:11271/14336 +step:45084/57344 train_time:26143773ms step_avg:579.89ms +step:45085/57344 train_time:26143788ms step_avg:579.88ms +step:45086/57344 train_time:26144042ms step_avg:579.87ms +step:45087/57344 train_time:26144615ms step_avg:579.87ms +grad accum step:11272/14336 +step:45088/57344 train_time:26145948ms step_avg:579.89ms +step:45089/57344 train_time:26145973ms step_avg:579.87ms +step:45090/57344 train_time:26146204ms step_avg:579.87ms +step:45091/57344 train_time:26146790ms step_avg:579.87ms +grad accum step:11273/14336 +step:45092/57344 train_time:26148174ms step_avg:579.88ms +step:45093/57344 train_time:26148188ms step_avg:579.87ms +step:45094/57344 train_time:26148441ms step_avg:579.87ms +step:45095/57344 train_time:26149002ms step_avg:579.86ms +grad accum step:11274/14336 +step:45096/57344 train_time:26150313ms step_avg:579.88ms +step:45097/57344 train_time:26150328ms step_avg:579.87ms +step:45098/57344 train_time:26150575ms step_avg:579.86ms +step:45099/57344 train_time:26151136ms step_avg:579.86ms +grad accum step:11275/14336 +step:45100/57344 train_time:26152492ms step_avg:579.88ms +step:45101/57344 train_time:26152503ms step_avg:579.87ms +step:45102/57344 train_time:26152750ms step_avg:579.86ms +step:45103/57344 train_time:26153314ms step_avg:579.86ms +grad accum step:11276/14336 +step:45104/57344 train_time:26154652ms step_avg:579.87ms +step:45105/57344 train_time:26154669ms step_avg:579.86ms +step:45106/57344 train_time:26154916ms step_avg:579.85ms +step:45107/57344 train_time:26155468ms step_avg:579.85ms +grad accum step:11277/14336 +step:45108/57344 train_time:26156807ms step_avg:579.87ms +step:45109/57344 train_time:26156824ms step_avg:579.86ms +step:45110/57344 train_time:26157070ms step_avg:579.85ms +step:45111/57344 train_time:26157613ms step_avg:579.85ms +grad accum step:11278/14336 +step:45112/57344 train_time:26158952ms step_avg:579.87ms +step:45113/57344 train_time:26158969ms step_avg:579.85ms +step:45114/57344 train_time:26159220ms step_avg:579.85ms +step:45115/57344 train_time:26159776ms step_avg:579.85ms +grad accum step:11279/14336 +step:45116/57344 train_time:26161152ms step_avg:579.86ms +step:45117/57344 train_time:26161162ms step_avg:579.85ms +step:45118/57344 train_time:26161411ms step_avg:579.84ms +step:45119/57344 train_time:26161979ms step_avg:579.84ms +grad accum step:11280/14336 +step:45120/57344 train_time:26163317ms step_avg:579.86ms +step:45120/57344 val_loss:5.551186 train_time:26163320ms step_avg:579.86ms +step:45121/57344 train_time:26163332ms step_avg:579.85ms +step:45122/57344 train_time:26163549ms step_avg:579.84ms +step:45123/57344 train_time:26164092ms step_avg:579.84ms +grad accum step:11281/14336 +step:45124/57344 train_time:26165391ms step_avg:579.86ms +step:45125/57344 train_time:26165408ms step_avg:579.84ms +step:45126/57344 train_time:26165661ms step_avg:579.84ms +step:45127/57344 train_time:26166227ms step_avg:579.84ms +grad accum step:11282/14336 +step:45128/57344 train_time:26167545ms step_avg:579.85ms +step:45129/57344 train_time:26167562ms step_avg:579.84ms +step:45130/57344 train_time:26167812ms step_avg:579.83ms +step:45131/57344 train_time:26168367ms step_avg:579.83ms +grad accum step:11283/14336 +step:45132/57344 train_time:26169689ms step_avg:579.85ms +step:45133/57344 train_time:26169706ms step_avg:579.84ms +step:45134/57344 train_time:26169955ms step_avg:579.83ms +step:45135/57344 train_time:26170520ms step_avg:579.83ms +grad accum step:11284/14336 +step:45136/57344 train_time:26171810ms step_avg:579.84ms +step:45137/57344 train_time:26171827ms step_avg:579.83ms +step:45138/57344 train_time:26172083ms step_avg:579.82ms +step:45139/57344 train_time:26172658ms step_avg:579.82ms +grad accum step:11285/14336 +step:45140/57344 train_time:26173977ms step_avg:579.84ms +step:45141/57344 train_time:26173992ms step_avg:579.83ms +step:45142/57344 train_time:26174238ms step_avg:579.82ms +step:45143/57344 train_time:26174780ms step_avg:579.82ms +grad accum step:11286/14336 +step:45144/57344 train_time:26176099ms step_avg:579.84ms +step:45145/57344 train_time:26176115ms step_avg:579.82ms +step:45146/57344 train_time:26176362ms step_avg:579.82ms +step:45147/57344 train_time:26176914ms step_avg:579.82ms +grad accum step:11287/14336 +step:45148/57344 train_time:26178240ms step_avg:579.83ms +step:45149/57344 train_time:26178262ms step_avg:579.82ms +step:45150/57344 train_time:26178484ms step_avg:579.81ms +step:45151/57344 train_time:26179034ms step_avg:579.81ms +grad accum step:11288/14336 +step:45152/57344 train_time:26180396ms step_avg:579.83ms +step:45153/57344 train_time:26180413ms step_avg:579.82ms +step:45154/57344 train_time:26180659ms step_avg:579.81ms +step:45155/57344 train_time:26181214ms step_avg:579.81ms +grad accum step:11289/14336 +step:45156/57344 train_time:26182591ms step_avg:579.83ms +step:45157/57344 train_time:26182626ms step_avg:579.81ms +step:45158/57344 train_time:26182846ms step_avg:579.81ms +step:45159/57344 train_time:26183392ms step_avg:579.80ms +grad accum step:11290/14336 +step:45160/57344 train_time:26184712ms step_avg:579.82ms +step:45161/57344 train_time:26184729ms step_avg:579.81ms +step:45162/57344 train_time:26184973ms step_avg:579.80ms +step:45163/57344 train_time:26185519ms step_avg:579.80ms +grad accum step:11291/14336 +step:45164/57344 train_time:26186821ms step_avg:579.82ms +step:45165/57344 train_time:26186837ms step_avg:579.80ms +step:45166/57344 train_time:26187087ms step_avg:579.80ms +step:45167/57344 train_time:26187643ms step_avg:579.80ms +grad accum step:11292/14336 +step:45168/57344 train_time:26189013ms step_avg:579.81ms +step:45169/57344 train_time:26189030ms step_avg:579.80ms +step:45170/57344 train_time:26189282ms step_avg:579.79ms +step:45171/57344 train_time:26189846ms step_avg:579.79ms +grad accum step:11293/14336 +step:45172/57344 train_time:26191187ms step_avg:579.81ms +step:45173/57344 train_time:26191202ms step_avg:579.80ms +step:45174/57344 train_time:26191465ms step_avg:579.79ms +step:45175/57344 train_time:26192059ms step_avg:579.79ms +grad accum step:11294/14336 +step:45176/57344 train_time:26193466ms step_avg:579.81ms +step:45177/57344 train_time:26193480ms step_avg:579.80ms +step:45178/57344 train_time:26193719ms step_avg:579.79ms +step:45179/57344 train_time:26194281ms step_avg:579.79ms +grad accum step:11295/14336 +step:45180/57344 train_time:26195657ms step_avg:579.81ms +step:45181/57344 train_time:26195673ms step_avg:579.79ms +step:45182/57344 train_time:26195921ms step_avg:579.79ms +step:45183/57344 train_time:26196473ms step_avg:579.79ms +grad accum step:11296/14336 +step:45184/57344 train_time:26197825ms step_avg:579.80ms +step:45184/57344 val_loss:5.559153 train_time:26197826ms step_avg:579.80ms +step:45185/57344 train_time:26198579ms step_avg:579.81ms +step:45186/57344 train_time:26198616ms step_avg:579.79ms +step:45187/57344 train_time:26199149ms step_avg:579.79ms +grad accum step:11297/14336 +step:45188/57344 train_time:26200768ms step_avg:579.82ms +step:45189/57344 train_time:26200797ms step_avg:579.80ms +step:45190/57344 train_time:26201015ms step_avg:579.80ms +step:45191/57344 train_time:26201558ms step_avg:579.80ms +grad accum step:11298/14336 +step:45192/57344 train_time:26202849ms step_avg:579.81ms +step:45193/57344 train_time:26202862ms step_avg:579.80ms +step:45194/57344 train_time:26203106ms step_avg:579.79ms +step:45195/57344 train_time:26203661ms step_avg:579.79ms +grad accum step:11299/14336 +step:45196/57344 train_time:26204963ms step_avg:579.81ms +step:45197/57344 train_time:26204974ms step_avg:579.79ms +step:45198/57344 train_time:26205210ms step_avg:579.79ms +step:45199/57344 train_time:26205746ms step_avg:579.79ms +grad accum step:11300/14336 +step:45200/57344 train_time:26207080ms step_avg:579.80ms +step:45201/57344 train_time:26207096ms step_avg:579.79ms +step:45202/57344 train_time:26207345ms step_avg:579.78ms +step:45203/57344 train_time:26207910ms step_avg:579.78ms +grad accum step:11301/14336 +step:45204/57344 train_time:26209254ms step_avg:579.80ms +step:45205/57344 train_time:26209281ms step_avg:579.79ms +step:45206/57344 train_time:26209506ms step_avg:579.78ms +step:45207/57344 train_time:26210053ms step_avg:579.78ms +grad accum step:11302/14336 +step:45208/57344 train_time:26211354ms step_avg:579.79ms +step:45209/57344 train_time:26211367ms step_avg:579.78ms +step:45210/57344 train_time:26211602ms step_avg:579.77ms +step:45211/57344 train_time:26212139ms step_avg:579.77ms +grad accum step:11303/14336 +step:45212/57344 train_time:26213441ms step_avg:579.79ms +step:45213/57344 train_time:26213457ms step_avg:579.78ms +step:45214/57344 train_time:26213708ms step_avg:579.77ms +step:45215/57344 train_time:26214283ms step_avg:579.77ms +grad accum step:11304/14336 +step:45216/57344 train_time:26215657ms step_avg:579.79ms +step:45217/57344 train_time:26215671ms step_avg:579.77ms +step:45218/57344 train_time:26215895ms step_avg:579.77ms +step:45219/57344 train_time:26216449ms step_avg:579.77ms +grad accum step:11305/14336 +step:45220/57344 train_time:26217805ms step_avg:579.78ms +step:45221/57344 train_time:26217820ms step_avg:579.77ms +step:45222/57344 train_time:26218046ms step_avg:579.76ms +step:45223/57344 train_time:26218588ms step_avg:579.76ms +grad accum step:11306/14336 +step:45224/57344 train_time:26219867ms step_avg:579.78ms +step:45225/57344 train_time:26219889ms step_avg:579.77ms +step:45226/57344 train_time:26220129ms step_avg:579.76ms +step:45227/57344 train_time:26220677ms step_avg:579.76ms +grad accum step:11307/14336 +step:45228/57344 train_time:26221996ms step_avg:579.77ms +step:45229/57344 train_time:26222017ms step_avg:579.76ms +step:45230/57344 train_time:26222241ms step_avg:579.75ms +step:45231/57344 train_time:26222797ms step_avg:579.75ms +grad accum step:11308/14336 +step:45232/57344 train_time:26224115ms step_avg:579.77ms +step:45233/57344 train_time:26224132ms step_avg:579.76ms +step:45234/57344 train_time:26224363ms step_avg:579.75ms +step:45235/57344 train_time:26224924ms step_avg:579.75ms +grad accum step:11309/14336 +step:45236/57344 train_time:26226266ms step_avg:579.77ms +step:45237/57344 train_time:26226304ms step_avg:579.75ms +step:45238/57344 train_time:26226524ms step_avg:579.75ms +step:45239/57344 train_time:26227070ms step_avg:579.74ms +grad accum step:11310/14336 +step:45240/57344 train_time:26228358ms step_avg:579.76ms +step:45241/57344 train_time:26228375ms step_avg:579.75ms +step:45242/57344 train_time:26228619ms step_avg:579.74ms +step:45243/57344 train_time:26229165ms step_avg:579.74ms +grad accum step:11311/14336 +step:45244/57344 train_time:26230469ms step_avg:579.76ms +step:45245/57344 train_time:26230480ms step_avg:579.74ms +step:45246/57344 train_time:26230727ms step_avg:579.74ms +step:45247/57344 train_time:26231280ms step_avg:579.74ms +grad accum step:11312/14336 +step:45248/57344 train_time:26232620ms step_avg:579.75ms +step:45248/57344 val_loss:5.567746 train_time:26232623ms step_avg:579.75ms +step:45249/57344 train_time:26232635ms step_avg:579.74ms +step:45250/57344 train_time:26232860ms step_avg:579.73ms +step:45251/57344 train_time:26233415ms step_avg:579.73ms +grad accum step:11313/14336 +step:45252/57344 train_time:26234743ms step_avg:579.75ms +step:45253/57344 train_time:26234755ms step_avg:579.74ms +step:45254/57344 train_time:26235000ms step_avg:579.73ms +step:45255/57344 train_time:26235549ms step_avg:579.73ms +grad accum step:11314/14336 +step:45256/57344 train_time:26236927ms step_avg:579.74ms +step:45257/57344 train_time:26236941ms step_avg:579.73ms +step:45258/57344 train_time:26237190ms step_avg:579.72ms +step:45259/57344 train_time:26237736ms step_avg:579.72ms +grad accum step:11315/14336 +step:45260/57344 train_time:26239066ms step_avg:579.74ms +step:45261/57344 train_time:26239082ms step_avg:579.73ms +step:45262/57344 train_time:26239335ms step_avg:579.72ms +step:45263/57344 train_time:26239903ms step_avg:579.72ms +grad accum step:11316/14336 +step:45264/57344 train_time:26241243ms step_avg:579.74ms +step:45265/57344 train_time:26241305ms step_avg:579.73ms +step:45266/57344 train_time:26241525ms step_avg:579.72ms +step:45267/57344 train_time:26242080ms step_avg:579.72ms +grad accum step:11317/14336 +step:45268/57344 train_time:26243377ms step_avg:579.73ms +step:45269/57344 train_time:26243390ms step_avg:579.72ms +step:45270/57344 train_time:26243643ms step_avg:579.71ms +step:45271/57344 train_time:26244211ms step_avg:579.71ms +grad accum step:11318/14336 +step:45272/57344 train_time:26245538ms step_avg:579.73ms +step:45273/57344 train_time:26245555ms step_avg:579.72ms +step:45274/57344 train_time:26245804ms step_avg:579.71ms +step:45275/57344 train_time:26246366ms step_avg:579.71ms +grad accum step:11319/14336 +step:45276/57344 train_time:26247686ms step_avg:579.73ms +step:45277/57344 train_time:26247700ms step_avg:579.71ms +step:45278/57344 train_time:26247951ms step_avg:579.71ms +step:45279/57344 train_time:26248518ms step_avg:579.71ms +grad accum step:11320/14336 +step:45280/57344 train_time:26249855ms step_avg:579.72ms +step:45281/57344 train_time:26249866ms step_avg:579.71ms +step:45282/57344 train_time:26250088ms step_avg:579.70ms +step:45283/57344 train_time:26250633ms step_avg:579.70ms +grad accum step:11321/14336 +step:45284/57344 train_time:26251976ms step_avg:579.72ms +step:45285/57344 train_time:26251991ms step_avg:579.71ms +step:45286/57344 train_time:26252235ms step_avg:579.70ms +step:45287/57344 train_time:26252780ms step_avg:579.70ms +grad accum step:11322/14336 +step:45288/57344 train_time:26254101ms step_avg:579.71ms +step:45289/57344 train_time:26254117ms step_avg:579.70ms +step:45290/57344 train_time:26254362ms step_avg:579.69ms +step:45291/57344 train_time:26254910ms step_avg:579.69ms +grad accum step:11323/14336 +step:45292/57344 train_time:26256217ms step_avg:579.71ms +step:45293/57344 train_time:26256233ms step_avg:579.70ms +step:45294/57344 train_time:26256479ms step_avg:579.69ms +step:45295/57344 train_time:26257030ms step_avg:579.69ms +grad accum step:11324/14336 +step:45296/57344 train_time:26258388ms step_avg:579.71ms +step:45297/57344 train_time:26258427ms step_avg:579.69ms +step:45298/57344 train_time:26258650ms step_avg:579.69ms +step:45299/57344 train_time:26259210ms step_avg:579.69ms +grad accum step:11325/14336 +step:45300/57344 train_time:26260626ms step_avg:579.70ms +step:45301/57344 train_time:26260652ms step_avg:579.69ms +step:45302/57344 train_time:26260884ms step_avg:579.68ms +step:45303/57344 train_time:26261465ms step_avg:579.68ms +grad accum step:11326/14336 +step:45304/57344 train_time:26262799ms step_avg:579.70ms +step:45305/57344 train_time:26262815ms step_avg:579.69ms +step:45306/57344 train_time:26263061ms step_avg:579.68ms +step:45307/57344 train_time:26263626ms step_avg:579.68ms +grad accum step:11327/14336 +step:45308/57344 train_time:26265012ms step_avg:579.70ms +step:45309/57344 train_time:26265024ms step_avg:579.69ms +step:45310/57344 train_time:26265280ms step_avg:579.68ms +step:45311/57344 train_time:26265851ms step_avg:579.68ms +grad accum step:11328/14336 +step:45312/57344 train_time:26267177ms step_avg:579.70ms +step:45312/57344 val_loss:5.576434 train_time:26267181ms step_avg:579.70ms +step:45313/57344 train_time:26267192ms step_avg:579.68ms +step:45314/57344 train_time:26267428ms step_avg:579.68ms +step:45315/57344 train_time:26268012ms step_avg:579.68ms +grad accum step:11329/14336 +step:45316/57344 train_time:26269372ms step_avg:579.69ms +step:45317/57344 train_time:26269387ms step_avg:579.68ms +step:45318/57344 train_time:26269609ms step_avg:579.67ms +step:45319/57344 train_time:26270182ms step_avg:579.67ms +grad accum step:11330/14336 +step:45320/57344 train_time:26271603ms step_avg:579.69ms +step:45321/57344 train_time:26271619ms step_avg:579.68ms +step:45322/57344 train_time:26271842ms step_avg:579.67ms +step:45323/57344 train_time:26272396ms step_avg:579.67ms +grad accum step:11331/14336 +step:45324/57344 train_time:26273746ms step_avg:579.69ms +step:45325/57344 train_time:26273762ms step_avg:579.67ms +step:45326/57344 train_time:26274008ms step_avg:579.67ms +step:45327/57344 train_time:26274546ms step_avg:579.67ms +grad accum step:11332/14336 +step:45328/57344 train_time:26275843ms step_avg:579.68ms +step:45329/57344 train_time:26275866ms step_avg:579.67ms +step:45330/57344 train_time:26276100ms step_avg:579.66ms +step:45331/57344 train_time:26276673ms step_avg:579.66ms +grad accum step:11333/14336 +step:45332/57344 train_time:26278031ms step_avg:579.68ms +step:45333/57344 train_time:26278048ms step_avg:579.67ms +step:45334/57344 train_time:26278291ms step_avg:579.66ms +step:45335/57344 train_time:26278845ms step_avg:579.66ms +grad accum step:11334/14336 +step:45336/57344 train_time:26280219ms step_avg:579.68ms +step:45337/57344 train_time:26280239ms step_avg:579.66ms +step:45338/57344 train_time:26280475ms step_avg:579.66ms +step:45339/57344 train_time:26281029ms step_avg:579.66ms +grad accum step:11335/14336 +step:45340/57344 train_time:26282493ms step_avg:579.68ms +step:45341/57344 train_time:26282519ms step_avg:579.66ms +step:45342/57344 train_time:26282744ms step_avg:579.66ms +step:45343/57344 train_time:26283318ms step_avg:579.66ms +grad accum step:11336/14336 +step:45344/57344 train_time:26284701ms step_avg:579.67ms +step:45345/57344 train_time:26284717ms step_avg:579.66ms +step:45346/57344 train_time:26284951ms step_avg:579.65ms +step:45347/57344 train_time:26285495ms step_avg:579.65ms +grad accum step:11337/14336 +step:45348/57344 train_time:26286838ms step_avg:579.67ms +step:45349/57344 train_time:26286857ms step_avg:579.66ms +step:45350/57344 train_time:26287091ms step_avg:579.65ms +step:45351/57344 train_time:26287660ms step_avg:579.65ms +grad accum step:11338/14336 +step:45352/57344 train_time:26289072ms step_avg:579.67ms +step:45353/57344 train_time:26289093ms step_avg:579.65ms +step:45354/57344 train_time:26289333ms step_avg:579.65ms +step:45355/57344 train_time:26289885ms step_avg:579.65ms +grad accum step:11339/14336 +step:45356/57344 train_time:26291224ms step_avg:579.66ms +step:45357/57344 train_time:26291245ms step_avg:579.65ms +step:45358/57344 train_time:26291487ms step_avg:579.64ms +step:45359/57344 train_time:26292063ms step_avg:579.64ms +grad accum step:11340/14336 +step:45360/57344 train_time:26293456ms step_avg:579.66ms +step:45361/57344 train_time:26293476ms step_avg:579.65ms +step:45362/57344 train_time:26293700ms step_avg:579.64ms +step:45363/57344 train_time:26294245ms step_avg:579.64ms +grad accum step:11341/14336 +step:45364/57344 train_time:26295548ms step_avg:579.66ms +step:45365/57344 train_time:26295564ms step_avg:579.64ms +step:45366/57344 train_time:26295812ms step_avg:579.64ms +step:45367/57344 train_time:26296361ms step_avg:579.64ms +grad accum step:11342/14336 +step:45368/57344 train_time:26297732ms step_avg:579.65ms +step:45369/57344 train_time:26297749ms step_avg:579.64ms +step:45370/57344 train_time:26297993ms step_avg:579.63ms +step:45371/57344 train_time:26298541ms step_avg:579.63ms +grad accum step:11343/14336 +step:45372/57344 train_time:26299855ms step_avg:579.65ms +step:45373/57344 train_time:26299870ms step_avg:579.64ms +step:45374/57344 train_time:26300123ms step_avg:579.63ms +step:45375/57344 train_time:26300689ms step_avg:579.63ms +grad accum step:11344/14336 +step:45376/57344 train_time:26302051ms step_avg:579.65ms +step:45376/57344 val_loss:5.587078 train_time:26302064ms step_avg:579.65ms +step:45377/57344 train_time:26302076ms step_avg:579.63ms +step:45378/57344 train_time:26302299ms step_avg:579.63ms +step:45379/57344 train_time:26302850ms step_avg:579.63ms +grad accum step:11345/14336 +step:45380/57344 train_time:26304203ms step_avg:579.64ms +step:45381/57344 train_time:26304240ms step_avg:579.63ms +step:45382/57344 train_time:26304461ms step_avg:579.62ms +step:45383/57344 train_time:26305008ms step_avg:579.62ms +grad accum step:11346/14336 +step:45384/57344 train_time:26306428ms step_avg:579.64ms +step:45385/57344 train_time:26306523ms step_avg:579.63ms +step:45386/57344 train_time:26306747ms step_avg:579.62ms +step:45387/57344 train_time:26307309ms step_avg:579.62ms +grad accum step:11347/14336 +step:45388/57344 train_time:26308659ms step_avg:579.64ms +step:45389/57344 train_time:26308675ms step_avg:579.63ms +step:45390/57344 train_time:26308928ms step_avg:579.62ms +step:45391/57344 train_time:26309505ms step_avg:579.62ms +grad accum step:11348/14336 +step:45392/57344 train_time:26310828ms step_avg:579.64ms +step:45393/57344 train_time:26310844ms step_avg:579.62ms +step:45394/57344 train_time:26311097ms step_avg:579.62ms +step:45395/57344 train_time:26311661ms step_avg:579.62ms +grad accum step:11349/14336 +step:45396/57344 train_time:26313087ms step_avg:579.63ms +step:45397/57344 train_time:26313107ms step_avg:579.62ms +step:45398/57344 train_time:26313327ms step_avg:579.61ms +step:45399/57344 train_time:26313883ms step_avg:579.61ms +grad accum step:11350/14336 +step:45400/57344 train_time:26315411ms step_avg:579.63ms +step:45401/57344 train_time:26315428ms step_avg:579.62ms +step:45402/57344 train_time:26315638ms step_avg:579.61ms +step:45403/57344 train_time:26316176ms step_avg:579.61ms +grad accum step:11351/14336 +step:45404/57344 train_time:26317523ms step_avg:579.63ms +step:45405/57344 train_time:26317594ms step_avg:579.62ms +step:45406/57344 train_time:26317832ms step_avg:579.61ms +step:45407/57344 train_time:26318428ms step_avg:579.61ms +grad accum step:11352/14336 +step:45408/57344 train_time:26319868ms step_avg:579.63ms +step:45409/57344 train_time:26319907ms step_avg:579.62ms +step:45410/57344 train_time:26320125ms step_avg:579.61ms +step:45411/57344 train_time:26320674ms step_avg:579.61ms +grad accum step:11353/14336 +step:45412/57344 train_time:26321992ms step_avg:579.63ms +step:45413/57344 train_time:26322012ms step_avg:579.61ms +step:45414/57344 train_time:26322253ms step_avg:579.61ms +step:45415/57344 train_time:26322811ms step_avg:579.61ms +grad accum step:11354/14336 +step:45416/57344 train_time:26324179ms step_avg:579.62ms +step:45417/57344 train_time:26324193ms step_avg:579.61ms +step:45418/57344 train_time:26324442ms step_avg:579.60ms +step:45419/57344 train_time:26325007ms step_avg:579.60ms +grad accum step:11355/14336 +step:45420/57344 train_time:26326435ms step_avg:579.62ms +step:45421/57344 train_time:26326450ms step_avg:579.61ms +step:45422/57344 train_time:26326698ms step_avg:579.60ms +step:45423/57344 train_time:26327261ms step_avg:579.60ms +grad accum step:11356/14336 +step:45424/57344 train_time:26328594ms step_avg:579.62ms +step:45425/57344 train_time:26328608ms step_avg:579.61ms +step:45426/57344 train_time:26328856ms step_avg:579.60ms +step:45427/57344 train_time:26329423ms step_avg:579.60ms +grad accum step:11357/14336 +step:45428/57344 train_time:26330756ms step_avg:579.62ms +step:45429/57344 train_time:26330772ms step_avg:579.60ms +step:45430/57344 train_time:26331014ms step_avg:579.60ms +step:45431/57344 train_time:26331564ms step_avg:579.59ms +grad accum step:11358/14336 +step:45432/57344 train_time:26332872ms step_avg:579.61ms +step:45433/57344 train_time:26332891ms step_avg:579.60ms +step:45434/57344 train_time:26333131ms step_avg:579.59ms +step:45435/57344 train_time:26333687ms step_avg:579.59ms +grad accum step:11359/14336 +step:45436/57344 train_time:26334996ms step_avg:579.61ms +step:45437/57344 train_time:26335009ms step_avg:579.59ms +step:45438/57344 train_time:26335257ms step_avg:579.59ms +step:45439/57344 train_time:26335825ms step_avg:579.59ms +grad accum step:11360/14336 +step:45440/57344 train_time:26337196ms step_avg:579.60ms +step:45440/57344 val_loss:5.598009 train_time:26337200ms step_avg:579.60ms +step:45441/57344 train_time:26337212ms step_avg:579.59ms +step:45442/57344 train_time:26337432ms step_avg:579.58ms +step:45443/57344 train_time:26337985ms step_avg:579.58ms +grad accum step:11361/14336 +step:45444/57344 train_time:26339404ms step_avg:579.60ms +step:45445/57344 train_time:26339441ms step_avg:579.59ms +step:45446/57344 train_time:26339665ms step_avg:579.58ms +step:45447/57344 train_time:26340218ms step_avg:579.58ms +grad accum step:11362/14336 +step:45448/57344 train_time:26341546ms step_avg:579.60ms +step:45449/57344 train_time:26341567ms step_avg:579.59ms +step:45450/57344 train_time:26341810ms step_avg:579.58ms +step:45451/57344 train_time:26342366ms step_avg:579.58ms +grad accum step:11363/14336 +step:45452/57344 train_time:26343722ms step_avg:579.59ms +step:45453/57344 train_time:26343754ms step_avg:579.58ms +step:45454/57344 train_time:26343988ms step_avg:579.57ms +step:45455/57344 train_time:26344565ms step_avg:579.57ms +grad accum step:11364/14336 +step:45456/57344 train_time:26345964ms step_avg:579.59ms +step:45457/57344 train_time:26345981ms step_avg:579.58ms +step:45458/57344 train_time:26346249ms step_avg:579.57ms +step:45459/57344 train_time:26346855ms step_avg:579.57ms +grad accum step:11365/14336 +step:45460/57344 train_time:26348196ms step_avg:579.59ms +step:45461/57344 train_time:26348213ms step_avg:579.58ms +step:45462/57344 train_time:26348462ms step_avg:579.57ms +step:45463/57344 train_time:26349010ms step_avg:579.57ms +grad accum step:11366/14336 +step:45464/57344 train_time:26350382ms step_avg:579.59ms +step:45465/57344 train_time:26350421ms step_avg:579.58ms +step:45466/57344 train_time:26350643ms step_avg:579.57ms +step:45467/57344 train_time:26351212ms step_avg:579.57ms +grad accum step:11367/14336 +step:45468/57344 train_time:26352630ms step_avg:579.59ms +step:45469/57344 train_time:26352648ms step_avg:579.57ms +step:45470/57344 train_time:26352896ms step_avg:579.57ms +step:45471/57344 train_time:26353443ms step_avg:579.57ms +grad accum step:11368/14336 +step:45472/57344 train_time:26354757ms step_avg:579.58ms +step:45473/57344 train_time:26354774ms step_avg:579.57ms +step:45474/57344 train_time:26355029ms step_avg:579.56ms +step:45475/57344 train_time:26355599ms step_avg:579.56ms +grad accum step:11369/14336 +step:45476/57344 train_time:26356947ms step_avg:579.58ms +step:45477/57344 train_time:26356964ms step_avg:579.57ms +step:45478/57344 train_time:26357214ms step_avg:579.56ms +step:45479/57344 train_time:26357785ms step_avg:579.56ms +grad accum step:11370/14336 +step:45480/57344 train_time:26359185ms step_avg:579.58ms +step:45481/57344 train_time:26359197ms step_avg:579.57ms +step:45482/57344 train_time:26359421ms step_avg:579.56ms +step:45483/57344 train_time:26359965ms step_avg:579.56ms +grad accum step:11371/14336 +step:45484/57344 train_time:26361291ms step_avg:579.57ms +step:45485/57344 train_time:26361308ms step_avg:579.56ms +step:45486/57344 train_time:26361560ms step_avg:579.55ms +step:45487/57344 train_time:26362119ms step_avg:579.55ms +grad accum step:11372/14336 +step:45488/57344 train_time:26363465ms step_avg:579.57ms +step:45489/57344 train_time:26363482ms step_avg:579.56ms +step:45490/57344 train_time:26363742ms step_avg:579.55ms +step:45491/57344 train_time:26364314ms step_avg:579.55ms +grad accum step:11373/14336 +step:45492/57344 train_time:26365625ms step_avg:579.57ms +step:45493/57344 train_time:26365647ms step_avg:579.55ms +step:45494/57344 train_time:26365871ms step_avg:579.55ms +step:45495/57344 train_time:26366421ms step_avg:579.55ms +grad accum step:11374/14336 +step:45496/57344 train_time:26367730ms step_avg:579.56ms +step:45497/57344 train_time:26367774ms step_avg:579.55ms +step:45498/57344 train_time:26367995ms step_avg:579.54ms +step:45499/57344 train_time:26368539ms step_avg:579.54ms +grad accum step:11375/14336 +step:45500/57344 train_time:26369871ms step_avg:579.56ms +step:45501/57344 train_time:26369882ms step_avg:579.55ms +step:45502/57344 train_time:26370134ms step_avg:579.54ms +step:45503/57344 train_time:26370707ms step_avg:579.54ms +grad accum step:11376/14336 +step:45504/57344 train_time:26372047ms step_avg:579.55ms +step:45504/57344 val_loss:5.610972 train_time:26372048ms step_avg:579.55ms +step:45505/57344 train_time:26372060ms step_avg:579.54ms +step:45506/57344 train_time:26372289ms step_avg:579.53ms +step:45507/57344 train_time:26372875ms step_avg:579.53ms +grad accum step:11377/14336 +step:45508/57344 train_time:26374256ms step_avg:579.55ms +step:45509/57344 train_time:26374277ms step_avg:579.54ms +step:45510/57344 train_time:26374513ms step_avg:579.53ms +step:45511/57344 train_time:26375100ms step_avg:579.53ms +grad accum step:11378/14336 +step:45512/57344 train_time:26376469ms step_avg:579.55ms +step:45513/57344 train_time:26376480ms step_avg:579.54ms +step:45514/57344 train_time:26376720ms step_avg:579.53ms +step:45515/57344 train_time:26377259ms step_avg:579.53ms +grad accum step:11379/14336 +step:45516/57344 train_time:26378562ms step_avg:579.54ms +step:45517/57344 train_time:26378579ms step_avg:579.53ms +step:45518/57344 train_time:26378834ms step_avg:579.53ms +step:45519/57344 train_time:26379397ms step_avg:579.52ms +grad accum step:11380/14336 +step:45520/57344 train_time:26380745ms step_avg:579.54ms +step:45521/57344 train_time:26380761ms step_avg:579.53ms +step:45522/57344 train_time:26381013ms step_avg:579.52ms +step:45523/57344 train_time:26381611ms step_avg:579.52ms +grad accum step:11381/14336 +step:45524/57344 train_time:26383057ms step_avg:579.54ms +step:45525/57344 train_time:26383071ms step_avg:579.53ms +step:45526/57344 train_time:26383308ms step_avg:579.52ms +step:45527/57344 train_time:26383864ms step_avg:579.52ms +grad accum step:11382/14336 +step:45528/57344 train_time:26385262ms step_avg:579.54ms +step:45529/57344 train_time:26385281ms step_avg:579.53ms +step:45530/57344 train_time:26385517ms step_avg:579.52ms +step:45531/57344 train_time:26386069ms step_avg:579.52ms +grad accum step:11383/14336 +step:45532/57344 train_time:26387456ms step_avg:579.54ms +step:45533/57344 train_time:26387471ms step_avg:579.52ms +step:45534/57344 train_time:26387697ms step_avg:579.52ms +step:45535/57344 train_time:26388244ms step_avg:579.52ms +grad accum step:11384/14336 +step:45536/57344 train_time:26389565ms step_avg:579.53ms +step:45537/57344 train_time:26389580ms step_avg:579.52ms +step:45538/57344 train_time:26389827ms step_avg:579.51ms +step:45539/57344 train_time:26390373ms step_avg:579.51ms +grad accum step:11385/14336 +step:45540/57344 train_time:26391745ms step_avg:579.53ms +step:45541/57344 train_time:26391759ms step_avg:579.52ms +step:45542/57344 train_time:26391983ms step_avg:579.51ms +step:45543/57344 train_time:26392554ms step_avg:579.51ms +grad accum step:11386/14336 +step:45544/57344 train_time:26393921ms step_avg:579.53ms +step:45545/57344 train_time:26393942ms step_avg:579.51ms +step:45546/57344 train_time:26394181ms step_avg:579.51ms +step:45547/57344 train_time:26394739ms step_avg:579.51ms +grad accum step:11387/14336 +step:45548/57344 train_time:26396067ms step_avg:579.52ms +step:45549/57344 train_time:26396081ms step_avg:579.51ms +step:45550/57344 train_time:26396309ms step_avg:579.50ms +step:45551/57344 train_time:26396855ms step_avg:579.50ms +grad accum step:11388/14336 +step:45552/57344 train_time:26398210ms step_avg:579.52ms +step:45553/57344 train_time:26398224ms step_avg:579.51ms +step:45554/57344 train_time:26398459ms step_avg:579.50ms +step:45555/57344 train_time:26399042ms step_avg:579.50ms +grad accum step:11389/14336 +step:45556/57344 train_time:26400406ms step_avg:579.52ms +step:45557/57344 train_time:26400422ms step_avg:579.50ms +step:45558/57344 train_time:26400672ms step_avg:579.50ms +step:45559/57344 train_time:26401233ms step_avg:579.50ms +grad accum step:11390/14336 +step:45560/57344 train_time:26402617ms step_avg:579.51ms +step:45561/57344 train_time:26402634ms step_avg:579.50ms +step:45562/57344 train_time:26402864ms step_avg:579.49ms +step:45563/57344 train_time:26403448ms step_avg:579.49ms +grad accum step:11391/14336 +step:45564/57344 train_time:26404797ms step_avg:579.51ms +step:45565/57344 train_time:26404821ms step_avg:579.50ms +step:45566/57344 train_time:26405053ms step_avg:579.49ms +step:45567/57344 train_time:26405612ms step_avg:579.49ms +grad accum step:11392/14336 +step:45568/57344 train_time:26406975ms step_avg:579.51ms +step:45568/57344 val_loss:5.621460 train_time:26406987ms step_avg:579.51ms +step:45569/57344 train_time:26406999ms step_avg:579.49ms +step:45570/57344 train_time:26407229ms step_avg:579.49ms +step:45571/57344 train_time:26407797ms step_avg:579.49ms +grad accum step:11393/14336 +step:45572/57344 train_time:26409121ms step_avg:579.50ms +step:45573/57344 train_time:26409137ms step_avg:579.49ms +step:45574/57344 train_time:26409399ms step_avg:579.48ms +step:45575/57344 train_time:26409982ms step_avg:579.48ms +grad accum step:11394/14336 +step:45576/57344 train_time:26411343ms step_avg:579.50ms +step:45577/57344 train_time:26411355ms step_avg:579.49ms +step:45578/57344 train_time:26411594ms step_avg:579.48ms +step:45579/57344 train_time:26412163ms step_avg:579.48ms +grad accum step:11395/14336 +step:45580/57344 train_time:26413494ms step_avg:579.50ms +step:45581/57344 train_time:26413507ms step_avg:579.49ms +step:45582/57344 train_time:26413756ms step_avg:579.48ms +step:45583/57344 train_time:26414318ms step_avg:579.48ms +grad accum step:11396/14336 +step:45584/57344 train_time:26415668ms step_avg:579.49ms +step:45585/57344 train_time:26415680ms step_avg:579.48ms +step:45586/57344 train_time:26415914ms step_avg:579.47ms +step:45587/57344 train_time:26416460ms step_avg:579.47ms +grad accum step:11397/14336 +step:45588/57344 train_time:26417800ms step_avg:579.49ms +step:45589/57344 train_time:26417814ms step_avg:579.48ms +step:45590/57344 train_time:26418059ms step_avg:579.47ms +step:45591/57344 train_time:26418605ms step_avg:579.47ms +grad accum step:11398/14336 +step:45592/57344 train_time:26419946ms step_avg:579.49ms +step:45593/57344 train_time:26419962ms step_avg:579.47ms +step:45594/57344 train_time:26420208ms step_avg:579.47ms +step:45595/57344 train_time:26420748ms step_avg:579.47ms +grad accum step:11399/14336 +step:45596/57344 train_time:26422076ms step_avg:579.48ms +step:45597/57344 train_time:26422094ms step_avg:579.47ms +step:45598/57344 train_time:26422326ms step_avg:579.46ms +step:45599/57344 train_time:26422876ms step_avg:579.46ms +grad accum step:11400/14336 +step:45600/57344 train_time:26424266ms step_avg:579.48ms +step:45601/57344 train_time:26424281ms step_avg:579.47ms +step:45602/57344 train_time:26424500ms step_avg:579.46ms +step:45603/57344 train_time:26425054ms step_avg:579.46ms +grad accum step:11401/14336 +step:45604/57344 train_time:26426428ms step_avg:579.48ms +step:45605/57344 train_time:26426440ms step_avg:579.46ms +step:45606/57344 train_time:26426691ms step_avg:579.46ms +step:45607/57344 train_time:26427258ms step_avg:579.46ms +grad accum step:11402/14336 +step:45608/57344 train_time:26428605ms step_avg:579.47ms +step:45609/57344 train_time:26428616ms step_avg:579.46ms +step:45610/57344 train_time:26428873ms step_avg:579.45ms +step:45611/57344 train_time:26429443ms step_avg:579.45ms +grad accum step:11403/14336 +step:45612/57344 train_time:26430771ms step_avg:579.47ms +step:45613/57344 train_time:26430787ms step_avg:579.46ms +step:45614/57344 train_time:26431046ms step_avg:579.45ms +step:45615/57344 train_time:26431627ms step_avg:579.45ms +grad accum step:11404/14336 +step:45616/57344 train_time:26433093ms step_avg:579.47ms +step:45617/57344 train_time:26433108ms step_avg:579.46ms +step:45618/57344 train_time:26433350ms step_avg:579.45ms +step:45619/57344 train_time:26433894ms step_avg:579.45ms +grad accum step:11405/14336 +step:45620/57344 train_time:26435227ms step_avg:579.47ms +step:45621/57344 train_time:26435248ms step_avg:579.45ms +step:45622/57344 train_time:26435518ms step_avg:579.45ms +step:45623/57344 train_time:26436167ms step_avg:579.45ms +grad accum step:11406/14336 +step:45624/57344 train_time:26441099ms step_avg:579.54ms +step:45625/57344 train_time:26442293ms step_avg:579.56ms +step:45626/57344 train_time:26442568ms step_avg:579.55ms +step:45627/57344 train_time:26443124ms step_avg:579.55ms +grad accum step:11407/14336 +step:45628/57344 train_time:26444450ms step_avg:579.57ms +step:45629/57344 train_time:26444461ms step_avg:579.55ms +step:45630/57344 train_time:26444704ms step_avg:579.55ms +step:45631/57344 train_time:26445244ms step_avg:579.55ms +grad accum step:11408/14336 +step:45632/57344 train_time:26446607ms step_avg:579.56ms +step:45632/57344 val_loss:5.638070 train_time:26446640ms step_avg:579.56ms +step:45633/57344 train_time:26447156ms step_avg:579.56ms +step:45634/57344 train_time:26447181ms step_avg:579.55ms +step:45635/57344 train_time:26447692ms step_avg:579.55ms +grad accum step:11409/14336 +step:45636/57344 train_time:26449046ms step_avg:579.57ms +step:45637/57344 train_time:26449060ms step_avg:579.55ms +step:45638/57344 train_time:26449285ms step_avg:579.55ms +step:45639/57344 train_time:26449835ms step_avg:579.54ms +grad accum step:11410/14336 +step:45640/57344 train_time:26451151ms step_avg:579.56ms +step:45641/57344 train_time:26451166ms step_avg:579.55ms +step:45642/57344 train_time:26451406ms step_avg:579.54ms +step:45643/57344 train_time:26451938ms step_avg:579.54ms +grad accum step:11411/14336 +step:45644/57344 train_time:26453221ms step_avg:579.56ms +step:45645/57344 train_time:26453234ms step_avg:579.54ms +step:45646/57344 train_time:26453481ms step_avg:579.54ms +step:45647/57344 train_time:26454040ms step_avg:579.54ms +grad accum step:11412/14336 +step:45648/57344 train_time:26455360ms step_avg:579.55ms +step:45649/57344 train_time:26455395ms step_avg:579.54ms +step:45650/57344 train_time:26455614ms step_avg:579.53ms +step:45651/57344 train_time:26456176ms step_avg:579.53ms +grad accum step:11413/14336 +step:45652/57344 train_time:26457640ms step_avg:579.55ms +step:45653/57344 train_time:26457655ms step_avg:579.54ms +step:45654/57344 train_time:26457913ms step_avg:579.53ms +step:45655/57344 train_time:26458494ms step_avg:579.53ms +grad accum step:11414/14336 +step:45656/57344 train_time:26459797ms step_avg:579.55ms +step:45657/57344 train_time:26459821ms step_avg:579.53ms +step:45658/57344 train_time:26460054ms step_avg:579.53ms +step:45659/57344 train_time:26460607ms step_avg:579.53ms +grad accum step:11415/14336 +step:45660/57344 train_time:26461932ms step_avg:579.54ms +step:45661/57344 train_time:26461956ms step_avg:579.53ms +step:45662/57344 train_time:26462199ms step_avg:579.52ms +step:45663/57344 train_time:26462760ms step_avg:579.52ms +grad accum step:11416/14336 +step:45664/57344 train_time:26464050ms step_avg:579.54ms +step:45665/57344 train_time:26464068ms step_avg:579.53ms +step:45666/57344 train_time:26464307ms step_avg:579.52ms +step:45667/57344 train_time:26464851ms step_avg:579.52ms +grad accum step:11417/14336 +step:45668/57344 train_time:26466224ms step_avg:579.54ms +step:45669/57344 train_time:26466241ms step_avg:579.52ms +step:45670/57344 train_time:26466483ms step_avg:579.52ms +step:45671/57344 train_time:26467048ms step_avg:579.52ms +grad accum step:11418/14336 +step:45672/57344 train_time:26468360ms step_avg:579.53ms +step:45673/57344 train_time:26468384ms step_avg:579.52ms +step:45674/57344 train_time:26468630ms step_avg:579.51ms +step:45675/57344 train_time:26469203ms step_avg:579.51ms +grad accum step:11419/14336 +step:45676/57344 train_time:26470664ms step_avg:579.53ms +step:45677/57344 train_time:26470689ms step_avg:579.52ms +step:45678/57344 train_time:26470911ms step_avg:579.51ms +step:45679/57344 train_time:26471470ms step_avg:579.51ms +grad accum step:11420/14336 +step:45680/57344 train_time:26472796ms step_avg:579.53ms +step:45681/57344 train_time:26472812ms step_avg:579.51ms +step:45682/57344 train_time:26473056ms step_avg:579.51ms +step:45683/57344 train_time:26473619ms step_avg:579.51ms +grad accum step:11421/14336 +step:45684/57344 train_time:26474887ms step_avg:579.52ms +step:45685/57344 train_time:26474907ms step_avg:579.51ms +step:45686/57344 train_time:26475136ms step_avg:579.50ms +step:45687/57344 train_time:26475687ms step_avg:579.50ms +grad accum step:11422/14336 +step:45688/57344 train_time:26476999ms step_avg:579.52ms +step:45689/57344 train_time:26477027ms step_avg:579.51ms +step:45690/57344 train_time:26477248ms step_avg:579.50ms +step:45691/57344 train_time:26477801ms step_avg:579.50ms +grad accum step:11423/14336 +step:45692/57344 train_time:26479103ms step_avg:579.51ms +step:45693/57344 train_time:26479121ms step_avg:579.50ms +step:45694/57344 train_time:26479361ms step_avg:579.49ms +step:45695/57344 train_time:26479908ms step_avg:579.49ms +grad accum step:11424/14336 +step:45696/57344 train_time:26481193ms step_avg:579.51ms +step:45696/57344 val_loss:5.647486 train_time:26481199ms step_avg:579.51ms +step:45697/57344 train_time:26481791ms step_avg:579.51ms +step:45698/57344 train_time:26482025ms step_avg:579.50ms +step:45699/57344 train_time:26482533ms step_avg:579.50ms +grad accum step:11425/14336 +step:45700/57344 train_time:26483832ms step_avg:579.51ms +step:45701/57344 train_time:26483845ms step_avg:579.50ms +step:45702/57344 train_time:26484086ms step_avg:579.50ms +step:45703/57344 train_time:26484636ms step_avg:579.49ms +grad accum step:11426/14336 +step:45704/57344 train_time:26485986ms step_avg:579.51ms +step:45705/57344 train_time:26486001ms step_avg:579.50ms +step:45706/57344 train_time:26486256ms step_avg:579.49ms +step:45707/57344 train_time:26486833ms step_avg:579.49ms +grad accum step:11427/14336 +step:45708/57344 train_time:26488148ms step_avg:579.51ms +step:45709/57344 train_time:26488187ms step_avg:579.50ms +step:45710/57344 train_time:26488411ms step_avg:579.49ms +step:45711/57344 train_time:26488958ms step_avg:579.49ms +grad accum step:11428/14336 +step:45712/57344 train_time:26490290ms step_avg:579.50ms +step:45713/57344 train_time:26490309ms step_avg:579.49ms +step:45714/57344 train_time:26490549ms step_avg:579.48ms +step:45715/57344 train_time:26491103ms step_avg:579.48ms +grad accum step:11429/14336 +step:45716/57344 train_time:26492450ms step_avg:579.50ms +step:45717/57344 train_time:26492463ms step_avg:579.49ms +step:45718/57344 train_time:26492700ms step_avg:579.48ms +step:45719/57344 train_time:26493253ms step_avg:579.48ms +grad accum step:11430/14336 +step:45720/57344 train_time:26494605ms step_avg:579.50ms +step:45721/57344 train_time:26494619ms step_avg:579.48ms +step:45722/57344 train_time:26494866ms step_avg:579.48ms +step:45723/57344 train_time:26495420ms step_avg:579.48ms +grad accum step:11431/14336 +step:45724/57344 train_time:26496858ms step_avg:579.50ms +step:45725/57344 train_time:26496874ms step_avg:579.48ms +step:45726/57344 train_time:26497094ms step_avg:579.48ms +step:45727/57344 train_time:26497650ms step_avg:579.47ms +grad accum step:11432/14336 +step:45728/57344 train_time:26499004ms step_avg:579.49ms +step:45729/57344 train_time:26499023ms step_avg:579.48ms +step:45730/57344 train_time:26499268ms step_avg:579.47ms +step:45731/57344 train_time:26499828ms step_avg:579.47ms +grad accum step:11433/14336 +step:45732/57344 train_time:26501169ms step_avg:579.49ms +step:45733/57344 train_time:26501186ms step_avg:579.48ms +step:45734/57344 train_time:26501409ms step_avg:579.47ms +step:45735/57344 train_time:26501958ms step_avg:579.47ms +grad accum step:11434/14336 +step:45736/57344 train_time:26503320ms step_avg:579.48ms +step:45737/57344 train_time:26503335ms step_avg:579.47ms +step:45738/57344 train_time:26503572ms step_avg:579.47ms +step:45739/57344 train_time:26504144ms step_avg:579.46ms +grad accum step:11435/14336 +step:45740/57344 train_time:26505536ms step_avg:579.48ms +step:45741/57344 train_time:26505551ms step_avg:579.47ms +step:45742/57344 train_time:26505799ms step_avg:579.46ms +step:45743/57344 train_time:26506357ms step_avg:579.46ms +grad accum step:11436/14336 +step:45744/57344 train_time:26507683ms step_avg:579.48ms +step:45745/57344 train_time:26507702ms step_avg:579.47ms +step:45746/57344 train_time:26507928ms step_avg:579.46ms +step:45747/57344 train_time:26508487ms step_avg:579.46ms +grad accum step:11437/14336 +step:45748/57344 train_time:26509813ms step_avg:579.47ms +step:45749/57344 train_time:26509832ms step_avg:579.46ms +step:45750/57344 train_time:26510072ms step_avg:579.46ms +step:45751/57344 train_time:26510635ms step_avg:579.45ms +grad accum step:11438/14336 +step:45752/57344 train_time:26512133ms step_avg:579.47ms +step:45753/57344 train_time:26512472ms step_avg:579.47ms +step:45754/57344 train_time:26512689ms step_avg:579.46ms +step:45755/57344 train_time:26513246ms step_avg:579.46ms +grad accum step:11439/14336 +step:45756/57344 train_time:26514703ms step_avg:579.48ms +step:45757/57344 train_time:26514718ms step_avg:579.47ms +step:45758/57344 train_time:26514943ms step_avg:579.46ms +step:45759/57344 train_time:26515503ms step_avg:579.46ms +grad accum step:11440/14336 +step:45760/57344 train_time:26516823ms step_avg:579.48ms +step:45760/57344 val_loss:5.659206 train_time:26516841ms step_avg:579.48ms +step:45761/57344 train_time:26516852ms step_avg:579.46ms +step:45762/57344 train_time:26517081ms step_avg:579.46ms +step:45763/57344 train_time:26517640ms step_avg:579.46ms +grad accum step:11441/14336 +step:45764/57344 train_time:26518968ms step_avg:579.47ms +step:45765/57344 train_time:26518984ms step_avg:579.46ms +step:45766/57344 train_time:26519240ms step_avg:579.45ms +step:45767/57344 train_time:26519818ms step_avg:579.45ms +grad accum step:11442/14336 +step:45768/57344 train_time:26521158ms step_avg:579.47ms +step:45769/57344 train_time:26521183ms step_avg:579.46ms +step:45770/57344 train_time:26521420ms step_avg:579.45ms +step:45771/57344 train_time:26521988ms step_avg:579.45ms +grad accum step:11443/14336 +step:45772/57344 train_time:26523315ms step_avg:579.47ms +step:45773/57344 train_time:26523335ms step_avg:579.45ms +step:45774/57344 train_time:26523575ms step_avg:579.45ms +step:45775/57344 train_time:26524119ms step_avg:579.45ms +grad accum step:11444/14336 +step:45776/57344 train_time:26525456ms step_avg:579.46ms +step:45777/57344 train_time:26525473ms step_avg:579.45ms +step:45778/57344 train_time:26525724ms step_avg:579.44ms +step:45779/57344 train_time:26526292ms step_avg:579.44ms +grad accum step:11445/14336 +step:45780/57344 train_time:26527626ms step_avg:579.46ms +step:45781/57344 train_time:26527641ms step_avg:579.45ms +step:45782/57344 train_time:26527885ms step_avg:579.44ms +step:45783/57344 train_time:26528428ms step_avg:579.44ms +grad accum step:11446/14336 +step:45784/57344 train_time:26529727ms step_avg:579.45ms +step:45785/57344 train_time:26529743ms step_avg:579.44ms +step:45786/57344 train_time:26529992ms step_avg:579.43ms +step:45787/57344 train_time:26530551ms step_avg:579.43ms +grad accum step:11447/14336 +step:45788/57344 train_time:26531873ms step_avg:579.45ms +step:45789/57344 train_time:26531891ms step_avg:579.44ms +step:45790/57344 train_time:26532139ms step_avg:579.43ms +step:45791/57344 train_time:26532717ms step_avg:579.43ms +grad accum step:11448/14336 +step:45792/57344 train_time:26534190ms step_avg:579.45ms +step:45793/57344 train_time:26534204ms step_avg:579.44ms +step:45794/57344 train_time:26534428ms step_avg:579.43ms +step:45795/57344 train_time:26535006ms step_avg:579.43ms +grad accum step:11449/14336 +step:45796/57344 train_time:26536405ms step_avg:579.45ms +step:45797/57344 train_time:26536422ms step_avg:579.44ms +step:45798/57344 train_time:26536672ms step_avg:579.43ms +step:45799/57344 train_time:26537242ms step_avg:579.43ms +grad accum step:11450/14336 +step:45800/57344 train_time:26538559ms step_avg:579.44ms +step:45801/57344 train_time:26538575ms step_avg:579.43ms +step:45802/57344 train_time:26538828ms step_avg:579.43ms +step:45803/57344 train_time:26539400ms step_avg:579.42ms +grad accum step:11451/14336 +step:45804/57344 train_time:26540783ms step_avg:579.44ms +step:45805/57344 train_time:26540804ms step_avg:579.43ms +step:45806/57344 train_time:26541023ms step_avg:579.42ms +step:45807/57344 train_time:26541592ms step_avg:579.42ms +grad accum step:11452/14336 +step:45808/57344 train_time:26543018ms step_avg:579.44ms +step:45809/57344 train_time:26543038ms step_avg:579.43ms +step:45810/57344 train_time:26543260ms step_avg:579.42ms +step:45811/57344 train_time:26543814ms step_avg:579.42ms +grad accum step:11453/14336 +step:45812/57344 train_time:26545242ms step_avg:579.44ms +step:45813/57344 train_time:26545264ms step_avg:579.43ms +step:45814/57344 train_time:26545486ms step_avg:579.42ms +step:45815/57344 train_time:26546045ms step_avg:579.42ms +grad accum step:11454/14336 +step:45816/57344 train_time:26547362ms step_avg:579.43ms +step:45817/57344 train_time:26547377ms step_avg:579.42ms +step:45818/57344 train_time:26547632ms step_avg:579.41ms +step:45819/57344 train_time:26548197ms step_avg:579.41ms +grad accum step:11455/14336 +step:45820/57344 train_time:26549519ms step_avg:579.43ms +step:45821/57344 train_time:26549540ms step_avg:579.42ms +step:45822/57344 train_time:26549779ms step_avg:579.41ms +step:45823/57344 train_time:26550333ms step_avg:579.41ms +grad accum step:11456/14336 +step:45824/57344 train_time:26551651ms step_avg:579.43ms +step:45824/57344 val_loss:5.673312 train_time:26551656ms step_avg:579.43ms +step:45825/57344 train_time:26551668ms step_avg:579.41ms +step:45826/57344 train_time:26551895ms step_avg:579.41ms +step:45827/57344 train_time:26552461ms step_avg:579.41ms +grad accum step:11457/14336 +step:45828/57344 train_time:26553796ms step_avg:579.42ms +step:45829/57344 train_time:26553812ms step_avg:579.41ms +step:45830/57344 train_time:26554061ms step_avg:579.40ms +step:45831/57344 train_time:26554608ms step_avg:579.40ms +grad accum step:11458/14336 +step:45832/57344 train_time:26555969ms step_avg:579.42ms +step:45833/57344 train_time:26556001ms step_avg:579.41ms +step:45834/57344 train_time:26556238ms step_avg:579.40ms +step:45835/57344 train_time:26556828ms step_avg:579.40ms +grad accum step:11459/14336 +step:45836/57344 train_time:26558199ms step_avg:579.42ms +step:45837/57344 train_time:26558214ms step_avg:579.41ms +step:45838/57344 train_time:26558464ms step_avg:579.40ms +step:45839/57344 train_time:26559029ms step_avg:579.40ms +grad accum step:11460/14336 +step:45840/57344 train_time:26560386ms step_avg:579.42ms +step:45841/57344 train_time:26560402ms step_avg:579.40ms +step:45842/57344 train_time:26560641ms step_avg:579.40ms +step:45843/57344 train_time:26561187ms step_avg:579.39ms +grad accum step:11461/14336 +step:45844/57344 train_time:26562521ms step_avg:579.41ms +step:45845/57344 train_time:26562536ms step_avg:579.40ms +step:45846/57344 train_time:26562784ms step_avg:579.39ms +step:45847/57344 train_time:26563339ms step_avg:579.39ms +grad accum step:11462/14336 +step:45848/57344 train_time:26564684ms step_avg:579.41ms +step:45849/57344 train_time:26564702ms step_avg:579.40ms +step:45850/57344 train_time:26564943ms step_avg:579.39ms +step:45851/57344 train_time:26565489ms step_avg:579.39ms +grad accum step:11463/14336 +step:45852/57344 train_time:26566831ms step_avg:579.40ms +step:45853/57344 train_time:26566847ms step_avg:579.39ms +step:45854/57344 train_time:26567094ms step_avg:579.38ms +step:45855/57344 train_time:26567649ms step_avg:579.38ms +grad accum step:11464/14336 +step:45856/57344 train_time:26569008ms step_avg:579.40ms +step:45857/57344 train_time:26569024ms step_avg:579.39ms +step:45858/57344 train_time:26569273ms step_avg:579.38ms +step:45859/57344 train_time:26569829ms step_avg:579.38ms +grad accum step:11465/14336 +step:45860/57344 train_time:26571157ms step_avg:579.40ms +step:45861/57344 train_time:26571171ms step_avg:579.38ms +step:45862/57344 train_time:26571417ms step_avg:579.38ms +step:45863/57344 train_time:26571963ms step_avg:579.38ms +grad accum step:11466/14336 +step:45864/57344 train_time:26573337ms step_avg:579.39ms +step:45865/57344 train_time:26573351ms step_avg:579.38ms +step:45866/57344 train_time:26573588ms step_avg:579.37ms +step:45867/57344 train_time:26574166ms step_avg:579.37ms +grad accum step:11467/14336 +step:45868/57344 train_time:26575551ms step_avg:579.39ms +step:45869/57344 train_time:26575568ms step_avg:579.38ms +step:45870/57344 train_time:26575810ms step_avg:579.37ms +step:45871/57344 train_time:26576352ms step_avg:579.37ms +grad accum step:11468/14336 +step:45872/57344 train_time:26577719ms step_avg:579.39ms +step:45873/57344 train_time:26577732ms step_avg:579.38ms +step:45874/57344 train_time:26577983ms step_avg:579.37ms +step:45875/57344 train_time:26578544ms step_avg:579.37ms +grad accum step:11469/14336 +step:45876/57344 train_time:26579896ms step_avg:579.39ms +step:45877/57344 train_time:26579922ms step_avg:579.37ms +step:45878/57344 train_time:26580154ms step_avg:579.37ms +step:45879/57344 train_time:26580739ms step_avg:579.37ms +grad accum step:11470/14336 +step:45880/57344 train_time:26582100ms step_avg:579.38ms +step:45881/57344 train_time:26582114ms step_avg:579.37ms +step:45882/57344 train_time:26582363ms step_avg:579.36ms +step:45883/57344 train_time:26582909ms step_avg:579.36ms +grad accum step:11471/14336 +step:45884/57344 train_time:26584272ms step_avg:579.38ms +step:45885/57344 train_time:26584306ms step_avg:579.37ms +step:45886/57344 train_time:26584526ms step_avg:579.36ms +step:45887/57344 train_time:26585069ms step_avg:579.36ms +grad accum step:11472/14336 +step:45888/57344 train_time:26586402ms step_avg:579.38ms +step:45888/57344 val_loss:5.688259 train_time:26586403ms step_avg:579.38ms +step:45889/57344 train_time:26586415ms step_avg:579.36ms +step:45890/57344 train_time:26586640ms step_avg:579.36ms +step:45891/57344 train_time:26587196ms step_avg:579.36ms +grad accum step:11473/14336 +step:45892/57344 train_time:26588560ms step_avg:579.37ms +step:45893/57344 train_time:26588573ms step_avg:579.36ms +step:45894/57344 train_time:26588818ms step_avg:579.35ms +step:45895/57344 train_time:26589399ms step_avg:579.35ms +grad accum step:11474/14336 +step:45896/57344 train_time:26590785ms step_avg:579.37ms +step:45897/57344 train_time:26590800ms step_avg:579.36ms +step:45898/57344 train_time:26591041ms step_avg:579.35ms +step:45899/57344 train_time:26591583ms step_avg:579.35ms +grad accum step:11475/14336 +step:45900/57344 train_time:26592954ms step_avg:579.37ms +step:45901/57344 train_time:26592973ms step_avg:579.35ms +step:45902/57344 train_time:26593214ms step_avg:579.35ms +step:45903/57344 train_time:26593767ms step_avg:579.35ms +grad accum step:11476/14336 +step:45904/57344 train_time:26595151ms step_avg:579.36ms +step:45905/57344 train_time:26595175ms step_avg:579.35ms +step:45906/57344 train_time:26595394ms step_avg:579.34ms +step:45907/57344 train_time:26595947ms step_avg:579.34ms +grad accum step:11477/14336 +step:45908/57344 train_time:26597279ms step_avg:579.36ms +step:45909/57344 train_time:26597293ms step_avg:579.35ms +step:45910/57344 train_time:26597546ms step_avg:579.34ms +step:45911/57344 train_time:26598115ms step_avg:579.34ms +grad accum step:11478/14336 +step:45912/57344 train_time:26599469ms step_avg:579.36ms +step:45913/57344 train_time:26599489ms step_avg:579.35ms +step:45914/57344 train_time:26599728ms step_avg:579.34ms +step:45915/57344 train_time:26600287ms step_avg:579.34ms +grad accum step:11479/14336 +step:45916/57344 train_time:26601611ms step_avg:579.35ms +step:45917/57344 train_time:26601631ms step_avg:579.34ms +step:45918/57344 train_time:26601871ms step_avg:579.33ms +step:45919/57344 train_time:26602417ms step_avg:579.33ms +grad accum step:11480/14336 +step:45920/57344 train_time:26603779ms step_avg:579.35ms +step:45921/57344 train_time:26603794ms step_avg:579.34ms +step:45922/57344 train_time:26604040ms step_avg:579.33ms +step:45923/57344 train_time:26604603ms step_avg:579.33ms +grad accum step:11481/14336 +step:45924/57344 train_time:26605920ms step_avg:579.35ms +step:45925/57344 train_time:26605935ms step_avg:579.33ms +step:45926/57344 train_time:26606185ms step_avg:579.33ms +step:45927/57344 train_time:26606738ms step_avg:579.33ms +grad accum step:11482/14336 +step:45928/57344 train_time:26608106ms step_avg:579.34ms +step:45929/57344 train_time:26608125ms step_avg:579.33ms +step:45930/57344 train_time:26608370ms step_avg:579.32ms +step:45931/57344 train_time:26608943ms step_avg:579.32ms +grad accum step:11483/14336 +step:45932/57344 train_time:26610306ms step_avg:579.34ms +step:45933/57344 train_time:26610322ms step_avg:579.33ms +step:45934/57344 train_time:26610556ms step_avg:579.32ms +step:45935/57344 train_time:26611119ms step_avg:579.32ms +grad accum step:11484/14336 +step:45936/57344 train_time:26612539ms step_avg:579.34ms +step:45937/57344 train_time:26612557ms step_avg:579.33ms +step:45938/57344 train_time:26612798ms step_avg:579.32ms +step:45939/57344 train_time:26613352ms step_avg:579.32ms +grad accum step:11485/14336 +step:45940/57344 train_time:26614743ms step_avg:579.34ms +step:45941/57344 train_time:26614766ms step_avg:579.32ms +step:45942/57344 train_time:26615000ms step_avg:579.32ms +step:45943/57344 train_time:26615569ms step_avg:579.32ms +grad accum step:11486/14336 +step:45944/57344 train_time:26616910ms step_avg:579.33ms +step:45945/57344 train_time:26616925ms step_avg:579.32ms +step:45946/57344 train_time:26617179ms step_avg:579.31ms +step:45947/57344 train_time:26617739ms step_avg:579.31ms +grad accum step:11487/14336 +step:45948/57344 train_time:26619058ms step_avg:579.33ms +step:45949/57344 train_time:26619072ms step_avg:579.32ms +step:45950/57344 train_time:26619325ms step_avg:579.31ms +step:45951/57344 train_time:26619897ms step_avg:579.31ms +grad accum step:11488/14336 +step:45952/57344 train_time:26621243ms step_avg:579.33ms +step:45952/57344 val_loss:5.700180 train_time:26621257ms step_avg:579.33ms +step:45953/57344 train_time:26621268ms step_avg:579.32ms +step:45954/57344 train_time:26621494ms step_avg:579.31ms +step:45955/57344 train_time:26622053ms step_avg:579.31ms +grad accum step:11489/14336 +step:45956/57344 train_time:26623383ms step_avg:579.32ms +step:45957/57344 train_time:26623400ms step_avg:579.31ms +step:45958/57344 train_time:26623644ms step_avg:579.30ms +step:45959/57344 train_time:26624207ms step_avg:579.30ms +grad accum step:11490/14336 +step:45960/57344 train_time:26625589ms step_avg:579.32ms +step:45961/57344 train_time:26625607ms step_avg:579.31ms +step:45962/57344 train_time:26625880ms step_avg:579.30ms +step:45963/57344 train_time:26626494ms step_avg:579.30ms +grad accum step:11491/14336 +step:45964/57344 train_time:26627889ms step_avg:579.32ms +step:45965/57344 train_time:26627914ms step_avg:579.31ms +step:45966/57344 train_time:26628142ms step_avg:579.30ms +step:45967/57344 train_time:26628704ms step_avg:579.30ms +grad accum step:11492/14336 +step:45968/57344 train_time:26630025ms step_avg:579.32ms +step:45969/57344 train_time:26630042ms step_avg:579.30ms +step:45970/57344 train_time:26630279ms step_avg:579.30ms +step:45971/57344 train_time:26630828ms step_avg:579.30ms +grad accum step:11493/14336 +step:45972/57344 train_time:26632172ms step_avg:579.31ms +step:45973/57344 train_time:26632188ms step_avg:579.30ms +step:45974/57344 train_time:26632440ms step_avg:579.29ms +step:45975/57344 train_time:26633012ms step_avg:579.29ms +grad accum step:11494/14336 +step:45976/57344 train_time:26634355ms step_avg:579.31ms +step:45977/57344 train_time:26634374ms step_avg:579.30ms +step:45978/57344 train_time:26634619ms step_avg:579.29ms +step:45979/57344 train_time:26635199ms step_avg:579.29ms +grad accum step:11495/14336 +step:45980/57344 train_time:26636553ms step_avg:579.31ms +step:45981/57344 train_time:26636574ms step_avg:579.30ms +step:45982/57344 train_time:26636814ms step_avg:579.29ms +step:45983/57344 train_time:26637390ms step_avg:579.29ms +grad accum step:11496/14336 +step:45984/57344 train_time:26638786ms step_avg:579.31ms +step:45985/57344 train_time:26638799ms step_avg:579.29ms +step:45986/57344 train_time:26639044ms step_avg:579.29ms +step:45987/57344 train_time:26639598ms step_avg:579.29ms +grad accum step:11497/14336 +step:45988/57344 train_time:26641127ms step_avg:579.31ms +step:45989/57344 train_time:26641146ms step_avg:579.29ms +step:45990/57344 train_time:26641365ms step_avg:579.29ms +step:45991/57344 train_time:26641912ms step_avg:579.29ms +grad accum step:11498/14336 +step:45992/57344 train_time:26643203ms step_avg:579.30ms +step:45993/57344 train_time:26643225ms step_avg:579.29ms +step:45994/57344 train_time:26643467ms step_avg:579.28ms +step:45995/57344 train_time:26644071ms step_avg:579.28ms +grad accum step:11499/14336 +step:45996/57344 train_time:26645413ms step_avg:579.30ms +step:45997/57344 train_time:26645428ms step_avg:579.29ms +step:45998/57344 train_time:26645677ms step_avg:579.28ms +step:45999/57344 train_time:26646237ms step_avg:579.28ms +grad accum step:11500/14336 +step:46000/57344 train_time:26647594ms step_avg:579.30ms +step:46001/57344 train_time:26647607ms step_avg:579.28ms +step:46002/57344 train_time:26647859ms step_avg:579.28ms +step:46003/57344 train_time:26648422ms step_avg:579.28ms +grad accum step:11501/14336 +step:46004/57344 train_time:26649745ms step_avg:579.29ms +step:46005/57344 train_time:26649767ms step_avg:579.28ms +step:46006/57344 train_time:26650008ms step_avg:579.27ms +step:46007/57344 train_time:26650565ms step_avg:579.27ms +grad accum step:11502/14336 +step:46008/57344 train_time:26651897ms step_avg:579.29ms +step:46009/57344 train_time:26651928ms step_avg:579.28ms +step:46010/57344 train_time:26652149ms step_avg:579.27ms +step:46011/57344 train_time:26652692ms step_avg:579.27ms +grad accum step:11503/14336 +step:46012/57344 train_time:26654012ms step_avg:579.28ms +step:46013/57344 train_time:26654033ms step_avg:579.27ms +step:46014/57344 train_time:26654281ms step_avg:579.26ms +step:46015/57344 train_time:26654855ms step_avg:579.26ms +grad accum step:11504/14336 +step:46016/57344 train_time:26656238ms step_avg:579.28ms +step:46016/57344 val_loss:5.713002 train_time:26656239ms step_avg:579.28ms +step:46017/57344 train_time:26656251ms step_avg:579.27ms +step:46018/57344 train_time:26656479ms step_avg:579.26ms +step:46019/57344 train_time:26657037ms step_avg:579.26ms +grad accum step:11505/14336 +step:46020/57344 train_time:26658388ms step_avg:579.28ms +step:46021/57344 train_time:26658433ms step_avg:579.27ms +step:46022/57344 train_time:26658656ms step_avg:579.26ms +step:46023/57344 train_time:26659209ms step_avg:579.26ms +grad accum step:11506/14336 +step:46024/57344 train_time:26660580ms step_avg:579.28ms +step:46025/57344 train_time:26660610ms step_avg:579.26ms +step:46026/57344 train_time:26660827ms step_avg:579.26ms +step:46027/57344 train_time:26661376ms step_avg:579.26ms +grad accum step:11507/14336 +step:46028/57344 train_time:26662705ms step_avg:579.27ms +step:46029/57344 train_time:26662737ms step_avg:579.26ms +step:46030/57344 train_time:26662956ms step_avg:579.25ms +step:46031/57344 train_time:26663505ms step_avg:579.25ms +grad accum step:11508/14336 +step:46032/57344 train_time:26664871ms step_avg:579.27ms +step:46033/57344 train_time:26664887ms step_avg:579.26ms +step:46034/57344 train_time:26665142ms step_avg:579.25ms +step:46035/57344 train_time:26665719ms step_avg:579.25ms +grad accum step:11509/14336 +step:46036/57344 train_time:26667069ms step_avg:579.27ms +step:46037/57344 train_time:26667086ms step_avg:579.25ms +step:46038/57344 train_time:26667338ms step_avg:579.25ms +step:46039/57344 train_time:26667914ms step_avg:579.25ms +grad accum step:11510/14336 +step:46040/57344 train_time:26669241ms step_avg:579.26ms +step:46041/57344 train_time:26669258ms step_avg:579.25ms +step:46042/57344 train_time:26669511ms step_avg:579.24ms +step:46043/57344 train_time:26670083ms step_avg:579.24ms +grad accum step:11511/14336 +step:46044/57344 train_time:26671447ms step_avg:579.26ms +step:46045/57344 train_time:26671464ms step_avg:579.25ms +step:46046/57344 train_time:26671712ms step_avg:579.24ms +step:46047/57344 train_time:26672261ms step_avg:579.24ms +grad accum step:11512/14336 +step:46048/57344 train_time:26673585ms step_avg:579.26ms +step:46049/57344 train_time:26673602ms step_avg:579.24ms +step:46050/57344 train_time:26673853ms step_avg:579.24ms +step:46051/57344 train_time:26674414ms step_avg:579.24ms +grad accum step:11513/14336 +step:46052/57344 train_time:26675731ms step_avg:579.25ms +step:46053/57344 train_time:26675742ms step_avg:579.24ms +step:46054/57344 train_time:26675989ms step_avg:579.23ms +step:46055/57344 train_time:26676543ms step_avg:579.23ms +grad accum step:11514/14336 +step:46056/57344 train_time:26677869ms step_avg:579.25ms +step:46057/57344 train_time:26677885ms step_avg:579.24ms +step:46058/57344 train_time:26678132ms step_avg:579.23ms +step:46059/57344 train_time:26678678ms step_avg:579.23ms +grad accum step:11515/14336 +step:46060/57344 train_time:26680030ms step_avg:579.25ms +step:46061/57344 train_time:26680052ms step_avg:579.23ms +step:46062/57344 train_time:26680280ms step_avg:579.23ms +step:46063/57344 train_time:26680845ms step_avg:579.23ms +grad accum step:11516/14336 +step:46064/57344 train_time:26682183ms step_avg:579.24ms +step:46065/57344 train_time:26682200ms step_avg:579.23ms +step:46066/57344 train_time:26682459ms step_avg:579.22ms +step:46067/57344 train_time:26683042ms step_avg:579.22ms +grad accum step:11517/14336 +step:46068/57344 train_time:26684351ms step_avg:579.24ms +step:46069/57344 train_time:26684367ms step_avg:579.23ms +step:46070/57344 train_time:26684609ms step_avg:579.22ms +step:46071/57344 train_time:26685154ms step_avg:579.22ms +grad accum step:11518/14336 +step:46072/57344 train_time:26686463ms step_avg:579.23ms +step:46073/57344 train_time:26686475ms step_avg:579.22ms +step:46074/57344 train_time:26686720ms step_avg:579.21ms +step:46075/57344 train_time:26687297ms step_avg:579.21ms +grad accum step:11519/14336 +step:46076/57344 train_time:26688697ms step_avg:579.23ms +step:46077/57344 train_time:26688714ms step_avg:579.22ms +step:46078/57344 train_time:26688961ms step_avg:579.21ms +step:46079/57344 train_time:26689522ms step_avg:579.21ms +grad accum step:11520/14336 +step:46080/57344 train_time:26690874ms step_avg:579.23ms +step:46080/57344 val_loss:5.725313 train_time:26690904ms step_avg:579.23ms +step:46081/57344 train_time:26690915ms step_avg:579.22ms +step:46082/57344 train_time:26691139ms step_avg:579.21ms +step:46083/57344 train_time:26691685ms step_avg:579.21ms +grad accum step:11521/14336 +step:46084/57344 train_time:26693014ms step_avg:579.23ms +step:46085/57344 train_time:26693029ms step_avg:579.21ms +step:46086/57344 train_time:26693277ms step_avg:579.21ms +step:46087/57344 train_time:26693828ms step_avg:579.21ms +grad accum step:11522/14336 +step:46088/57344 train_time:26695121ms step_avg:579.22ms +step:46089/57344 train_time:26695132ms step_avg:579.21ms +step:46090/57344 train_time:26695377ms step_avg:579.20ms +step:46091/57344 train_time:26695922ms step_avg:579.20ms +grad accum step:11523/14336 +step:46092/57344 train_time:26697238ms step_avg:579.22ms +step:46093/57344 train_time:26697256ms step_avg:579.20ms +step:46094/57344 train_time:26697495ms step_avg:579.20ms +step:46095/57344 train_time:26698058ms step_avg:579.20ms +grad accum step:11524/14336 +step:46096/57344 train_time:26699379ms step_avg:579.21ms +step:46097/57344 train_time:26699394ms step_avg:579.20ms +step:46098/57344 train_time:26699643ms step_avg:579.19ms +step:46099/57344 train_time:26700202ms step_avg:579.19ms +grad accum step:11525/14336 +step:46100/57344 train_time:26701813ms step_avg:579.22ms +step:46101/57344 train_time:26701924ms step_avg:579.20ms +step:46102/57344 train_time:26702266ms step_avg:579.20ms +step:46103/57344 train_time:26702726ms step_avg:579.20ms +grad accum step:11526/14336 +step:46104/57344 train_time:26704176ms step_avg:579.22ms +step:46105/57344 train_time:26704196ms step_avg:579.20ms +step:46106/57344 train_time:26704414ms step_avg:579.20ms +step:46107/57344 train_time:26704961ms step_avg:579.20ms +grad accum step:11527/14336 +step:46108/57344 train_time:26706301ms step_avg:579.21ms +step:46109/57344 train_time:26706323ms step_avg:579.20ms +step:46110/57344 train_time:26706552ms step_avg:579.19ms +step:46111/57344 train_time:26707095ms step_avg:579.19ms +grad accum step:11528/14336 +step:46112/57344 train_time:26708372ms step_avg:579.21ms +step:46113/57344 train_time:26708387ms step_avg:579.19ms +step:46114/57344 train_time:26708636ms step_avg:579.19ms +step:46115/57344 train_time:26709197ms step_avg:579.19ms +grad accum step:11529/14336 +step:46116/57344 train_time:26710538ms step_avg:579.20ms +step:46117/57344 train_time:26710553ms step_avg:579.19ms +step:46118/57344 train_time:26710799ms step_avg:579.18ms +step:46119/57344 train_time:26711354ms step_avg:579.18ms +grad accum step:11530/14336 +step:46120/57344 train_time:26712657ms step_avg:579.20ms +step:46121/57344 train_time:26712671ms step_avg:579.19ms +step:46122/57344 train_time:26712916ms step_avg:579.18ms +step:46123/57344 train_time:26713462ms step_avg:579.18ms +grad accum step:11531/14336 +step:46124/57344 train_time:26714769ms step_avg:579.19ms +step:46125/57344 train_time:26714788ms step_avg:579.18ms +step:46126/57344 train_time:26715023ms step_avg:579.17ms +step:46127/57344 train_time:26715579ms step_avg:579.17ms +grad accum step:11532/14336 +step:46128/57344 train_time:26716908ms step_avg:579.19ms +step:46129/57344 train_time:26716922ms step_avg:579.18ms +step:46130/57344 train_time:26717170ms step_avg:579.17ms +step:46131/57344 train_time:26717734ms step_avg:579.17ms +grad accum step:11533/14336 +step:46132/57344 train_time:26719139ms step_avg:579.19ms +step:46133/57344 train_time:26719155ms step_avg:579.18ms +step:46134/57344 train_time:26719409ms step_avg:579.17ms +step:46135/57344 train_time:26719986ms step_avg:579.17ms +grad accum step:11534/14336 +step:46136/57344 train_time:26721312ms step_avg:579.19ms +step:46137/57344 train_time:26721329ms step_avg:579.17ms +step:46138/57344 train_time:26721574ms step_avg:579.17ms +step:46139/57344 train_time:26722124ms step_avg:579.17ms +grad accum step:11535/14336 +step:46140/57344 train_time:26723650ms step_avg:579.19ms +step:46141/57344 train_time:26723674ms step_avg:579.17ms +step:46142/57344 train_time:26723903ms step_avg:579.17ms +step:46143/57344 train_time:26724476ms step_avg:579.17ms +grad accum step:11536/14336 +step:46144/57344 train_time:26725890ms step_avg:579.18ms +step:46144/57344 val_loss:5.738195 train_time:26725908ms step_avg:579.18ms +step:46145/57344 train_time:26725920ms step_avg:579.17ms +step:46146/57344 train_time:26726147ms step_avg:579.16ms +step:46147/57344 train_time:26726712ms step_avg:579.16ms +grad accum step:11537/14336 +step:46148/57344 train_time:26728067ms step_avg:579.18ms +step:46149/57344 train_time:26728093ms step_avg:579.17ms +step:46150/57344 train_time:26728327ms step_avg:579.16ms +step:46151/57344 train_time:26728909ms step_avg:579.16ms +grad accum step:11538/14336 +step:46152/57344 train_time:26730247ms step_avg:579.18ms +step:46153/57344 train_time:26730265ms step_avg:579.17ms +step:46154/57344 train_time:26730511ms step_avg:579.16ms +step:46155/57344 train_time:26731072ms step_avg:579.16ms +grad accum step:11539/14336 +step:46156/57344 train_time:26732431ms step_avg:579.18ms +step:46157/57344 train_time:26732445ms step_avg:579.16ms +step:46158/57344 train_time:26732679ms step_avg:579.16ms +step:46159/57344 train_time:26733228ms step_avg:579.16ms +grad accum step:11540/14336 +step:46160/57344 train_time:26734678ms step_avg:579.17ms +step:46161/57344 train_time:26734698ms step_avg:579.16ms +step:46162/57344 train_time:26734912ms step_avg:579.15ms +step:46163/57344 train_time:26735446ms step_avg:579.15ms +grad accum step:11541/14336 +step:46164/57344 train_time:26736776ms step_avg:579.17ms +step:46165/57344 train_time:26736796ms step_avg:579.16ms +step:46166/57344 train_time:26737024ms step_avg:579.15ms +step:46167/57344 train_time:26737599ms step_avg:579.15ms +grad accum step:11542/14336 +step:46168/57344 train_time:26738946ms step_avg:579.17ms +step:46169/57344 train_time:26738958ms step_avg:579.15ms +step:46170/57344 train_time:26739198ms step_avg:579.15ms +step:46171/57344 train_time:26739759ms step_avg:579.15ms +grad accum step:11543/14336 +step:46172/57344 train_time:26741175ms step_avg:579.16ms +step:46173/57344 train_time:26741193ms step_avg:579.15ms +step:46174/57344 train_time:26741409ms step_avg:579.14ms +step:46175/57344 train_time:26741945ms step_avg:579.14ms +grad accum step:11544/14336 +step:46176/57344 train_time:26743285ms step_avg:579.16ms +step:46177/57344 train_time:26743301ms step_avg:579.15ms +step:46178/57344 train_time:26743554ms step_avg:579.14ms +step:46179/57344 train_time:26744118ms step_avg:579.14ms +grad accum step:11545/14336 +step:46180/57344 train_time:26745518ms step_avg:579.16ms +step:46181/57344 train_time:26745534ms step_avg:579.15ms +step:46182/57344 train_time:26745788ms step_avg:579.14ms +step:46183/57344 train_time:26746377ms step_avg:579.14ms +grad accum step:11546/14336 +step:46184/57344 train_time:26747691ms step_avg:579.15ms +step:46185/57344 train_time:26747708ms step_avg:579.14ms +step:46186/57344 train_time:26747964ms step_avg:579.14ms +step:46187/57344 train_time:26748535ms step_avg:579.14ms +grad accum step:11547/14336 +step:46188/57344 train_time:26749877ms step_avg:579.15ms +step:46189/57344 train_time:26749892ms step_avg:579.14ms +step:46190/57344 train_time:26750138ms step_avg:579.13ms +step:46191/57344 train_time:26750685ms step_avg:579.13ms +grad accum step:11548/14336 +step:46192/57344 train_time:26752006ms step_avg:579.15ms +step:46193/57344 train_time:26752021ms step_avg:579.14ms +step:46194/57344 train_time:26752274ms step_avg:579.13ms +step:46195/57344 train_time:26752842ms step_avg:579.13ms +grad accum step:11549/14336 +step:46196/57344 train_time:26754168ms step_avg:579.14ms +step:46197/57344 train_time:26754182ms step_avg:579.13ms +step:46198/57344 train_time:26754430ms step_avg:579.13ms +step:46199/57344 train_time:26754983ms step_avg:579.12ms +grad accum step:11550/14336 +step:46200/57344 train_time:26756326ms step_avg:579.14ms +step:46201/57344 train_time:26756341ms step_avg:579.13ms +step:46202/57344 train_time:26756585ms step_avg:579.12ms +step:46203/57344 train_time:26757146ms step_avg:579.12ms +grad accum step:11551/14336 +step:46204/57344 train_time:26758565ms step_avg:579.14ms +step:46205/57344 train_time:26758580ms step_avg:579.13ms +step:46206/57344 train_time:26758843ms step_avg:579.12ms +step:46207/57344 train_time:26759470ms step_avg:579.12ms +grad accum step:11552/14336 +step:46208/57344 train_time:26760875ms step_avg:579.14ms +step:46208/57344 val_loss:5.756293 train_time:26760879ms step_avg:579.14ms +step:46209/57344 train_time:26760891ms step_avg:579.13ms +step:46210/57344 train_time:26761120ms step_avg:579.12ms +step:46211/57344 train_time:26761681ms step_avg:579.12ms +grad accum step:11553/14336 +step:46212/57344 train_time:26762988ms step_avg:579.14ms +step:46213/57344 train_time:26763005ms step_avg:579.12ms +step:46214/57344 train_time:26763250ms step_avg:579.12ms +step:46215/57344 train_time:26763803ms step_avg:579.12ms +grad accum step:11554/14336 +step:46216/57344 train_time:26765126ms step_avg:579.13ms +step:46217/57344 train_time:26765149ms step_avg:579.12ms +step:46218/57344 train_time:26765380ms step_avg:579.11ms +step:46219/57344 train_time:26765933ms step_avg:579.11ms +grad accum step:11555/14336 +step:46220/57344 train_time:26767280ms step_avg:579.13ms +step:46221/57344 train_time:26767297ms step_avg:579.12ms +step:46222/57344 train_time:26767544ms step_avg:579.11ms +step:46223/57344 train_time:26768091ms step_avg:579.11ms +grad accum step:11556/14336 +step:46224/57344 train_time:26769381ms step_avg:579.12ms +step:46225/57344 train_time:26769397ms step_avg:579.11ms +step:46226/57344 train_time:26769651ms step_avg:579.10ms +step:46227/57344 train_time:26770212ms step_avg:579.10ms +grad accum step:11557/14336 +step:46228/57344 train_time:26771543ms step_avg:579.12ms +step:46229/57344 train_time:26771585ms step_avg:579.11ms +step:46230/57344 train_time:26771806ms step_avg:579.10ms +step:46231/57344 train_time:26772361ms step_avg:579.10ms +grad accum step:11558/14336 +step:46232/57344 train_time:26773734ms step_avg:579.12ms +step:46233/57344 train_time:26773753ms step_avg:579.10ms +step:46234/57344 train_time:26773992ms step_avg:579.10ms +step:46235/57344 train_time:26774563ms step_avg:579.10ms +grad accum step:11559/14336 +step:46236/57344 train_time:26775887ms step_avg:579.11ms +step:46237/57344 train_time:26775900ms step_avg:579.10ms +step:46238/57344 train_time:26776152ms step_avg:579.09ms +step:46239/57344 train_time:26776711ms step_avg:579.09ms +grad accum step:11560/14336 +step:46240/57344 train_time:26778033ms step_avg:579.11ms +step:46241/57344 train_time:26778045ms step_avg:579.10ms +step:46242/57344 train_time:26778288ms step_avg:579.09ms +step:46243/57344 train_time:26778851ms step_avg:579.09ms +grad accum step:11561/14336 +step:46244/57344 train_time:26780247ms step_avg:579.11ms +step:46245/57344 train_time:26780261ms step_avg:579.10ms +step:46246/57344 train_time:26780516ms step_avg:579.09ms +step:46247/57344 train_time:26781091ms step_avg:579.09ms +grad accum step:11562/14336 +step:46248/57344 train_time:26782418ms step_avg:579.10ms +step:46249/57344 train_time:26782438ms step_avg:579.09ms +step:46250/57344 train_time:26782658ms step_avg:579.08ms +step:46251/57344 train_time:26783197ms step_avg:579.08ms +grad accum step:11563/14336 +step:46252/57344 train_time:26784546ms step_avg:579.10ms +step:46253/57344 train_time:26784581ms step_avg:579.09ms +step:46254/57344 train_time:26784801ms step_avg:579.08ms +step:46255/57344 train_time:26785354ms step_avg:579.08ms +grad accum step:11564/14336 +step:46256/57344 train_time:26786694ms step_avg:579.10ms +step:46257/57344 train_time:26786705ms step_avg:579.08ms +step:46258/57344 train_time:26786941ms step_avg:579.08ms +step:46259/57344 train_time:26787500ms step_avg:579.08ms +grad accum step:11565/14336 +step:46260/57344 train_time:26788808ms step_avg:579.09ms +step:46261/57344 train_time:26788825ms step_avg:579.08ms +step:46262/57344 train_time:26789080ms step_avg:579.07ms +step:46263/57344 train_time:26789645ms step_avg:579.07ms +grad accum step:11566/14336 +step:46264/57344 train_time:26790978ms step_avg:579.09ms +step:46265/57344 train_time:26790989ms step_avg:579.08ms +step:46266/57344 train_time:26791250ms step_avg:579.07ms +step:46267/57344 train_time:26791844ms step_avg:579.07ms +grad accum step:11567/14336 +step:46268/57344 train_time:26793259ms step_avg:579.09ms +step:46269/57344 train_time:26793276ms step_avg:579.08ms +step:46270/57344 train_time:26793528ms step_avg:579.07ms +step:46271/57344 train_time:26794091ms step_avg:579.07ms +grad accum step:11568/14336 +step:46272/57344 train_time:26795414ms step_avg:579.08ms +step:46272/57344 val_loss:5.767856 train_time:26795415ms step_avg:579.08ms +step:46273/57344 train_time:26795426ms step_avg:579.07ms +step:46274/57344 train_time:26795656ms step_avg:579.07ms +step:46275/57344 train_time:26796229ms step_avg:579.06ms +grad accum step:11569/14336 +step:46276/57344 train_time:26797566ms step_avg:579.08ms +step:46277/57344 train_time:26797579ms step_avg:579.07ms +step:46278/57344 train_time:26797829ms step_avg:579.06ms +step:46279/57344 train_time:26798386ms step_avg:579.06ms +grad accum step:11570/14336 +step:46280/57344 train_time:26799751ms step_avg:579.08ms +step:46281/57344 train_time:26799768ms step_avg:579.07ms +step:46282/57344 train_time:26800017ms step_avg:579.06ms +step:46283/57344 train_time:26800572ms step_avg:579.06ms +grad accum step:11571/14336 +step:46284/57344 train_time:26801896ms step_avg:579.07ms +step:46285/57344 train_time:26801913ms step_avg:579.06ms +step:46286/57344 train_time:26802168ms step_avg:579.06ms +step:46287/57344 train_time:26802733ms step_avg:579.06ms +grad accum step:11572/14336 +step:46288/57344 train_time:26804043ms step_avg:579.07ms +step:46289/57344 train_time:26804055ms step_avg:579.06ms +step:46290/57344 train_time:26804296ms step_avg:579.05ms +step:46291/57344 train_time:26804849ms step_avg:579.05ms +grad accum step:11573/14336 +step:46292/57344 train_time:26806217ms step_avg:579.07ms +step:46293/57344 train_time:26806232ms step_avg:579.06ms +step:46294/57344 train_time:26806476ms step_avg:579.05ms +step:46295/57344 train_time:26807042ms step_avg:579.05ms +grad accum step:11574/14336 +step:46296/57344 train_time:26808401ms step_avg:579.07ms +step:46297/57344 train_time:26808417ms step_avg:579.05ms +step:46298/57344 train_time:26808665ms step_avg:579.05ms +step:46299/57344 train_time:26809217ms step_avg:579.05ms +grad accum step:11575/14336 +step:46300/57344 train_time:26810551ms step_avg:579.06ms +step:46301/57344 train_time:26810566ms step_avg:579.05ms +step:46302/57344 train_time:26810818ms step_avg:579.04ms +step:46303/57344 train_time:26811383ms step_avg:579.04ms +grad accum step:11576/14336 +step:46304/57344 train_time:26812687ms step_avg:579.06ms +step:46305/57344 train_time:26812702ms step_avg:579.05ms +step:46306/57344 train_time:26812948ms step_avg:579.04ms +step:46307/57344 train_time:26813496ms step_avg:579.04ms +grad accum step:11577/14336 +step:46308/57344 train_time:26814811ms step_avg:579.05ms +step:46309/57344 train_time:26814827ms step_avg:579.04ms +step:46310/57344 train_time:26815076ms step_avg:579.03ms +step:46311/57344 train_time:26815635ms step_avg:579.03ms +grad accum step:11578/14336 +step:46312/57344 train_time:26816943ms step_avg:579.05ms +step:46313/57344 train_time:26816957ms step_avg:579.04ms +step:46314/57344 train_time:26817205ms step_avg:579.03ms +step:46315/57344 train_time:26817753ms step_avg:579.03ms +grad accum step:11579/14336 +step:46316/57344 train_time:26819098ms step_avg:579.05ms +step:46317/57344 train_time:26819110ms step_avg:579.03ms +step:46318/57344 train_time:26819342ms step_avg:579.03ms +step:46319/57344 train_time:26819906ms step_avg:579.03ms +grad accum step:11580/14336 +step:46320/57344 train_time:26821247ms step_avg:579.04ms +step:46321/57344 train_time:26821263ms step_avg:579.03ms +step:46322/57344 train_time:26821517ms step_avg:579.02ms +step:46323/57344 train_time:26822080ms step_avg:579.02ms +grad accum step:11581/14336 +step:46324/57344 train_time:26823444ms step_avg:579.04ms +step:46325/57344 train_time:26823460ms step_avg:579.03ms +step:46326/57344 train_time:26823709ms step_avg:579.02ms +step:46327/57344 train_time:26824278ms step_avg:579.02ms +grad accum step:11582/14336 +step:46328/57344 train_time:26825645ms step_avg:579.04ms +step:46329/57344 train_time:26825661ms step_avg:579.03ms +step:46330/57344 train_time:26825903ms step_avg:579.02ms +step:46331/57344 train_time:26826458ms step_avg:579.02ms +grad accum step:11583/14336 +step:46332/57344 train_time:26827816ms step_avg:579.03ms +step:46333/57344 train_time:26827855ms step_avg:579.02ms +step:46334/57344 train_time:26828079ms step_avg:579.01ms +step:46335/57344 train_time:26828635ms step_avg:579.01ms +grad accum step:11584/14336 +step:46336/57344 train_time:26829940ms step_avg:579.03ms +step:46336/57344 val_loss:5.775228 train_time:26829942ms step_avg:579.03ms +step:46337/57344 train_time:26829953ms step_avg:579.02ms +step:46338/57344 train_time:26830175ms step_avg:579.01ms +step:46339/57344 train_time:26830733ms step_avg:579.01ms +grad accum step:11585/14336 +step:46340/57344 train_time:26832111ms step_avg:579.03ms +step:46341/57344 train_time:26832127ms step_avg:579.01ms +step:46342/57344 train_time:26832376ms step_avg:579.01ms +step:46343/57344 train_time:26832940ms step_avg:579.01ms +grad accum step:11586/14336 +step:46344/57344 train_time:26834294ms step_avg:579.02ms +step:46345/57344 train_time:26834310ms step_avg:579.01ms +step:46346/57344 train_time:26834552ms step_avg:579.00ms +step:46347/57344 train_time:26835106ms step_avg:579.00ms +grad accum step:11587/14336 +step:46348/57344 train_time:26836425ms step_avg:579.02ms +step:46349/57344 train_time:26836437ms step_avg:579.01ms +step:46350/57344 train_time:26836681ms step_avg:579.00ms +step:46351/57344 train_time:26837233ms step_avg:579.00ms +grad accum step:11588/14336 +step:46352/57344 train_time:26838552ms step_avg:579.02ms +step:46353/57344 train_time:26838563ms step_avg:579.00ms +step:46354/57344 train_time:26838812ms step_avg:579.00ms +step:46355/57344 train_time:26839374ms step_avg:579.00ms +grad accum step:11589/14336 +step:46356/57344 train_time:26840756ms step_avg:579.01ms +step:46357/57344 train_time:26840773ms step_avg:579.00ms +step:46358/57344 train_time:26841025ms step_avg:578.99ms +step:46359/57344 train_time:26841586ms step_avg:578.99ms +grad accum step:11590/14336 +step:46360/57344 train_time:26842949ms step_avg:579.01ms +step:46361/57344 train_time:26842963ms step_avg:579.00ms +step:46362/57344 train_time:26843219ms step_avg:578.99ms +step:46363/57344 train_time:26843842ms step_avg:578.99ms +grad accum step:11591/14336 +step:46364/57344 train_time:26845201ms step_avg:579.01ms +step:46365/57344 train_time:26845215ms step_avg:579.00ms +step:46366/57344 train_time:26845460ms step_avg:578.99ms +step:46367/57344 train_time:26846004ms step_avg:578.99ms +grad accum step:11592/14336 +step:46368/57344 train_time:26847320ms step_avg:579.01ms +step:46369/57344 train_time:26847342ms step_avg:578.99ms +step:46370/57344 train_time:26847564ms step_avg:578.99ms +step:46371/57344 train_time:26848112ms step_avg:578.98ms +grad accum step:11593/14336 +step:46372/57344 train_time:26849443ms step_avg:579.00ms +step:46373/57344 train_time:26849466ms step_avg:578.99ms +step:46374/57344 train_time:26849703ms step_avg:578.98ms +step:46375/57344 train_time:26850238ms step_avg:578.98ms +grad accum step:11594/14336 +step:46376/57344 train_time:26851554ms step_avg:579.00ms +step:46377/57344 train_time:26851571ms step_avg:578.98ms +step:46378/57344 train_time:26851811ms step_avg:578.98ms +step:46379/57344 train_time:26852367ms step_avg:578.98ms +grad accum step:11595/14336 +step:46380/57344 train_time:26853739ms step_avg:578.99ms +step:46381/57344 train_time:26853754ms step_avg:578.98ms +step:46382/57344 train_time:26853993ms step_avg:578.97ms +step:46383/57344 train_time:26854545ms step_avg:578.97ms +grad accum step:11596/14336 +step:46384/57344 train_time:26855856ms step_avg:578.99ms +step:46385/57344 train_time:26855872ms step_avg:578.98ms +step:46386/57344 train_time:26856120ms step_avg:578.97ms +step:46387/57344 train_time:26856687ms step_avg:578.97ms +grad accum step:11597/14336 +step:46388/57344 train_time:26857979ms step_avg:578.99ms +step:46389/57344 train_time:26857994ms step_avg:578.97ms +step:46390/57344 train_time:26858243ms step_avg:578.97ms +step:46391/57344 train_time:26858800ms step_avg:578.97ms +grad accum step:11598/14336 +step:46392/57344 train_time:26860144ms step_avg:578.98ms +step:46393/57344 train_time:26860162ms step_avg:578.97ms +step:46394/57344 train_time:26860425ms step_avg:578.96ms +step:46395/57344 train_time:26861027ms step_avg:578.96ms +grad accum step:11599/14336 +step:46396/57344 train_time:26862344ms step_avg:578.98ms +step:46397/57344 train_time:26862361ms step_avg:578.97ms +step:46398/57344 train_time:26862597ms step_avg:578.96ms +step:46399/57344 train_time:26863151ms step_avg:578.96ms +grad accum step:11600/14336 +step:46400/57344 train_time:26864483ms step_avg:578.98ms +step:46400/57344 val_loss:5.789692 train_time:26864486ms step_avg:578.98ms +step:46401/57344 train_time:26864497ms step_avg:578.96ms +step:46402/57344 train_time:26864721ms step_avg:578.96ms +step:46403/57344 train_time:26865280ms step_avg:578.96ms +grad accum step:11601/14336 +step:46404/57344 train_time:26866646ms step_avg:578.97ms +step:46405/57344 train_time:26866661ms step_avg:578.96ms +step:46406/57344 train_time:26866918ms step_avg:578.95ms +step:46407/57344 train_time:26867496ms step_avg:578.95ms +grad accum step:11602/14336 +step:46408/57344 train_time:26868803ms step_avg:578.97ms +step:46409/57344 train_time:26869013ms step_avg:578.96ms +step:46410/57344 train_time:26869239ms step_avg:578.95ms +step:46411/57344 train_time:26869810ms step_avg:578.95ms +grad accum step:11603/14336 +step:46412/57344 train_time:26871155ms step_avg:578.97ms +step:46413/57344 train_time:26871172ms step_avg:578.96ms +step:46414/57344 train_time:26871410ms step_avg:578.95ms +step:46415/57344 train_time:26871955ms step_avg:578.95ms +grad accum step:11604/14336 +step:46416/57344 train_time:26873309ms step_avg:578.97ms +step:46417/57344 train_time:26873326ms step_avg:578.95ms +step:46418/57344 train_time:26873576ms step_avg:578.95ms +step:46419/57344 train_time:26874146ms step_avg:578.95ms +grad accum step:11605/14336 +step:46420/57344 train_time:26875488ms step_avg:578.96ms +step:46421/57344 train_time:26875502ms step_avg:578.95ms +step:46422/57344 train_time:26875749ms step_avg:578.94ms +step:46423/57344 train_time:26876291ms step_avg:578.94ms +grad accum step:11606/14336 +step:46424/57344 train_time:26877622ms step_avg:578.96ms +step:46425/57344 train_time:26877639ms step_avg:578.95ms +step:46426/57344 train_time:26877881ms step_avg:578.94ms +step:46427/57344 train_time:26878439ms step_avg:578.94ms +grad accum step:11607/14336 +step:46428/57344 train_time:26879798ms step_avg:578.96ms +step:46429/57344 train_time:26879813ms step_avg:578.94ms +step:46430/57344 train_time:26880055ms step_avg:578.94ms +step:46431/57344 train_time:26880613ms step_avg:578.94ms +grad accum step:11608/14336 +step:46432/57344 train_time:26882189ms step_avg:578.96ms +step:46433/57344 train_time:26882208ms step_avg:578.95ms +step:46434/57344 train_time:26882431ms step_avg:578.94ms +step:46435/57344 train_time:26882994ms step_avg:578.94ms +grad accum step:11609/14336 +step:46436/57344 train_time:26884330ms step_avg:578.95ms +step:46437/57344 train_time:26884352ms step_avg:578.94ms +step:46438/57344 train_time:26884580ms step_avg:578.93ms +step:46439/57344 train_time:26885122ms step_avg:578.93ms +grad accum step:11610/14336 +step:46440/57344 train_time:26886457ms step_avg:578.95ms +step:46441/57344 train_time:26886475ms step_avg:578.94ms +step:46442/57344 train_time:26886726ms step_avg:578.93ms +step:46443/57344 train_time:26887327ms step_avg:578.93ms +grad accum step:11611/14336 +step:46444/57344 train_time:26888685ms step_avg:578.95ms +step:46445/57344 train_time:26888715ms step_avg:578.94ms +step:46446/57344 train_time:26888939ms step_avg:578.93ms +step:46447/57344 train_time:26889501ms step_avg:578.93ms +grad accum step:11612/14336 +step:46448/57344 train_time:26890845ms step_avg:578.95ms +step:46449/57344 train_time:26890863ms step_avg:578.93ms +step:46450/57344 train_time:26891102ms step_avg:578.93ms +step:46451/57344 train_time:26891651ms step_avg:578.93ms +grad accum step:11613/14336 +step:46452/57344 train_time:26893016ms step_avg:578.94ms +step:46453/57344 train_time:26893036ms step_avg:578.93ms +step:46454/57344 train_time:26893269ms step_avg:578.92ms +step:46455/57344 train_time:26893823ms step_avg:578.92ms +grad accum step:11614/14336 +step:46456/57344 train_time:26895145ms step_avg:578.94ms +step:46457/57344 train_time:26895162ms step_avg:578.93ms +step:46458/57344 train_time:26895408ms step_avg:578.92ms +step:46459/57344 train_time:26895973ms step_avg:578.92ms +grad accum step:11615/14336 +step:46460/57344 train_time:26897307ms step_avg:578.93ms +step:46461/57344 train_time:26897323ms step_avg:578.92ms +step:46462/57344 train_time:26897574ms step_avg:578.92ms +step:46463/57344 train_time:26898140ms step_avg:578.92ms +grad accum step:11616/14336 +step:46464/57344 train_time:26899526ms step_avg:578.93ms +step:46464/57344 val_loss:5.799087 train_time:26899528ms step_avg:578.93ms +step:46465/57344 train_time:26899540ms step_avg:578.92ms +step:46466/57344 train_time:26899765ms step_avg:578.91ms +step:46467/57344 train_time:26900318ms step_avg:578.91ms +grad accum step:11617/14336 +step:46468/57344 train_time:26901628ms step_avg:578.93ms +step:46469/57344 train_time:26901644ms step_avg:578.92ms +step:46470/57344 train_time:26901886ms step_avg:578.91ms +step:46471/57344 train_time:26902439ms step_avg:578.91ms +grad accum step:11618/14336 +step:46472/57344 train_time:26903761ms step_avg:578.92ms +step:46473/57344 train_time:26903777ms step_avg:578.91ms +step:46474/57344 train_time:26904025ms step_avg:578.90ms +step:46475/57344 train_time:26904577ms step_avg:578.90ms +grad accum step:11619/14336 +step:46476/57344 train_time:26905938ms step_avg:578.92ms +step:46477/57344 train_time:26905958ms step_avg:578.91ms +step:46478/57344 train_time:26906202ms step_avg:578.90ms +step:46479/57344 train_time:26906780ms step_avg:578.90ms +grad accum step:11620/14336 +step:46480/57344 train_time:26908096ms step_avg:578.92ms +step:46481/57344 train_time:26908112ms step_avg:578.91ms +step:46482/57344 train_time:26908362ms step_avg:578.90ms +step:46483/57344 train_time:26908932ms step_avg:578.90ms +grad accum step:11621/14336 +step:46484/57344 train_time:26910293ms step_avg:578.92ms +step:46485/57344 train_time:26910311ms step_avg:578.90ms +step:46486/57344 train_time:26910552ms step_avg:578.90ms +step:46487/57344 train_time:26911108ms step_avg:578.90ms +grad accum step:11622/14336 +step:46488/57344 train_time:26912474ms step_avg:578.91ms +step:46489/57344 train_time:26912494ms step_avg:578.90ms +step:46490/57344 train_time:26912713ms step_avg:578.89ms +step:46491/57344 train_time:26913268ms step_avg:578.89ms +grad accum step:11623/14336 +step:46492/57344 train_time:26914694ms step_avg:578.91ms +step:46493/57344 train_time:26914716ms step_avg:578.90ms +step:46494/57344 train_time:26914932ms step_avg:578.89ms +step:46495/57344 train_time:26915484ms step_avg:578.89ms +grad accum step:11624/14336 +step:46496/57344 train_time:26916837ms step_avg:578.91ms +step:46497/57344 train_time:26916857ms step_avg:578.89ms +step:46498/57344 train_time:26917088ms step_avg:578.89ms +step:46499/57344 train_time:26917635ms step_avg:578.89ms +grad accum step:11625/14336 +step:46500/57344 train_time:26919032ms step_avg:578.90ms +step:46501/57344 train_time:26919352ms step_avg:578.90ms +step:46502/57344 train_time:26919571ms step_avg:578.89ms +step:46503/57344 train_time:26920131ms step_avg:578.89ms +grad accum step:11626/14336 +step:46504/57344 train_time:26921450ms step_avg:578.91ms +step:46505/57344 train_time:26921467ms step_avg:578.89ms +step:46506/57344 train_time:26921715ms step_avg:578.89ms +step:46507/57344 train_time:26922267ms step_avg:578.89ms +grad accum step:11627/14336 +step:46508/57344 train_time:26923585ms step_avg:578.90ms +step:46509/57344 train_time:26923601ms step_avg:578.89ms +step:46510/57344 train_time:26923840ms step_avg:578.88ms +step:46511/57344 train_time:26924390ms step_avg:578.88ms +grad accum step:11628/14336 +step:46512/57344 train_time:26925759ms step_avg:578.90ms +step:46513/57344 train_time:26925771ms step_avg:578.89ms +step:46514/57344 train_time:26926018ms step_avg:578.88ms +step:46515/57344 train_time:26926571ms step_avg:578.88ms +grad accum step:11629/14336 +step:46516/57344 train_time:26927915ms step_avg:578.90ms +step:46517/57344 train_time:26927931ms step_avg:578.88ms +step:46518/57344 train_time:26928206ms step_avg:578.88ms +step:46519/57344 train_time:26928831ms step_avg:578.88ms +grad accum step:11630/14336 +step:46520/57344 train_time:26930143ms step_avg:578.89ms +step:46521/57344 train_time:26930165ms step_avg:578.88ms +step:46522/57344 train_time:26930400ms step_avg:578.87ms +step:46523/57344 train_time:26930965ms step_avg:578.87ms +grad accum step:11631/14336 +step:46524/57344 train_time:26932468ms step_avg:578.89ms +step:46525/57344 train_time:26932492ms step_avg:578.88ms +step:46526/57344 train_time:26932719ms step_avg:578.87ms +step:46527/57344 train_time:26933311ms step_avg:578.87ms +grad accum step:11632/14336 +step:46528/57344 train_time:26934705ms step_avg:578.89ms +step:46528/57344 val_loss:5.813019 train_time:26934711ms step_avg:578.89ms +step:46529/57344 train_time:26934722ms step_avg:578.88ms +step:46530/57344 train_time:26934945ms step_avg:578.87ms +step:46531/57344 train_time:26935498ms step_avg:578.87ms +grad accum step:11633/14336 +step:46532/57344 train_time:26936837ms step_avg:578.89ms +step:46533/57344 train_time:26936861ms step_avg:578.88ms +step:46534/57344 train_time:26937093ms step_avg:578.87ms +step:46535/57344 train_time:26937654ms step_avg:578.87ms +grad accum step:11634/14336 +step:46536/57344 train_time:26938991ms step_avg:578.88ms +step:46537/57344 train_time:26939020ms step_avg:578.87ms +step:46538/57344 train_time:26939255ms step_avg:578.87ms +step:46539/57344 train_time:26939807ms step_avg:578.87ms +grad accum step:11635/14336 +step:46540/57344 train_time:26941156ms step_avg:578.88ms +step:46541/57344 train_time:26941170ms step_avg:578.87ms +step:46542/57344 train_time:26941391ms step_avg:578.86ms +step:46543/57344 train_time:26941930ms step_avg:578.86ms +grad accum step:11636/14336 +step:46544/57344 train_time:26943293ms step_avg:578.88ms +step:46545/57344 train_time:26943310ms step_avg:578.87ms +step:46546/57344 train_time:26943535ms step_avg:578.86ms +step:46547/57344 train_time:26944094ms step_avg:578.86ms +grad accum step:11637/14336 +step:46548/57344 train_time:26945408ms step_avg:578.87ms +step:46549/57344 train_time:26945421ms step_avg:578.86ms +step:46550/57344 train_time:26945667ms step_avg:578.85ms +step:46551/57344 train_time:26946224ms step_avg:578.85ms +grad accum step:11638/14336 +step:46552/57344 train_time:26947553ms step_avg:578.87ms +step:46553/57344 train_time:26947597ms step_avg:578.86ms +step:46554/57344 train_time:26947823ms step_avg:578.85ms +step:46555/57344 train_time:26948391ms step_avg:578.85ms +grad accum step:11639/14336 +step:46556/57344 train_time:26949756ms step_avg:578.87ms +step:46557/57344 train_time:26949780ms step_avg:578.86ms +step:46558/57344 train_time:26950004ms step_avg:578.85ms +step:46559/57344 train_time:26950576ms step_avg:578.85ms +grad accum step:11640/14336 +step:46560/57344 train_time:26951939ms step_avg:578.86ms +step:46561/57344 train_time:26951979ms step_avg:578.85ms +step:46562/57344 train_time:26952198ms step_avg:578.85ms +step:46563/57344 train_time:26952739ms step_avg:578.84ms +grad accum step:11641/14336 +step:46564/57344 train_time:26954069ms step_avg:578.86ms +step:46565/57344 train_time:26954080ms step_avg:578.85ms +step:46566/57344 train_time:26954321ms step_avg:578.84ms +step:46567/57344 train_time:26954867ms step_avg:578.84ms +grad accum step:11642/14336 +step:46568/57344 train_time:26956209ms step_avg:578.86ms +step:46569/57344 train_time:26956226ms step_avg:578.84ms +step:46570/57344 train_time:26956486ms step_avg:578.84ms +step:46571/57344 train_time:26957074ms step_avg:578.84ms +grad accum step:11643/14336 +step:46572/57344 train_time:26958427ms step_avg:578.85ms +step:46573/57344 train_time:26958440ms step_avg:578.84ms +step:46574/57344 train_time:26958677ms step_avg:578.84ms +step:46575/57344 train_time:26959223ms step_avg:578.83ms +grad accum step:11644/14336 +step:46576/57344 train_time:26960581ms step_avg:578.85ms +step:46577/57344 train_time:26960596ms step_avg:578.84ms +step:46578/57344 train_time:26960843ms step_avg:578.83ms +step:46579/57344 train_time:26961410ms step_avg:578.83ms +grad accum step:11645/14336 +step:46580/57344 train_time:26962793ms step_avg:578.85ms +step:46581/57344 train_time:26962809ms step_avg:578.84ms +step:46582/57344 train_time:26963056ms step_avg:578.83ms +step:46583/57344 train_time:26963595ms step_avg:578.83ms +grad accum step:11646/14336 +step:46584/57344 train_time:26964891ms step_avg:578.84ms +step:46585/57344 train_time:26964906ms step_avg:578.83ms +step:46586/57344 train_time:26965153ms step_avg:578.83ms +step:46587/57344 train_time:26965696ms step_avg:578.82ms +grad accum step:11647/14336 +step:46588/57344 train_time:26966988ms step_avg:578.84ms +step:46589/57344 train_time:26967005ms step_avg:578.83ms +step:46590/57344 train_time:26967252ms step_avg:578.82ms +step:46591/57344 train_time:26967805ms step_avg:578.82ms +grad accum step:11648/14336 +step:46592/57344 train_time:26969183ms step_avg:578.84ms +step:46592/57344 val_loss:5.820201 train_time:26969200ms step_avg:578.84ms +step:46593/57344 train_time:26969211ms step_avg:578.83ms +step:46594/57344 train_time:26969443ms step_avg:578.82ms +step:46595/57344 train_time:26970013ms step_avg:578.82ms +grad accum step:11649/14336 +step:46596/57344 train_time:26971374ms step_avg:578.83ms +step:46597/57344 train_time:26971394ms step_avg:578.82ms +step:46598/57344 train_time:26971638ms step_avg:578.82ms +step:46599/57344 train_time:26972204ms step_avg:578.82ms +grad accum step:11650/14336 +step:46600/57344 train_time:26973566ms step_avg:578.83ms +step:46601/57344 train_time:26973596ms step_avg:578.82ms +step:46602/57344 train_time:26973830ms step_avg:578.81ms +step:46603/57344 train_time:26974427ms step_avg:578.81ms +grad accum step:11651/14336 +step:46604/57344 train_time:26975814ms step_avg:578.83ms +step:46605/57344 train_time:26975831ms step_avg:578.82ms +step:46606/57344 train_time:26976075ms step_avg:578.81ms +step:46607/57344 train_time:26976617ms step_avg:578.81ms +grad accum step:11652/14336 +step:46608/57344 train_time:26977957ms step_avg:578.83ms +step:46609/57344 train_time:26977980ms step_avg:578.81ms +step:46610/57344 train_time:26978217ms step_avg:578.81ms +step:46611/57344 train_time:26978791ms step_avg:578.81ms +grad accum step:11653/14336 +step:46612/57344 train_time:26980088ms step_avg:578.82ms +step:46613/57344 train_time:26980101ms step_avg:578.81ms +step:46614/57344 train_time:26980358ms step_avg:578.80ms +step:46615/57344 train_time:26980934ms step_avg:578.80ms +grad accum step:11654/14336 +step:46616/57344 train_time:26982256ms step_avg:578.82ms +step:46617/57344 train_time:26982272ms step_avg:578.81ms +step:46618/57344 train_time:26982522ms step_avg:578.80ms +step:46619/57344 train_time:26983075ms step_avg:578.80ms +grad accum step:11655/14336 +step:46620/57344 train_time:26984411ms step_avg:578.82ms +step:46621/57344 train_time:26984432ms step_avg:578.80ms +step:46622/57344 train_time:26984670ms step_avg:578.80ms +step:46623/57344 train_time:26985213ms step_avg:578.80ms +grad accum step:11656/14336 +step:46624/57344 train_time:26986526ms step_avg:578.81ms +step:46625/57344 train_time:26986542ms step_avg:578.80ms +step:46626/57344 train_time:26986787ms step_avg:578.79ms +step:46627/57344 train_time:26987337ms step_avg:578.79ms +grad accum step:11657/14336 +step:46628/57344 train_time:26988671ms step_avg:578.81ms +step:46629/57344 train_time:26988692ms step_avg:578.80ms +step:46630/57344 train_time:26988934ms step_avg:578.79ms +step:46631/57344 train_time:26989510ms step_avg:578.79ms +grad accum step:11658/14336 +step:46632/57344 train_time:26990872ms step_avg:578.81ms +step:46633/57344 train_time:26990892ms step_avg:578.79ms +step:46634/57344 train_time:26991120ms step_avg:578.79ms +step:46635/57344 train_time:26991677ms step_avg:578.79ms +grad accum step:11659/14336 +step:46636/57344 train_time:26992994ms step_avg:578.80ms +step:46637/57344 train_time:26993022ms step_avg:578.79ms +step:46638/57344 train_time:26993250ms step_avg:578.78ms +step:46639/57344 train_time:26993802ms step_avg:578.78ms +grad accum step:11660/14336 +step:46640/57344 train_time:26995113ms step_avg:578.80ms +step:46641/57344 train_time:26995133ms step_avg:578.79ms +step:46642/57344 train_time:26995371ms step_avg:578.78ms +step:46643/57344 train_time:26995937ms step_avg:578.78ms +grad accum step:11661/14336 +step:46644/57344 train_time:27029475ms step_avg:579.48ms +step:46645/57344 train_time:27029499ms step_avg:579.47ms +step:46646/57344 train_time:27029799ms step_avg:579.47ms +step:46647/57344 train_time:27030355ms step_avg:579.47ms +grad accum step:11662/14336 +step:46648/57344 train_time:27031721ms step_avg:579.48ms +step:46649/57344 train_time:27031733ms step_avg:579.47ms +step:46650/57344 train_time:27031978ms step_avg:579.46ms +step:46651/57344 train_time:27032536ms step_avg:579.46ms +grad accum step:11663/14336 +step:46652/57344 train_time:27033970ms step_avg:579.48ms +step:46653/57344 train_time:27034014ms step_avg:579.47ms +step:46654/57344 train_time:27034232ms step_avg:579.46ms +step:46655/57344 train_time:27034775ms step_avg:579.46ms +grad accum step:11664/14336 +step:46656/57344 train_time:27036107ms step_avg:579.48ms +step:46656/57344 val_loss:5.835791 train_time:27036107ms step_avg:579.48ms +step:46657/57344 train_time:27036119ms step_avg:579.47ms +step:46658/57344 train_time:27036387ms step_avg:579.46ms +step:46659/57344 train_time:27036938ms step_avg:579.46ms +grad accum step:11665/14336 +step:46660/57344 train_time:27038349ms step_avg:579.48ms +step:46661/57344 train_time:27038366ms step_avg:579.46ms +step:46662/57344 train_time:27038610ms step_avg:579.46ms +step:46663/57344 train_time:27039168ms step_avg:579.46ms +grad accum step:11666/14336 +step:46664/57344 train_time:27040552ms step_avg:579.47ms +step:46665/57344 train_time:27040571ms step_avg:579.46ms +step:46666/57344 train_time:27040803ms step_avg:579.45ms +step:46667/57344 train_time:27041366ms step_avg:579.45ms +grad accum step:11667/14336 +step:46668/57344 train_time:27042665ms step_avg:579.47ms +step:46669/57344 train_time:27042682ms step_avg:579.46ms +step:46670/57344 train_time:27042923ms step_avg:579.45ms +step:46671/57344 train_time:27043470ms step_avg:579.45ms +grad accum step:11668/14336 +step:46672/57344 train_time:27044773ms step_avg:579.46ms +step:46673/57344 train_time:27044784ms step_avg:579.45ms +step:46674/57344 train_time:27045020ms step_avg:579.45ms +step:46675/57344 train_time:27045569ms step_avg:579.44ms +grad accum step:11669/14336 +step:46676/57344 train_time:27046862ms step_avg:579.46ms +step:46677/57344 train_time:27046879ms step_avg:579.45ms +step:46678/57344 train_time:27047119ms step_avg:579.44ms +step:46679/57344 train_time:27047651ms step_avg:579.44ms +grad accum step:11670/14336 +step:46680/57344 train_time:27048982ms step_avg:579.46ms +step:46681/57344 train_time:27048993ms step_avg:579.44ms +step:46682/57344 train_time:27049234ms step_avg:579.44ms +step:46683/57344 train_time:27049781ms step_avg:579.44ms +grad accum step:11671/14336 +step:46684/57344 train_time:27051120ms step_avg:579.45ms +step:46685/57344 train_time:27051158ms step_avg:579.44ms +step:46686/57344 train_time:27051383ms step_avg:579.43ms +step:46687/57344 train_time:27051942ms step_avg:579.43ms +grad accum step:11672/14336 +step:46688/57344 train_time:27053234ms step_avg:579.45ms +step:46689/57344 train_time:27053263ms step_avg:579.44ms +step:46690/57344 train_time:27053479ms step_avg:579.43ms +step:46691/57344 train_time:27054025ms step_avg:579.43ms +grad accum step:11673/14336 +step:46692/57344 train_time:27055364ms step_avg:579.44ms +step:46693/57344 train_time:27055375ms step_avg:579.43ms +step:46694/57344 train_time:27055603ms step_avg:579.42ms +step:46695/57344 train_time:27056147ms step_avg:579.42ms +grad accum step:11674/14336 +step:46696/57344 train_time:27057454ms step_avg:579.44ms +step:46697/57344 train_time:27057470ms step_avg:579.43ms +step:46698/57344 train_time:27057716ms step_avg:579.42ms +step:46699/57344 train_time:27058265ms step_avg:579.42ms +grad accum step:11675/14336 +step:46700/57344 train_time:27059607ms step_avg:579.43ms +step:46701/57344 train_time:27059624ms step_avg:579.42ms +step:46702/57344 train_time:27059872ms step_avg:579.42ms +step:46703/57344 train_time:27060425ms step_avg:579.42ms +grad accum step:11676/14336 +step:46704/57344 train_time:27061739ms step_avg:579.43ms +step:46705/57344 train_time:27061756ms step_avg:579.42ms +step:46706/57344 train_time:27062010ms step_avg:579.41ms +step:46707/57344 train_time:27062571ms step_avg:579.41ms +grad accum step:11677/14336 +step:46708/57344 train_time:27063875ms step_avg:579.43ms +step:46709/57344 train_time:27063892ms step_avg:579.41ms +step:46710/57344 train_time:27064139ms step_avg:579.41ms +step:46711/57344 train_time:27064697ms step_avg:579.41ms +grad accum step:11678/14336 +step:46712/57344 train_time:27065993ms step_avg:579.42ms +step:46713/57344 train_time:27066010ms step_avg:579.41ms +step:46714/57344 train_time:27066254ms step_avg:579.40ms +step:46715/57344 train_time:27066810ms step_avg:579.40ms +grad accum step:11679/14336 +step:46716/57344 train_time:27068139ms step_avg:579.42ms +step:46717/57344 train_time:27068155ms step_avg:579.41ms +step:46718/57344 train_time:27068400ms step_avg:579.40ms +step:46719/57344 train_time:27085286ms step_avg:579.75ms +grad accum step:11680/14336 +step:46720/57344 train_time:27086392ms step_avg:579.76ms +step:46720/57344 val_loss:5.844876 train_time:27086393ms step_avg:579.76ms +step:46721/57344 train_time:27086404ms step_avg:579.75ms +step:46722/57344 train_time:27086692ms step_avg:579.74ms +step:46723/57344 train_time:27087238ms step_avg:579.74ms +grad accum step:11681/14336 +step:46724/57344 train_time:27088593ms step_avg:579.76ms +step:46725/57344 train_time:27088607ms step_avg:579.75ms +step:46726/57344 train_time:27088858ms step_avg:579.74ms +step:46727/57344 train_time:27089434ms step_avg:579.74ms +grad accum step:11682/14336 +step:46728/57344 train_time:27090797ms step_avg:579.76ms +step:46729/57344 train_time:27090812ms step_avg:579.74ms +step:46730/57344 train_time:27091056ms step_avg:579.74ms +step:46731/57344 train_time:27091599ms step_avg:579.74ms +grad accum step:11683/14336 +step:46732/57344 train_time:27092911ms step_avg:579.75ms +step:46733/57344 train_time:27092924ms step_avg:579.74ms +step:46734/57344 train_time:27093169ms step_avg:579.73ms +step:46735/57344 train_time:27093719ms step_avg:579.73ms +grad accum step:11684/14336 +step:46736/57344 train_time:27095035ms step_avg:579.75ms +step:46737/57344 train_time:27095049ms step_avg:579.73ms +step:46738/57344 train_time:27095296ms step_avg:579.73ms +step:46739/57344 train_time:27095846ms step_avg:579.73ms +grad accum step:11685/14336 +step:46740/57344 train_time:27097139ms step_avg:579.74ms +step:46741/57344 train_time:27097156ms step_avg:579.73ms +step:46742/57344 train_time:27097400ms step_avg:579.72ms +step:46743/57344 train_time:27097937ms step_avg:579.72ms +grad accum step:11686/14336 +step:46744/57344 train_time:27099244ms step_avg:579.74ms +step:46745/57344 train_time:27099256ms step_avg:579.73ms +step:46746/57344 train_time:27099494ms step_avg:579.72ms +step:46747/57344 train_time:27100034ms step_avg:579.72ms +grad accum step:11687/14336 +step:46748/57344 train_time:27101348ms step_avg:579.73ms +step:46749/57344 train_time:27101363ms step_avg:579.72ms +step:46750/57344 train_time:27101607ms step_avg:579.71ms +step:46751/57344 train_time:27102162ms step_avg:579.71ms +grad accum step:11688/14336 +step:46752/57344 train_time:27103477ms step_avg:579.73ms +step:46753/57344 train_time:27103491ms step_avg:579.72ms +step:46754/57344 train_time:27103734ms step_avg:579.71ms +step:46755/57344 train_time:27104289ms step_avg:579.71ms +grad accum step:11689/14336 +step:46756/57344 train_time:27105589ms step_avg:579.72ms +step:46757/57344 train_time:27105607ms step_avg:579.71ms +step:46758/57344 train_time:27105855ms step_avg:579.71ms +step:46759/57344 train_time:27106413ms step_avg:579.70ms +grad accum step:11690/14336 +step:46760/57344 train_time:27107717ms step_avg:579.72ms +step:46761/57344 train_time:27107733ms step_avg:579.71ms +step:46762/57344 train_time:27107988ms step_avg:579.70ms +step:46763/57344 train_time:27108554ms step_avg:579.70ms +grad accum step:11691/14336 +step:46764/57344 train_time:27109868ms step_avg:579.72ms +step:46765/57344 train_time:27109884ms step_avg:579.70ms +step:46766/57344 train_time:27110132ms step_avg:579.70ms +step:46767/57344 train_time:27110683ms step_avg:579.70ms +grad accum step:11692/14336 +step:46768/57344 train_time:27112027ms step_avg:579.71ms +step:46769/57344 train_time:27112049ms step_avg:579.70ms +step:46770/57344 train_time:27112287ms step_avg:579.69ms +step:46771/57344 train_time:27112849ms step_avg:579.69ms +grad accum step:11693/14336 +step:46772/57344 train_time:27114154ms step_avg:579.71ms +step:46773/57344 train_time:27114176ms step_avg:579.70ms +step:46774/57344 train_time:27114413ms step_avg:579.69ms +step:46775/57344 train_time:27114968ms step_avg:579.69ms +grad accum step:11694/14336 +step:46776/57344 train_time:27116321ms step_avg:579.71ms +step:46777/57344 train_time:27116335ms step_avg:579.69ms +step:46778/57344 train_time:27116574ms step_avg:579.69ms +step:46779/57344 train_time:27117139ms step_avg:579.69ms +grad accum step:11695/14336 +step:46780/57344 train_time:27118491ms step_avg:579.70ms +step:46781/57344 train_time:27118506ms step_avg:579.69ms +step:46782/57344 train_time:27118747ms step_avg:579.68ms +step:46783/57344 train_time:27119292ms step_avg:579.68ms +grad accum step:11696/14336 +step:46784/57344 train_time:27120646ms step_avg:579.70ms +step:46784/57344 val_loss:5.853851 train_time:27120656ms step_avg:579.70ms +step:46785/57344 train_time:27120668ms step_avg:579.69ms +step:46786/57344 train_time:27120884ms step_avg:579.68ms +step:46787/57344 train_time:27121421ms step_avg:579.68ms +grad accum step:11697/14336 +step:46788/57344 train_time:27122760ms step_avg:579.69ms +step:46789/57344 train_time:27122775ms step_avg:579.68ms +step:46790/57344 train_time:27123030ms step_avg:579.68ms +step:46791/57344 train_time:27123596ms step_avg:579.68ms +grad accum step:11698/14336 +step:46792/57344 train_time:27124948ms step_avg:579.69ms +step:46793/57344 train_time:27124967ms step_avg:579.68ms +step:46794/57344 train_time:27125203ms step_avg:579.67ms +step:46795/57344 train_time:27125744ms step_avg:579.67ms +grad accum step:11699/14336 +step:46796/57344 train_time:27127081ms step_avg:579.69ms +step:46797/57344 train_time:27127096ms step_avg:579.68ms +step:46798/57344 train_time:27127345ms step_avg:579.67ms +step:46799/57344 train_time:27127910ms step_avg:579.67ms +grad accum step:11700/14336 +step:46800/57344 train_time:27129273ms step_avg:579.69ms +step:46801/57344 train_time:27129287ms step_avg:579.67ms +step:46802/57344 train_time:27129533ms step_avg:579.67ms +step:46803/57344 train_time:27130080ms step_avg:579.67ms +grad accum step:11701/14336 +step:46804/57344 train_time:27131396ms step_avg:579.68ms +step:46805/57344 train_time:27131416ms step_avg:579.67ms +step:46806/57344 train_time:27131662ms step_avg:579.66ms +step:46807/57344 train_time:27132236ms step_avg:579.66ms +grad accum step:11702/14336 +step:46808/57344 train_time:27133568ms step_avg:579.68ms +step:46809/57344 train_time:27133583ms step_avg:579.67ms +step:46810/57344 train_time:27133835ms step_avg:579.66ms +step:46811/57344 train_time:27134401ms step_avg:579.66ms +grad accum step:11703/14336 +step:46812/57344 train_time:27135933ms step_avg:579.68ms +step:46813/57344 train_time:27136058ms step_avg:579.67ms +step:46814/57344 train_time:27136273ms step_avg:579.66ms +step:46815/57344 train_time:27136834ms step_avg:579.66ms +grad accum step:11704/14336 +step:46816/57344 train_time:27138192ms step_avg:579.68ms +step:46817/57344 train_time:27138207ms step_avg:579.67ms +step:46818/57344 train_time:27138456ms step_avg:579.66ms +step:46819/57344 train_time:27139025ms step_avg:579.66ms +grad accum step:11705/14336 +step:46820/57344 train_time:27140372ms step_avg:579.67ms +step:46821/57344 train_time:27140388ms step_avg:579.66ms +step:46822/57344 train_time:27140637ms step_avg:579.66ms +step:46823/57344 train_time:27141191ms step_avg:579.66ms +grad accum step:11706/14336 +step:46824/57344 train_time:27142527ms step_avg:579.67ms +step:46825/57344 train_time:27142542ms step_avg:579.66ms +step:46826/57344 train_time:27142790ms step_avg:579.65ms +step:46827/57344 train_time:27143347ms step_avg:579.65ms +grad accum step:11707/14336 +step:46828/57344 train_time:27144654ms step_avg:579.67ms +step:46829/57344 train_time:27144681ms step_avg:579.66ms +step:46830/57344 train_time:27144912ms step_avg:579.65ms +step:46831/57344 train_time:27145458ms step_avg:579.65ms +grad accum step:11708/14336 +step:46832/57344 train_time:27146790ms step_avg:579.66ms +step:46833/57344 train_time:27146806ms step_avg:579.65ms +step:46834/57344 train_time:27147065ms step_avg:579.64ms +step:46835/57344 train_time:27147683ms step_avg:579.65ms +grad accum step:11709/14336 +step:46836/57344 train_time:27149075ms step_avg:579.66ms +step:46837/57344 train_time:27149090ms step_avg:579.65ms +step:46838/57344 train_time:27149346ms step_avg:579.64ms +step:46839/57344 train_time:27149925ms step_avg:579.64ms +grad accum step:11710/14336 +step:46840/57344 train_time:27151437ms step_avg:579.66ms +step:46841/57344 train_time:27151729ms step_avg:579.66ms +step:46842/57344 train_time:27151966ms step_avg:579.65ms +step:46843/57344 train_time:27152565ms step_avg:579.65ms +grad accum step:11711/14336 +step:46844/57344 train_time:27153963ms step_avg:579.67ms +step:46845/57344 train_time:27153985ms step_avg:579.66ms +step:46846/57344 train_time:27154204ms step_avg:579.65ms +step:46847/57344 train_time:27154752ms step_avg:579.65ms +grad accum step:11712/14336 +step:46848/57344 train_time:27157446ms step_avg:579.69ms +step:46848/57344 val_loss:5.871704 train_time:27157591ms step_avg:579.70ms +step:46849/57344 train_time:27157678ms step_avg:579.69ms +step:46850/57344 train_time:27157840ms step_avg:579.68ms +step:46851/57344 train_time:27158385ms step_avg:579.68ms +grad accum step:11713/14336 +step:46852/57344 train_time:27159706ms step_avg:579.69ms +step:46853/57344 train_time:27159724ms step_avg:579.68ms +step:46854/57344 train_time:27159966ms step_avg:579.67ms +step:46855/57344 train_time:27160524ms step_avg:579.67ms +grad accum step:11714/14336 +step:46856/57344 train_time:27161889ms step_avg:579.69ms +step:46857/57344 train_time:27161908ms step_avg:579.68ms +step:46858/57344 train_time:27162151ms step_avg:579.67ms +step:46859/57344 train_time:27162710ms step_avg:579.67ms +grad accum step:11715/14336 +step:46860/57344 train_time:27164027ms step_avg:579.68ms +step:46861/57344 train_time:27164040ms step_avg:579.67ms +step:46862/57344 train_time:27164289ms step_avg:579.67ms +step:46863/57344 train_time:27164845ms step_avg:579.67ms +grad accum step:11716/14336 +step:46864/57344 train_time:27166206ms step_avg:579.68ms +step:46865/57344 train_time:27166222ms step_avg:579.67ms +step:46866/57344 train_time:27166444ms step_avg:579.66ms +step:46867/57344 train_time:27166993ms step_avg:579.66ms +grad accum step:11717/14336 +step:46868/57344 train_time:27168297ms step_avg:579.68ms +step:46869/57344 train_time:27168318ms step_avg:579.66ms +step:46870/57344 train_time:27168557ms step_avg:579.66ms +step:46871/57344 train_time:27169123ms step_avg:579.66ms +grad accum step:11718/14336 +step:46872/57344 train_time:27170489ms step_avg:579.67ms +step:46873/57344 train_time:27170506ms step_avg:579.66ms +step:46874/57344 train_time:27170727ms step_avg:579.65ms +step:46875/57344 train_time:27171272ms step_avg:579.65ms +grad accum step:11719/14336 +step:46876/57344 train_time:27172888ms step_avg:579.68ms +step:46877/57344 train_time:27172899ms step_avg:579.66ms +step:46878/57344 train_time:27173109ms step_avg:579.66ms +step:46879/57344 train_time:27173660ms step_avg:579.66ms +grad accum step:11720/14336 +step:46880/57344 train_time:27175046ms step_avg:579.67ms +step:46881/57344 train_time:27175064ms step_avg:579.66ms +step:46882/57344 train_time:27175303ms step_avg:579.65ms +step:46883/57344 train_time:27175872ms step_avg:579.65ms +grad accum step:11721/14336 +step:46884/57344 train_time:27177215ms step_avg:579.67ms +step:46885/57344 train_time:27177242ms step_avg:579.66ms +step:46886/57344 train_time:27177478ms step_avg:579.65ms +step:46887/57344 train_time:27178073ms step_avg:579.65ms +grad accum step:11722/14336 +step:46888/57344 train_time:27179548ms step_avg:579.67ms +step:46889/57344 train_time:27179563ms step_avg:579.66ms +step:46890/57344 train_time:27179785ms step_avg:579.65ms +step:46891/57344 train_time:27180345ms step_avg:579.65ms +grad accum step:11723/14336 +step:46892/57344 train_time:27181745ms step_avg:579.67ms +step:46893/57344 train_time:27181759ms step_avg:579.65ms +step:46894/57344 train_time:27182001ms step_avg:579.65ms +step:46895/57344 train_time:27182546ms step_avg:579.65ms +grad accum step:11724/14336 +step:46896/57344 train_time:27183892ms step_avg:579.66ms +step:46897/57344 train_time:27183909ms step_avg:579.65ms +step:46898/57344 train_time:27184142ms step_avg:579.64ms +step:46899/57344 train_time:27184713ms step_avg:579.64ms +grad accum step:11725/14336 +step:46900/57344 train_time:27186075ms step_avg:579.66ms +step:46901/57344 train_time:27186097ms step_avg:579.65ms +step:46902/57344 train_time:27186345ms step_avg:579.64ms +step:46903/57344 train_time:27186914ms step_avg:579.64ms +grad accum step:11726/14336 +step:46904/57344 train_time:27188258ms step_avg:579.66ms +step:46905/57344 train_time:27188272ms step_avg:579.65ms +step:46906/57344 train_time:27188508ms step_avg:579.64ms +step:46907/57344 train_time:27189057ms step_avg:579.64ms +grad accum step:11727/14336 +step:46908/57344 train_time:27190383ms step_avg:579.65ms +step:46909/57344 train_time:27190398ms step_avg:579.64ms +step:46910/57344 train_time:27190642ms step_avg:579.63ms +step:46911/57344 train_time:27191200ms step_avg:579.63ms +grad accum step:11728/14336 +step:46912/57344 train_time:27192557ms step_avg:579.65ms +step:46912/57344 val_loss:5.877258 train_time:27192560ms step_avg:579.65ms +step:46913/57344 train_time:27192572ms step_avg:579.64ms +step:46914/57344 train_time:27192796ms step_avg:579.63ms +step:46915/57344 train_time:27193348ms step_avg:579.63ms +grad accum step:11729/14336 +step:46916/57344 train_time:27194771ms step_avg:579.65ms +step:46917/57344 train_time:27194824ms step_avg:579.64ms +step:46918/57344 train_time:27195045ms step_avg:579.63ms +step:46919/57344 train_time:27195595ms step_avg:579.63ms +grad accum step:11730/14336 +step:46920/57344 train_time:27197156ms step_avg:579.65ms +step:46921/57344 train_time:27197173ms step_avg:579.64ms +step:46922/57344 train_time:27197403ms step_avg:579.63ms +step:46923/57344 train_time:27197978ms step_avg:579.63ms +grad accum step:11731/14336 +step:46924/57344 train_time:27199340ms step_avg:579.65ms +step:46925/57344 train_time:27199357ms step_avg:579.63ms +step:46926/57344 train_time:27199598ms step_avg:579.63ms +step:46927/57344 train_time:27200147ms step_avg:579.63ms +grad accum step:11732/14336 +step:46928/57344 train_time:27201596ms step_avg:579.65ms +step:46929/57344 train_time:27201616ms step_avg:579.63ms +step:46930/57344 train_time:27201837ms step_avg:579.63ms +step:46931/57344 train_time:27202394ms step_avg:579.63ms +grad accum step:11733/14336 +step:46932/57344 train_time:27203730ms step_avg:579.64ms +step:46933/57344 train_time:27203746ms step_avg:579.63ms +step:46934/57344 train_time:27203994ms step_avg:579.62ms +step:46935/57344 train_time:27204545ms step_avg:579.62ms +grad accum step:11734/14336 +step:46936/57344 train_time:27205967ms step_avg:579.64ms +step:46937/57344 train_time:27205982ms step_avg:579.63ms +step:46938/57344 train_time:27206247ms step_avg:579.62ms +step:46939/57344 train_time:27206867ms step_avg:579.62ms +grad accum step:11735/14336 +step:46940/57344 train_time:27208277ms step_avg:579.64ms +step:46941/57344 train_time:27208297ms step_avg:579.63ms +step:46942/57344 train_time:27208531ms step_avg:579.62ms +step:46943/57344 train_time:27209106ms step_avg:579.62ms +grad accum step:11736/14336 +step:46944/57344 train_time:27210449ms step_avg:579.64ms +step:46945/57344 train_time:27210467ms step_avg:579.62ms +step:46946/57344 train_time:27210711ms step_avg:579.62ms +step:46947/57344 train_time:27211256ms step_avg:579.62ms +grad accum step:11737/14336 +step:46948/57344 train_time:27212565ms step_avg:579.63ms +step:46949/57344 train_time:27212585ms step_avg:579.62ms +step:46950/57344 train_time:27212822ms step_avg:579.61ms +step:46951/57344 train_time:27213374ms step_avg:579.61ms +grad accum step:11738/14336 +step:46952/57344 train_time:27214712ms step_avg:579.63ms +step:46953/57344 train_time:27214727ms step_avg:579.62ms +step:46954/57344 train_time:27214979ms step_avg:579.61ms +step:46955/57344 train_time:27215540ms step_avg:579.61ms +grad accum step:11739/14336 +step:46956/57344 train_time:27216969ms step_avg:579.63ms +step:46957/57344 train_time:27217112ms step_avg:579.62ms +step:46958/57344 train_time:27217337ms step_avg:579.61ms +step:46959/57344 train_time:27217906ms step_avg:579.61ms +grad accum step:11740/14336 +step:46960/57344 train_time:27219203ms step_avg:579.63ms +step:46961/57344 train_time:27219218ms step_avg:579.61ms +step:46962/57344 train_time:27219468ms step_avg:579.61ms +step:46963/57344 train_time:27220025ms step_avg:579.61ms +grad accum step:11741/14336 +step:46964/57344 train_time:27221536ms step_avg:579.63ms +step:46965/57344 train_time:27221732ms step_avg:579.62ms +step:46966/57344 train_time:27221955ms step_avg:579.61ms +step:46967/57344 train_time:27222563ms step_avg:579.61ms +grad accum step:11742/14336 +step:46968/57344 train_time:27224187ms step_avg:579.63ms +step:46969/57344 train_time:27224207ms step_avg:579.62ms +step:46970/57344 train_time:27224436ms step_avg:579.61ms +step:46971/57344 train_time:27225007ms step_avg:579.61ms +grad accum step:11743/14336 +step:46972/57344 train_time:27226466ms step_avg:579.63ms +step:46973/57344 train_time:27226494ms step_avg:579.62ms +step:46974/57344 train_time:27226715ms step_avg:579.61ms +step:46975/57344 train_time:27227275ms step_avg:579.61ms +grad accum step:11744/14336 +step:46976/57344 train_time:27228638ms step_avg:579.63ms +step:46976/57344 val_loss:5.897797 train_time:27228642ms step_avg:579.63ms +step:46977/57344 train_time:27228654ms step_avg:579.62ms +step:46978/57344 train_time:27228880ms step_avg:579.61ms +step:46979/57344 train_time:27229450ms step_avg:579.61ms +grad accum step:11745/14336 +step:46980/57344 train_time:27230912ms step_avg:579.63ms +step:46981/57344 train_time:27230931ms step_avg:579.62ms +step:46982/57344 train_time:27231169ms step_avg:579.61ms +step:46983/57344 train_time:27231723ms step_avg:579.61ms +grad accum step:11746/14336 +step:46984/57344 train_time:27233071ms step_avg:579.62ms +step:46985/57344 train_time:27233092ms step_avg:579.61ms +step:46986/57344 train_time:27233328ms step_avg:579.61ms +step:46987/57344 train_time:27233897ms step_avg:579.60ms +grad accum step:11747/14336 +step:46988/57344 train_time:27235279ms step_avg:579.62ms +step:46989/57344 train_time:27235300ms step_avg:579.61ms +step:46990/57344 train_time:27235533ms step_avg:579.60ms +step:46991/57344 train_time:27236093ms step_avg:579.60ms +grad accum step:11748/14336 +step:46992/57344 train_time:27237406ms step_avg:579.62ms +step:46993/57344 train_time:27237422ms step_avg:579.61ms +step:46994/57344 train_time:27237669ms step_avg:579.60ms +step:46995/57344 train_time:27238235ms step_avg:579.60ms +grad accum step:11749/14336 +step:46996/57344 train_time:27239571ms step_avg:579.61ms +step:46997/57344 train_time:27239588ms step_avg:579.60ms +step:46998/57344 train_time:27239835ms step_avg:579.60ms +step:46999/57344 train_time:27240398ms step_avg:579.60ms +grad accum step:11750/14336 +step:47000/57344 train_time:27241746ms step_avg:579.61ms +step:47001/57344 train_time:27241762ms step_avg:579.60ms +step:47002/57344 train_time:27242013ms step_avg:579.59ms +step:47003/57344 train_time:27242587ms step_avg:579.59ms +grad accum step:11751/14336 +step:47004/57344 train_time:27244027ms step_avg:579.61ms +step:47005/57344 train_time:27244041ms step_avg:579.60ms +step:47006/57344 train_time:27244296ms step_avg:579.59ms +step:47007/57344 train_time:27244901ms step_avg:579.59ms +grad accum step:11752/14336 +step:47008/57344 train_time:27246254ms step_avg:579.61ms +step:47009/57344 train_time:27246271ms step_avg:579.60ms +step:47010/57344 train_time:27246511ms step_avg:579.59ms +step:47011/57344 train_time:27247050ms step_avg:579.59ms +grad accum step:11753/14336 +step:47012/57344 train_time:27248403ms step_avg:579.61ms +step:47013/57344 train_time:27248417ms step_avg:579.59ms +step:47014/57344 train_time:27248656ms step_avg:579.59ms +step:47015/57344 train_time:27249202ms step_avg:579.59ms +grad accum step:11754/14336 +step:47016/57344 train_time:27250541ms step_avg:579.60ms +step:47017/57344 train_time:27250559ms step_avg:579.59ms +step:47018/57344 train_time:27250791ms step_avg:579.58ms +step:47019/57344 train_time:27251366ms step_avg:579.58ms +grad accum step:11755/14336 +step:47020/57344 train_time:27252723ms step_avg:579.60ms +step:47021/57344 train_time:27252742ms step_avg:579.59ms +step:47022/57344 train_time:27252978ms step_avg:579.58ms +step:47023/57344 train_time:27253539ms step_avg:579.58ms +grad accum step:11756/14336 +step:47024/57344 train_time:27254888ms step_avg:579.60ms +step:47025/57344 train_time:27254907ms step_avg:579.58ms +step:47026/57344 train_time:27255143ms step_avg:579.58ms +step:47027/57344 train_time:27255699ms step_avg:579.58ms +grad accum step:11757/14336 +step:47028/57344 train_time:27257084ms step_avg:579.59ms +step:47029/57344 train_time:27257098ms step_avg:579.58ms +step:47030/57344 train_time:27257340ms step_avg:579.57ms +step:47031/57344 train_time:27257894ms step_avg:579.57ms +grad accum step:11758/14336 +step:47032/57344 train_time:27259223ms step_avg:579.59ms +step:47033/57344 train_time:27259235ms step_avg:579.58ms +step:47034/57344 train_time:27259476ms step_avg:579.57ms +step:47035/57344 train_time:27260032ms step_avg:579.57ms +grad accum step:11759/14336 +step:47036/57344 train_time:27261375ms step_avg:579.59ms +step:47037/57344 train_time:27261391ms step_avg:579.57ms +step:47038/57344 train_time:27261649ms step_avg:579.57ms +step:47039/57344 train_time:27262246ms step_avg:579.57ms +grad accum step:11760/14336 +step:47040/57344 train_time:27263638ms step_avg:579.58ms +step:47040/57344 val_loss:5.894917 train_time:27263643ms step_avg:579.58ms +step:47041/57344 train_time:27263655ms step_avg:579.57ms +step:47042/57344 train_time:27263886ms step_avg:579.56ms +step:47043/57344 train_time:27264456ms step_avg:579.56ms +grad accum step:11761/14336 +step:47044/57344 train_time:27265801ms step_avg:579.58ms +step:47045/57344 train_time:27265822ms step_avg:579.57ms +step:47046/57344 train_time:27266058ms step_avg:579.56ms +step:47047/57344 train_time:27266642ms step_avg:579.56ms +grad accum step:11762/14336 +step:47048/57344 train_time:27268199ms step_avg:579.58ms +step:47049/57344 train_time:27268337ms step_avg:579.57ms +step:47050/57344 train_time:27268552ms step_avg:579.57ms +step:47051/57344 train_time:27269118ms step_avg:579.57ms +grad accum step:11763/14336 +step:47052/57344 train_time:27270467ms step_avg:579.58ms +step:47053/57344 train_time:27270485ms step_avg:579.57ms +step:47054/57344 train_time:27270722ms step_avg:579.56ms +step:47055/57344 train_time:27271281ms step_avg:579.56ms +grad accum step:11764/14336 +step:47056/57344 train_time:27272669ms step_avg:579.58ms +step:47057/57344 train_time:27272683ms step_avg:579.57ms +step:47058/57344 train_time:27272931ms step_avg:579.56ms +step:47059/57344 train_time:27273495ms step_avg:579.56ms +grad accum step:11765/14336 +step:47060/57344 train_time:27274838ms step_avg:579.58ms +step:47061/57344 train_time:27274856ms step_avg:579.56ms +step:47062/57344 train_time:27275109ms step_avg:579.56ms +step:47063/57344 train_time:27275689ms step_avg:579.56ms +grad accum step:11766/14336 +step:47064/57344 train_time:27277026ms step_avg:579.57ms +step:47065/57344 train_time:27277042ms step_avg:579.56ms +step:47066/57344 train_time:27277292ms step_avg:579.55ms +step:47067/57344 train_time:27277887ms step_avg:579.55ms +grad accum step:11767/14336 +step:47068/57344 train_time:27279360ms step_avg:579.57ms +step:47069/57344 train_time:27279376ms step_avg:579.56ms +step:47070/57344 train_time:27279620ms step_avg:579.55ms +step:47071/57344 train_time:27280184ms step_avg:579.55ms +grad accum step:11768/14336 +step:47072/57344 train_time:27281538ms step_avg:579.57ms +step:47073/57344 train_time:27281553ms step_avg:579.56ms +step:47074/57344 train_time:27281796ms step_avg:579.55ms +step:47075/57344 train_time:27282362ms step_avg:579.55ms +grad accum step:11769/14336 +step:47076/57344 train_time:27283767ms step_avg:579.57ms +step:47077/57344 train_time:27283782ms step_avg:579.56ms +step:47078/57344 train_time:27284019ms step_avg:579.55ms +step:47079/57344 train_time:27284567ms step_avg:579.55ms +grad accum step:11770/14336 +step:47080/57344 train_time:27285897ms step_avg:579.56ms +step:47081/57344 train_time:27285913ms step_avg:579.55ms +step:47082/57344 train_time:27286172ms step_avg:579.55ms +step:47083/57344 train_time:27286757ms step_avg:579.55ms +grad accum step:11771/14336 +step:47084/57344 train_time:27288111ms step_avg:579.56ms +step:47085/57344 train_time:27288137ms step_avg:579.55ms +step:47086/57344 train_time:27288367ms step_avg:579.54ms +step:47087/57344 train_time:27288944ms step_avg:579.54ms +grad accum step:11772/14336 +step:47088/57344 train_time:27290285ms step_avg:579.56ms +step:47089/57344 train_time:27290300ms step_avg:579.55ms +step:47090/57344 train_time:27290553ms step_avg:579.54ms +step:47091/57344 train_time:27291125ms step_avg:579.54ms +grad accum step:11773/14336 +step:47092/57344 train_time:27292483ms step_avg:579.56ms +step:47093/57344 train_time:27292499ms step_avg:579.54ms +step:47094/57344 train_time:27292743ms step_avg:579.54ms +step:47095/57344 train_time:27293288ms step_avg:579.54ms +grad accum step:11774/14336 +step:47096/57344 train_time:27294624ms step_avg:579.55ms +step:47097/57344 train_time:27294639ms step_avg:579.54ms +step:47098/57344 train_time:27294889ms step_avg:579.53ms +step:47099/57344 train_time:27295451ms step_avg:579.53ms +grad accum step:11775/14336 +step:47100/57344 train_time:27296848ms step_avg:579.55ms +step:47101/57344 train_time:27296868ms step_avg:579.54ms +step:47102/57344 train_time:27297090ms step_avg:579.53ms +step:47103/57344 train_time:27297641ms step_avg:579.53ms +grad accum step:11776/14336 +step:47104/57344 train_time:27299032ms step_avg:579.55ms +step:47104/57344 val_loss:5.899468 train_time:27299037ms step_avg:579.55ms +step:47105/57344 train_time:27299048ms step_avg:579.54ms +step:47106/57344 train_time:27299278ms step_avg:579.53ms +step:47107/57344 train_time:27299841ms step_avg:579.53ms +grad accum step:11777/14336 +step:47108/57344 train_time:27301142ms step_avg:579.54ms +step:47109/57344 train_time:27301170ms step_avg:579.53ms +step:47110/57344 train_time:27301401ms step_avg:579.52ms +step:47111/57344 train_time:27301960ms step_avg:579.52ms +grad accum step:11778/14336 +step:47112/57344 train_time:27303419ms step_avg:579.54ms +step:47113/57344 train_time:27303433ms step_avg:579.53ms +step:47114/57344 train_time:27303654ms step_avg:579.52ms +step:47115/57344 train_time:27304209ms step_avg:579.52ms +grad accum step:11779/14336 +step:47116/57344 train_time:27305549ms step_avg:579.54ms +step:47117/57344 train_time:27305569ms step_avg:579.53ms +step:47118/57344 train_time:27305805ms step_avg:579.52ms +step:47119/57344 train_time:27306367ms step_avg:579.52ms +grad accum step:11780/14336 +step:47120/57344 train_time:27307704ms step_avg:579.54ms +step:47121/57344 train_time:27307719ms step_avg:579.52ms +step:47122/57344 train_time:27307966ms step_avg:579.52ms +step:47123/57344 train_time:27308539ms step_avg:579.52ms +grad accum step:11781/14336 +step:47124/57344 train_time:27309953ms step_avg:579.53ms +step:47125/57344 train_time:27309982ms step_avg:579.52ms +step:47126/57344 train_time:27310205ms step_avg:579.51ms +step:47127/57344 train_time:27310758ms step_avg:579.51ms +grad accum step:11782/14336 +step:47128/57344 train_time:27312127ms step_avg:579.53ms +step:47129/57344 train_time:27312148ms step_avg:579.52ms +step:47130/57344 train_time:27312387ms step_avg:579.51ms +step:47131/57344 train_time:27312989ms step_avg:579.51ms +grad accum step:11783/14336 +step:47132/57344 train_time:27314359ms step_avg:579.53ms +step:47133/57344 train_time:27314371ms step_avg:579.52ms +step:47134/57344 train_time:27314630ms step_avg:579.51ms +step:47135/57344 train_time:27315239ms step_avg:579.51ms +grad accum step:11784/14336 +step:47136/57344 train_time:27316693ms step_avg:579.53ms +step:47137/57344 train_time:27316714ms step_avg:579.52ms +step:47138/57344 train_time:27316965ms step_avg:579.51ms +step:47139/57344 train_time:27317555ms step_avg:579.51ms +grad accum step:11785/14336 +step:47140/57344 train_time:27318916ms step_avg:579.53ms +step:47141/57344 train_time:27318935ms step_avg:579.52ms +step:47142/57344 train_time:27319158ms step_avg:579.51ms +step:47143/57344 train_time:27319703ms step_avg:579.51ms +grad accum step:11786/14336 +step:47144/57344 train_time:27321052ms step_avg:579.52ms +step:47145/57344 train_time:27321067ms step_avg:579.51ms +step:47146/57344 train_time:27321319ms step_avg:579.50ms +step:47147/57344 train_time:27321894ms step_avg:579.50ms +grad accum step:11787/14336 +step:47148/57344 train_time:27323269ms step_avg:579.52ms +step:47149/57344 train_time:27323286ms step_avg:579.51ms +step:47150/57344 train_time:27323544ms step_avg:579.50ms +step:47151/57344 train_time:27324117ms step_avg:579.50ms +grad accum step:11788/14336 +step:47152/57344 train_time:27325473ms step_avg:579.52ms +step:47153/57344 train_time:27325489ms step_avg:579.51ms +step:47154/57344 train_time:27325742ms step_avg:579.50ms +step:47155/57344 train_time:27326312ms step_avg:579.50ms +grad accum step:11789/14336 +step:47156/57344 train_time:27327643ms step_avg:579.52ms +step:47157/57344 train_time:27327679ms step_avg:579.50ms +step:47158/57344 train_time:27327905ms step_avg:579.50ms +step:47159/57344 train_time:27328465ms step_avg:579.50ms +grad accum step:11790/14336 +step:47160/57344 train_time:27329829ms step_avg:579.51ms +step:47161/57344 train_time:27329881ms step_avg:579.50ms +step:47162/57344 train_time:27330101ms step_avg:579.49ms +step:47163/57344 train_time:27330665ms step_avg:579.49ms +grad accum step:11791/14336 +step:47164/57344 train_time:27332042ms step_avg:579.51ms +step:47165/57344 train_time:27332058ms step_avg:579.50ms +step:47166/57344 train_time:27332311ms step_avg:579.49ms +step:47167/57344 train_time:27332872ms step_avg:579.49ms +grad accum step:11792/14336 +step:47168/57344 train_time:27334196ms step_avg:579.51ms +step:47168/57344 val_loss:5.901907 train_time:27334201ms step_avg:579.51ms +step:47169/57344 train_time:27334213ms step_avg:579.50ms +step:47170/57344 train_time:27334437ms step_avg:579.49ms +step:47171/57344 train_time:27334985ms step_avg:579.49ms +grad accum step:11793/14336 +step:47172/57344 train_time:27336275ms step_avg:579.50ms +step:47173/57344 train_time:27336296ms step_avg:579.49ms +step:47174/57344 train_time:27336544ms step_avg:579.48ms +step:47175/57344 train_time:27337109ms step_avg:579.48ms +grad accum step:11794/14336 +step:47176/57344 train_time:27338450ms step_avg:579.50ms +step:47177/57344 train_time:27338466ms step_avg:579.49ms +step:47178/57344 train_time:27338719ms step_avg:579.48ms +step:47179/57344 train_time:27339298ms step_avg:579.48ms +grad accum step:11795/14336 +step:47180/57344 train_time:27340656ms step_avg:579.50ms +step:47181/57344 train_time:27340680ms step_avg:579.48ms +step:47182/57344 train_time:27340908ms step_avg:579.48ms +step:47183/57344 train_time:27341457ms step_avg:579.48ms +grad accum step:11796/14336 +step:47184/57344 train_time:27342865ms step_avg:579.49ms +step:47185/57344 train_time:27342881ms step_avg:579.48ms +step:47186/57344 train_time:27343128ms step_avg:579.48ms +step:47187/57344 train_time:27343680ms step_avg:579.47ms +grad accum step:11797/14336 +step:47188/57344 train_time:27345034ms step_avg:579.49ms +step:47189/57344 train_time:27345056ms step_avg:579.48ms +step:47190/57344 train_time:27345289ms step_avg:579.47ms +step:47191/57344 train_time:27345850ms step_avg:579.47ms +grad accum step:11798/14336 +step:47192/57344 train_time:27347326ms step_avg:579.49ms +step:47193/57344 train_time:27347341ms step_avg:579.48ms +step:47194/57344 train_time:27347587ms step_avg:579.47ms +step:47195/57344 train_time:27348145ms step_avg:579.47ms +grad accum step:11799/14336 +step:47196/57344 train_time:27349524ms step_avg:579.49ms +step:47197/57344 train_time:27349558ms step_avg:579.48ms +step:47198/57344 train_time:27349777ms step_avg:579.47ms +step:47199/57344 train_time:27350335ms step_avg:579.47ms +grad accum step:11800/14336 +step:47200/57344 train_time:27351696ms step_avg:579.49ms +step:47201/57344 train_time:27351711ms step_avg:579.47ms +step:47202/57344 train_time:27351964ms step_avg:579.47ms +step:47203/57344 train_time:27352543ms step_avg:579.47ms +grad accum step:11801/14336 +step:47204/57344 train_time:27353961ms step_avg:579.48ms +step:47205/57344 train_time:27353977ms step_avg:579.47ms +step:47206/57344 train_time:27354222ms step_avg:579.46ms +step:47207/57344 train_time:27354766ms step_avg:579.46ms +grad accum step:11802/14336 +step:47208/57344 train_time:27356176ms step_avg:579.48ms +step:47209/57344 train_time:27356191ms step_avg:579.47ms +step:47210/57344 train_time:27356439ms step_avg:579.46ms +step:47211/57344 train_time:27357013ms step_avg:579.46ms +grad accum step:11803/14336 +step:47212/57344 train_time:27358459ms step_avg:579.48ms +step:47213/57344 train_time:27358477ms step_avg:579.47ms +step:47214/57344 train_time:27358721ms step_avg:579.46ms +step:47215/57344 train_time:27359283ms step_avg:579.46ms +grad accum step:11804/14336 +step:47216/57344 train_time:27360630ms step_avg:579.48ms +step:47217/57344 train_time:27360649ms step_avg:579.47ms +step:47218/57344 train_time:27360882ms step_avg:579.46ms +step:47219/57344 train_time:27361417ms step_avg:579.46ms +grad accum step:11805/14336 +step:47220/57344 train_time:27362813ms step_avg:579.48ms +step:47221/57344 train_time:27362830ms step_avg:579.46ms +step:47222/57344 train_time:27363080ms step_avg:579.46ms +step:47223/57344 train_time:27363666ms step_avg:579.46ms +grad accum step:11806/14336 +step:47224/57344 train_time:27365029ms step_avg:579.47ms +step:47225/57344 train_time:27365051ms step_avg:579.46ms +step:47226/57344 train_time:27365287ms step_avg:579.45ms +step:47227/57344 train_time:27365836ms step_avg:579.45ms +grad accum step:11807/14336 +step:47228/57344 train_time:27367171ms step_avg:579.47ms +step:47229/57344 train_time:27367188ms step_avg:579.46ms +step:47230/57344 train_time:27367431ms step_avg:579.45ms +step:47231/57344 train_time:27367992ms step_avg:579.45ms +grad accum step:11808/14336 +step:47232/57344 train_time:27369372ms step_avg:579.47ms +step:47232/57344 val_loss:5.902164 train_time:27369375ms step_avg:579.47ms +step:47233/57344 train_time:27369386ms step_avg:579.45ms +step:47234/57344 train_time:27369620ms step_avg:579.45ms +step:47235/57344 train_time:27370192ms step_avg:579.45ms +grad accum step:11809/14336 +step:47236/57344 train_time:27371516ms step_avg:579.46ms +step:47237/57344 train_time:27371534ms step_avg:579.45ms +step:47238/57344 train_time:27371774ms step_avg:579.44ms +step:47239/57344 train_time:27372320ms step_avg:579.44ms +grad accum step:11810/14336 +step:47240/57344 train_time:27373685ms step_avg:579.46ms +step:47241/57344 train_time:27373701ms step_avg:579.45ms +step:47242/57344 train_time:27373955ms step_avg:579.44ms +step:47243/57344 train_time:27374523ms step_avg:579.44ms +grad accum step:11811/14336 +step:47244/57344 train_time:27375860ms step_avg:579.46ms +step:47245/57344 train_time:27375876ms step_avg:579.44ms +step:47246/57344 train_time:27376123ms step_avg:579.44ms +step:47247/57344 train_time:27376677ms step_avg:579.44ms +grad accum step:11812/14336 +step:47248/57344 train_time:27378060ms step_avg:579.45ms +step:47249/57344 train_time:27378076ms step_avg:579.44ms +step:47250/57344 train_time:27378320ms step_avg:579.44ms +step:47251/57344 train_time:27378891ms step_avg:579.44ms +grad accum step:11813/14336 +step:47252/57344 train_time:27380451ms step_avg:579.46ms +step:47253/57344 train_time:27380471ms step_avg:579.44ms +step:47254/57344 train_time:27380706ms step_avg:579.44ms +step:47255/57344 train_time:27381254ms step_avg:579.44ms +grad accum step:11814/14336 +step:47256/57344 train_time:27382564ms step_avg:579.45ms +step:47257/57344 train_time:27382581ms step_avg:579.44ms +step:47258/57344 train_time:27382831ms step_avg:579.43ms +step:47259/57344 train_time:27383383ms step_avg:579.43ms +grad accum step:11815/14336 +step:47260/57344 train_time:27384711ms step_avg:579.45ms +step:47261/57344 train_time:27384725ms step_avg:579.44ms +step:47262/57344 train_time:27384995ms step_avg:579.43ms +step:47263/57344 train_time:27385605ms step_avg:579.43ms +grad accum step:11816/14336 +step:47264/57344 train_time:27386982ms step_avg:579.45ms +step:47265/57344 train_time:27386995ms step_avg:579.43ms +step:47266/57344 train_time:27387249ms step_avg:579.43ms +step:47267/57344 train_time:27387833ms step_avg:579.43ms +grad accum step:11817/14336 +step:47268/57344 train_time:27389211ms step_avg:579.45ms +step:47269/57344 train_time:27389225ms step_avg:579.43ms +step:47270/57344 train_time:27389465ms step_avg:579.43ms +step:47271/57344 train_time:27390023ms step_avg:579.43ms +grad accum step:11818/14336 +step:47272/57344 train_time:27391369ms step_avg:579.44ms +step:47273/57344 train_time:27391385ms step_avg:579.43ms +step:47274/57344 train_time:27391638ms step_avg:579.42ms +step:47275/57344 train_time:27392206ms step_avg:579.42ms +grad accum step:11819/14336 +step:47276/57344 train_time:27393535ms step_avg:579.44ms +step:47277/57344 train_time:27393556ms step_avg:579.43ms +step:47278/57344 train_time:27393776ms step_avg:579.42ms +step:47279/57344 train_time:27394322ms step_avg:579.42ms +grad accum step:11820/14336 +step:47280/57344 train_time:27395650ms step_avg:579.43ms +step:47281/57344 train_time:27395661ms step_avg:579.42ms +step:47282/57344 train_time:27395904ms step_avg:579.42ms +step:47283/57344 train_time:27396467ms step_avg:579.41ms +grad accum step:11821/14336 +step:47284/57344 train_time:27397835ms step_avg:579.43ms +step:47285/57344 train_time:27397848ms step_avg:579.42ms +step:47286/57344 train_time:27398100ms step_avg:579.41ms +step:47287/57344 train_time:27398664ms step_avg:579.41ms +grad accum step:11822/14336 +step:47288/57344 train_time:27399972ms step_avg:579.43ms +step:47289/57344 train_time:27399989ms step_avg:579.42ms +step:47290/57344 train_time:27400248ms step_avg:579.41ms +step:47291/57344 train_time:27400837ms step_avg:579.41ms +grad accum step:11823/14336 +step:47292/57344 train_time:27402207ms step_avg:579.43ms +step:47293/57344 train_time:27402225ms step_avg:579.41ms +step:47294/57344 train_time:27402459ms step_avg:579.41ms +step:47295/57344 train_time:27403003ms step_avg:579.41ms +grad accum step:11824/14336 +step:47296/57344 train_time:27404300ms step_avg:579.42ms +step:47296/57344 val_loss:5.892847 train_time:27404301ms step_avg:579.42ms +step:47297/57344 train_time:27404312ms step_avg:579.41ms +step:47298/57344 train_time:27404532ms step_avg:579.40ms +step:47299/57344 train_time:27405084ms step_avg:579.40ms +grad accum step:11825/14336 +step:47300/57344 train_time:27406447ms step_avg:579.42ms +step:47301/57344 train_time:27406464ms step_avg:579.41ms +step:47302/57344 train_time:27406691ms step_avg:579.40ms +step:47303/57344 train_time:27407256ms step_avg:579.40ms +grad accum step:11826/14336 +step:47304/57344 train_time:27408610ms step_avg:579.41ms +step:47305/57344 train_time:27408625ms step_avg:579.40ms +step:47306/57344 train_time:27408873ms step_avg:579.40ms +step:47307/57344 train_time:27409422ms step_avg:579.39ms +grad accum step:11827/14336 +step:47308/57344 train_time:27410774ms step_avg:579.41ms +step:47309/57344 train_time:27410790ms step_avg:579.40ms +step:47310/57344 train_time:27411036ms step_avg:579.39ms +step:47311/57344 train_time:27411593ms step_avg:579.39ms +grad accum step:11828/14336 +step:47312/57344 train_time:27412957ms step_avg:579.41ms +step:47313/57344 train_time:27412986ms step_avg:579.40ms +step:47314/57344 train_time:27413209ms step_avg:579.39ms +step:47315/57344 train_time:27413768ms step_avg:579.39ms +grad accum step:11829/14336 +step:47316/57344 train_time:27415093ms step_avg:579.40ms +step:47317/57344 train_time:27415130ms step_avg:579.39ms +step:47318/57344 train_time:27415360ms step_avg:579.39ms +step:47319/57344 train_time:27415960ms step_avg:579.39ms +grad accum step:11830/14336 +step:47320/57344 train_time:27417350ms step_avg:579.40ms +step:47321/57344 train_time:27417366ms step_avg:579.39ms +step:47322/57344 train_time:27417613ms step_avg:579.38ms +step:47323/57344 train_time:27418166ms step_avg:579.38ms +grad accum step:11831/14336 +step:47324/57344 train_time:27419497ms step_avg:579.40ms +step:47325/57344 train_time:27419513ms step_avg:579.39ms +step:47326/57344 train_time:27419770ms step_avg:579.38ms +step:47327/57344 train_time:27420354ms step_avg:579.38ms +grad accum step:11832/14336 +step:47328/57344 train_time:27421728ms step_avg:579.40ms +step:47329/57344 train_time:27421742ms step_avg:579.39ms +step:47330/57344 train_time:27422001ms step_avg:579.38ms +step:47331/57344 train_time:27422584ms step_avg:579.38ms +grad accum step:11833/14336 +step:47332/57344 train_time:27423946ms step_avg:579.40ms +step:47333/57344 train_time:27423963ms step_avg:579.38ms +step:47334/57344 train_time:27424219ms step_avg:579.38ms +step:47335/57344 train_time:27424797ms step_avg:579.38ms +grad accum step:11834/14336 +step:47336/57344 train_time:27426110ms step_avg:579.39ms +step:47337/57344 train_time:27426134ms step_avg:579.38ms +step:47338/57344 train_time:27426353ms step_avg:579.37ms +step:47339/57344 train_time:27426896ms step_avg:579.37ms +grad accum step:11835/14336 +step:47340/57344 train_time:27428222ms step_avg:579.39ms +step:47341/57344 train_time:27428239ms step_avg:579.38ms +step:47342/57344 train_time:27428490ms step_avg:579.37ms +step:47343/57344 train_time:27429048ms step_avg:579.37ms +grad accum step:11836/14336 +step:47344/57344 train_time:27430419ms step_avg:579.39ms +step:47345/57344 train_time:27430435ms step_avg:579.37ms +step:47346/57344 train_time:27430695ms step_avg:579.37ms +step:47347/57344 train_time:27431280ms step_avg:579.37ms +grad accum step:11837/14336 +step:47348/57344 train_time:27432672ms step_avg:579.38ms +step:47349/57344 train_time:27432693ms step_avg:579.37ms +step:47350/57344 train_time:27432917ms step_avg:579.36ms +step:47351/57344 train_time:27433482ms step_avg:579.36ms +grad accum step:11838/14336 +step:47352/57344 train_time:27434803ms step_avg:579.38ms +step:47353/57344 train_time:27434827ms step_avg:579.37ms +step:47354/57344 train_time:27435065ms step_avg:579.36ms +step:47355/57344 train_time:27435607ms step_avg:579.36ms +grad accum step:11839/14336 +step:47356/57344 train_time:27436964ms step_avg:579.38ms +step:47357/57344 train_time:27436976ms step_avg:579.36ms +step:47358/57344 train_time:27437206ms step_avg:579.36ms +step:47359/57344 train_time:27437754ms step_avg:579.36ms +grad accum step:11840/14336 +step:47360/57344 train_time:27461852ms step_avg:579.85ms +step:47360/57344 val_loss:5.879344 train_time:27461853ms step_avg:579.85ms +step:47361/57344 train_time:27461865ms step_avg:579.84ms +step:47362/57344 train_time:27480150ms step_avg:580.22ms +step:47363/57344 train_time:27508581ms step_avg:580.80ms +grad accum step:11841/14336 +step:47364/57344 train_time:27509705ms step_avg:580.81ms +step:47365/57344 train_time:27509721ms step_avg:580.80ms +step:47366/57344 train_time:27509959ms step_avg:580.80ms +step:47367/57344 train_time:27510504ms step_avg:580.79ms +grad accum step:11842/14336 +step:47368/57344 train_time:27511823ms step_avg:580.81ms +step:47369/57344 train_time:27511838ms step_avg:580.80ms +step:47370/57344 train_time:27512074ms step_avg:580.79ms +step:47371/57344 train_time:27512633ms step_avg:580.79ms +grad accum step:11843/14336 +step:47372/57344 train_time:27566162ms step_avg:581.91ms +step:47373/57344 train_time:27587863ms step_avg:582.35ms +step:47374/57344 train_time:27588166ms step_avg:582.35ms +step:47375/57344 train_time:27588734ms step_avg:582.35ms +grad accum step:11844/14336 +step:47376/57344 train_time:27590093ms step_avg:582.36ms +step:47377/57344 train_time:27590104ms step_avg:582.35ms +step:47378/57344 train_time:27590350ms step_avg:582.35ms +step:47379/57344 train_time:27590909ms step_avg:582.34ms +grad accum step:11845/14336 +step:47380/57344 train_time:27592226ms step_avg:582.36ms +step:47381/57344 train_time:27592250ms step_avg:582.35ms +step:47382/57344 train_time:27592466ms step_avg:582.34ms +step:47383/57344 train_time:27593001ms step_avg:582.34ms +grad accum step:11846/14336 +step:47384/57344 train_time:27594295ms step_avg:582.35ms +step:47385/57344 train_time:27594327ms step_avg:582.34ms +step:47386/57344 train_time:27594548ms step_avg:582.34ms +step:47387/57344 train_time:27595091ms step_avg:582.33ms +grad accum step:11847/14336 +step:47388/57344 train_time:27596372ms step_avg:582.35ms +step:47389/57344 train_time:27596388ms step_avg:582.34ms +step:47390/57344 train_time:27596634ms step_avg:582.33ms +step:47391/57344 train_time:27597196ms step_avg:582.33ms +grad accum step:11848/14336 +step:47392/57344 train_time:27598519ms step_avg:582.35ms +step:47393/57344 train_time:27598536ms step_avg:582.33ms +step:47394/57344 train_time:27598780ms step_avg:582.33ms +step:47395/57344 train_time:27599318ms step_avg:582.33ms +grad accum step:11849/14336 +step:47396/57344 train_time:27615274ms step_avg:582.65ms +step:47397/57344 train_time:27615286ms step_avg:582.64ms +step:47398/57344 train_time:27615574ms step_avg:582.63ms +step:47399/57344 train_time:27616115ms step_avg:582.63ms +grad accum step:11850/14336 +step:47400/57344 train_time:27617415ms step_avg:582.65ms +step:47401/57344 train_time:27617431ms step_avg:582.63ms +step:47402/57344 train_time:27617689ms step_avg:582.63ms +step:47403/57344 train_time:27618279ms step_avg:582.63ms +grad accum step:11851/14336 +step:47404/57344 train_time:27619664ms step_avg:582.64ms +step:47405/57344 train_time:27619681ms step_avg:582.63ms +step:47406/57344 train_time:27619915ms step_avg:582.62ms +step:47407/57344 train_time:27620467ms step_avg:582.62ms +grad accum step:11852/14336 +step:47408/57344 train_time:27621824ms step_avg:582.64ms +step:47409/57344 train_time:27621835ms step_avg:582.63ms +step:47410/57344 train_time:27622059ms step_avg:582.62ms +step:47411/57344 train_time:27622608ms step_avg:582.62ms +grad accum step:11853/14336 +step:47412/57344 train_time:27623935ms step_avg:582.64ms +step:47413/57344 train_time:27623947ms step_avg:582.62ms +step:47414/57344 train_time:27624190ms step_avg:582.62ms +step:47415/57344 train_time:27624735ms step_avg:582.62ms +grad accum step:11854/14336 +step:47416/57344 train_time:27626092ms step_avg:582.63ms +step:47417/57344 train_time:27626107ms step_avg:582.62ms +step:47418/57344 train_time:27626335ms step_avg:582.61ms +step:47419/57344 train_time:27626897ms step_avg:582.61ms +grad accum step:11855/14336 +step:47420/57344 train_time:27628221ms step_avg:582.63ms +step:47421/57344 train_time:27628233ms step_avg:582.62ms +step:47422/57344 train_time:27628482ms step_avg:582.61ms +step:47423/57344 train_time:27629047ms step_avg:582.61ms +grad accum step:11856/14336 +step:47424/57344 train_time:27630393ms step_avg:582.62ms +step:47424/57344 val_loss:5.857492 train_time:27630393ms step_avg:582.62ms +step:47425/57344 train_time:27630405ms step_avg:582.61ms +step:47426/57344 train_time:27630629ms step_avg:582.61ms +step:47427/57344 train_time:27631186ms step_avg:582.60ms +grad accum step:11857/14336 +step:47428/57344 train_time:27632498ms step_avg:582.62ms +step:47429/57344 train_time:27632515ms step_avg:582.61ms +step:47430/57344 train_time:27632760ms step_avg:582.60ms +step:47431/57344 train_time:27633320ms step_avg:582.60ms +grad accum step:11858/14336 +step:47432/57344 train_time:27634656ms step_avg:582.62ms +step:47433/57344 train_time:27634678ms step_avg:582.60ms +step:47434/57344 train_time:27634911ms step_avg:582.60ms +step:47435/57344 train_time:27635474ms step_avg:582.60ms +grad accum step:11859/14336 +step:47436/57344 train_time:27636798ms step_avg:582.61ms +step:47437/57344 train_time:27636809ms step_avg:582.60ms +step:47438/57344 train_time:27637063ms step_avg:582.59ms +step:47439/57344 train_time:27637631ms step_avg:582.59ms +grad accum step:11860/14336 +step:47440/57344 train_time:27638984ms step_avg:582.61ms +step:47441/57344 train_time:27639001ms step_avg:582.60ms +step:47442/57344 train_time:27639249ms step_avg:582.59ms +step:47443/57344 train_time:27639819ms step_avg:582.59ms +grad accum step:11861/14336 +step:47444/57344 train_time:27641238ms step_avg:582.61ms +step:47445/57344 train_time:27641277ms step_avg:582.60ms +step:47446/57344 train_time:27641504ms step_avg:582.59ms +step:47447/57344 train_time:27642073ms step_avg:582.59ms +grad accum step:11862/14336 +step:47448/57344 train_time:27643416ms step_avg:582.60ms +step:47449/57344 train_time:27643443ms step_avg:582.59ms +step:47450/57344 train_time:27643666ms step_avg:582.59ms +step:47451/57344 train_time:27644222ms step_avg:582.58ms +grad accum step:11863/14336 +step:47452/57344 train_time:27645553ms step_avg:582.60ms +step:47453/57344 train_time:27645567ms step_avg:582.59ms +step:47454/57344 train_time:27645824ms step_avg:582.58ms +step:47455/57344 train_time:27646397ms step_avg:582.58ms +grad accum step:11864/14336 +step:47456/57344 train_time:27647789ms step_avg:582.60ms +step:47457/57344 train_time:27647806ms step_avg:582.59ms +step:47458/57344 train_time:27648054ms step_avg:582.58ms +step:47459/57344 train_time:27648609ms step_avg:582.58ms +grad accum step:11865/14336 +step:47460/57344 train_time:27649962ms step_avg:582.60ms +step:47461/57344 train_time:27649979ms step_avg:582.58ms +step:47462/57344 train_time:27650230ms step_avg:582.58ms +step:47463/57344 train_time:27650803ms step_avg:582.58ms +grad accum step:11866/14336 +step:47464/57344 train_time:27652138ms step_avg:582.59ms +step:47465/57344 train_time:27652192ms step_avg:582.58ms +step:47466/57344 train_time:27652420ms step_avg:582.57ms +step:47467/57344 train_time:27652995ms step_avg:582.57ms +grad accum step:11867/14336 +step:47468/57344 train_time:27654330ms step_avg:582.59ms +step:47469/57344 train_time:27654347ms step_avg:582.58ms +step:47470/57344 train_time:27654599ms step_avg:582.57ms +step:47471/57344 train_time:27655161ms step_avg:582.57ms +grad accum step:11868/14336 +step:47472/57344 train_time:27656486ms step_avg:582.59ms +step:47473/57344 train_time:27656497ms step_avg:582.57ms +step:47474/57344 train_time:27656740ms step_avg:582.57ms +step:47475/57344 train_time:27657282ms step_avg:582.57ms +grad accum step:11869/14336 +step:47476/57344 train_time:27658630ms step_avg:582.58ms +step:47477/57344 train_time:27658647ms step_avg:582.57ms +step:47478/57344 train_time:27658893ms step_avg:582.56ms +step:47479/57344 train_time:27659444ms step_avg:582.56ms +grad accum step:11870/14336 +step:47480/57344 train_time:27660757ms step_avg:582.58ms +step:47481/57344 train_time:27660774ms step_avg:582.57ms +step:47482/57344 train_time:27661027ms step_avg:582.56ms +step:47483/57344 train_time:27661584ms step_avg:582.56ms +grad accum step:11871/14336 +step:47484/57344 train_time:27662942ms step_avg:582.57ms +step:47485/57344 train_time:27662959ms step_avg:582.56ms +step:47486/57344 train_time:27663206ms step_avg:582.55ms +step:47487/57344 train_time:27663752ms step_avg:582.55ms +grad accum step:11872/14336 +step:47488/57344 train_time:27665087ms step_avg:582.57ms +step:47488/57344 val_loss:5.831481 train_time:27665088ms step_avg:582.57ms +step:47489/57344 train_time:27665099ms step_avg:582.56ms +step:47490/57344 train_time:27665393ms step_avg:582.55ms +step:47491/57344 train_time:27666032ms step_avg:582.55ms +grad accum step:11873/14336 +step:47492/57344 train_time:27667392ms step_avg:582.57ms +step:47493/57344 train_time:27667408ms step_avg:582.56ms +step:47494/57344 train_time:27667657ms step_avg:582.55ms +step:47495/57344 train_time:27668222ms step_avg:582.55ms +grad accum step:11874/14336 +step:47496/57344 train_time:27669546ms step_avg:582.57ms +step:47497/57344 train_time:27669561ms step_avg:582.55ms +step:47498/57344 train_time:27669808ms step_avg:582.55ms +step:47499/57344 train_time:27670358ms step_avg:582.55ms +grad accum step:11875/14336 +step:47500/57344 train_time:27671755ms step_avg:582.56ms +step:47501/57344 train_time:27671766ms step_avg:582.55ms +step:47502/57344 train_time:27672007ms step_avg:582.54ms +step:47503/57344 train_time:27672570ms step_avg:582.54ms +grad accum step:11876/14336 +step:47504/57344 train_time:27673913ms step_avg:582.56ms +step:47505/57344 train_time:27673925ms step_avg:582.55ms +step:47506/57344 train_time:27674168ms step_avg:582.54ms +step:47507/57344 train_time:27674735ms step_avg:582.54ms +grad accum step:11877/14336 +step:47508/57344 train_time:27676113ms step_avg:582.56ms +step:47509/57344 train_time:27676127ms step_avg:582.54ms +step:47510/57344 train_time:27676381ms step_avg:582.54ms +step:47511/57344 train_time:27676954ms step_avg:582.54ms +grad accum step:11878/14336 +step:47512/57344 train_time:27678286ms step_avg:582.55ms +step:47513/57344 train_time:27678303ms step_avg:582.54ms +step:47514/57344 train_time:27678552ms step_avg:582.53ms +step:47515/57344 train_time:27679108ms step_avg:582.53ms +grad accum step:11879/14336 +step:47516/57344 train_time:27680432ms step_avg:582.55ms +step:47517/57344 train_time:27680449ms step_avg:582.54ms +step:47518/57344 train_time:27680699ms step_avg:582.53ms +step:47519/57344 train_time:27681291ms step_avg:582.53ms +grad accum step:11880/14336 +step:47520/57344 train_time:27682748ms step_avg:582.55ms +step:47521/57344 train_time:27682780ms step_avg:582.54ms +step:47522/57344 train_time:27683005ms step_avg:582.53ms +step:47523/57344 train_time:27683575ms step_avg:582.53ms +grad accum step:11881/14336 +step:47524/57344 train_time:27684972ms step_avg:582.55ms +step:47525/57344 train_time:27685002ms step_avg:582.54ms +step:47526/57344 train_time:27685233ms step_avg:582.53ms +step:47527/57344 train_time:27686313ms step_avg:582.54ms +grad accum step:11882/14336 +step:47528/57344 train_time:27687232ms step_avg:582.55ms +step:47529/57344 train_time:27687266ms step_avg:582.53ms +step:47530/57344 train_time:27687487ms step_avg:582.53ms +step:47531/57344 train_time:27728121ms step_avg:583.37ms +grad accum step:11883/14336 +step:47532/57344 train_time:27729289ms step_avg:583.38ms +step:47533/57344 train_time:27729307ms step_avg:583.37ms +step:47534/57344 train_time:27729547ms step_avg:583.36ms +step:47535/57344 train_time:27730095ms step_avg:583.36ms +grad accum step:11884/14336 +step:47536/57344 train_time:27731398ms step_avg:583.38ms +step:47537/57344 train_time:27731415ms step_avg:583.36ms +step:47538/57344 train_time:27731660ms step_avg:583.36ms +step:47539/57344 train_time:27732217ms step_avg:583.36ms +grad accum step:11885/14336 +step:47540/57344 train_time:27733550ms step_avg:583.37ms +step:47541/57344 train_time:27733567ms step_avg:583.36ms +step:47542/57344 train_time:27733817ms step_avg:583.35ms +step:47543/57344 train_time:27734376ms step_avg:583.35ms +grad accum step:11886/14336 +step:47544/57344 train_time:27735696ms step_avg:583.37ms +step:47545/57344 train_time:27735713ms step_avg:583.36ms +step:47546/57344 train_time:27735964ms step_avg:583.35ms +step:47547/57344 train_time:27736525ms step_avg:583.35ms +grad accum step:11887/14336 +step:47548/57344 train_time:27737838ms step_avg:583.36ms +step:47549/57344 train_time:27737855ms step_avg:583.35ms +step:47550/57344 train_time:27738096ms step_avg:583.35ms +step:47551/57344 train_time:27738644ms step_avg:583.35ms +grad accum step:11888/14336 +step:47552/57344 train_time:27739992ms step_avg:583.36ms +step:47552/57344 val_loss:5.802512 train_time:27740002ms step_avg:583.36ms +step:47553/57344 train_time:27740014ms step_avg:583.35ms +step:47554/57344 train_time:27740237ms step_avg:583.34ms +step:47555/57344 train_time:27740791ms step_avg:583.34ms +grad accum step:11889/14336 +step:47556/57344 train_time:27742148ms step_avg:583.36ms +step:47557/57344 train_time:27742163ms step_avg:583.35ms +step:47558/57344 train_time:27742415ms step_avg:583.34ms +step:47559/57344 train_time:27742990ms step_avg:583.34ms +grad accum step:11890/14336 +step:47560/57344 train_time:27744354ms step_avg:583.35ms +step:47561/57344 train_time:27744391ms step_avg:583.34ms +step:47562/57344 train_time:27744612ms step_avg:583.34ms +step:47563/57344 train_time:27745164ms step_avg:583.34ms +grad accum step:11891/14336 +step:47564/57344 train_time:27746477ms step_avg:583.35ms +step:47565/57344 train_time:27746492ms step_avg:583.34ms +step:47566/57344 train_time:27746745ms step_avg:583.33ms +step:47567/57344 train_time:27747309ms step_avg:583.33ms +grad accum step:11892/14336 +step:47568/57344 train_time:27748637ms step_avg:583.35ms +step:47569/57344 train_time:27748655ms step_avg:583.33ms +step:47570/57344 train_time:27748904ms step_avg:583.33ms +step:47571/57344 train_time:27749482ms step_avg:583.33ms +grad accum step:11893/14336 +step:47572/57344 train_time:27750797ms step_avg:583.34ms +step:47573/57344 train_time:27750811ms step_avg:583.33ms +step:47574/57344 train_time:27751077ms step_avg:583.32ms +step:47575/57344 train_time:27751677ms step_avg:583.32ms +grad accum step:11894/14336 +step:47576/57344 train_time:27753027ms step_avg:583.34ms +step:47577/57344 train_time:27753042ms step_avg:583.33ms +step:47578/57344 train_time:27753285ms step_avg:583.32ms +step:47579/57344 train_time:27753833ms step_avg:583.32ms +grad accum step:11895/14336 +step:47580/57344 train_time:27755239ms step_avg:583.34ms +step:47581/57344 train_time:27755257ms step_avg:583.33ms +step:47582/57344 train_time:27755480ms step_avg:583.32ms +step:47583/57344 train_time:27756226ms step_avg:583.32ms +grad accum step:11896/14336 +step:47584/57344 train_time:27757361ms step_avg:583.33ms +step:47585/57344 train_time:27757378ms step_avg:583.32ms +step:47586/57344 train_time:27757617ms step_avg:583.31ms +step:47587/57344 train_time:27758161ms step_avg:583.31ms +grad accum step:11897/14336 +step:47588/57344 train_time:27759475ms step_avg:583.33ms +step:47589/57344 train_time:27759492ms step_avg:583.32ms +step:47590/57344 train_time:27759737ms step_avg:583.31ms +step:47591/57344 train_time:27760289ms step_avg:583.31ms +grad accum step:11898/14336 +step:47592/57344 train_time:27761652ms step_avg:583.33ms +step:47593/57344 train_time:27761666ms step_avg:583.31ms +step:47594/57344 train_time:27761904ms step_avg:583.31ms +step:47595/57344 train_time:27762466ms step_avg:583.31ms +grad accum step:11899/14336 +step:47596/57344 train_time:27763815ms step_avg:583.32ms +step:47597/57344 train_time:27763831ms step_avg:583.31ms +step:47598/57344 train_time:27764088ms step_avg:583.30ms +step:47599/57344 train_time:27764666ms step_avg:583.30ms +grad accum step:11900/14336 +step:47600/57344 train_time:27766023ms step_avg:583.32ms +step:47601/57344 train_time:27766037ms step_avg:583.31ms +step:47602/57344 train_time:27766292ms step_avg:583.30ms +step:47603/57344 train_time:27766864ms step_avg:583.30ms +grad accum step:11901/14336 +step:47604/57344 train_time:27768211ms step_avg:583.32ms +step:47605/57344 train_time:27768235ms step_avg:583.31ms +step:47606/57344 train_time:27768473ms step_avg:583.30ms +step:47607/57344 train_time:27769016ms step_avg:583.30ms +grad accum step:11902/14336 +step:47608/57344 train_time:27770344ms step_avg:583.31ms +step:47609/57344 train_time:27770373ms step_avg:583.30ms +step:47610/57344 train_time:27770618ms step_avg:583.29ms +step:47611/57344 train_time:27771207ms step_avg:583.29ms +grad accum step:11903/14336 +step:47612/57344 train_time:27772695ms step_avg:583.31ms +step:47613/57344 train_time:27772708ms step_avg:583.30ms +step:47614/57344 train_time:27772930ms step_avg:583.29ms +step:47615/57344 train_time:27773490ms step_avg:583.29ms +grad accum step:11904/14336 +step:47616/57344 train_time:27774843ms step_avg:583.31ms +step:47616/57344 val_loss:5.777002 train_time:27774848ms step_avg:583.31ms +step:47617/57344 train_time:27774860ms step_avg:583.30ms +step:47618/57344 train_time:27775080ms step_avg:583.29ms +step:47619/57344 train_time:27775631ms step_avg:583.29ms +grad accum step:11905/14336 +step:47620/57344 train_time:27776933ms step_avg:583.30ms +step:47621/57344 train_time:27776950ms step_avg:583.29ms +step:47622/57344 train_time:27777194ms step_avg:583.28ms +step:47623/57344 train_time:27777749ms step_avg:583.28ms +grad accum step:11906/14336 +step:47624/57344 train_time:27779073ms step_avg:583.30ms +step:47625/57344 train_time:27779095ms step_avg:583.29ms +step:47626/57344 train_time:27779317ms step_avg:583.28ms +step:47627/57344 train_time:27779862ms step_avg:583.28ms +grad accum step:11907/14336 +step:47628/57344 train_time:27781232ms step_avg:583.30ms +step:47629/57344 train_time:27781250ms step_avg:583.28ms +step:47630/57344 train_time:27781475ms step_avg:583.28ms +step:47631/57344 train_time:27782038ms step_avg:583.28ms +grad accum step:11908/14336 +step:47632/57344 train_time:27783344ms step_avg:583.29ms +step:47633/57344 train_time:27783368ms step_avg:583.28ms +step:47634/57344 train_time:27783602ms step_avg:583.27ms +step:47635/57344 train_time:27784176ms step_avg:583.27ms +grad accum step:11909/14336 +step:47636/57344 train_time:27785578ms step_avg:583.29ms +step:47637/57344 train_time:27785595ms step_avg:583.28ms +step:47638/57344 train_time:27785849ms step_avg:583.27ms +step:47639/57344 train_time:27786432ms step_avg:583.27ms +grad accum step:11910/14336 +step:47640/57344 train_time:27788037ms step_avg:583.29ms +step:47641/57344 train_time:27788481ms step_avg:583.29ms +step:47642/57344 train_time:27788708ms step_avg:583.28ms +step:47643/57344 train_time:27789289ms step_avg:583.28ms +grad accum step:11911/14336 +step:47644/57344 train_time:27790649ms step_avg:583.30ms +step:47645/57344 train_time:27790664ms step_avg:583.29ms +step:47646/57344 train_time:27790893ms step_avg:583.28ms +step:47647/57344 train_time:27791462ms step_avg:583.28ms +grad accum step:11912/14336 +step:47648/57344 train_time:27792802ms step_avg:583.29ms +step:47649/57344 train_time:27792818ms step_avg:583.28ms +step:47650/57344 train_time:27793060ms step_avg:583.28ms +step:47651/57344 train_time:27793604ms step_avg:583.27ms +grad accum step:11913/14336 +step:47652/57344 train_time:27794950ms step_avg:583.29ms +step:47653/57344 train_time:27794967ms step_avg:583.28ms +step:47654/57344 train_time:27795209ms step_avg:583.27ms +step:47655/57344 train_time:27795775ms step_avg:583.27ms +grad accum step:11914/14336 +step:47656/57344 train_time:27797124ms step_avg:583.29ms +step:47657/57344 train_time:27797139ms step_avg:583.28ms +step:47658/57344 train_time:27797387ms step_avg:583.27ms +step:47659/57344 train_time:27797964ms step_avg:583.27ms +grad accum step:11915/14336 +step:47660/57344 train_time:27799338ms step_avg:583.28ms +step:47661/57344 train_time:27799355ms step_avg:583.27ms +step:47662/57344 train_time:27799590ms step_avg:583.27ms +step:47663/57344 train_time:27800165ms step_avg:583.27ms +grad accum step:11916/14336 +step:47664/57344 train_time:27801490ms step_avg:583.28ms +step:47665/57344 train_time:27801505ms step_avg:583.27ms +step:47666/57344 train_time:27801755ms step_avg:583.26ms +step:47667/57344 train_time:27802319ms step_avg:583.26ms +grad accum step:11917/14336 +step:47668/57344 train_time:27803675ms step_avg:583.28ms +step:47669/57344 train_time:27803692ms step_avg:583.27ms +step:47670/57344 train_time:27803934ms step_avg:583.26ms +step:47671/57344 train_time:27804484ms step_avg:583.26ms +grad accum step:11918/14336 +step:47672/57344 train_time:27805821ms step_avg:583.27ms +step:47673/57344 train_time:27805836ms step_avg:583.26ms +step:47674/57344 train_time:27806103ms step_avg:583.26ms +step:47675/57344 train_time:27806714ms step_avg:583.26ms +grad accum step:11919/14336 +step:47676/57344 train_time:27808094ms step_avg:583.27ms +step:47677/57344 train_time:27808109ms step_avg:583.26ms +step:47678/57344 train_time:27808354ms step_avg:583.25ms +step:47679/57344 train_time:27808905ms step_avg:583.25ms +grad accum step:11920/14336 +step:47680/57344 train_time:27810251ms step_avg:583.27ms +step:47680/57344 val_loss:5.752609 train_time:27810257ms step_avg:583.27ms +step:47681/57344 train_time:27810268ms step_avg:583.26ms +step:47682/57344 train_time:27810489ms step_avg:583.25ms +step:47683/57344 train_time:27811059ms step_avg:583.25ms +grad accum step:11921/14336 +step:47684/57344 train_time:27812613ms step_avg:583.27ms +step:47685/57344 train_time:27812631ms step_avg:583.26ms +step:47686/57344 train_time:27812858ms step_avg:583.25ms +step:47687/57344 train_time:27813440ms step_avg:583.25ms +grad accum step:11922/14336 +step:47688/57344 train_time:27814799ms step_avg:583.27ms +step:47689/57344 train_time:27814822ms step_avg:583.25ms +step:47690/57344 train_time:27815059ms step_avg:583.25ms +step:47691/57344 train_time:27815630ms step_avg:583.25ms +grad accum step:11923/14336 +step:47692/57344 train_time:27817038ms step_avg:583.26ms +step:47693/57344 train_time:27817053ms step_avg:583.25ms +step:47694/57344 train_time:27817275ms step_avg:583.24ms +step:47695/57344 train_time:27817839ms step_avg:583.24ms +grad accum step:11924/14336 +step:47696/57344 train_time:27819423ms step_avg:583.27ms +step:47697/57344 train_time:27819449ms step_avg:583.25ms +step:47698/57344 train_time:27819671ms step_avg:583.25ms +step:47699/57344 train_time:27820240ms step_avg:583.25ms +grad accum step:11925/14336 +step:47700/57344 train_time:27821532ms step_avg:583.26ms +step:47701/57344 train_time:27821545ms step_avg:583.25ms +step:47702/57344 train_time:27821794ms step_avg:583.24ms +step:47703/57344 train_time:27822351ms step_avg:583.24ms +grad accum step:11926/14336 +step:47704/57344 train_time:27823688ms step_avg:583.26ms +step:47705/57344 train_time:27823707ms step_avg:583.25ms +step:47706/57344 train_time:27823953ms step_avg:583.24ms +step:47707/57344 train_time:27824527ms step_avg:583.24ms +grad accum step:11927/14336 +step:47708/57344 train_time:27825879ms step_avg:583.25ms +step:47709/57344 train_time:27825895ms step_avg:583.24ms +step:47710/57344 train_time:27826133ms step_avg:583.23ms +step:47711/57344 train_time:27826680ms step_avg:583.23ms +grad accum step:11928/14336 +step:47712/57344 train_time:27828059ms step_avg:583.25ms +step:47713/57344 train_time:27828076ms step_avg:583.24ms +step:47714/57344 train_time:27828319ms step_avg:583.23ms +step:47715/57344 train_time:27828882ms step_avg:583.23ms +grad accum step:11929/14336 +step:47716/57344 train_time:27830259ms step_avg:583.25ms +step:47717/57344 train_time:27830278ms step_avg:583.24ms +step:47718/57344 train_time:27830515ms step_avg:583.23ms +step:47719/57344 train_time:27831057ms step_avg:583.23ms +grad accum step:11930/14336 +step:47720/57344 train_time:27832383ms step_avg:583.24ms +step:47721/57344 train_time:27832399ms step_avg:583.23ms +step:47722/57344 train_time:27832645ms step_avg:583.22ms +step:47723/57344 train_time:27833208ms step_avg:583.22ms +grad accum step:11931/14336 +step:47724/57344 train_time:27834582ms step_avg:583.24ms +step:47725/57344 train_time:27834597ms step_avg:583.23ms +step:47726/57344 train_time:27834846ms step_avg:583.22ms +step:47727/57344 train_time:27835397ms step_avg:583.22ms +grad accum step:11932/14336 +step:47728/57344 train_time:27836766ms step_avg:583.24ms +step:47729/57344 train_time:27836948ms step_avg:583.23ms +step:47730/57344 train_time:27837178ms step_avg:583.22ms +step:47731/57344 train_time:27837725ms step_avg:583.22ms +grad accum step:11933/14336 +step:47732/57344 train_time:27839102ms step_avg:583.24ms +step:47733/57344 train_time:27839138ms step_avg:583.23ms +step:47734/57344 train_time:27839366ms step_avg:583.22ms +step:47735/57344 train_time:27839955ms step_avg:583.22ms +grad accum step:11934/14336 +step:47736/57344 train_time:27841307ms step_avg:583.24ms +step:47737/57344 train_time:27841329ms step_avg:583.22ms +step:47738/57344 train_time:27841551ms step_avg:583.22ms +step:47739/57344 train_time:27842107ms step_avg:583.22ms +grad accum step:11935/14336 +step:47740/57344 train_time:27843433ms step_avg:583.23ms +step:47741/57344 train_time:27843450ms step_avg:583.22ms +step:47742/57344 train_time:27843694ms step_avg:583.21ms +step:47743/57344 train_time:27844257ms step_avg:583.21ms +grad accum step:11936/14336 +step:47744/57344 train_time:27845657ms step_avg:583.23ms +step:47744/57344 val_loss:5.728514 train_time:27845662ms step_avg:583.23ms +step:47745/57344 train_time:27845673ms step_avg:583.22ms +step:47746/57344 train_time:27845900ms step_avg:583.21ms +step:47747/57344 train_time:27846463ms step_avg:583.21ms +grad accum step:11937/14336 +step:47748/57344 train_time:27847801ms step_avg:583.22ms +step:47749/57344 train_time:27847823ms step_avg:583.21ms +step:47750/57344 train_time:27848066ms step_avg:583.21ms +step:47751/57344 train_time:27848650ms step_avg:583.21ms +grad accum step:11938/14336 +step:47752/57344 train_time:27849997ms step_avg:583.22ms +step:47753/57344 train_time:27850022ms step_avg:583.21ms +step:47754/57344 train_time:27850275ms step_avg:583.20ms +step:47755/57344 train_time:27850886ms step_avg:583.20ms +grad accum step:11939/14336 +step:47756/57344 train_time:27852241ms step_avg:583.22ms +step:47757/57344 train_time:27852256ms step_avg:583.21ms +step:47758/57344 train_time:27852512ms step_avg:583.20ms +step:47759/57344 train_time:27853089ms step_avg:583.20ms +grad accum step:11940/14336 +step:47760/57344 train_time:27854462ms step_avg:583.22ms +step:47761/57344 train_time:27854481ms step_avg:583.21ms +step:47762/57344 train_time:27854731ms step_avg:583.20ms +step:47763/57344 train_time:27855343ms step_avg:583.20ms +grad accum step:11941/14336 +step:47764/57344 train_time:27856862ms step_avg:583.22ms +step:47765/57344 train_time:27856876ms step_avg:583.21ms +step:47766/57344 train_time:27857123ms step_avg:583.20ms +step:47767/57344 train_time:27857692ms step_avg:583.20ms +grad accum step:11942/14336 +step:47768/57344 train_time:27859132ms step_avg:583.22ms +step:47769/57344 train_time:27859151ms step_avg:583.21ms +step:47770/57344 train_time:27859371ms step_avg:583.20ms +step:47771/57344 train_time:27859937ms step_avg:583.20ms +grad accum step:11943/14336 +step:47772/57344 train_time:27861310ms step_avg:583.21ms +step:47773/57344 train_time:27861326ms step_avg:583.20ms +step:47774/57344 train_time:27861579ms step_avg:583.20ms +step:47775/57344 train_time:27862151ms step_avg:583.20ms +grad accum step:11944/14336 +step:47776/57344 train_time:27863527ms step_avg:583.21ms +step:47777/57344 train_time:27863554ms step_avg:583.20ms +step:47778/57344 train_time:27863774ms step_avg:583.19ms +step:47779/57344 train_time:27864313ms step_avg:583.19ms +grad accum step:11945/14336 +step:47780/57344 train_time:27865708ms step_avg:583.21ms +step:47781/57344 train_time:27865722ms step_avg:583.20ms +step:47782/57344 train_time:27865966ms step_avg:583.19ms +step:47783/57344 train_time:27866516ms step_avg:583.19ms +grad accum step:11946/14336 +step:47784/57344 train_time:27867812ms step_avg:583.20ms +step:47785/57344 train_time:27867826ms step_avg:583.19ms +step:47786/57344 train_time:27868081ms step_avg:583.19ms +step:47787/57344 train_time:27868657ms step_avg:583.18ms +grad accum step:11947/14336 +step:47788/57344 train_time:27870008ms step_avg:583.20ms +step:47789/57344 train_time:27870038ms step_avg:583.19ms +step:47790/57344 train_time:27870268ms step_avg:583.18ms +step:47791/57344 train_time:27870831ms step_avg:583.18ms +grad accum step:11948/14336 +step:47792/57344 train_time:27872193ms step_avg:583.20ms +step:47793/57344 train_time:27872208ms step_avg:583.19ms +step:47794/57344 train_time:27872457ms step_avg:583.18ms +step:47795/57344 train_time:27873027ms step_avg:583.18ms +grad accum step:11949/14336 +step:47796/57344 train_time:27874432ms step_avg:583.20ms +step:47797/57344 train_time:27874447ms step_avg:583.18ms +step:47798/57344 train_time:27874700ms step_avg:583.18ms +step:47799/57344 train_time:27875266ms step_avg:583.18ms +grad accum step:11950/14336 +step:47800/57344 train_time:27876537ms step_avg:583.19ms +step:47801/57344 train_time:27876553ms step_avg:583.18ms +step:47802/57344 train_time:27876801ms step_avg:583.17ms +step:47803/57344 train_time:27877365ms step_avg:583.17ms +grad accum step:11951/14336 +step:47804/57344 train_time:27878705ms step_avg:583.19ms +step:47805/57344 train_time:27878728ms step_avg:583.18ms +step:47806/57344 train_time:27878963ms step_avg:583.17ms +step:47807/57344 train_time:27879516ms step_avg:583.17ms +grad accum step:11952/14336 +step:47808/57344 train_time:27880861ms step_avg:583.18ms +step:47808/57344 val_loss:5.709937 train_time:27880867ms step_avg:583.18ms +step:47809/57344 train_time:27880879ms step_avg:583.17ms +step:47810/57344 train_time:27881107ms step_avg:583.16ms +step:47811/57344 train_time:27881694ms step_avg:583.16ms +grad accum step:11953/14336 +step:47812/57344 train_time:27883132ms step_avg:583.18ms +step:47813/57344 train_time:27883156ms step_avg:583.17ms +step:47814/57344 train_time:27883377ms step_avg:583.16ms +step:47815/57344 train_time:27883937ms step_avg:583.16ms +grad accum step:11954/14336 +step:47816/57344 train_time:27885293ms step_avg:583.18ms +step:47817/57344 train_time:27885307ms step_avg:583.17ms +step:47818/57344 train_time:27885557ms step_avg:583.16ms +step:47819/57344 train_time:27886120ms step_avg:583.16ms +grad accum step:11955/14336 +step:47820/57344 train_time:27887457ms step_avg:583.18ms +step:47821/57344 train_time:27887469ms step_avg:583.16ms +step:47822/57344 train_time:27887722ms step_avg:583.16ms +step:47823/57344 train_time:27888292ms step_avg:583.16ms +grad accum step:11956/14336 +step:47824/57344 train_time:27889686ms step_avg:583.17ms +step:47825/57344 train_time:27889702ms step_avg:583.16ms +step:47826/57344 train_time:27889947ms step_avg:583.15ms +step:47827/57344 train_time:27890498ms step_avg:583.15ms +grad accum step:11957/14336 +step:47828/57344 train_time:27891881ms step_avg:583.17ms +step:47829/57344 train_time:27891894ms step_avg:583.16ms +step:47830/57344 train_time:27892147ms step_avg:583.15ms +step:47831/57344 train_time:27892715ms step_avg:583.15ms +grad accum step:11958/14336 +step:47832/57344 train_time:27894052ms step_avg:583.17ms +step:47833/57344 train_time:27894074ms step_avg:583.16ms +step:47834/57344 train_time:27894304ms step_avg:583.15ms +step:47835/57344 train_time:27894844ms step_avg:583.15ms +grad accum step:11959/14336 +step:47836/57344 train_time:27896182ms step_avg:583.16ms +step:47837/57344 train_time:27896197ms step_avg:583.15ms +step:47838/57344 train_time:27896446ms step_avg:583.14ms +step:47839/57344 train_time:27896999ms step_avg:583.14ms +grad accum step:11960/14336 +step:47840/57344 train_time:27898397ms step_avg:583.16ms +step:47841/57344 train_time:27898414ms step_avg:583.15ms +step:47842/57344 train_time:27898655ms step_avg:583.14ms +step:47843/57344 train_time:27899224ms step_avg:583.14ms +grad accum step:11961/14336 +step:47844/57344 train_time:27900615ms step_avg:583.16ms +step:47845/57344 train_time:27900632ms step_avg:583.15ms +step:47846/57344 train_time:27900902ms step_avg:583.14ms +step:47847/57344 train_time:27901524ms step_avg:583.14ms +grad accum step:11962/14336 +step:47848/57344 train_time:27902910ms step_avg:583.16ms +step:47849/57344 train_time:27902929ms step_avg:583.15ms +step:47850/57344 train_time:27903152ms step_avg:583.14ms +step:47851/57344 train_time:27903721ms step_avg:583.14ms +grad accum step:11963/14336 +step:47852/57344 train_time:27905080ms step_avg:583.15ms +step:47853/57344 train_time:27905093ms step_avg:583.14ms +step:47854/57344 train_time:27905338ms step_avg:583.13ms +step:47855/57344 train_time:27905886ms step_avg:583.13ms +grad accum step:11964/14336 +step:47856/57344 train_time:27907231ms step_avg:583.15ms +step:47857/57344 train_time:27907316ms step_avg:583.14ms +step:47858/57344 train_time:27907542ms step_avg:583.13ms +step:47859/57344 train_time:27908115ms step_avg:583.13ms +grad accum step:11965/14336 +step:47860/57344 train_time:27909457ms step_avg:583.15ms +step:47861/57344 train_time:27909468ms step_avg:583.14ms +step:47862/57344 train_time:27909719ms step_avg:583.13ms +step:47863/57344 train_time:27910278ms step_avg:583.13ms +grad accum step:11966/14336 +step:47864/57344 train_time:27911645ms step_avg:583.14ms +step:47865/57344 train_time:27911666ms step_avg:583.13ms +step:47866/57344 train_time:27911910ms step_avg:583.13ms +step:47867/57344 train_time:27912510ms step_avg:583.13ms +grad accum step:11967/14336 +step:47868/57344 train_time:27913920ms step_avg:583.14ms +step:47869/57344 train_time:27913939ms step_avg:583.13ms +step:47870/57344 train_time:27914170ms step_avg:583.12ms +step:47871/57344 train_time:27914728ms step_avg:583.12ms +grad accum step:11968/14336 +step:47872/57344 train_time:27916097ms step_avg:583.14ms +step:47872/57344 val_loss:5.692632 train_time:27916102ms step_avg:583.14ms +step:47873/57344 train_time:27916397ms step_avg:583.13ms +step:47874/57344 train_time:27916458ms step_avg:583.12ms +step:47875/57344 train_time:27917011ms step_avg:583.12ms +grad accum step:11969/14336 +step:47876/57344 train_time:27918515ms step_avg:583.14ms +step:47877/57344 train_time:27918530ms step_avg:583.13ms +step:47878/57344 train_time:27918745ms step_avg:583.12ms +step:47879/57344 train_time:27919292ms step_avg:583.12ms +grad accum step:11970/14336 +step:47880/57344 train_time:27920635ms step_avg:583.14ms +step:47881/57344 train_time:27920663ms step_avg:583.13ms +step:47882/57344 train_time:27920897ms step_avg:583.12ms +step:47883/57344 train_time:27921447ms step_avg:583.12ms +grad accum step:11971/14336 +step:47884/57344 train_time:27922777ms step_avg:583.13ms +step:47885/57344 train_time:27922792ms step_avg:583.12ms +step:47886/57344 train_time:27923041ms step_avg:583.11ms +step:47887/57344 train_time:27923603ms step_avg:583.11ms +grad accum step:11972/14336 +step:47888/57344 train_time:27924928ms step_avg:583.13ms +step:47889/57344 train_time:27924952ms step_avg:583.12ms +step:47890/57344 train_time:27925190ms step_avg:583.11ms +step:47891/57344 train_time:27925760ms step_avg:583.11ms +grad accum step:11973/14336 +step:47892/57344 train_time:27927073ms step_avg:583.13ms +step:47893/57344 train_time:27927090ms step_avg:583.11ms +step:47894/57344 train_time:27927350ms step_avg:583.11ms +step:47895/57344 train_time:27927950ms step_avg:583.11ms +grad accum step:11974/14336 +step:47896/57344 train_time:27929275ms step_avg:583.12ms +step:47897/57344 train_time:27929289ms step_avg:583.11ms +step:47898/57344 train_time:27929547ms step_avg:583.10ms +step:47899/57344 train_time:27930131ms step_avg:583.10ms +grad accum step:11975/14336 +step:47900/57344 train_time:27931496ms step_avg:583.12ms +step:47901/57344 train_time:27931519ms step_avg:583.11ms +step:47902/57344 train_time:27931748ms step_avg:583.10ms +step:47903/57344 train_time:27932329ms step_avg:583.10ms +grad accum step:11976/14336 +step:47904/57344 train_time:27933837ms step_avg:583.12ms +step:47905/57344 train_time:27933859ms step_avg:583.11ms +step:47906/57344 train_time:27934112ms step_avg:583.10ms +step:47907/57344 train_time:27934782ms step_avg:583.10ms +grad accum step:11977/14336 +step:47908/57344 train_time:27936489ms step_avg:583.13ms +step:47909/57344 train_time:27936505ms step_avg:583.12ms +step:47910/57344 train_time:27936726ms step_avg:583.11ms +step:47911/57344 train_time:27937289ms step_avg:583.11ms +grad accum step:11978/14336 +step:47912/57344 train_time:27938677ms step_avg:583.12ms +step:47913/57344 train_time:27938693ms step_avg:583.11ms +step:47914/57344 train_time:27938917ms step_avg:583.11ms +step:47915/57344 train_time:27939482ms step_avg:583.11ms +grad accum step:11979/14336 +step:47916/57344 train_time:27940875ms step_avg:583.12ms +step:47917/57344 train_time:27940888ms step_avg:583.11ms +step:47918/57344 train_time:27941146ms step_avg:583.10ms +step:47919/57344 train_time:27941731ms step_avg:583.10ms +grad accum step:11980/14336 +step:47920/57344 train_time:27943086ms step_avg:583.12ms +step:47921/57344 train_time:27943099ms step_avg:583.11ms +step:47922/57344 train_time:27943359ms step_avg:583.10ms +step:47923/57344 train_time:27943938ms step_avg:583.10ms +grad accum step:11981/14336 +step:47924/57344 train_time:27945267ms step_avg:583.12ms +step:47925/57344 train_time:27945287ms step_avg:583.10ms +step:47926/57344 train_time:27945520ms step_avg:583.10ms +step:47927/57344 train_time:27946091ms step_avg:583.10ms +grad accum step:11982/14336 +step:47928/57344 train_time:27947443ms step_avg:583.11ms +step:47929/57344 train_time:27947461ms step_avg:583.10ms +step:47930/57344 train_time:27947700ms step_avg:583.09ms +step:47931/57344 train_time:27948277ms step_avg:583.09ms +grad accum step:11983/14336 +step:47932/57344 train_time:27949609ms step_avg:583.11ms +step:47933/57344 train_time:27949624ms step_avg:583.10ms +step:47934/57344 train_time:27949878ms step_avg:583.09ms +step:47935/57344 train_time:27950449ms step_avg:583.09ms +grad accum step:11984/14336 +step:47936/57344 train_time:27951767ms step_avg:583.11ms +step:47936/57344 val_loss:5.681762 train_time:27951777ms step_avg:583.11ms +step:47937/57344 train_time:27951789ms step_avg:583.09ms +step:47938/57344 train_time:27952032ms step_avg:583.09ms +step:47939/57344 train_time:27952678ms step_avg:583.09ms +grad accum step:11985/14336 +step:47940/57344 train_time:27954073ms step_avg:583.11ms +step:47941/57344 train_time:27954089ms step_avg:583.09ms +step:47942/57344 train_time:27954331ms step_avg:583.09ms +step:47943/57344 train_time:27954875ms step_avg:583.09ms +grad accum step:11986/14336 +step:47944/57344 train_time:27956223ms step_avg:583.10ms +step:47945/57344 train_time:27956244ms step_avg:583.09ms +step:47946/57344 train_time:27956478ms step_avg:583.08ms +step:47947/57344 train_time:27957075ms step_avg:583.08ms +grad accum step:11987/14336 +step:47948/57344 train_time:27958428ms step_avg:583.10ms +step:47949/57344 train_time:27958446ms step_avg:583.09ms +step:47950/57344 train_time:27958674ms step_avg:583.08ms +step:47951/57344 train_time:27959219ms step_avg:583.08ms +grad accum step:11988/14336 +step:47952/57344 train_time:27960573ms step_avg:583.10ms +step:47953/57344 train_time:27960589ms step_avg:583.08ms +step:47954/57344 train_time:27960834ms step_avg:583.08ms +step:47955/57344 train_time:27961396ms step_avg:583.08ms +grad accum step:11989/14336 +step:47956/57344 train_time:27962803ms step_avg:583.09ms +step:47957/57344 train_time:27962820ms step_avg:583.08ms +step:47958/57344 train_time:27963054ms step_avg:583.07ms +step:47959/57344 train_time:27963631ms step_avg:583.07ms +grad accum step:11990/14336 +step:47960/57344 train_time:27964991ms step_avg:583.09ms +step:47961/57344 train_time:27965006ms step_avg:583.08ms +step:47962/57344 train_time:27965261ms step_avg:583.07ms +step:47963/57344 train_time:27965842ms step_avg:583.07ms +grad accum step:11991/14336 +step:47964/57344 train_time:27967157ms step_avg:583.09ms +step:47965/57344 train_time:27967171ms step_avg:583.07ms +step:47966/57344 train_time:27967418ms step_avg:583.07ms +step:47967/57344 train_time:27967971ms step_avg:583.07ms +grad accum step:11992/14336 +step:47968/57344 train_time:27969323ms step_avg:583.08ms +step:47969/57344 train_time:27969352ms step_avg:583.07ms +step:47970/57344 train_time:27969587ms step_avg:583.06ms +step:47971/57344 train_time:27970168ms step_avg:583.06ms +grad accum step:11993/14336 +step:47972/57344 train_time:27971526ms step_avg:583.08ms +step:47973/57344 train_time:27971539ms step_avg:583.07ms +step:47974/57344 train_time:27971789ms step_avg:583.06ms +step:47975/57344 train_time:27972343ms step_avg:583.06ms +grad accum step:11994/14336 +step:47976/57344 train_time:27973675ms step_avg:583.08ms +step:47977/57344 train_time:27973702ms step_avg:583.06ms +step:47978/57344 train_time:27973932ms step_avg:583.06ms +step:47979/57344 train_time:27974501ms step_avg:583.06ms +grad accum step:11995/14336 +step:47980/57344 train_time:27975818ms step_avg:583.07ms +step:47981/57344 train_time:27975833ms step_avg:583.06ms +step:47982/57344 train_time:27976079ms step_avg:583.05ms +step:47983/57344 train_time:27976635ms step_avg:583.05ms +grad accum step:11996/14336 +step:47984/57344 train_time:27977989ms step_avg:583.07ms +step:47985/57344 train_time:27978005ms step_avg:583.06ms +step:47986/57344 train_time:27978255ms step_avg:583.05ms +step:47987/57344 train_time:27978814ms step_avg:583.05ms +grad accum step:11997/14336 +step:47988/57344 train_time:27980160ms step_avg:583.07ms +step:47989/57344 train_time:27980177ms step_avg:583.05ms +step:47990/57344 train_time:27980423ms step_avg:583.05ms +step:47991/57344 train_time:27980984ms step_avg:583.05ms +grad accum step:11998/14336 +step:47992/57344 train_time:27982426ms step_avg:583.06ms +step:47993/57344 train_time:27982443ms step_avg:583.05ms +step:47994/57344 train_time:27982692ms step_avg:583.05ms +step:47995/57344 train_time:27983246ms step_avg:583.05ms +grad accum step:11999/14336 +step:47996/57344 train_time:27984590ms step_avg:583.06ms +step:47997/57344 train_time:27984606ms step_avg:583.05ms +step:47998/57344 train_time:27984849ms step_avg:583.04ms +step:47999/57344 train_time:27985401ms step_avg:583.04ms +grad accum step:12000/14336 +step:48000/57344 train_time:27986787ms step_avg:583.06ms +step:48000/57344 val_loss:5.659550 train_time:27986794ms step_avg:583.06ms +step:48001/57344 train_time:27986805ms step_avg:583.05ms +step:48002/57344 train_time:27987039ms step_avg:583.04ms +step:48003/57344 train_time:27987623ms step_avg:583.04ms +grad accum step:12001/14336 +step:48004/57344 train_time:27989003ms step_avg:583.06ms +step:48005/57344 train_time:27989021ms step_avg:583.04ms +step:48006/57344 train_time:27989267ms step_avg:583.04ms +step:48007/57344 train_time:27989825ms step_avg:583.04ms +grad accum step:12002/14336 +step:48008/57344 train_time:27991219ms step_avg:583.05ms +step:48009/57344 train_time:27991235ms step_avg:583.04ms +step:48010/57344 train_time:27991489ms step_avg:583.03ms +step:48011/57344 train_time:27992078ms step_avg:583.03ms +grad accum step:12003/14336 +step:48012/57344 train_time:27993458ms step_avg:583.05ms +step:48013/57344 train_time:27993470ms step_avg:583.04ms +step:48014/57344 train_time:27993717ms step_avg:583.03ms +step:48015/57344 train_time:27994284ms step_avg:583.03ms +grad accum step:12004/14336 +step:48016/57344 train_time:28024459ms step_avg:583.65ms +step:48017/57344 train_time:28024480ms step_avg:583.64ms +step:48018/57344 train_time:28024733ms step_avg:583.63ms +step:48019/57344 train_time:28025345ms step_avg:583.63ms +grad accum step:12005/14336 +step:48020/57344 train_time:28026745ms step_avg:583.65ms +step:48021/57344 train_time:28026756ms step_avg:583.64ms +step:48022/57344 train_time:28026988ms step_avg:583.63ms +step:48023/57344 train_time:28027537ms step_avg:583.63ms +grad accum step:12006/14336 +step:48024/57344 train_time:28028843ms step_avg:583.64ms +step:48025/57344 train_time:28028856ms step_avg:583.63ms +step:48026/57344 train_time:28029096ms step_avg:583.62ms +step:48027/57344 train_time:28029645ms step_avg:583.62ms +grad accum step:12007/14336 +step:48028/57344 train_time:28030974ms step_avg:583.64ms +step:48029/57344 train_time:28031005ms step_avg:583.63ms +step:48030/57344 train_time:28031239ms step_avg:583.62ms +step:48031/57344 train_time:28031825ms step_avg:583.62ms +grad accum step:12008/14336 +step:48032/57344 train_time:28033167ms step_avg:583.64ms +step:48033/57344 train_time:28033183ms step_avg:583.62ms +step:48034/57344 train_time:28033428ms step_avg:583.62ms +step:48035/57344 train_time:28033971ms step_avg:583.62ms +grad accum step:12009/14336 +step:48036/57344 train_time:28035314ms step_avg:583.63ms +step:48037/57344 train_time:28035331ms step_avg:583.62ms +step:48038/57344 train_time:28035581ms step_avg:583.61ms +step:48039/57344 train_time:28036143ms step_avg:583.61ms +grad accum step:12010/14336 +step:48040/57344 train_time:28037464ms step_avg:583.63ms +step:48041/57344 train_time:28037480ms step_avg:583.62ms +step:48042/57344 train_time:28037732ms step_avg:583.61ms +step:48043/57344 train_time:28038298ms step_avg:583.61ms +grad accum step:12011/14336 +step:48044/57344 train_time:28039605ms step_avg:583.62ms +step:48045/57344 train_time:28039622ms step_avg:583.61ms +step:48046/57344 train_time:28039870ms step_avg:583.60ms +step:48047/57344 train_time:28040421ms step_avg:583.60ms +grad accum step:12012/14336 +step:48048/57344 train_time:28041760ms step_avg:583.62ms +step:48049/57344 train_time:28041777ms step_avg:583.61ms +step:48050/57344 train_time:28042003ms step_avg:583.60ms +step:48051/57344 train_time:28042564ms step_avg:583.60ms +grad accum step:12013/14336 +step:48052/57344 train_time:28043907ms step_avg:583.62ms +step:48053/57344 train_time:28043922ms step_avg:583.60ms +step:48054/57344 train_time:28044156ms step_avg:583.60ms +step:48055/57344 train_time:28044714ms step_avg:583.60ms +grad accum step:12014/14336 +step:48056/57344 train_time:28046029ms step_avg:583.61ms +step:48057/57344 train_time:28046046ms step_avg:583.60ms +step:48058/57344 train_time:28046290ms step_avg:583.59ms +step:48059/57344 train_time:28046837ms step_avg:583.59ms +grad accum step:12015/14336 +step:48060/57344 train_time:28048167ms step_avg:583.61ms +step:48061/57344 train_time:28048188ms step_avg:583.60ms +step:48062/57344 train_time:28048427ms step_avg:583.59ms +step:48063/57344 train_time:28048997ms step_avg:583.59ms +grad accum step:12016/14336 +step:48064/57344 train_time:28050344ms step_avg:583.60ms +step:48064/57344 val_loss:5.645223 train_time:28050344ms step_avg:583.60ms +step:48065/57344 train_time:28050356ms step_avg:583.59ms +step:48066/57344 train_time:28050657ms step_avg:583.59ms +step:48067/57344 train_time:28051208ms step_avg:583.59ms +grad accum step:12017/14336 +step:48068/57344 train_time:28052524ms step_avg:583.60ms +step:48069/57344 train_time:28052541ms step_avg:583.59ms +step:48070/57344 train_time:28052801ms step_avg:583.58ms +step:48071/57344 train_time:28053396ms step_avg:583.58ms +grad accum step:12018/14336 +step:48072/57344 train_time:28054710ms step_avg:583.60ms +step:48073/57344 train_time:28054727ms step_avg:583.59ms +step:48074/57344 train_time:28054979ms step_avg:583.58ms +step:48075/57344 train_time:28055544ms step_avg:583.58ms +grad accum step:12019/14336 +step:48076/57344 train_time:28056874ms step_avg:583.59ms +step:48077/57344 train_time:28056891ms step_avg:583.58ms +step:48078/57344 train_time:28057135ms step_avg:583.58ms +step:48079/57344 train_time:28057683ms step_avg:583.57ms +grad accum step:12020/14336 +step:48080/57344 train_time:28058987ms step_avg:583.59ms +step:48081/57344 train_time:28059001ms step_avg:583.58ms +step:48082/57344 train_time:28059249ms step_avg:583.57ms +step:48083/57344 train_time:28059800ms step_avg:583.57ms +grad accum step:12021/14336 +step:48084/57344 train_time:28061147ms step_avg:583.59ms +step:48085/57344 train_time:28061164ms step_avg:583.57ms +step:48086/57344 train_time:28061413ms step_avg:583.57ms +step:48087/57344 train_time:28061972ms step_avg:583.57ms +grad accum step:12022/14336 +step:48088/57344 train_time:28063309ms step_avg:583.58ms +step:48089/57344 train_time:28063323ms step_avg:583.57ms +step:48090/57344 train_time:28063563ms step_avg:583.56ms +step:48091/57344 train_time:28064143ms step_avg:583.56ms +grad accum step:12023/14336 +step:48092/57344 train_time:28065498ms step_avg:583.58ms +step:48093/57344 train_time:28065515ms step_avg:583.57ms +step:48094/57344 train_time:28065761ms step_avg:583.56ms +step:48095/57344 train_time:28066305ms step_avg:583.56ms +grad accum step:12024/14336 +step:48096/57344 train_time:28067624ms step_avg:583.58ms +step:48097/57344 train_time:28067640ms step_avg:583.56ms +step:48098/57344 train_time:28067892ms step_avg:583.56ms +step:48099/57344 train_time:28068464ms step_avg:583.56ms +grad accum step:12025/14336 +step:48100/57344 train_time:28158900ms step_avg:585.42ms +step:48101/57344 train_time:28172330ms step_avg:585.69ms +step:48102/57344 train_time:28172593ms step_avg:585.68ms +step:48103/57344 train_time:28173146ms step_avg:585.68ms +grad accum step:12026/14336 +step:48104/57344 train_time:28174476ms step_avg:585.70ms +step:48105/57344 train_time:28174491ms step_avg:585.69ms +step:48106/57344 train_time:28174743ms step_avg:585.68ms +step:48107/57344 train_time:28175322ms step_avg:585.68ms +grad accum step:12027/14336 +step:48108/57344 train_time:28176688ms step_avg:585.70ms +step:48109/57344 train_time:28176703ms step_avg:585.68ms +step:48110/57344 train_time:28176961ms step_avg:585.68ms +step:48111/57344 train_time:28177540ms step_avg:585.68ms +grad accum step:12028/14336 +step:48112/57344 train_time:28178881ms step_avg:585.69ms +step:48113/57344 train_time:28178896ms step_avg:585.68ms +step:48114/57344 train_time:28179118ms step_avg:585.67ms +step:48115/57344 train_time:28179669ms step_avg:585.67ms +grad accum step:12029/14336 +step:48116/57344 train_time:28180962ms step_avg:585.69ms +step:48117/57344 train_time:28180979ms step_avg:585.68ms +step:48118/57344 train_time:28181225ms step_avg:585.67ms +step:48119/57344 train_time:28181781ms step_avg:585.67ms +grad accum step:12030/14336 +step:48120/57344 train_time:28183090ms step_avg:585.68ms +step:48121/57344 train_time:28183106ms step_avg:585.67ms +step:48122/57344 train_time:28183352ms step_avg:585.66ms +step:48123/57344 train_time:28183897ms step_avg:585.66ms +grad accum step:12031/14336 +step:48124/57344 train_time:28185205ms step_avg:585.68ms +step:48125/57344 train_time:28185215ms step_avg:585.67ms +step:48126/57344 train_time:28185466ms step_avg:585.66ms +step:48127/57344 train_time:28186029ms step_avg:585.66ms +grad accum step:12032/14336 +step:48128/57344 train_time:28187393ms step_avg:585.68ms +step:48128/57344 val_loss:5.632010 train_time:28187394ms step_avg:585.68ms +step:48129/57344 train_time:28187405ms step_avg:585.66ms +step:48130/57344 train_time:28187785ms step_avg:585.66ms +step:48131/57344 train_time:28188316ms step_avg:585.66ms +grad accum step:12033/14336 +step:48132/57344 train_time:28189712ms step_avg:585.68ms +step:48133/57344 train_time:28189728ms step_avg:585.66ms +step:48134/57344 train_time:28189970ms step_avg:585.66ms +step:48135/57344 train_time:28190520ms step_avg:585.66ms +grad accum step:12034/14336 +step:48136/57344 train_time:28191861ms step_avg:585.67ms +step:48137/57344 train_time:28191876ms step_avg:585.66ms +step:48138/57344 train_time:28192123ms step_avg:585.65ms +step:48139/57344 train_time:28192678ms step_avg:585.65ms +grad accum step:12035/14336 +step:48140/57344 train_time:28194004ms step_avg:585.67ms +step:48141/57344 train_time:28194022ms step_avg:585.66ms +step:48142/57344 train_time:28194255ms step_avg:585.65ms +step:48143/57344 train_time:28194815ms step_avg:585.65ms +grad accum step:12036/14336 +step:48144/57344 train_time:28196159ms step_avg:585.66ms +step:48145/57344 train_time:28196176ms step_avg:585.65ms +step:48146/57344 train_time:28196424ms step_avg:585.64ms +step:48147/57344 train_time:28196982ms step_avg:585.64ms +grad accum step:12037/14336 +step:48148/57344 train_time:28198367ms step_avg:585.66ms +step:48149/57344 train_time:28198382ms step_avg:585.65ms +step:48150/57344 train_time:28198637ms step_avg:585.64ms +step:48151/57344 train_time:28199237ms step_avg:585.64ms +grad accum step:12038/14336 +step:48152/57344 train_time:28200633ms step_avg:585.66ms +step:48153/57344 train_time:28200644ms step_avg:585.65ms +step:48154/57344 train_time:28200882ms step_avg:585.64ms +step:48155/57344 train_time:28201439ms step_avg:585.64ms +grad accum step:12039/14336 +step:48156/57344 train_time:28238103ms step_avg:586.39ms +step:48157/57344 train_time:28238120ms step_avg:586.38ms +step:48158/57344 train_time:28238363ms step_avg:586.37ms +step:48159/57344 train_time:28238903ms step_avg:586.37ms +grad accum step:12040/14336 +step:48160/57344 train_time:28240215ms step_avg:586.38ms +step:48161/57344 train_time:28240235ms step_avg:586.37ms +step:48162/57344 train_time:28240451ms step_avg:586.36ms +step:48163/57344 train_time:28240991ms step_avg:586.36ms +grad accum step:12041/14336 +step:48164/57344 train_time:28242316ms step_avg:586.38ms +step:48165/57344 train_time:28242329ms step_avg:586.37ms +step:48166/57344 train_time:28242574ms step_avg:586.36ms +step:48167/57344 train_time:28243134ms step_avg:586.36ms +grad accum step:12042/14336 +step:48168/57344 train_time:28244536ms step_avg:586.38ms +step:48169/57344 train_time:28244553ms step_avg:586.36ms +step:48170/57344 train_time:28244795ms step_avg:586.36ms +step:48171/57344 train_time:28245349ms step_avg:586.36ms +grad accum step:12043/14336 +step:48172/57344 train_time:28246670ms step_avg:586.37ms +step:48173/57344 train_time:28246686ms step_avg:586.36ms +step:48174/57344 train_time:28246938ms step_avg:586.35ms +step:48175/57344 train_time:28247509ms step_avg:586.35ms +grad accum step:12044/14336 +step:48176/57344 train_time:28248820ms step_avg:586.37ms +step:48177/57344 train_time:28248837ms step_avg:586.36ms +step:48178/57344 train_time:28249083ms step_avg:586.35ms +step:48179/57344 train_time:28249626ms step_avg:586.35ms +grad accum step:12045/14336 +step:48180/57344 train_time:28250980ms step_avg:586.36ms +step:48181/57344 train_time:28250998ms step_avg:586.35ms +step:48182/57344 train_time:28251226ms step_avg:586.34ms +step:48183/57344 train_time:28251784ms step_avg:586.34ms +grad accum step:12046/14336 +step:48184/57344 train_time:28253118ms step_avg:586.36ms +step:48185/57344 train_time:28253130ms step_avg:586.35ms +step:48186/57344 train_time:28253372ms step_avg:586.34ms +step:48187/57344 train_time:28253918ms step_avg:586.34ms +grad accum step:12047/14336 +step:48188/57344 train_time:28255310ms step_avg:586.36ms +step:48189/57344 train_time:28255327ms step_avg:586.34ms +step:48190/57344 train_time:28255577ms step_avg:586.34ms +step:48191/57344 train_time:28256141ms step_avg:586.34ms +grad accum step:12048/14336 +step:48192/57344 train_time:28257513ms step_avg:586.35ms +step:48192/57344 val_loss:5.621907 train_time:28257516ms step_avg:586.35ms +step:48193/57344 train_time:28257527ms step_avg:586.34ms +step:48194/57344 train_time:28257748ms step_avg:586.33ms +step:48195/57344 train_time:28258301ms step_avg:586.33ms +grad accum step:12049/14336 +step:48196/57344 train_time:28259666ms step_avg:586.35ms +step:48197/57344 train_time:28259684ms step_avg:586.34ms +step:48198/57344 train_time:28259940ms step_avg:586.33ms +step:48199/57344 train_time:28260529ms step_avg:586.33ms +grad accum step:12050/14336 +step:48200/57344 train_time:28261930ms step_avg:586.35ms +step:48201/57344 train_time:28261947ms step_avg:586.34ms +step:48202/57344 train_time:28262189ms step_avg:586.33ms +step:48203/57344 train_time:28262758ms step_avg:586.33ms +grad accum step:12051/14336 +step:48204/57344 train_time:28264118ms step_avg:586.34ms +step:48205/57344 train_time:28264135ms step_avg:586.33ms +step:48206/57344 train_time:28264375ms step_avg:586.32ms +step:48207/57344 train_time:28264925ms step_avg:586.32ms +grad accum step:12052/14336 +step:48208/57344 train_time:28266306ms step_avg:586.34ms +step:48209/57344 train_time:28266322ms step_avg:586.33ms +step:48210/57344 train_time:28266568ms step_avg:586.32ms +step:48211/57344 train_time:28267154ms step_avg:586.32ms +grad accum step:12053/14336 +step:48212/57344 train_time:28268618ms step_avg:586.34ms +step:48213/57344 train_time:28268635ms step_avg:586.33ms +step:48214/57344 train_time:28268879ms step_avg:586.32ms +step:48215/57344 train_time:28269425ms step_avg:586.32ms +grad accum step:12054/14336 +step:48216/57344 train_time:28270784ms step_avg:586.34ms +step:48217/57344 train_time:28270800ms step_avg:586.32ms +step:48218/57344 train_time:28271062ms step_avg:586.32ms +step:48219/57344 train_time:28271667ms step_avg:586.32ms +grad accum step:12055/14336 +step:48220/57344 train_time:28273024ms step_avg:586.33ms +step:48221/57344 train_time:28273038ms step_avg:586.32ms +step:48222/57344 train_time:28273297ms step_avg:586.32ms +step:48223/57344 train_time:28273876ms step_avg:586.32ms +grad accum step:12056/14336 +step:48224/57344 train_time:28275295ms step_avg:586.33ms +step:48225/57344 train_time:28275319ms step_avg:586.32ms +step:48226/57344 train_time:28275535ms step_avg:586.31ms +step:48227/57344 train_time:28276078ms step_avg:586.31ms +grad accum step:12057/14336 +step:48228/57344 train_time:28277422ms step_avg:586.33ms +step:48229/57344 train_time:28277442ms step_avg:586.32ms +step:48230/57344 train_time:28277667ms step_avg:586.31ms +step:48231/57344 train_time:28278233ms step_avg:586.31ms +grad accum step:12058/14336 +step:48232/57344 train_time:28279601ms step_avg:586.32ms +step:48233/57344 train_time:28279616ms step_avg:586.31ms +step:48234/57344 train_time:28279858ms step_avg:586.31ms +step:48235/57344 train_time:28280410ms step_avg:586.30ms +grad accum step:12059/14336 +step:48236/57344 train_time:28281727ms step_avg:586.32ms +step:48237/57344 train_time:28281750ms step_avg:586.31ms +step:48238/57344 train_time:28281979ms step_avg:586.30ms +step:48239/57344 train_time:28282541ms step_avg:586.30ms +grad accum step:12060/14336 +step:48240/57344 train_time:28283984ms step_avg:586.32ms +step:48241/57344 train_time:28284006ms step_avg:586.31ms +step:48242/57344 train_time:28284229ms step_avg:586.30ms +step:48243/57344 train_time:28284781ms step_avg:586.30ms +grad accum step:12061/14336 +step:48244/57344 train_time:28286306ms step_avg:586.32ms +step:48245/57344 train_time:28286327ms step_avg:586.31ms +step:48246/57344 train_time:28286550ms step_avg:586.30ms +step:48247/57344 train_time:28287132ms step_avg:586.30ms +grad accum step:12062/14336 +step:48248/57344 train_time:28288534ms step_avg:586.32ms +step:48249/57344 train_time:28288549ms step_avg:586.30ms +step:48250/57344 train_time:28288794ms step_avg:586.30ms +step:48251/57344 train_time:28289353ms step_avg:586.30ms +grad accum step:12063/14336 +step:48252/57344 train_time:28290760ms step_avg:586.31ms +step:48253/57344 train_time:28290777ms step_avg:586.30ms +step:48254/57344 train_time:28291021ms step_avg:586.29ms +step:48255/57344 train_time:28320120ms step_avg:586.88ms +grad accum step:12064/14336 +step:48256/57344 train_time:28321294ms step_avg:586.90ms +step:48256/57344 val_loss:5.611989 train_time:28321303ms step_avg:586.90ms +step:48257/57344 train_time:28321315ms step_avg:586.89ms +step:48258/57344 train_time:28356248ms step_avg:587.60ms +step:48259/57344 train_time:28360321ms step_avg:587.67ms +grad accum step:12065/14336 +step:48260/57344 train_time:28361449ms step_avg:587.68ms +step:48261/57344 train_time:28361463ms step_avg:587.67ms +step:48262/57344 train_time:28361680ms step_avg:587.66ms +step:48263/57344 train_time:28362235ms step_avg:587.66ms +grad accum step:12066/14336 +step:48264/57344 train_time:28363579ms step_avg:587.68ms +step:48265/57344 train_time:28363594ms step_avg:587.66ms +step:48266/57344 train_time:28363844ms step_avg:587.66ms +step:48267/57344 train_time:28364404ms step_avg:587.66ms +grad accum step:12067/14336 +step:48268/57344 train_time:28365746ms step_avg:587.67ms +step:48269/57344 train_time:28365771ms step_avg:587.66ms +step:48270/57344 train_time:28366007ms step_avg:587.65ms +step:48271/57344 train_time:28366571ms step_avg:587.65ms +grad accum step:12068/14336 +step:48272/57344 train_time:28367912ms step_avg:587.67ms +step:48273/57344 train_time:28367925ms step_avg:587.66ms +step:48274/57344 train_time:28368182ms step_avg:587.65ms +step:48275/57344 train_time:28368784ms step_avg:587.65ms +grad accum step:12069/14336 +step:48276/57344 train_time:28370157ms step_avg:587.67ms +step:48277/57344 train_time:28370187ms step_avg:587.65ms +step:48278/57344 train_time:28370414ms step_avg:587.65ms +step:48279/57344 train_time:28370994ms step_avg:587.65ms +grad accum step:12070/14336 +step:48280/57344 train_time:28372380ms step_avg:587.66ms +step:48281/57344 train_time:28372398ms step_avg:587.65ms +step:48282/57344 train_time:28372640ms step_avg:587.64ms +step:48283/57344 train_time:28373220ms step_avg:587.64ms +grad accum step:12071/14336 +step:48284/57344 train_time:28374660ms step_avg:587.66ms +step:48285/57344 train_time:28374680ms step_avg:587.65ms +step:48286/57344 train_time:28374922ms step_avg:587.64ms +step:48287/57344 train_time:28375480ms step_avg:587.64ms +grad accum step:12072/14336 +step:48288/57344 train_time:28376823ms step_avg:587.66ms +step:48289/57344 train_time:28376838ms step_avg:587.65ms +step:48290/57344 train_time:28377083ms step_avg:587.64ms +step:48291/57344 train_time:28377647ms step_avg:587.64ms +grad accum step:12073/14336 +step:48292/57344 train_time:28379022ms step_avg:587.65ms +step:48293/57344 train_time:28379042ms step_avg:587.64ms +step:48294/57344 train_time:28379278ms step_avg:587.64ms +step:48295/57344 train_time:28379832ms step_avg:587.64ms +grad accum step:12074/14336 +step:48296/57344 train_time:28381292ms step_avg:587.65ms +step:48297/57344 train_time:28381320ms step_avg:587.64ms +step:48298/57344 train_time:28381548ms step_avg:587.63ms +step:48299/57344 train_time:28382092ms step_avg:587.63ms +grad accum step:12075/14336 +step:48300/57344 train_time:28383417ms step_avg:587.65ms +step:48301/57344 train_time:28383434ms step_avg:587.64ms +step:48302/57344 train_time:28383682ms step_avg:587.63ms +step:48303/57344 train_time:28384257ms step_avg:587.63ms +grad accum step:12076/14336 +step:48304/57344 train_time:28385627ms step_avg:587.65ms +step:48305/57344 train_time:28385654ms step_avg:587.63ms +step:48306/57344 train_time:28385875ms step_avg:587.63ms +step:48307/57344 train_time:28386426ms step_avg:587.63ms +grad accum step:12077/14336 +step:48308/57344 train_time:28387858ms step_avg:587.64ms +step:48309/57344 train_time:28387876ms step_avg:587.63ms +step:48310/57344 train_time:28388093ms step_avg:587.62ms +step:48311/57344 train_time:28388659ms step_avg:587.62ms +grad accum step:12078/14336 +step:48312/57344 train_time:28390239ms step_avg:587.64ms +step:48313/57344 train_time:28390273ms step_avg:587.63ms +step:48314/57344 train_time:28390488ms step_avg:587.62ms +step:48315/57344 train_time:28391032ms step_avg:587.62ms +grad accum step:12079/14336 +step:48316/57344 train_time:28392369ms step_avg:587.64ms +step:48317/57344 train_time:28392389ms step_avg:587.63ms +step:48318/57344 train_time:28392634ms step_avg:587.62ms +step:48319/57344 train_time:28393197ms step_avg:587.62ms +grad accum step:12080/14336 +step:48320/57344 train_time:28394573ms step_avg:587.64ms +step:48320/57344 val_loss:5.599409 train_time:28394587ms step_avg:587.64ms +step:48321/57344 train_time:28394599ms step_avg:587.62ms +step:48322/57344 train_time:28394826ms step_avg:587.62ms +step:48323/57344 train_time:28395403ms step_avg:587.62ms +grad accum step:12081/14336 +step:48324/57344 train_time:28396760ms step_avg:587.63ms +step:48325/57344 train_time:28396783ms step_avg:587.62ms +step:48326/57344 train_time:28397024ms step_avg:587.61ms +step:48327/57344 train_time:28397597ms step_avg:587.61ms +grad accum step:12082/14336 +step:48328/57344 train_time:28398962ms step_avg:587.63ms +step:48329/57344 train_time:28398976ms step_avg:587.62ms +step:48330/57344 train_time:28399223ms step_avg:587.61ms +step:48331/57344 train_time:28399796ms step_avg:587.61ms +grad accum step:12083/14336 +step:48332/57344 train_time:28401121ms step_avg:587.63ms +step:48333/57344 train_time:28401136ms step_avg:587.61ms +step:48334/57344 train_time:28401386ms step_avg:587.61ms +step:48335/57344 train_time:28401949ms step_avg:587.61ms +grad accum step:12084/14336 +step:48336/57344 train_time:28403256ms step_avg:587.62ms +step:48337/57344 train_time:28403270ms step_avg:587.61ms +step:48338/57344 train_time:28403523ms step_avg:587.60ms +step:48339/57344 train_time:28404092ms step_avg:587.60ms +grad accum step:12085/14336 +step:48340/57344 train_time:28405598ms step_avg:587.62ms +step:48341/57344 train_time:28405636ms step_avg:587.61ms +step:48342/57344 train_time:28405858ms step_avg:587.60ms +step:48343/57344 train_time:28406404ms step_avg:587.60ms +grad accum step:12086/14336 +step:48344/57344 train_time:28407750ms step_avg:587.62ms +step:48345/57344 train_time:28407771ms step_avg:587.61ms +step:48346/57344 train_time:28408001ms step_avg:587.60ms +step:48347/57344 train_time:28408549ms step_avg:587.60ms +grad accum step:12087/14336 +step:48348/57344 train_time:28409882ms step_avg:587.61ms +step:48349/57344 train_time:28409897ms step_avg:587.60ms +step:48350/57344 train_time:28410138ms step_avg:587.59ms +step:48351/57344 train_time:28410700ms step_avg:587.59ms +grad accum step:12088/14336 +step:48352/57344 train_time:28412039ms step_avg:587.61ms +step:48353/57344 train_time:28412055ms step_avg:587.60ms +step:48354/57344 train_time:28412312ms step_avg:587.59ms +step:48355/57344 train_time:28412897ms step_avg:587.59ms +grad accum step:12089/14336 +step:48356/57344 train_time:28414239ms step_avg:587.61ms +step:48357/57344 train_time:28414255ms step_avg:587.59ms +step:48358/57344 train_time:28414511ms step_avg:587.59ms +step:48359/57344 train_time:28415104ms step_avg:587.59ms +grad accum step:12090/14336 +step:48360/57344 train_time:28416552ms step_avg:587.60ms +step:48361/57344 train_time:28416570ms step_avg:587.59ms +step:48362/57344 train_time:28416794ms step_avg:587.59ms +step:48363/57344 train_time:28417352ms step_avg:587.58ms +grad accum step:12091/14336 +step:48364/57344 train_time:28418726ms step_avg:587.60ms +step:48365/57344 train_time:28418738ms step_avg:587.59ms +step:48366/57344 train_time:28418969ms step_avg:587.58ms +step:48367/57344 train_time:28419544ms step_avg:587.58ms +grad accum step:12092/14336 +step:48368/57344 train_time:28420918ms step_avg:587.60ms +step:48369/57344 train_time:28420935ms step_avg:587.59ms +step:48370/57344 train_time:28421196ms step_avg:587.58ms +step:48371/57344 train_time:28421796ms step_avg:587.58ms +grad accum step:12093/14336 +step:48372/57344 train_time:28423270ms step_avg:587.60ms +step:48373/57344 train_time:28423286ms step_avg:587.59ms +step:48374/57344 train_time:28423542ms step_avg:587.58ms +step:48375/57344 train_time:28424126ms step_avg:587.58ms +grad accum step:12094/14336 +step:48376/57344 train_time:28425482ms step_avg:587.59ms +step:48377/57344 train_time:28425496ms step_avg:587.58ms +step:48378/57344 train_time:28425748ms step_avg:587.58ms +step:48379/57344 train_time:28426310ms step_avg:587.58ms +grad accum step:12095/14336 +step:48380/57344 train_time:28427675ms step_avg:587.59ms +step:48381/57344 train_time:28427689ms step_avg:587.58ms +step:48382/57344 train_time:28427947ms step_avg:587.57ms +step:48383/57344 train_time:28428521ms step_avg:587.57ms +grad accum step:12096/14336 +step:48384/57344 train_time:28429870ms step_avg:587.59ms +step:48384/57344 val_loss:5.596268 train_time:28429876ms step_avg:587.59ms +step:48385/57344 train_time:28429888ms step_avg:587.58ms +step:48386/57344 train_time:28430127ms step_avg:587.57ms +step:48387/57344 train_time:28430726ms step_avg:587.57ms +grad accum step:12097/14336 +step:48388/57344 train_time:28432114ms step_avg:587.59ms +step:48389/57344 train_time:28432130ms step_avg:587.57ms +step:48390/57344 train_time:28432378ms step_avg:587.57ms +step:48391/57344 train_time:28432938ms step_avg:587.57ms +grad accum step:12098/14336 +step:48392/57344 train_time:28434295ms step_avg:587.58ms +step:48393/57344 train_time:28434313ms step_avg:587.57ms +step:48394/57344 train_time:28434555ms step_avg:587.56ms +step:48395/57344 train_time:28435117ms step_avg:587.56ms +grad accum step:12099/14336 +step:48396/57344 train_time:28436455ms step_avg:587.58ms +step:48397/57344 train_time:28436471ms step_avg:587.57ms +step:48398/57344 train_time:28436723ms step_avg:587.56ms +step:48399/57344 train_time:28437295ms step_avg:587.56ms +grad accum step:12100/14336 +step:48400/57344 train_time:28438717ms step_avg:587.58ms +step:48401/57344 train_time:28438733ms step_avg:587.56ms +step:48402/57344 train_time:28438978ms step_avg:587.56ms +step:48403/57344 train_time:28439532ms step_avg:587.56ms +grad accum step:12101/14336 +step:48404/57344 train_time:28441084ms step_avg:587.58ms +step:48405/57344 train_time:28441105ms step_avg:587.57ms +step:48406/57344 train_time:28441339ms step_avg:587.56ms +step:48407/57344 train_time:28441939ms step_avg:587.56ms +grad accum step:12102/14336 +step:48408/57344 train_time:28443518ms step_avg:587.58ms +step:48409/57344 train_time:28443534ms step_avg:587.57ms +step:48410/57344 train_time:28443750ms step_avg:587.56ms +step:48411/57344 train_time:28444315ms step_avg:587.56ms +grad accum step:12103/14336 +step:48412/57344 train_time:28445666ms step_avg:587.57ms +step:48413/57344 train_time:28445682ms step_avg:587.56ms +step:48414/57344 train_time:28445956ms step_avg:587.56ms +step:48415/57344 train_time:28446621ms step_avg:587.56ms +grad accum step:12104/14336 +step:48416/57344 train_time:28448095ms step_avg:587.58ms +step:48417/57344 train_time:28448118ms step_avg:587.56ms +step:48418/57344 train_time:28448353ms step_avg:587.56ms +step:48419/57344 train_time:28448898ms step_avg:587.56ms +grad accum step:12105/14336 +step:48420/57344 train_time:28450311ms step_avg:587.57ms +step:48421/57344 train_time:28450326ms step_avg:587.56ms +step:48422/57344 train_time:28450568ms step_avg:587.55ms +step:48423/57344 train_time:28451149ms step_avg:587.55ms +grad accum step:12106/14336 +step:48424/57344 train_time:28452506ms step_avg:587.57ms +step:48425/57344 train_time:28452524ms step_avg:587.56ms +step:48426/57344 train_time:28452752ms step_avg:587.55ms +step:48427/57344 train_time:28453323ms step_avg:587.55ms +grad accum step:12107/14336 +step:48428/57344 train_time:28454663ms step_avg:587.57ms +step:48429/57344 train_time:28454676ms step_avg:587.55ms +step:48430/57344 train_time:28454924ms step_avg:587.55ms +step:48431/57344 train_time:28455485ms step_avg:587.55ms +grad accum step:12108/14336 +step:48432/57344 train_time:28456832ms step_avg:587.56ms +step:48433/57344 train_time:28456849ms step_avg:587.55ms +step:48434/57344 train_time:28457096ms step_avg:587.54ms +step:48435/57344 train_time:28457663ms step_avg:587.54ms +grad accum step:12109/14336 +step:48436/57344 train_time:28459089ms step_avg:587.56ms +step:48437/57344 train_time:28459132ms step_avg:587.55ms +step:48438/57344 train_time:28459357ms step_avg:587.54ms +step:48439/57344 train_time:28459917ms step_avg:587.54ms +grad accum step:12110/14336 +step:48440/57344 train_time:28461267ms step_avg:587.56ms +step:48441/57344 train_time:28461286ms step_avg:587.55ms +step:48442/57344 train_time:28461535ms step_avg:587.54ms +step:48443/57344 train_time:28462113ms step_avg:587.54ms +grad accum step:12111/14336 +step:48444/57344 train_time:28463450ms step_avg:587.55ms +step:48445/57344 train_time:28463465ms step_avg:587.54ms +step:48446/57344 train_time:28463720ms step_avg:587.53ms +step:48447/57344 train_time:28464292ms step_avg:587.53ms +grad accum step:12112/14336 +step:48448/57344 train_time:28465730ms step_avg:587.55ms +step:48448/57344 val_loss:5.582804 train_time:28465757ms step_avg:587.55ms +step:48449/57344 train_time:28465905ms step_avg:587.54ms +step:48450/57344 train_time:28466026ms step_avg:587.53ms +step:48451/57344 train_time:28466584ms step_avg:587.53ms +grad accum step:12113/14336 +step:48452/57344 train_time:28467922ms step_avg:587.55ms +step:48453/57344 train_time:28467946ms step_avg:587.54ms +step:48454/57344 train_time:28468186ms step_avg:587.53ms +step:48455/57344 train_time:28468753ms step_avg:587.53ms +grad accum step:12114/14336 +step:48456/57344 train_time:28470065ms step_avg:587.54ms +step:48457/57344 train_time:28470084ms step_avg:587.53ms +step:48458/57344 train_time:28470324ms step_avg:587.53ms +step:48459/57344 train_time:28470888ms step_avg:587.53ms +grad accum step:12115/14336 +step:48460/57344 train_time:28472224ms step_avg:587.54ms +step:48461/57344 train_time:28472240ms step_avg:587.53ms +step:48462/57344 train_time:28472496ms step_avg:587.52ms +step:48463/57344 train_time:28473075ms step_avg:587.52ms +grad accum step:12116/14336 +step:48464/57344 train_time:28474414ms step_avg:587.54ms +step:48465/57344 train_time:28474428ms step_avg:587.53ms +step:48466/57344 train_time:28474681ms step_avg:587.52ms +step:48467/57344 train_time:28475265ms step_avg:587.52ms +grad accum step:12117/14336 +step:48468/57344 train_time:28476610ms step_avg:587.53ms +step:48469/57344 train_time:28476623ms step_avg:587.52ms +step:48470/57344 train_time:28476875ms step_avg:587.52ms +step:48471/57344 train_time:28477443ms step_avg:587.52ms +grad accum step:12118/14336 +step:48472/57344 train_time:28478781ms step_avg:587.53ms +step:48473/57344 train_time:28478796ms step_avg:587.52ms +step:48474/57344 train_time:28479040ms step_avg:587.51ms +step:48475/57344 train_time:28479585ms step_avg:587.51ms +grad accum step:12119/14336 +step:48476/57344 train_time:28480983ms step_avg:587.53ms +step:48477/57344 train_time:28481080ms step_avg:587.52ms +step:48478/57344 train_time:28481302ms step_avg:587.51ms +step:48479/57344 train_time:28481870ms step_avg:587.51ms +grad accum step:12120/14336 +step:48480/57344 train_time:28483475ms step_avg:587.53ms +step:48481/57344 train_time:28483493ms step_avg:587.52ms +step:48482/57344 train_time:28483716ms step_avg:587.51ms +step:48483/57344 train_time:28484280ms step_avg:587.51ms +grad accum step:12121/14336 +step:48484/57344 train_time:28485647ms step_avg:587.53ms +step:48485/57344 train_time:28485669ms step_avg:587.52ms +step:48486/57344 train_time:28485899ms step_avg:587.51ms +step:48487/57344 train_time:28486463ms step_avg:587.51ms +grad accum step:12122/14336 +step:48488/57344 train_time:28487816ms step_avg:587.52ms +step:48489/57344 train_time:28487832ms step_avg:587.51ms +step:48490/57344 train_time:28488082ms step_avg:587.50ms +step:48491/57344 train_time:28488669ms step_avg:587.50ms +grad accum step:12123/14336 +step:48492/57344 train_time:28490020ms step_avg:587.52ms +step:48493/57344 train_time:28490041ms step_avg:587.51ms +step:48494/57344 train_time:28490272ms step_avg:587.50ms +step:48495/57344 train_time:28490835ms step_avg:587.50ms +grad accum step:12124/14336 +step:48496/57344 train_time:28492178ms step_avg:587.52ms +step:48497/57344 train_time:28492189ms step_avg:587.50ms +step:48498/57344 train_time:28492436ms step_avg:587.50ms +step:48499/57344 train_time:28492998ms step_avg:587.50ms +grad accum step:12125/14336 +step:48500/57344 train_time:28494386ms step_avg:587.51ms +step:48501/57344 train_time:28494411ms step_avg:587.50ms +step:48502/57344 train_time:28494640ms step_avg:587.49ms +step:48503/57344 train_time:28495196ms step_avg:587.49ms +grad accum step:12126/14336 +step:48504/57344 train_time:28496542ms step_avg:587.51ms +step:48505/57344 train_time:28496555ms step_avg:587.50ms +step:48506/57344 train_time:28496807ms step_avg:587.49ms +step:48507/57344 train_time:28497372ms step_avg:587.49ms +grad accum step:12127/14336 +step:48508/57344 train_time:28498720ms step_avg:587.51ms +step:48509/57344 train_time:28498755ms step_avg:587.49ms +step:48510/57344 train_time:28498981ms step_avg:587.49ms +step:48511/57344 train_time:28499543ms step_avg:587.49ms +grad accum step:12128/14336 +step:48512/57344 train_time:28500847ms step_avg:587.50ms +step:48512/57344 val_loss:5.570768 train_time:28500849ms step_avg:587.50ms +step:48513/57344 train_time:28501921ms step_avg:587.51ms +step:48514/57344 train_time:28502068ms step_avg:587.50ms +step:48515/57344 train_time:28502507ms step_avg:587.50ms +grad accum step:12129/14336 +step:48516/57344 train_time:28503953ms step_avg:587.52ms +step:48517/57344 train_time:28503967ms step_avg:587.50ms +step:48518/57344 train_time:28504191ms step_avg:587.50ms +step:48519/57344 train_time:28504766ms step_avg:587.50ms +grad accum step:12130/14336 +step:48520/57344 train_time:28506112ms step_avg:587.51ms +step:48521/57344 train_time:28506128ms step_avg:587.50ms +step:48522/57344 train_time:28506378ms step_avg:587.49ms +step:48523/57344 train_time:28506939ms step_avg:587.49ms +grad accum step:12131/14336 +step:48524/57344 train_time:28508264ms step_avg:587.51ms +step:48525/57344 train_time:28508282ms step_avg:587.50ms +step:48526/57344 train_time:28508520ms step_avg:587.49ms +step:48527/57344 train_time:28509082ms step_avg:587.49ms +grad accum step:12132/14336 +step:48528/57344 train_time:28510469ms step_avg:587.51ms +step:48529/57344 train_time:28510493ms step_avg:587.49ms +step:48530/57344 train_time:28510725ms step_avg:587.49ms +step:48531/57344 train_time:28511275ms step_avg:587.49ms +grad accum step:12133/14336 +step:48532/57344 train_time:28512671ms step_avg:587.50ms +step:48533/57344 train_time:28512688ms step_avg:587.49ms +step:48534/57344 train_time:28512932ms step_avg:587.48ms +step:48535/57344 train_time:28513502ms step_avg:587.48ms +grad accum step:12134/14336 +step:48536/57344 train_time:28514873ms step_avg:587.50ms +step:48537/57344 train_time:28514887ms step_avg:587.49ms +step:48538/57344 train_time:28515139ms step_avg:587.48ms +step:48539/57344 train_time:28515706ms step_avg:587.48ms +grad accum step:12135/14336 +step:48540/57344 train_time:28517057ms step_avg:587.50ms +step:48541/57344 train_time:28517073ms step_avg:587.48ms +step:48542/57344 train_time:28517327ms step_avg:587.48ms +step:48543/57344 train_time:28517897ms step_avg:587.48ms +grad accum step:12136/14336 +step:48544/57344 train_time:28519239ms step_avg:587.49ms +step:48545/57344 train_time:28519250ms step_avg:587.48ms +step:48546/57344 train_time:28519496ms step_avg:587.47ms +step:48547/57344 train_time:28520083ms step_avg:587.47ms +grad accum step:12137/14336 +step:48548/57344 train_time:28521466ms step_avg:587.49ms +step:48549/57344 train_time:28521483ms step_avg:587.48ms +step:48550/57344 train_time:28521730ms step_avg:587.47ms +step:48551/57344 train_time:28522279ms step_avg:587.47ms +grad accum step:12138/14336 +step:48552/57344 train_time:28523610ms step_avg:587.49ms +step:48553/57344 train_time:28523622ms step_avg:587.47ms +step:48554/57344 train_time:28523869ms step_avg:587.47ms +step:48555/57344 train_time:28524429ms step_avg:587.47ms +grad accum step:12139/14336 +step:48556/57344 train_time:28525768ms step_avg:587.48ms +step:48557/57344 train_time:28535894ms step_avg:587.68ms +step:48558/57344 train_time:28536754ms step_avg:587.68ms +step:48559/57344 train_time:28536915ms step_avg:587.68ms +grad accum step:12140/14336 +step:48560/57344 train_time:28538327ms step_avg:587.69ms +step:48561/57344 train_time:28538342ms step_avg:587.68ms +step:48562/57344 train_time:28538569ms step_avg:587.67ms +step:48563/57344 train_time:28539138ms step_avg:587.67ms +grad accum step:12141/14336 +step:48564/57344 train_time:28540489ms step_avg:587.69ms +step:48565/57344 train_time:28540520ms step_avg:587.68ms +step:48566/57344 train_time:28540741ms step_avg:587.67ms +step:48567/57344 train_time:28541284ms step_avg:587.67ms +grad accum step:12142/14336 +step:48568/57344 train_time:28542628ms step_avg:587.68ms +step:48569/57344 train_time:28542640ms step_avg:587.67ms +step:48570/57344 train_time:28542880ms step_avg:587.66ms +step:48571/57344 train_time:28543428ms step_avg:587.66ms +grad accum step:12143/14336 +step:48572/57344 train_time:28544797ms step_avg:587.68ms +step:48573/57344 train_time:28544813ms step_avg:587.67ms +step:48574/57344 train_time:28545058ms step_avg:587.66ms +step:48575/57344 train_time:28545593ms step_avg:587.66ms +grad accum step:12144/14336 +step:48576/57344 train_time:28546891ms step_avg:587.67ms +step:48576/57344 val_loss:5.563162 train_time:28546891ms step_avg:587.67ms +step:48577/57344 train_time:28546903ms step_avg:587.66ms +step:48578/57344 train_time:28547127ms step_avg:587.66ms +step:48579/57344 train_time:28547685ms step_avg:587.65ms +grad accum step:12145/14336 +step:48580/57344 train_time:28549049ms step_avg:587.67ms +step:48581/57344 train_time:28549064ms step_avg:587.66ms +step:48582/57344 train_time:28549310ms step_avg:587.65ms +step:48583/57344 train_time:28549871ms step_avg:587.65ms +grad accum step:12146/14336 +step:48584/57344 train_time:28551229ms step_avg:587.67ms +step:48585/57344 train_time:28551246ms step_avg:587.66ms +step:48586/57344 train_time:28551488ms step_avg:587.65ms +step:48587/57344 train_time:28552031ms step_avg:587.65ms +grad accum step:12147/14336 +step:48588/57344 train_time:28553397ms step_avg:587.66ms +step:48589/57344 train_time:28553411ms step_avg:587.65ms +step:48590/57344 train_time:28553661ms step_avg:587.64ms +step:48591/57344 train_time:28554226ms step_avg:587.64ms +grad accum step:12148/14336 +step:48592/57344 train_time:28555557ms step_avg:587.66ms +step:48593/57344 train_time:28555574ms step_avg:587.65ms +step:48594/57344 train_time:28555824ms step_avg:587.64ms +step:48595/57344 train_time:28556385ms step_avg:587.64ms +grad accum step:12149/14336 +step:48596/57344 train_time:28557717ms step_avg:587.66ms +step:48597/57344 train_time:28557730ms step_avg:587.64ms +step:48598/57344 train_time:28557973ms step_avg:587.64ms +step:48599/57344 train_time:28558568ms step_avg:587.64ms +grad accum step:12150/14336 +step:48600/57344 train_time:28559972ms step_avg:587.65ms +step:48601/57344 train_time:28559983ms step_avg:587.64ms +step:48602/57344 train_time:28560236ms step_avg:587.63ms +step:48603/57344 train_time:28560819ms step_avg:587.63ms +grad accum step:12151/14336 +step:48604/57344 train_time:28562206ms step_avg:587.65ms +step:48605/57344 train_time:28562223ms step_avg:587.64ms +step:48606/57344 train_time:28562470ms step_avg:587.63ms +step:48607/57344 train_time:28563021ms step_avg:587.63ms +grad accum step:12152/14336 +step:48608/57344 train_time:28564335ms step_avg:587.65ms +step:48609/57344 train_time:28564351ms step_avg:587.64ms +step:48610/57344 train_time:28564598ms step_avg:587.63ms +step:48611/57344 train_time:28565147ms step_avg:587.63ms +grad accum step:12153/14336 +step:48612/57344 train_time:28566465ms step_avg:587.64ms +step:48613/57344 train_time:28566477ms step_avg:587.63ms +step:48614/57344 train_time:28566725ms step_avg:587.62ms +step:48615/57344 train_time:28567285ms step_avg:587.62ms +grad accum step:12154/14336 +step:48616/57344 train_time:28568626ms step_avg:587.64ms +step:48617/57344 train_time:28568641ms step_avg:587.63ms +step:48618/57344 train_time:28568899ms step_avg:587.62ms +step:48619/57344 train_time:28569489ms step_avg:587.62ms +grad accum step:12155/14336 +step:48620/57344 train_time:28570840ms step_avg:587.64ms +step:48621/57344 train_time:28570856ms step_avg:587.62ms +step:48622/57344 train_time:28571110ms step_avg:587.62ms +step:48623/57344 train_time:28571683ms step_avg:587.62ms +grad accum step:12156/14336 +step:48624/57344 train_time:28573014ms step_avg:587.63ms +step:48625/57344 train_time:28573028ms step_avg:587.62ms +step:48626/57344 train_time:28573263ms step_avg:587.61ms +step:48627/57344 train_time:28573832ms step_avg:587.61ms +grad accum step:12157/14336 +step:48628/57344 train_time:28575155ms step_avg:587.63ms +step:48629/57344 train_time:28575172ms step_avg:587.62ms +step:48630/57344 train_time:28575419ms step_avg:587.61ms +step:48631/57344 train_time:28575978ms step_avg:587.61ms +grad accum step:12158/14336 +step:48632/57344 train_time:28577338ms step_avg:587.62ms +step:48633/57344 train_time:28577354ms step_avg:587.61ms +step:48634/57344 train_time:28577606ms step_avg:587.61ms +step:48635/57344 train_time:28578174ms step_avg:587.61ms +grad accum step:12159/14336 +step:48636/57344 train_time:28579510ms step_avg:587.62ms +step:48637/57344 train_time:28579527ms step_avg:587.61ms +step:48638/57344 train_time:28579773ms step_avg:587.60ms +step:48639/57344 train_time:28580322ms step_avg:587.60ms +grad accum step:12160/14336 +step:48640/57344 train_time:28581635ms step_avg:587.62ms +step:48640/57344 val_loss:5.555359 train_time:28581636ms step_avg:587.62ms +step:48641/57344 train_time:28581648ms step_avg:587.60ms +step:48642/57344 train_time:28581872ms step_avg:587.60ms +step:48643/57344 train_time:28582441ms step_avg:587.60ms +grad accum step:12161/14336 +step:48644/57344 train_time:28583876ms step_avg:587.61ms +step:48645/57344 train_time:28583892ms step_avg:587.60ms +step:48646/57344 train_time:28584145ms step_avg:587.59ms +step:48647/57344 train_time:28584706ms step_avg:587.59ms +grad accum step:12162/14336 +step:48648/57344 train_time:28586014ms step_avg:587.61ms +step:48649/57344 train_time:28586031ms step_avg:587.60ms +step:48650/57344 train_time:28586286ms step_avg:587.59ms +step:48651/57344 train_time:28586862ms step_avg:587.59ms +grad accum step:12163/14336 +step:48652/57344 train_time:28588218ms step_avg:587.61ms +step:48653/57344 train_time:28588235ms step_avg:587.59ms +step:48654/57344 train_time:28588488ms step_avg:587.59ms +step:48655/57344 train_time:28589070ms step_avg:587.59ms +grad accum step:12164/14336 +step:48656/57344 train_time:28590463ms step_avg:587.60ms +step:48657/57344 train_time:28590487ms step_avg:587.59ms +step:48658/57344 train_time:28590717ms step_avg:587.59ms +step:48659/57344 train_time:28591274ms step_avg:587.58ms +grad accum step:12165/14336 +step:48660/57344 train_time:28592658ms step_avg:587.60ms +step:48661/57344 train_time:28592675ms step_avg:587.59ms +step:48662/57344 train_time:28592930ms step_avg:587.58ms +step:48663/57344 train_time:28593500ms step_avg:587.58ms +grad accum step:12166/14336 +step:48664/57344 train_time:28594819ms step_avg:587.60ms +step:48665/57344 train_time:28594836ms step_avg:587.59ms +step:48666/57344 train_time:28595086ms step_avg:587.58ms +step:48667/57344 train_time:28595645ms step_avg:587.58ms +grad accum step:12167/14336 +step:48668/57344 train_time:28596945ms step_avg:587.59ms +step:48669/57344 train_time:28596962ms step_avg:587.58ms +step:48670/57344 train_time:28597209ms step_avg:587.57ms +step:48671/57344 train_time:28597761ms step_avg:587.57ms +grad accum step:12168/14336 +step:48672/57344 train_time:28599102ms step_avg:587.59ms +step:48673/57344 train_time:28599118ms step_avg:587.58ms +step:48674/57344 train_time:28599371ms step_avg:587.57ms +step:48675/57344 train_time:28599936ms step_avg:587.57ms +grad accum step:12169/14336 +step:48676/57344 train_time:28601278ms step_avg:587.58ms +step:48677/57344 train_time:28601295ms step_avg:587.57ms +step:48678/57344 train_time:28601546ms step_avg:587.57ms +step:48679/57344 train_time:28602105ms step_avg:587.57ms +grad accum step:12170/14336 +step:48680/57344 train_time:28603604ms step_avg:587.58ms +step:48681/57344 train_time:28603639ms step_avg:587.57ms +step:48682/57344 train_time:28603861ms step_avg:587.57ms +step:48683/57344 train_time:28604417ms step_avg:587.56ms +grad accum step:12171/14336 +step:48684/57344 train_time:28605882ms step_avg:587.58ms +step:48685/57344 train_time:28606145ms step_avg:587.58ms +step:48686/57344 train_time:28606365ms step_avg:587.57ms +step:48687/57344 train_time:28606925ms step_avg:587.57ms +grad accum step:12172/14336 +step:48688/57344 train_time:28608283ms step_avg:587.58ms +step:48689/57344 train_time:28608302ms step_avg:587.57ms +step:48690/57344 train_time:28608556ms step_avg:587.57ms +step:48691/57344 train_time:28609142ms step_avg:587.57ms +grad accum step:12173/14336 +step:48692/57344 train_time:28610490ms step_avg:587.58ms +step:48693/57344 train_time:28610506ms step_avg:587.57ms +step:48694/57344 train_time:28610741ms step_avg:587.56ms +step:48695/57344 train_time:28611307ms step_avg:587.56ms +grad accum step:12174/14336 +step:48696/57344 train_time:28612775ms step_avg:587.58ms +step:48697/57344 train_time:28612794ms step_avg:587.57ms +step:48698/57344 train_time:28613017ms step_avg:587.56ms +step:48699/57344 train_time:28613581ms step_avg:587.56ms +grad accum step:12175/14336 +step:48700/57344 train_time:28614889ms step_avg:587.57ms +step:48701/57344 train_time:28614907ms step_avg:587.56ms +step:48702/57344 train_time:28615150ms step_avg:587.56ms +step:48703/57344 train_time:28615713ms step_avg:587.56ms +grad accum step:12176/14336 +step:48704/57344 train_time:28617104ms step_avg:587.57ms +step:48704/57344 val_loss:5.550089 train_time:28617105ms step_avg:587.57ms +step:48705/57344 train_time:28617117ms step_avg:587.56ms +step:48706/57344 train_time:28617339ms step_avg:587.55ms +step:48707/57344 train_time:28617900ms step_avg:587.55ms +grad accum step:12177/14336 +step:48708/57344 train_time:28619291ms step_avg:587.57ms +step:48709/57344 train_time:28619308ms step_avg:587.56ms +step:48710/57344 train_time:28619551ms step_avg:587.55ms +step:48711/57344 train_time:28620119ms step_avg:587.55ms +grad accum step:12178/14336 +step:48712/57344 train_time:28621473ms step_avg:587.57ms +step:48713/57344 train_time:28621489ms step_avg:587.55ms +step:48714/57344 train_time:28621727ms step_avg:587.55ms +step:48715/57344 train_time:28622287ms step_avg:587.55ms +grad accum step:12179/14336 +step:48716/57344 train_time:28623632ms step_avg:587.56ms +step:48717/57344 train_time:28623650ms step_avg:587.55ms +step:48718/57344 train_time:28623889ms step_avg:587.54ms +step:48719/57344 train_time:28624442ms step_avg:587.54ms +grad accum step:12180/14336 +step:48720/57344 train_time:28625765ms step_avg:587.56ms +step:48721/57344 train_time:28625811ms step_avg:587.55ms +step:48722/57344 train_time:28626044ms step_avg:587.54ms +step:48723/57344 train_time:28626624ms step_avg:587.54ms +grad accum step:12181/14336 +step:48724/57344 train_time:28628116ms step_avg:587.56ms +step:48725/57344 train_time:28628162ms step_avg:587.55ms +step:48726/57344 train_time:28628385ms step_avg:587.54ms +step:48727/57344 train_time:28628951ms step_avg:587.54ms +grad accum step:12182/14336 +step:48728/57344 train_time:28630377ms step_avg:587.55ms +step:48729/57344 train_time:28630397ms step_avg:587.54ms +step:48730/57344 train_time:28630637ms step_avg:587.54ms +step:48731/57344 train_time:28631200ms step_avg:587.54ms +grad accum step:12183/14336 +step:48732/57344 train_time:28632503ms step_avg:587.55ms +step:48733/57344 train_time:28632517ms step_avg:587.54ms +step:48734/57344 train_time:28632764ms step_avg:587.53ms +step:48735/57344 train_time:28633311ms step_avg:587.53ms +grad accum step:12184/14336 +step:48736/57344 train_time:28634662ms step_avg:587.55ms +step:48737/57344 train_time:28634682ms step_avg:587.53ms +step:48738/57344 train_time:28634922ms step_avg:587.53ms +step:48739/57344 train_time:28635532ms step_avg:587.53ms +grad accum step:12185/14336 +step:48740/57344 train_time:28637002ms step_avg:587.55ms +step:48741/57344 train_time:28637025ms step_avg:587.53ms +step:48742/57344 train_time:28637246ms step_avg:587.53ms +step:48743/57344 train_time:28637805ms step_avg:587.53ms +grad accum step:12186/14336 +step:48744/57344 train_time:28639178ms step_avg:587.54ms +step:48745/57344 train_time:28639202ms step_avg:587.53ms +step:48746/57344 train_time:28639445ms step_avg:587.52ms +step:48747/57344 train_time:28640044ms step_avg:587.52ms +grad accum step:12187/14336 +step:48748/57344 train_time:28641398ms step_avg:587.54ms +step:48749/57344 train_time:28641418ms step_avg:587.53ms +step:48750/57344 train_time:28641659ms step_avg:587.52ms +step:48751/57344 train_time:28642238ms step_avg:587.52ms +grad accum step:12188/14336 +step:48752/57344 train_time:28643571ms step_avg:587.54ms +step:48753/57344 train_time:28643595ms step_avg:587.52ms +step:48754/57344 train_time:28643830ms step_avg:587.52ms +step:48755/57344 train_time:28644390ms step_avg:587.52ms +grad accum step:12189/14336 +step:48756/57344 train_time:28645743ms step_avg:587.53ms +step:48757/57344 train_time:28645763ms step_avg:587.52ms +step:48758/57344 train_time:28646005ms step_avg:587.51ms +step:48759/57344 train_time:28646572ms step_avg:587.51ms +grad accum step:12190/14336 +step:48760/57344 train_time:28647933ms step_avg:587.53ms +step:48761/57344 train_time:28647949ms step_avg:587.52ms +step:48762/57344 train_time:28648175ms step_avg:587.51ms +step:48763/57344 train_time:28648731ms step_avg:587.51ms +grad accum step:12191/14336 +step:48764/57344 train_time:28650054ms step_avg:587.52ms +step:48765/57344 train_time:28650076ms step_avg:587.51ms +step:48766/57344 train_time:28650309ms step_avg:587.51ms +step:48767/57344 train_time:28650866ms step_avg:587.51ms +grad accum step:12192/14336 +step:48768/57344 train_time:28652226ms step_avg:587.52ms +step:48768/57344 val_loss:5.544346 train_time:28652230ms step_avg:587.52ms +step:48769/57344 train_time:28652241ms step_avg:587.51ms +step:48770/57344 train_time:28652467ms step_avg:587.50ms +step:48771/57344 train_time:28653039ms step_avg:587.50ms +grad accum step:12193/14336 +step:48772/57344 train_time:28654394ms step_avg:587.52ms +step:48773/57344 train_time:28654415ms step_avg:587.51ms +step:48774/57344 train_time:28654651ms step_avg:587.50ms +step:48775/57344 train_time:28655195ms step_avg:587.50ms +grad accum step:12194/14336 +step:48776/57344 train_time:28656540ms step_avg:587.51ms +step:48777/57344 train_time:28656555ms step_avg:587.50ms +step:48778/57344 train_time:28656802ms step_avg:587.49ms +step:48779/57344 train_time:28657356ms step_avg:587.49ms +grad accum step:12195/14336 +step:48780/57344 train_time:28658778ms step_avg:587.51ms +step:48781/57344 train_time:28658794ms step_avg:587.50ms +step:48782/57344 train_time:28659029ms step_avg:587.49ms +step:48783/57344 train_time:28659620ms step_avg:587.49ms +grad accum step:12196/14336 +step:48784/57344 train_time:28661004ms step_avg:587.51ms +step:48785/57344 train_time:28661022ms step_avg:587.50ms +step:48786/57344 train_time:28661250ms step_avg:587.49ms +step:48787/57344 train_time:28661817ms step_avg:587.49ms +grad accum step:12197/14336 +step:48788/57344 train_time:28663138ms step_avg:587.50ms +step:48789/57344 train_time:28663154ms step_avg:587.49ms +step:48790/57344 train_time:28663409ms step_avg:587.49ms +step:48791/57344 train_time:28663986ms step_avg:587.49ms +grad accum step:12198/14336 +step:48792/57344 train_time:28665398ms step_avg:587.50ms +step:48793/57344 train_time:28665423ms step_avg:587.49ms +step:48794/57344 train_time:28665647ms step_avg:587.48ms +step:48795/57344 train_time:28666196ms step_avg:587.48ms +grad accum step:12199/14336 +step:48796/57344 train_time:28667498ms step_avg:587.50ms +step:48797/57344 train_time:28667520ms step_avg:587.49ms +step:48798/57344 train_time:28667760ms step_avg:587.48ms +step:48799/57344 train_time:28668336ms step_avg:587.48ms +grad accum step:12200/14336 +step:48800/57344 train_time:28669707ms step_avg:587.49ms +step:48801/57344 train_time:28669727ms step_avg:587.48ms +step:48802/57344 train_time:28669971ms step_avg:587.48ms +step:48803/57344 train_time:28670554ms step_avg:587.48ms +grad accum step:12201/14336 +step:48804/57344 train_time:28671904ms step_avg:587.49ms +step:48805/57344 train_time:28671923ms step_avg:587.48ms +step:48806/57344 train_time:28672172ms step_avg:587.47ms +step:48807/57344 train_time:28672747ms step_avg:587.47ms +grad accum step:12202/14336 +step:48808/57344 train_time:28674100ms step_avg:587.49ms +step:48809/57344 train_time:28674116ms step_avg:587.48ms +step:48810/57344 train_time:28674364ms step_avg:587.47ms +step:48811/57344 train_time:28674920ms step_avg:587.47ms +grad accum step:12203/14336 +step:48812/57344 train_time:28676388ms step_avg:587.49ms +step:48813/57344 train_time:28676412ms step_avg:587.47ms +step:48814/57344 train_time:28676632ms step_avg:587.47ms +step:48815/57344 train_time:28677177ms step_avg:587.47ms +grad accum step:12204/14336 +step:48816/57344 train_time:28678594ms step_avg:587.48ms +step:48817/57344 train_time:28678610ms step_avg:587.47ms +step:48818/57344 train_time:28678864ms step_avg:587.46ms +step:48819/57344 train_time:28679442ms step_avg:587.46ms +grad accum step:12205/14336 +step:48820/57344 train_time:28680783ms step_avg:587.48ms +step:48821/57344 train_time:28680798ms step_avg:587.47ms +step:48822/57344 train_time:28681050ms step_avg:587.46ms +step:48823/57344 train_time:28681624ms step_avg:587.46ms +grad accum step:12206/14336 +step:48824/57344 train_time:28682971ms step_avg:587.48ms +step:48825/57344 train_time:28682986ms step_avg:587.47ms +step:48826/57344 train_time:28683423ms step_avg:587.46ms +step:48827/57344 train_time:28683824ms step_avg:587.46ms +grad accum step:12207/14336 +step:48828/57344 train_time:28685175ms step_avg:587.47ms +step:48829/57344 train_time:28685189ms step_avg:587.46ms +step:48830/57344 train_time:28685443ms step_avg:587.46ms +step:48831/57344 train_time:28686016ms step_avg:587.46ms +grad accum step:12208/14336 +step:48832/57344 train_time:28687496ms step_avg:587.47ms +step:48832/57344 val_loss:5.536995 train_time:28687497ms step_avg:587.47ms +step:48833/57344 train_time:28687509ms step_avg:587.46ms +step:48834/57344 train_time:28687735ms step_avg:587.45ms +step:48835/57344 train_time:28688287ms step_avg:587.45ms +grad accum step:12209/14336 +step:48836/57344 train_time:28689638ms step_avg:587.47ms +step:48837/57344 train_time:28689653ms step_avg:587.46ms +step:48838/57344 train_time:28689904ms step_avg:587.45ms +step:48839/57344 train_time:28690456ms step_avg:587.45ms +grad accum step:12210/14336 +step:48840/57344 train_time:28691741ms step_avg:587.46ms +step:48841/57344 train_time:28691754ms step_avg:587.45ms +step:48842/57344 train_time:28692006ms step_avg:587.45ms +step:48843/57344 train_time:28692589ms step_avg:587.45ms +grad accum step:12211/14336 +step:48844/57344 train_time:28693937ms step_avg:587.46ms +step:48845/57344 train_time:28693958ms step_avg:587.45ms +step:48846/57344 train_time:28694205ms step_avg:587.44ms +step:48847/57344 train_time:28694780ms step_avg:587.44ms +grad accum step:12212/14336 +step:48848/57344 train_time:28696179ms step_avg:587.46ms +step:48849/57344 train_time:28696196ms step_avg:587.45ms +step:48850/57344 train_time:28696447ms step_avg:587.44ms +step:48851/57344 train_time:28697031ms step_avg:587.44ms +grad accum step:12213/14336 +step:48852/57344 train_time:28698544ms step_avg:587.46ms +step:48853/57344 train_time:28698647ms step_avg:587.45ms +step:48854/57344 train_time:28698864ms step_avg:587.44ms +step:48855/57344 train_time:28699422ms step_avg:587.44ms +grad accum step:12214/14336 +step:48856/57344 train_time:28700777ms step_avg:587.46ms +step:48857/57344 train_time:28700804ms step_avg:587.45ms +step:48858/57344 train_time:28701032ms step_avg:587.44ms +step:48859/57344 train_time:28701585ms step_avg:587.44ms +grad accum step:12215/14336 +step:48860/57344 train_time:28702969ms step_avg:587.45ms +step:48861/57344 train_time:28702984ms step_avg:587.44ms +step:48862/57344 train_time:28703239ms step_avg:587.43ms +step:48863/57344 train_time:28703807ms step_avg:587.43ms +grad accum step:12216/14336 +step:48864/57344 train_time:28705147ms step_avg:587.45ms +step:48865/57344 train_time:28705163ms step_avg:587.44ms +step:48866/57344 train_time:28705414ms step_avg:587.43ms +step:48867/57344 train_time:28705981ms step_avg:587.43ms +grad accum step:12217/14336 +step:48868/57344 train_time:28707314ms step_avg:587.45ms +step:48869/57344 train_time:28707329ms step_avg:587.43ms +step:48870/57344 train_time:28707568ms step_avg:587.43ms +step:48871/57344 train_time:28708130ms step_avg:587.43ms +grad accum step:12218/14336 +step:48872/57344 train_time:28709548ms step_avg:587.44ms +step:48873/57344 train_time:28709564ms step_avg:587.43ms +step:48874/57344 train_time:28709796ms step_avg:587.42ms +step:48875/57344 train_time:28710360ms step_avg:587.42ms +grad accum step:12219/14336 +step:48876/57344 train_time:28711705ms step_avg:587.44ms +step:48877/57344 train_time:28711718ms step_avg:587.43ms +step:48878/57344 train_time:28711971ms step_avg:587.42ms +step:48879/57344 train_time:28712535ms step_avg:587.42ms +grad accum step:12220/14336 +step:48880/57344 train_time:28713925ms step_avg:587.44ms +step:48881/57344 train_time:28713948ms step_avg:587.43ms +step:48882/57344 train_time:28714198ms step_avg:587.42ms +step:48883/57344 train_time:28714799ms step_avg:587.42ms +grad accum step:12221/14336 +step:48884/57344 train_time:28716167ms step_avg:587.43ms +step:48885/57344 train_time:28716188ms step_avg:587.42ms +step:48886/57344 train_time:28716428ms step_avg:587.42ms +step:48887/57344 train_time:28716992ms step_avg:587.42ms +grad accum step:12222/14336 +step:48888/57344 train_time:28718416ms step_avg:587.43ms +step:48889/57344 train_time:28718438ms step_avg:587.42ms +step:48890/57344 train_time:28718669ms step_avg:587.41ms +step:48891/57344 train_time:28719223ms step_avg:587.41ms +grad accum step:12223/14336 +step:48892/57344 train_time:28720586ms step_avg:587.43ms +step:48893/57344 train_time:28720603ms step_avg:587.42ms +step:48894/57344 train_time:28720855ms step_avg:587.41ms +step:48895/57344 train_time:28721430ms step_avg:587.41ms +grad accum step:12224/14336 +step:48896/57344 train_time:28722801ms step_avg:587.43ms +step:48896/57344 val_loss:5.531327 train_time:28722824ms step_avg:587.43ms +step:48897/57344 train_time:28722836ms step_avg:587.42ms +step:48898/57344 train_time:28723057ms step_avg:587.41ms +step:48899/57344 train_time:28723612ms step_avg:587.41ms +grad accum step:12225/14336 +step:48900/57344 train_time:28724978ms step_avg:587.42ms +step:48901/57344 train_time:28725002ms step_avg:587.41ms +step:48902/57344 train_time:28725233ms step_avg:587.40ms +step:48903/57344 train_time:28725799ms step_avg:587.40ms +grad accum step:12226/14336 +step:48904/57344 train_time:28727160ms step_avg:587.42ms +step:48905/57344 train_time:28727176ms step_avg:587.41ms +step:48906/57344 train_time:28727426ms step_avg:587.40ms +step:48907/57344 train_time:28727994ms step_avg:587.40ms +grad accum step:12227/14336 +step:48908/57344 train_time:28729306ms step_avg:587.42ms +step:48909/57344 train_time:28729324ms step_avg:587.40ms +step:48910/57344 train_time:28729572ms step_avg:587.40ms +step:48911/57344 train_time:28730148ms step_avg:587.40ms +grad accum step:12228/14336 +step:48912/57344 train_time:28731536ms step_avg:587.41ms +step:48913/57344 train_time:28731559ms step_avg:587.40ms +step:48914/57344 train_time:28731781ms step_avg:587.39ms +step:48915/57344 train_time:28732337ms step_avg:587.39ms +grad accum step:12229/14336 +step:48916/57344 train_time:28733691ms step_avg:587.41ms +step:48917/57344 train_time:28733706ms step_avg:587.40ms +step:48918/57344 train_time:28733927ms step_avg:587.39ms +step:48919/57344 train_time:28734475ms step_avg:587.39ms +grad accum step:12230/14336 +step:48920/57344 train_time:28735814ms step_avg:587.40ms +step:48921/57344 train_time:28735828ms step_avg:587.39ms +step:48922/57344 train_time:28736075ms step_avg:587.39ms +step:48923/57344 train_time:28736636ms step_avg:587.38ms +grad accum step:12231/14336 +step:48924/57344 train_time:28738111ms step_avg:587.40ms +step:48925/57344 train_time:28738127ms step_avg:587.39ms +step:48926/57344 train_time:28738381ms step_avg:587.38ms +step:48927/57344 train_time:28738958ms step_avg:587.38ms +grad accum step:12232/14336 +step:48928/57344 train_time:28740272ms step_avg:587.40ms +step:48929/57344 train_time:28740289ms step_avg:587.39ms +step:48930/57344 train_time:28740536ms step_avg:587.38ms +step:48931/57344 train_time:28741085ms step_avg:587.38ms +grad accum step:12233/14336 +step:48932/57344 train_time:28742463ms step_avg:587.40ms +step:48933/57344 train_time:28742485ms step_avg:587.38ms +step:48934/57344 train_time:28742706ms step_avg:587.38ms +step:48935/57344 train_time:28743260ms step_avg:587.38ms +grad accum step:12234/14336 +step:48936/57344 train_time:28744591ms step_avg:587.39ms +step:48937/57344 train_time:28744615ms step_avg:587.38ms +step:48938/57344 train_time:28744839ms step_avg:587.37ms +step:48939/57344 train_time:28745392ms step_avg:587.37ms +grad accum step:12235/14336 +step:48940/57344 train_time:28746769ms step_avg:587.39ms +step:48941/57344 train_time:28746787ms step_avg:587.38ms +step:48942/57344 train_time:28747037ms step_avg:587.37ms +step:48943/57344 train_time:28747626ms step_avg:587.37ms +grad accum step:12236/14336 +step:48944/57344 train_time:28748952ms step_avg:587.38ms +step:48945/57344 train_time:28748967ms step_avg:587.37ms +step:48946/57344 train_time:28749235ms step_avg:587.37ms +step:48947/57344 train_time:28749847ms step_avg:587.37ms +grad accum step:12237/14336 +step:48948/57344 train_time:28751248ms step_avg:587.38ms +step:48949/57344 train_time:28751262ms step_avg:587.37ms +step:48950/57344 train_time:28751517ms step_avg:587.37ms +step:48951/57344 train_time:28752099ms step_avg:587.36ms +grad accum step:12238/14336 +step:48952/57344 train_time:28753471ms step_avg:587.38ms +step:48953/57344 train_time:28753492ms step_avg:587.37ms +step:48954/57344 train_time:28753733ms step_avg:587.36ms +step:48955/57344 train_time:28754306ms step_avg:587.36ms +grad accum step:12239/14336 +step:48956/57344 train_time:28755718ms step_avg:587.38ms +step:48957/57344 train_time:28755734ms step_avg:587.37ms +step:48958/57344 train_time:28755984ms step_avg:587.36ms +step:48959/57344 train_time:28756560ms step_avg:587.36ms +grad accum step:12240/14336 +step:48960/57344 train_time:28757900ms step_avg:587.38ms +step:48960/57344 val_loss:5.526346 train_time:28757926ms step_avg:587.38ms +step:48961/57344 train_time:28757937ms step_avg:587.36ms +step:48962/57344 train_time:28758155ms step_avg:587.36ms +step:48963/57344 train_time:28758699ms step_avg:587.36ms +grad accum step:12241/14336 +step:48964/57344 train_time:28760010ms step_avg:587.37ms +step:48965/57344 train_time:28760031ms step_avg:587.36ms +step:48966/57344 train_time:28760281ms step_avg:587.35ms +step:48967/57344 train_time:28760870ms step_avg:587.35ms +grad accum step:12242/14336 +step:48968/57344 train_time:28762250ms step_avg:587.37ms +step:48969/57344 train_time:28762268ms step_avg:587.36ms +step:48970/57344 train_time:28762494ms step_avg:587.35ms +step:48971/57344 train_time:28763044ms step_avg:587.35ms +grad accum step:12243/14336 +step:48972/57344 train_time:28764375ms step_avg:587.36ms +step:48973/57344 train_time:28764390ms step_avg:587.35ms +step:48974/57344 train_time:28764647ms step_avg:587.35ms +step:48975/57344 train_time:28765227ms step_avg:587.35ms +grad accum step:12244/14336 +step:48976/57344 train_time:28766568ms step_avg:587.36ms +step:48977/57344 train_time:28766584ms step_avg:587.35ms +step:48978/57344 train_time:28766834ms step_avg:587.34ms +step:48979/57344 train_time:28767411ms step_avg:587.34ms +grad accum step:12245/14336 +step:48980/57344 train_time:28768753ms step_avg:587.36ms +step:48981/57344 train_time:28768772ms step_avg:587.35ms +step:48982/57344 train_time:28769021ms step_avg:587.34ms +step:48983/57344 train_time:28769628ms step_avg:587.34ms +grad accum step:12246/14336 +step:48984/57344 train_time:28771102ms step_avg:587.36ms +step:48985/57344 train_time:28771116ms step_avg:587.35ms +step:48986/57344 train_time:28771330ms step_avg:587.34ms +step:48987/57344 train_time:28771881ms step_avg:587.34ms +grad accum step:12247/14336 +step:48988/57344 train_time:28773397ms step_avg:587.36ms +step:48989/57344 train_time:28773414ms step_avg:587.34ms +step:48990/57344 train_time:28773636ms step_avg:587.34ms +step:48991/57344 train_time:28774201ms step_avg:587.34ms +grad accum step:12248/14336 +step:48992/57344 train_time:28775530ms step_avg:587.35ms +step:48993/57344 train_time:28775548ms step_avg:587.34ms +step:48994/57344 train_time:28775793ms step_avg:587.33ms +step:48995/57344 train_time:28776365ms step_avg:587.33ms +grad accum step:12249/14336 +step:48996/57344 train_time:28777746ms step_avg:587.35ms +step:48997/57344 train_time:28777762ms step_avg:587.34ms +step:48998/57344 train_time:28778012ms step_avg:587.33ms +step:48999/57344 train_time:28778575ms step_avg:587.33ms +grad accum step:12250/14336 +step:49000/57344 train_time:28779918ms step_avg:587.35ms +step:49001/57344 train_time:28779935ms step_avg:587.33ms +step:49002/57344 train_time:28780175ms step_avg:587.33ms +step:49003/57344 train_time:28780760ms step_avg:587.33ms +grad accum step:12251/14336 +step:49004/57344 train_time:28782294ms step_avg:587.35ms +step:49005/57344 train_time:28782312ms step_avg:587.33ms +step:49006/57344 train_time:28782553ms step_avg:587.33ms +step:49007/57344 train_time:28783111ms step_avg:587.33ms +grad accum step:12252/14336 +step:49008/57344 train_time:28784449ms step_avg:587.34ms +step:49009/57344 train_time:28784467ms step_avg:587.33ms +step:49010/57344 train_time:28784713ms step_avg:587.32ms +step:49011/57344 train_time:28785300ms step_avg:587.32ms +grad accum step:12253/14336 +step:49012/57344 train_time:28786670ms step_avg:587.34ms +step:49013/57344 train_time:28786688ms step_avg:587.33ms +step:49014/57344 train_time:28786931ms step_avg:587.32ms +step:49015/57344 train_time:28787499ms step_avg:587.32ms +grad accum step:12254/14336 +step:49016/57344 train_time:28788856ms step_avg:587.34ms +step:49017/57344 train_time:28788874ms step_avg:587.32ms +step:49018/57344 train_time:28789114ms step_avg:587.32ms +step:49019/57344 train_time:28789673ms step_avg:587.32ms +grad accum step:12255/14336 +step:49020/57344 train_time:28791078ms step_avg:587.33ms +step:49021/57344 train_time:28791098ms step_avg:587.32ms +step:49022/57344 train_time:28791342ms step_avg:587.31ms +step:49023/57344 train_time:28791908ms step_avg:587.31ms +grad accum step:12256/14336 +step:49024/57344 train_time:28793251ms step_avg:587.33ms +step:49024/57344 val_loss:5.524518 train_time:28793260ms step_avg:587.33ms +step:49025/57344 train_time:28793272ms step_avg:587.32ms +step:49026/57344 train_time:28793497ms step_avg:587.31ms +step:49027/57344 train_time:28794062ms step_avg:587.31ms +grad accum step:12257/14336 +step:49028/57344 train_time:28795400ms step_avg:587.33ms +step:49029/57344 train_time:28795415ms step_avg:587.31ms +step:49030/57344 train_time:28795667ms step_avg:587.31ms +step:49031/57344 train_time:28796232ms step_avg:587.31ms +grad accum step:12258/14336 +step:49032/57344 train_time:28797564ms step_avg:587.32ms +step:49033/57344 train_time:28797581ms step_avg:587.31ms +step:49034/57344 train_time:28797834ms step_avg:587.30ms +step:49035/57344 train_time:28798401ms step_avg:587.30ms +grad accum step:12259/14336 +step:49036/57344 train_time:28799841ms step_avg:587.32ms +step:49037/57344 train_time:28799873ms step_avg:587.31ms +step:49038/57344 train_time:28800102ms step_avg:587.30ms +step:49039/57344 train_time:28800685ms step_avg:587.30ms +grad accum step:12260/14336 +step:49040/57344 train_time:28802072ms step_avg:587.32ms +step:49041/57344 train_time:28802087ms step_avg:587.31ms +step:49042/57344 train_time:28802321ms step_avg:587.30ms +step:49043/57344 train_time:28802865ms step_avg:587.30ms +grad accum step:12261/14336 +step:49044/57344 train_time:28804183ms step_avg:587.31ms +step:49045/57344 train_time:28804198ms step_avg:587.30ms +step:49046/57344 train_time:28804421ms step_avg:587.29ms +step:49047/57344 train_time:28804969ms step_avg:587.29ms +grad accum step:12262/14336 +step:49048/57344 train_time:28806285ms step_avg:587.31ms +step:49049/57344 train_time:28806300ms step_avg:587.30ms +step:49050/57344 train_time:28806551ms step_avg:587.29ms +step:49051/57344 train_time:28807113ms step_avg:587.29ms +grad accum step:12263/14336 +step:49052/57344 train_time:28808447ms step_avg:587.30ms +step:49053/57344 train_time:28808464ms step_avg:587.29ms +step:49054/57344 train_time:28808716ms step_avg:587.29ms +step:49055/57344 train_time:28809288ms step_avg:587.29ms +grad accum step:12264/14336 +step:49056/57344 train_time:28810666ms step_avg:587.30ms +step:49057/57344 train_time:28810682ms step_avg:587.29ms +step:49058/57344 train_time:28810928ms step_avg:587.28ms +step:49059/57344 train_time:28811489ms step_avg:587.28ms +grad accum step:12265/14336 +step:49060/57344 train_time:28812868ms step_avg:587.30ms +step:49061/57344 train_time:28812885ms step_avg:587.29ms +step:49062/57344 train_time:28813132ms step_avg:587.28ms +step:49063/57344 train_time:28813675ms step_avg:587.28ms +grad accum step:12266/14336 +step:49064/57344 train_time:28815019ms step_avg:587.29ms +step:49065/57344 train_time:28815031ms step_avg:587.28ms +step:49066/57344 train_time:28815300ms step_avg:587.28ms +step:49067/57344 train_time:28815903ms step_avg:587.28ms +grad accum step:12267/14336 +step:49068/57344 train_time:28817262ms step_avg:587.29ms +step:49069/57344 train_time:28817277ms step_avg:587.28ms +step:49070/57344 train_time:28817524ms step_avg:587.27ms +step:49071/57344 train_time:28818071ms step_avg:587.27ms +grad accum step:12268/14336 +step:49072/57344 train_time:28819418ms step_avg:587.29ms +step:49073/57344 train_time:28819434ms step_avg:587.28ms +step:49074/57344 train_time:28819696ms step_avg:587.27ms +step:49075/57344 train_time:28820309ms step_avg:587.27ms +grad accum step:12269/14336 +step:49076/57344 train_time:28821733ms step_avg:587.29ms +step:49077/57344 train_time:28821749ms step_avg:587.28ms +step:49078/57344 train_time:28821996ms step_avg:587.27ms +step:49079/57344 train_time:28822560ms step_avg:587.27ms +grad accum step:12270/14336 +step:49080/57344 train_time:28826091ms step_avg:587.33ms +step:49081/57344 train_time:28826105ms step_avg:587.32ms +step:49082/57344 train_time:28826405ms step_avg:587.31ms +step:49083/57344 train_time:28826961ms step_avg:587.31ms +grad accum step:12271/14336 +step:49084/57344 train_time:28828282ms step_avg:587.33ms +step:49085/57344 train_time:28828294ms step_avg:587.31ms +step:49086/57344 train_time:28828533ms step_avg:587.31ms +step:49087/57344 train_time:28829084ms step_avg:587.31ms +grad accum step:12272/14336 +step:49088/57344 train_time:28830603ms step_avg:587.32ms +step:49088/57344 val_loss:5.516452 train_time:28830618ms step_avg:587.33ms +step:49089/57344 train_time:28830630ms step_avg:587.31ms +step:49090/57344 train_time:28830857ms step_avg:587.31ms +step:49091/57344 train_time:28831431ms step_avg:587.31ms +grad accum step:12273/14336 +step:49092/57344 train_time:28832798ms step_avg:587.32ms +step:49093/57344 train_time:28832814ms step_avg:587.31ms +step:49094/57344 train_time:28833061ms step_avg:587.30ms +step:49095/57344 train_time:28833606ms step_avg:587.30ms +grad accum step:12274/14336 +step:49096/57344 train_time:28834979ms step_avg:587.32ms +step:49097/57344 train_time:28834991ms step_avg:587.31ms +step:49098/57344 train_time:28835229ms step_avg:587.30ms +step:49099/57344 train_time:28835793ms step_avg:587.30ms +grad accum step:12275/14336 +step:49100/57344 train_time:28837170ms step_avg:587.32ms +step:49101/57344 train_time:28837182ms step_avg:587.30ms +step:49102/57344 train_time:28837428ms step_avg:587.30ms +step:49103/57344 train_time:28837995ms step_avg:587.30ms +grad accum step:12276/14336 +step:49104/57344 train_time:28839311ms step_avg:587.31ms +step:49105/57344 train_time:28839327ms step_avg:587.30ms +step:49106/57344 train_time:28839576ms step_avg:587.29ms +step:49107/57344 train_time:28840127ms step_avg:587.29ms +grad accum step:12277/14336 +step:49108/57344 train_time:28841459ms step_avg:587.31ms +step:49109/57344 train_time:28841474ms step_avg:587.30ms +step:49110/57344 train_time:28841721ms step_avg:587.29ms +step:49111/57344 train_time:28842266ms step_avg:587.29ms +grad accum step:12278/14336 +step:49112/57344 train_time:28843610ms step_avg:587.30ms +step:49113/57344 train_time:28843628ms step_avg:587.29ms +step:49114/57344 train_time:28843888ms step_avg:587.28ms +step:49115/57344 train_time:28844482ms step_avg:587.28ms +grad accum step:12279/14336 +step:49116/57344 train_time:28845803ms step_avg:587.30ms +step:49117/57344 train_time:28845820ms step_avg:587.29ms +step:49118/57344 train_time:28846072ms step_avg:587.28ms +step:49119/57344 train_time:28846631ms step_avg:587.28ms +grad accum step:12280/14336 +step:49120/57344 train_time:28851043ms step_avg:587.36ms +step:49121/57344 train_time:28851067ms step_avg:587.35ms +step:49122/57344 train_time:28851332ms step_avg:587.34ms +step:49123/57344 train_time:28851898ms step_avg:587.34ms +grad accum step:12281/14336 +step:49124/57344 train_time:28853240ms step_avg:587.36ms +step:49125/57344 train_time:28853257ms step_avg:587.34ms +step:49126/57344 train_time:28853502ms step_avg:587.34ms +step:49127/57344 train_time:28854053ms step_avg:587.34ms +grad accum step:12282/14336 +step:49128/57344 train_time:28855437ms step_avg:587.35ms +step:49129/57344 train_time:28855450ms step_avg:587.34ms +step:49130/57344 train_time:28855698ms step_avg:587.33ms +step:49131/57344 train_time:28856267ms step_avg:587.33ms +grad accum step:12283/14336 +step:49132/57344 train_time:28857684ms step_avg:587.35ms +step:49133/57344 train_time:28861462ms step_avg:587.42ms +step:49134/57344 train_time:28861773ms step_avg:587.41ms +step:49135/57344 train_time:28862335ms step_avg:587.41ms +grad accum step:12284/14336 +step:49136/57344 train_time:28863631ms step_avg:587.42ms +step:49137/57344 train_time:28863647ms step_avg:587.41ms +step:49138/57344 train_time:28863892ms step_avg:587.40ms +step:49139/57344 train_time:28864438ms step_avg:587.40ms +grad accum step:12285/14336 +step:49140/57344 train_time:28865757ms step_avg:587.42ms +step:49141/57344 train_time:28865773ms step_avg:587.41ms +step:49142/57344 train_time:28866017ms step_avg:587.40ms +step:49143/57344 train_time:28866573ms step_avg:587.40ms +grad accum step:12286/14336 +step:49144/57344 train_time:28867921ms step_avg:587.41ms +step:49145/57344 train_time:28867937ms step_avg:587.40ms +step:49146/57344 train_time:28868185ms step_avg:587.40ms +step:49147/57344 train_time:28868750ms step_avg:587.40ms +grad accum step:12287/14336 +step:49148/57344 train_time:28870125ms step_avg:587.41ms +step:49149/57344 train_time:28870143ms step_avg:587.40ms +step:49150/57344 train_time:28870385ms step_avg:587.39ms +step:49151/57344 train_time:28870952ms step_avg:587.39ms +grad accum step:12288/14336 +step:49152/57344 train_time:28872344ms step_avg:587.41ms +step:49152/57344 val_loss:5.509875 train_time:28872350ms step_avg:587.41ms +step:49153/57344 train_time:28872362ms step_avg:587.40ms +step:49154/57344 train_time:28872586ms step_avg:587.39ms +step:49155/57344 train_time:28873133ms step_avg:587.39ms +grad accum step:12289/14336 +step:49156/57344 train_time:28874498ms step_avg:587.41ms +step:49157/57344 train_time:28874516ms step_avg:587.39ms +step:49158/57344 train_time:28874755ms step_avg:587.39ms +step:49159/57344 train_time:28875347ms step_avg:587.39ms +grad accum step:12290/14336 +step:49160/57344 train_time:28876777ms step_avg:587.40ms +step:49161/57344 train_time:28876811ms step_avg:587.39ms +step:49162/57344 train_time:28877035ms step_avg:587.39ms +step:49163/57344 train_time:28877592ms step_avg:587.38ms +grad accum step:12291/14336 +step:49164/57344 train_time:28878920ms step_avg:587.40ms +step:49165/57344 train_time:28878936ms step_avg:587.39ms +step:49166/57344 train_time:28879199ms step_avg:587.38ms +step:49167/57344 train_time:28879800ms step_avg:587.38ms +grad accum step:12292/14336 +step:49168/57344 train_time:28881168ms step_avg:587.40ms +step:49169/57344 train_time:28881185ms step_avg:587.39ms +step:49170/57344 train_time:28881429ms step_avg:587.38ms +step:49171/57344 train_time:28881994ms step_avg:587.38ms +grad accum step:12293/14336 +step:49172/57344 train_time:28883365ms step_avg:587.39ms +step:49173/57344 train_time:28883380ms step_avg:587.38ms +step:49174/57344 train_time:28883614ms step_avg:587.38ms +step:49175/57344 train_time:28884172ms step_avg:587.38ms +grad accum step:12294/14336 +step:49176/57344 train_time:28885496ms step_avg:587.39ms +step:49177/57344 train_time:28885514ms step_avg:587.38ms +step:49178/57344 train_time:28885755ms step_avg:587.37ms +step:49179/57344 train_time:28886318ms step_avg:587.37ms +grad accum step:12295/14336 +step:49180/57344 train_time:28887679ms step_avg:587.39ms +step:49181/57344 train_time:28887695ms step_avg:587.38ms +step:49182/57344 train_time:28887935ms step_avg:587.37ms +step:49183/57344 train_time:28888485ms step_avg:587.37ms +grad accum step:12296/14336 +step:49184/57344 train_time:28889846ms step_avg:587.38ms +step:49185/57344 train_time:28889861ms step_avg:587.37ms +step:49186/57344 train_time:28890110ms step_avg:587.36ms +step:49187/57344 train_time:28890691ms step_avg:587.36ms +grad accum step:12297/14336 +step:49188/57344 train_time:28892070ms step_avg:587.38ms +step:49189/57344 train_time:28892086ms step_avg:587.37ms +step:49190/57344 train_time:28892340ms step_avg:587.36ms +step:49191/57344 train_time:28892916ms step_avg:587.36ms +grad accum step:12298/14336 +step:49192/57344 train_time:28894323ms step_avg:587.38ms +step:49193/57344 train_time:28894340ms step_avg:587.37ms +step:49194/57344 train_time:28894585ms step_avg:587.36ms +step:49195/57344 train_time:28895177ms step_avg:587.36ms +grad accum step:12299/14336 +step:49196/57344 train_time:28896569ms step_avg:587.38ms +step:49197/57344 train_time:28896586ms step_avg:587.36ms +step:49198/57344 train_time:28896825ms step_avg:587.36ms +step:49199/57344 train_time:28897384ms step_avg:587.36ms +grad accum step:12300/14336 +step:49200/57344 train_time:28898707ms step_avg:587.37ms +step:49201/57344 train_time:28898721ms step_avg:587.36ms +step:49202/57344 train_time:28898969ms step_avg:587.35ms +step:49203/57344 train_time:28899528ms step_avg:587.35ms +grad accum step:12301/14336 +step:49204/57344 train_time:28900857ms step_avg:587.37ms +step:49205/57344 train_time:28900875ms step_avg:587.36ms +step:49206/57344 train_time:28901113ms step_avg:587.35ms +step:49207/57344 train_time:28901678ms step_avg:587.35ms +grad accum step:12302/14336 +step:49208/57344 train_time:28903058ms step_avg:587.37ms +step:49209/57344 train_time:28903075ms step_avg:587.35ms +step:49210/57344 train_time:28903323ms step_avg:587.35ms +step:49211/57344 train_time:28903898ms step_avg:587.35ms +grad accum step:12303/14336 +step:49212/57344 train_time:28905240ms step_avg:587.36ms +step:49213/57344 train_time:28905255ms step_avg:587.35ms +step:49214/57344 train_time:28905505ms step_avg:587.34ms +step:49215/57344 train_time:28906064ms step_avg:587.34ms +grad accum step:12304/14336 +step:49216/57344 train_time:28907424ms step_avg:587.36ms +step:49216/57344 val_loss:5.505832 train_time:28907428ms step_avg:587.36ms +step:49217/57344 train_time:28907440ms step_avg:587.35ms +step:49218/57344 train_time:28907666ms step_avg:587.34ms +step:49219/57344 train_time:28908223ms step_avg:587.34ms +grad accum step:12305/14336 +step:49220/57344 train_time:28909589ms step_avg:587.35ms +step:49221/57344 train_time:28909606ms step_avg:587.34ms +step:49222/57344 train_time:28909854ms step_avg:587.34ms +step:49223/57344 train_time:28910425ms step_avg:587.34ms +grad accum step:12306/14336 +step:49224/57344 train_time:28911805ms step_avg:587.35ms +step:49225/57344 train_time:28911821ms step_avg:587.34ms +step:49226/57344 train_time:28912064ms step_avg:587.33ms +step:49227/57344 train_time:28912607ms step_avg:587.33ms +grad accum step:12307/14336 +step:49228/57344 train_time:28913964ms step_avg:587.35ms +step:49229/57344 train_time:28913976ms step_avg:587.34ms +step:49230/57344 train_time:28914229ms step_avg:587.33ms +step:49231/57344 train_time:28914798ms step_avg:587.33ms +grad accum step:12308/14336 +step:49232/57344 train_time:28916133ms step_avg:587.34ms +step:49233/57344 train_time:28916148ms step_avg:587.33ms +step:49234/57344 train_time:28916400ms step_avg:587.33ms +step:49235/57344 train_time:28916957ms step_avg:587.33ms +grad accum step:12309/14336 +step:49236/57344 train_time:28918277ms step_avg:587.34ms +step:49237/57344 train_time:28918294ms step_avg:587.33ms +step:49238/57344 train_time:28918543ms step_avg:587.32ms +step:49239/57344 train_time:28919095ms step_avg:587.32ms +grad accum step:12310/14336 +step:49240/57344 train_time:28920427ms step_avg:587.34ms +step:49241/57344 train_time:28920440ms step_avg:587.32ms +step:49242/57344 train_time:28920691ms step_avg:587.32ms +step:49243/57344 train_time:28921255ms step_avg:587.32ms +grad accum step:12311/14336 +step:49244/57344 train_time:28922569ms step_avg:587.33ms +step:49245/57344 train_time:28922583ms step_avg:587.32ms +step:49246/57344 train_time:28922829ms step_avg:587.31ms +step:49247/57344 train_time:28923374ms step_avg:587.31ms +grad accum step:12312/14336 +step:49248/57344 train_time:28924838ms step_avg:587.33ms +step:49249/57344 train_time:28924852ms step_avg:587.32ms +step:49250/57344 train_time:28925127ms step_avg:587.31ms +step:49251/57344 train_time:28925741ms step_avg:587.31ms +grad accum step:12313/14336 +step:49252/57344 train_time:28927113ms step_avg:587.33ms +step:49253/57344 train_time:28927130ms step_avg:587.32ms +step:49254/57344 train_time:28927383ms step_avg:587.31ms +step:49255/57344 train_time:28927952ms step_avg:587.31ms +grad accum step:12314/14336 +step:49256/57344 train_time:28929270ms step_avg:587.32ms +step:49257/57344 train_time:28929287ms step_avg:587.31ms +step:49258/57344 train_time:28929538ms step_avg:587.31ms +step:49259/57344 train_time:28930098ms step_avg:587.31ms +grad accum step:12315/14336 +step:49260/57344 train_time:28931436ms step_avg:587.32ms +step:49261/57344 train_time:28931454ms step_avg:587.31ms +step:49262/57344 train_time:28931701ms step_avg:587.30ms +step:49263/57344 train_time:28932260ms step_avg:587.30ms +grad accum step:12316/14336 +step:49264/57344 train_time:28933677ms step_avg:587.32ms +step:49265/57344 train_time:28933688ms step_avg:587.31ms +step:49266/57344 train_time:28933928ms step_avg:587.30ms +step:49267/57344 train_time:28934492ms step_avg:587.30ms +grad accum step:12317/14336 +step:49268/57344 train_time:28935851ms step_avg:587.32ms +step:49269/57344 train_time:28935867ms step_avg:587.30ms +step:49270/57344 train_time:28936114ms step_avg:587.30ms +step:49271/57344 train_time:28936680ms step_avg:587.30ms +grad accum step:12318/14336 +step:49272/57344 train_time:28938048ms step_avg:587.31ms +step:49273/57344 train_time:28938065ms step_avg:587.30ms +step:49274/57344 train_time:28938309ms step_avg:587.29ms +step:49275/57344 train_time:28938855ms step_avg:587.29ms +grad accum step:12319/14336 +step:49276/57344 train_time:28940231ms step_avg:587.31ms +step:49277/57344 train_time:28940247ms step_avg:587.30ms +step:49278/57344 train_time:28940477ms step_avg:587.29ms +step:49279/57344 train_time:28941064ms step_avg:587.29ms +grad accum step:12320/14336 +step:49280/57344 train_time:28942449ms step_avg:587.31ms +step:49280/57344 val_loss:5.502330 train_time:28942453ms step_avg:587.31ms +step:49281/57344 train_time:28942465ms step_avg:587.29ms +step:49282/57344 train_time:28942690ms step_avg:587.29ms +step:49283/57344 train_time:28943245ms step_avg:587.29ms +grad accum step:12321/14336 +step:49284/57344 train_time:28944588ms step_avg:587.30ms +step:49285/57344 train_time:28944599ms step_avg:587.29ms +step:49286/57344 train_time:28944851ms step_avg:587.28ms +step:49287/57344 train_time:28945432ms step_avg:587.28ms +grad accum step:12322/14336 +step:49288/57344 train_time:28946790ms step_avg:587.30ms +step:49289/57344 train_time:28946805ms step_avg:587.29ms +step:49290/57344 train_time:28947060ms step_avg:587.28ms +step:49291/57344 train_time:28947631ms step_avg:587.28ms +grad accum step:12323/14336 +step:49292/57344 train_time:28948968ms step_avg:587.30ms +step:49293/57344 train_time:28948984ms step_avg:587.28ms +step:49294/57344 train_time:28949221ms step_avg:587.28ms +step:49295/57344 train_time:28949769ms step_avg:587.28ms +grad accum step:12324/14336 +step:49296/57344 train_time:28951138ms step_avg:587.29ms +step:49297/57344 train_time:28951152ms step_avg:587.28ms +step:49298/57344 train_time:28951402ms step_avg:587.27ms +step:49299/57344 train_time:28951964ms step_avg:587.27ms +grad accum step:12325/14336 +step:49300/57344 train_time:28953276ms step_avg:587.29ms +step:49301/57344 train_time:28953293ms step_avg:587.28ms +step:49302/57344 train_time:28953537ms step_avg:587.27ms +step:49303/57344 train_time:28954090ms step_avg:587.27ms +grad accum step:12326/14336 +step:49304/57344 train_time:28955458ms step_avg:587.28ms +step:49305/57344 train_time:28955532ms step_avg:587.27ms +step:49306/57344 train_time:28955756ms step_avg:587.27ms +step:49307/57344 train_time:28956324ms step_avg:587.27ms +grad accum step:12327/14336 +step:49308/57344 train_time:28957677ms step_avg:587.28ms +step:49309/57344 train_time:28957695ms step_avg:587.27ms +step:49310/57344 train_time:28957945ms step_avg:587.26ms +step:49311/57344 train_time:28958517ms step_avg:587.26ms +grad accum step:12328/14336 +step:49312/57344 train_time:28959842ms step_avg:587.28ms +step:49313/57344 train_time:28959858ms step_avg:587.27ms +step:49314/57344 train_time:28960109ms step_avg:587.26ms +step:49315/57344 train_time:28960670ms step_avg:587.26ms +grad accum step:12329/14336 +step:49316/57344 train_time:28962057ms step_avg:587.28ms +step:49317/57344 train_time:28962083ms step_avg:587.26ms +step:49318/57344 train_time:28962306ms step_avg:587.26ms +step:49319/57344 train_time:28962864ms step_avg:587.26ms +grad accum step:12330/14336 +step:49320/57344 train_time:28964227ms step_avg:587.27ms +step:49321/57344 train_time:28964243ms step_avg:587.26ms +step:49322/57344 train_time:28964496ms step_avg:587.25ms +step:49323/57344 train_time:28965064ms step_avg:587.25ms +grad accum step:12331/14336 +step:49324/57344 train_time:28966382ms step_avg:587.27ms +step:49325/57344 train_time:28966397ms step_avg:587.26ms +step:49326/57344 train_time:28966641ms step_avg:587.25ms +step:49327/57344 train_time:28967186ms step_avg:587.25ms +grad accum step:12332/14336 +step:49328/57344 train_time:28968711ms step_avg:587.27ms +step:49329/57344 train_time:28968727ms step_avg:587.26ms +step:49330/57344 train_time:28968946ms step_avg:587.25ms +step:49331/57344 train_time:28969513ms step_avg:587.25ms +grad accum step:12333/14336 +step:49332/57344 train_time:28970857ms step_avg:587.26ms +step:49333/57344 train_time:28970877ms step_avg:587.25ms +step:49334/57344 train_time:28971117ms step_avg:587.24ms +step:49335/57344 train_time:28971666ms step_avg:587.24ms +grad accum step:12334/14336 +step:49336/57344 train_time:28973007ms step_avg:587.26ms +step:49337/57344 train_time:28973030ms step_avg:587.25ms +step:49338/57344 train_time:28973250ms step_avg:587.24ms +step:49339/57344 train_time:28973796ms step_avg:587.24ms +grad accum step:12335/14336 +step:49340/57344 train_time:28975143ms step_avg:587.25ms +step:49341/57344 train_time:28975159ms step_avg:587.24ms +step:49342/57344 train_time:28975407ms step_avg:587.24ms +step:49343/57344 train_time:28975960ms step_avg:587.24ms +grad accum step:12336/14336 +step:49344/57344 train_time:28977391ms step_avg:587.25ms +step:49344/57344 val_loss:5.494663 train_time:28977407ms step_avg:587.25ms +step:49345/57344 train_time:28977419ms step_avg:587.24ms +step:49346/57344 train_time:28977646ms step_avg:587.23ms +step:49347/57344 train_time:28978208ms step_avg:587.23ms +grad accum step:12337/14336 +step:49348/57344 train_time:28979526ms step_avg:587.25ms +step:49349/57344 train_time:28979555ms step_avg:587.24ms +step:49350/57344 train_time:28979787ms step_avg:587.23ms +step:49351/57344 train_time:28980349ms step_avg:587.23ms +grad accum step:12338/14336 +step:49352/57344 train_time:28981704ms step_avg:587.24ms +step:49353/57344 train_time:28981731ms step_avg:587.23ms +step:49354/57344 train_time:28981963ms step_avg:587.23ms +step:49355/57344 train_time:28982547ms step_avg:587.23ms +grad accum step:12339/14336 +step:49356/57344 train_time:28983925ms step_avg:587.24ms +step:49357/57344 train_time:28983941ms step_avg:587.23ms +step:49358/57344 train_time:28984185ms step_avg:587.22ms +step:49359/57344 train_time:28984762ms step_avg:587.22ms +grad accum step:12340/14336 +step:49360/57344 train_time:28986084ms step_avg:587.24ms +step:49361/57344 train_time:28986100ms step_avg:587.23ms +step:49362/57344 train_time:28986342ms step_avg:587.22ms +step:49363/57344 train_time:28986897ms step_avg:587.22ms +grad accum step:12341/14336 +step:49364/57344 train_time:28988311ms step_avg:587.24ms +step:49365/57344 train_time:28988327ms step_avg:587.22ms +step:49366/57344 train_time:28988581ms step_avg:587.22ms +step:49367/57344 train_time:28989163ms step_avg:587.22ms +grad accum step:12342/14336 +step:49368/57344 train_time:28990597ms step_avg:587.23ms +step:49369/57344 train_time:28990622ms step_avg:587.22ms +step:49370/57344 train_time:28990842ms step_avg:587.22ms +step:49371/57344 train_time:28991403ms step_avg:587.22ms +grad accum step:12343/14336 +step:49372/57344 train_time:28992741ms step_avg:587.23ms +step:49373/57344 train_time:28992758ms step_avg:587.22ms +step:49374/57344 train_time:28992999ms step_avg:587.21ms +step:49375/57344 train_time:28993563ms step_avg:587.21ms +grad accum step:12344/14336 +step:49376/57344 train_time:28995002ms step_avg:587.23ms +step:49377/57344 train_time:28995032ms step_avg:587.22ms +step:49378/57344 train_time:28995261ms step_avg:587.21ms +step:49379/57344 train_time:28995831ms step_avg:587.21ms +grad accum step:12345/14336 +step:49380/57344 train_time:28997212ms step_avg:587.23ms +step:49381/57344 train_time:28997232ms step_avg:587.21ms +step:49382/57344 train_time:28997468ms step_avg:587.21ms +step:49383/57344 train_time:28998010ms step_avg:587.21ms +grad accum step:12346/14336 +step:49384/57344 train_time:28999347ms step_avg:587.22ms +step:49385/57344 train_time:28999363ms step_avg:587.21ms +step:49386/57344 train_time:28999608ms step_avg:587.20ms +step:49387/57344 train_time:29000154ms step_avg:587.20ms +grad accum step:12347/14336 +step:49388/57344 train_time:29001475ms step_avg:587.22ms +step:49389/57344 train_time:29001490ms step_avg:587.21ms +step:49390/57344 train_time:29001755ms step_avg:587.20ms +step:49391/57344 train_time:29002363ms step_avg:587.20ms +grad accum step:12348/14336 +step:49392/57344 train_time:29003684ms step_avg:587.21ms +step:49393/57344 train_time:29003700ms step_avg:587.20ms +step:49394/57344 train_time:29003948ms step_avg:587.20ms +step:49395/57344 train_time:29004508ms step_avg:587.20ms +grad accum step:12349/14336 +step:49396/57344 train_time:29005861ms step_avg:587.21ms +step:49397/57344 train_time:29005877ms step_avg:587.20ms +step:49398/57344 train_time:29006098ms step_avg:587.19ms +step:49399/57344 train_time:29006656ms step_avg:587.19ms +grad accum step:12350/14336 +step:49400/57344 train_time:29007990ms step_avg:587.21ms +step:49401/57344 train_time:29008006ms step_avg:587.19ms +step:49402/57344 train_time:29008253ms step_avg:587.19ms +step:49403/57344 train_time:29008816ms step_avg:587.19ms +grad accum step:12351/14336 +step:49404/57344 train_time:29010158ms step_avg:587.20ms +step:49405/57344 train_time:29010173ms step_avg:587.19ms +step:49406/57344 train_time:29010428ms step_avg:587.18ms +step:49407/57344 train_time:29010999ms step_avg:587.18ms +grad accum step:12352/14336 +step:49408/57344 train_time:29012682ms step_avg:587.21ms +step:49408/57344 val_loss:5.492581 train_time:29012687ms step_avg:587.21ms +step:49409/57344 train_time:29012699ms step_avg:587.19ms +step:49410/57344 train_time:29012925ms step_avg:587.19ms +step:49411/57344 train_time:29013500ms step_avg:587.19ms +grad accum step:12353/14336 +step:49412/57344 train_time:29014851ms step_avg:587.20ms +step:49413/57344 train_time:29014870ms step_avg:587.19ms +step:49414/57344 train_time:29015124ms step_avg:587.18ms +step:49415/57344 train_time:29015721ms step_avg:587.18ms +grad accum step:12354/14336 +step:49416/57344 train_time:29017061ms step_avg:587.20ms +step:49417/57344 train_time:29017075ms step_avg:587.19ms +step:49418/57344 train_time:29017321ms step_avg:587.18ms +step:49419/57344 train_time:29017869ms step_avg:587.18ms +grad accum step:12355/14336 +step:49420/57344 train_time:29019247ms step_avg:587.20ms +step:49421/57344 train_time:29019268ms step_avg:587.18ms +step:49422/57344 train_time:29019507ms step_avg:587.18ms +step:49423/57344 train_time:29020076ms step_avg:587.18ms +grad accum step:12356/14336 +step:49424/57344 train_time:29021420ms step_avg:587.19ms +step:49425/57344 train_time:29021434ms step_avg:587.18ms +step:49426/57344 train_time:29021685ms step_avg:587.17ms +step:49427/57344 train_time:29022245ms step_avg:587.17ms +grad accum step:12357/14336 +step:49428/57344 train_time:29023573ms step_avg:587.19ms +step:49429/57344 train_time:29023589ms step_avg:587.18ms +step:49430/57344 train_time:29023835ms step_avg:587.17ms +step:49431/57344 train_time:29024384ms step_avg:587.17ms +grad accum step:12358/14336 +step:49432/57344 train_time:29025760ms step_avg:587.19ms +step:49433/57344 train_time:29025780ms step_avg:587.17ms +step:49434/57344 train_time:29026014ms step_avg:587.17ms +step:49435/57344 train_time:29026561ms step_avg:587.17ms +grad accum step:12359/14336 +step:49436/57344 train_time:29027905ms step_avg:587.18ms +step:49437/57344 train_time:29027921ms step_avg:587.17ms +step:49438/57344 train_time:29028183ms step_avg:587.16ms +step:49439/57344 train_time:29028775ms step_avg:587.16ms +grad accum step:12360/14336 +step:49440/57344 train_time:29030112ms step_avg:587.18ms +step:49441/57344 train_time:29030129ms step_avg:587.17ms +step:49442/57344 train_time:29030367ms step_avg:587.16ms +step:49443/57344 train_time:29030953ms step_avg:587.16ms +grad accum step:12361/14336 +step:49444/57344 train_time:29032315ms step_avg:587.18ms +step:49445/57344 train_time:29032328ms step_avg:587.16ms +step:49446/57344 train_time:29032582ms step_avg:587.16ms +step:49447/57344 train_time:29033149ms step_avg:587.16ms +grad accum step:12362/14336 +step:49448/57344 train_time:29034660ms step_avg:587.18ms +step:49449/57344 train_time:29034691ms step_avg:587.16ms +step:49450/57344 train_time:29034924ms step_avg:587.16ms +step:49451/57344 train_time:29035514ms step_avg:587.16ms +grad accum step:12363/14336 +step:49452/57344 train_time:29036812ms step_avg:587.17ms +step:49453/57344 train_time:29036828ms step_avg:587.16ms +step:49454/57344 train_time:29037085ms step_avg:587.15ms +step:49455/57344 train_time:29037662ms step_avg:587.15ms +grad accum step:12364/14336 +step:49456/57344 train_time:29038987ms step_avg:587.17ms +step:49457/57344 train_time:29039009ms step_avg:587.16ms +step:49458/57344 train_time:29039247ms step_avg:587.15ms +step:49459/57344 train_time:29039803ms step_avg:587.15ms +grad accum step:12365/14336 +step:49460/57344 train_time:29041352ms step_avg:587.17ms +step:49461/57344 train_time:29041367ms step_avg:587.16ms +step:49462/57344 train_time:29041607ms step_avg:587.15ms +step:49463/57344 train_time:29042210ms step_avg:587.15ms +grad accum step:12366/14336 +step:49464/57344 train_time:29043555ms step_avg:587.17ms +step:49465/57344 train_time:29043571ms step_avg:587.15ms +step:49466/57344 train_time:29043808ms step_avg:587.15ms +step:49467/57344 train_time:29044357ms step_avg:587.15ms +grad accum step:12367/14336 +step:49468/57344 train_time:29045708ms step_avg:587.16ms +step:49469/57344 train_time:29045726ms step_avg:587.15ms +step:49470/57344 train_time:29045964ms step_avg:587.14ms +step:49471/57344 train_time:29046517ms step_avg:587.14ms +grad accum step:12368/14336 +step:49472/57344 train_time:29047868ms step_avg:587.16ms +step:49472/57344 val_loss:5.487586 train_time:29047871ms step_avg:587.16ms +step:49473/57344 train_time:29047883ms step_avg:587.15ms +step:49474/57344 train_time:29048103ms step_avg:587.14ms +step:49475/57344 train_time:29048640ms step_avg:587.14ms +grad accum step:12369/14336 +step:49476/57344 train_time:29050006ms step_avg:587.15ms +step:49477/57344 train_time:29050050ms step_avg:587.14ms +step:49478/57344 train_time:29050279ms step_avg:587.14ms +step:49479/57344 train_time:29050857ms step_avg:587.14ms +grad accum step:12370/14336 +step:49480/57344 train_time:29052200ms step_avg:587.15ms +step:49481/57344 train_time:29052215ms step_avg:587.14ms +step:49482/57344 train_time:29052468ms step_avg:587.13ms +step:49483/57344 train_time:29053039ms step_avg:587.13ms +grad accum step:12371/14336 +step:49484/57344 train_time:29054395ms step_avg:587.15ms +step:49485/57344 train_time:29054415ms step_avg:587.14ms +step:49486/57344 train_time:29054686ms step_avg:587.13ms +step:49487/57344 train_time:29055320ms step_avg:587.13ms +grad accum step:12372/14336 +step:49488/57344 train_time:29056864ms step_avg:587.15ms +step:49489/57344 train_time:29056883ms step_avg:587.14ms +step:49490/57344 train_time:29057106ms step_avg:587.13ms +step:49491/57344 train_time:29057680ms step_avg:587.13ms +grad accum step:12373/14336 +step:49492/57344 train_time:29059041ms step_avg:587.15ms +step:49493/57344 train_time:29059056ms step_avg:587.13ms +step:49494/57344 train_time:29059305ms step_avg:587.13ms +step:49495/57344 train_time:29059861ms step_avg:587.13ms +grad accum step:12374/14336 +step:49496/57344 train_time:29061194ms step_avg:587.14ms +step:49497/57344 train_time:29061211ms step_avg:587.13ms +step:49498/57344 train_time:29061456ms step_avg:587.12ms +step:49499/57344 train_time:29062011ms step_avg:587.12ms +grad accum step:12375/14336 +step:49500/57344 train_time:29063396ms step_avg:587.14ms +step:49501/57344 train_time:29063412ms step_avg:587.13ms +step:49502/57344 train_time:29063650ms step_avg:587.12ms +step:49503/57344 train_time:29064224ms step_avg:587.12ms +grad accum step:12376/14336 +step:49504/57344 train_time:29065571ms step_avg:587.14ms +step:49505/57344 train_time:29065585ms step_avg:587.12ms +step:49506/57344 train_time:29065828ms step_avg:587.12ms +step:49507/57344 train_time:29066377ms step_avg:587.12ms +grad accum step:12377/14336 +step:49508/57344 train_time:29067717ms step_avg:587.13ms +step:49509/57344 train_time:29067735ms step_avg:587.12ms +step:49510/57344 train_time:29067976ms step_avg:587.11ms +step:49511/57344 train_time:29068542ms step_avg:587.11ms +grad accum step:12378/14336 +step:49512/57344 train_time:29069877ms step_avg:587.13ms +step:49513/57344 train_time:29069894ms step_avg:587.12ms +step:49514/57344 train_time:29070142ms step_avg:587.11ms +step:49515/57344 train_time:29070699ms step_avg:587.11ms +grad accum step:12379/14336 +step:49516/57344 train_time:29072042ms step_avg:587.12ms +step:49517/57344 train_time:29072055ms step_avg:587.11ms +step:49518/57344 train_time:29072305ms step_avg:587.11ms +step:49519/57344 train_time:29072862ms step_avg:587.11ms +grad accum step:12380/14336 +step:49520/57344 train_time:29074223ms step_avg:587.12ms +step:49521/57344 train_time:29074244ms step_avg:587.11ms +step:49522/57344 train_time:29074469ms step_avg:587.10ms +step:49523/57344 train_time:29075030ms step_avg:587.10ms +grad accum step:12381/14336 +step:49524/57344 train_time:29076415ms step_avg:587.12ms +step:49525/57344 train_time:29076428ms step_avg:587.11ms +step:49526/57344 train_time:29076680ms step_avg:587.10ms +step:49527/57344 train_time:29077250ms step_avg:587.10ms +grad accum step:12382/14336 +step:49528/57344 train_time:29078550ms step_avg:587.11ms +step:49529/57344 train_time:29078566ms step_avg:587.10ms +step:49530/57344 train_time:29078814ms step_avg:587.09ms +step:49531/57344 train_time:29079385ms step_avg:587.09ms +grad accum step:12383/14336 +step:49532/57344 train_time:29080732ms step_avg:587.11ms +step:49533/57344 train_time:29080746ms step_avg:587.10ms +step:49534/57344 train_time:29080992ms step_avg:587.09ms +step:49535/57344 train_time:29081551ms step_avg:587.09ms +grad accum step:12384/14336 +step:49536/57344 train_time:29082940ms step_avg:587.11ms +step:49536/57344 val_loss:5.484352 train_time:29082944ms step_avg:587.11ms +step:49537/57344 train_time:29082956ms step_avg:587.10ms +step:49538/57344 train_time:29083188ms step_avg:587.09ms +step:49539/57344 train_time:29083759ms step_avg:587.09ms +grad accum step:12385/14336 +step:49540/57344 train_time:29085081ms step_avg:587.10ms +step:49541/57344 train_time:29085097ms step_avg:587.09ms +step:49542/57344 train_time:29085349ms step_avg:587.08ms +step:49543/57344 train_time:29085909ms step_avg:587.08ms +grad accum step:12386/14336 +step:49544/57344 train_time:29087283ms step_avg:587.10ms +step:49545/57344 train_time:29087298ms step_avg:587.09ms +step:49546/57344 train_time:29087560ms step_avg:587.08ms +step:49547/57344 train_time:29088148ms step_avg:587.08ms +grad accum step:12387/14336 +step:49548/57344 train_time:29089482ms step_avg:587.10ms +step:49549/57344 train_time:29089499ms step_avg:587.09ms +step:49550/57344 train_time:29089746ms step_avg:587.08ms +step:49551/57344 train_time:29090293ms step_avg:587.08ms +grad accum step:12388/14336 +step:49552/57344 train_time:29091698ms step_avg:587.09ms +step:49553/57344 train_time:29091715ms step_avg:587.08ms +step:49554/57344 train_time:29091971ms step_avg:587.08ms +step:49555/57344 train_time:29092545ms step_avg:587.08ms +grad accum step:12389/14336 +step:49556/57344 train_time:29093909ms step_avg:587.09ms +step:49557/57344 train_time:29093926ms step_avg:587.08ms +step:49558/57344 train_time:29094188ms step_avg:587.07ms +step:49559/57344 train_time:29094787ms step_avg:587.07ms +grad accum step:12390/14336 +step:49560/57344 train_time:29096148ms step_avg:587.09ms +step:49561/57344 train_time:29096162ms step_avg:587.08ms +step:49562/57344 train_time:29096416ms step_avg:587.07ms +step:49563/57344 train_time:29096993ms step_avg:587.07ms +grad accum step:12391/14336 +step:49564/57344 train_time:29098388ms step_avg:587.09ms +step:49565/57344 train_time:29098404ms step_avg:587.08ms +step:49566/57344 train_time:29098653ms step_avg:587.07ms +step:49567/57344 train_time:29099232ms step_avg:587.07ms +grad accum step:12392/14336 +step:49568/57344 train_time:29100604ms step_avg:587.08ms +step:49569/57344 train_time:29100635ms step_avg:587.07ms +step:49570/57344 train_time:29100871ms step_avg:587.07ms +step:49571/57344 train_time:29101438ms step_avg:587.07ms +grad accum step:12393/14336 +step:49572/57344 train_time:29102722ms step_avg:587.08ms +step:49573/57344 train_time:29102739ms step_avg:587.07ms +step:49574/57344 train_time:29102992ms step_avg:587.06ms +step:49575/57344 train_time:29103558ms step_avg:587.06ms +grad accum step:12394/14336 +step:49576/57344 train_time:29104899ms step_avg:587.08ms +step:49577/57344 train_time:29104913ms step_avg:587.06ms +step:49578/57344 train_time:29105144ms step_avg:587.06ms +step:49579/57344 train_time:29105690ms step_avg:587.06ms +grad accum step:12395/14336 +step:49580/57344 train_time:29107021ms step_avg:587.07ms +step:49581/57344 train_time:29107038ms step_avg:587.06ms +step:49582/57344 train_time:29107288ms step_avg:587.05ms +step:49583/57344 train_time:29107854ms step_avg:587.05ms +grad accum step:12396/14336 +step:49584/57344 train_time:29109231ms step_avg:587.07ms +step:49585/57344 train_time:29109247ms step_avg:587.06ms +step:49586/57344 train_time:29109496ms step_avg:587.05ms +step:49587/57344 train_time:29110046ms step_avg:587.05ms +grad accum step:12397/14336 +step:49588/57344 train_time:29111367ms step_avg:587.06ms +step:49589/57344 train_time:29111384ms step_avg:587.05ms +step:49590/57344 train_time:29111632ms step_avg:587.05ms +step:49591/57344 train_time:29112179ms step_avg:587.05ms +grad accum step:12398/14336 +step:49592/57344 train_time:29124655ms step_avg:587.29ms +step:49593/57344 train_time:29124675ms step_avg:587.27ms +step:49594/57344 train_time:29124940ms step_avg:587.27ms +step:49595/57344 train_time:29125493ms step_avg:587.27ms +grad accum step:12399/14336 +step:49596/57344 train_time:29126848ms step_avg:587.28ms +step:49597/57344 train_time:29126870ms step_avg:587.27ms +step:49598/57344 train_time:29127119ms step_avg:587.26ms +step:49599/57344 train_time:29127693ms step_avg:587.26ms +grad accum step:12400/14336 +step:49600/57344 train_time:29184379ms step_avg:588.39ms +step:49600/57344 val_loss:5.480047 train_time:29184381ms step_avg:588.39ms +step:49601/57344 train_time:29184393ms step_avg:588.38ms +step:49602/57344 train_time:29184704ms step_avg:588.38ms +step:49603/57344 train_time:29185235ms step_avg:588.38ms +grad accum step:12401/14336 +step:49604/57344 train_time:29186545ms step_avg:588.39ms +step:49605/57344 train_time:29186560ms step_avg:588.38ms +step:49606/57344 train_time:29186803ms step_avg:588.37ms +step:49607/57344 train_time:29187350ms step_avg:588.37ms +grad accum step:12402/14336 +step:49608/57344 train_time:29188717ms step_avg:588.39ms +step:49609/57344 train_time:29188734ms step_avg:588.38ms +step:49610/57344 train_time:29188981ms step_avg:588.37ms +step:49611/57344 train_time:29189537ms step_avg:588.37ms +grad accum step:12403/14336 +step:49612/57344 train_time:29190870ms step_avg:588.38ms +step:49613/57344 train_time:29190883ms step_avg:588.37ms +step:49614/57344 train_time:29191131ms step_avg:588.36ms +step:49615/57344 train_time:29191697ms step_avg:588.36ms +grad accum step:12404/14336 +step:49616/57344 train_time:29193052ms step_avg:588.38ms +step:49617/57344 train_time:29193068ms step_avg:588.37ms +step:49618/57344 train_time:29193310ms step_avg:588.36ms +step:49619/57344 train_time:29193855ms step_avg:588.36ms +grad accum step:12405/14336 +step:49620/57344 train_time:29195191ms step_avg:588.38ms +step:49621/57344 train_time:29195208ms step_avg:588.36ms +step:49622/57344 train_time:29195456ms step_avg:588.36ms +step:49623/57344 train_time:29196008ms step_avg:588.36ms +grad accum step:12406/14336 +step:49624/57344 train_time:29197292ms step_avg:588.37ms +step:49625/57344 train_time:29197309ms step_avg:588.36ms +step:49626/57344 train_time:29197559ms step_avg:588.35ms +step:49627/57344 train_time:29198127ms step_avg:588.35ms +grad accum step:12407/14336 +step:49628/57344 train_time:29199440ms step_avg:588.37ms +step:49629/57344 train_time:29199457ms step_avg:588.35ms +step:49630/57344 train_time:29199704ms step_avg:588.35ms +step:49631/57344 train_time:29200253ms step_avg:588.35ms +grad accum step:12408/14336 +step:49632/57344 train_time:29201559ms step_avg:588.36ms +step:49633/57344 train_time:29201576ms step_avg:588.35ms +step:49634/57344 train_time:29201828ms step_avg:588.34ms +step:49635/57344 train_time:29202387ms step_avg:588.34ms +grad accum step:12409/14336 +step:49636/57344 train_time:29203692ms step_avg:588.36ms +step:49637/57344 train_time:29203708ms step_avg:588.35ms +step:49638/57344 train_time:29203961ms step_avg:588.34ms +step:49639/57344 train_time:29204543ms step_avg:588.34ms +grad accum step:12410/14336 +step:49640/57344 train_time:29205895ms step_avg:588.35ms +step:49641/57344 train_time:29205915ms step_avg:588.34ms +step:49642/57344 train_time:29206159ms step_avg:588.34ms +step:49643/57344 train_time:29206727ms step_avg:588.34ms +grad accum step:12411/14336 +step:49644/57344 train_time:29208108ms step_avg:588.35ms +step:49645/57344 train_time:29208125ms step_avg:588.34ms +step:49646/57344 train_time:29208381ms step_avg:588.33ms +step:49647/57344 train_time:29208955ms step_avg:588.33ms +grad accum step:12412/14336 +step:49648/57344 train_time:29210261ms step_avg:588.35ms +step:49649/57344 train_time:29210272ms step_avg:588.34ms +step:49650/57344 train_time:29210522ms step_avg:588.33ms +step:49651/57344 train_time:29211094ms step_avg:588.33ms +grad accum step:12413/14336 +step:49652/57344 train_time:29212423ms step_avg:588.34ms +step:49653/57344 train_time:29212435ms step_avg:588.33ms +step:49654/57344 train_time:29212674ms step_avg:588.32ms +step:49655/57344 train_time:29213235ms step_avg:588.32ms +grad accum step:12414/14336 +step:49656/57344 train_time:29214532ms step_avg:588.34ms +step:49657/57344 train_time:29214548ms step_avg:588.33ms +step:49658/57344 train_time:29214822ms step_avg:588.32ms +step:49659/57344 train_time:29215445ms step_avg:588.32ms +grad accum step:12415/14336 +step:49660/57344 train_time:29216746ms step_avg:588.34ms +step:49661/57344 train_time:29216764ms step_avg:588.32ms +step:49662/57344 train_time:29217011ms step_avg:588.32ms +step:49663/57344 train_time:29225764ms step_avg:588.48ms +grad accum step:12416/14336 +step:49664/57344 train_time:29226906ms step_avg:588.49ms +step:49664/57344 val_loss:5.477690 train_time:29226907ms step_avg:588.49ms +step:49665/57344 train_time:29226919ms step_avg:588.48ms +step:49666/57344 train_time:29227144ms step_avg:588.47ms +step:49667/57344 train_time:29227698ms step_avg:588.47ms +grad accum step:12417/14336 +step:49668/57344 train_time:29228995ms step_avg:588.49ms +step:49669/57344 train_time:29229011ms step_avg:588.48ms +step:49670/57344 train_time:29229263ms step_avg:588.47ms +step:49671/57344 train_time:29229834ms step_avg:588.47ms +grad accum step:12418/14336 +step:49672/57344 train_time:29231157ms step_avg:588.48ms +step:49673/57344 train_time:29231174ms step_avg:588.47ms +step:49674/57344 train_time:29231428ms step_avg:588.47ms +step:49675/57344 train_time:29231998ms step_avg:588.46ms +grad accum step:12419/14336 +step:49676/57344 train_time:29233323ms step_avg:588.48ms +step:49677/57344 train_time:29233336ms step_avg:588.47ms +step:49678/57344 train_time:29233593ms step_avg:588.46ms +step:49679/57344 train_time:29234161ms step_avg:588.46ms +grad accum step:12420/14336 +step:49680/57344 train_time:29235493ms step_avg:588.48ms +step:49681/57344 train_time:29235509ms step_avg:588.46ms +step:49682/57344 train_time:29235769ms step_avg:588.46ms +step:49683/57344 train_time:29236354ms step_avg:588.46ms +grad accum step:12421/14336 +step:49684/57344 train_time:29237673ms step_avg:588.47ms +step:49685/57344 train_time:29237689ms step_avg:588.46ms +step:49686/57344 train_time:29237935ms step_avg:588.45ms +step:49687/57344 train_time:29238484ms step_avg:588.45ms +grad accum step:12422/14336 +step:49688/57344 train_time:29239811ms step_avg:588.47ms +step:49689/57344 train_time:29239828ms step_avg:588.46ms +step:49690/57344 train_time:29240074ms step_avg:588.45ms +step:49691/57344 train_time:29240632ms step_avg:588.45ms +grad accum step:12423/14336 +step:49692/57344 train_time:29242078ms step_avg:588.47ms +step:49693/57344 train_time:29242111ms step_avg:588.46ms +step:49694/57344 train_time:29242350ms step_avg:588.45ms +step:49695/57344 train_time:29242949ms step_avg:588.45ms +grad accum step:12424/14336 +step:49696/57344 train_time:29244271ms step_avg:588.46ms +step:49697/57344 train_time:29244287ms step_avg:588.45ms +step:49698/57344 train_time:29244541ms step_avg:588.45ms +step:49699/57344 train_time:29245131ms step_avg:588.45ms +grad accum step:12425/14336 +step:49700/57344 train_time:29246463ms step_avg:588.46ms +step:49701/57344 train_time:29246479ms step_avg:588.45ms +step:49702/57344 train_time:29246730ms step_avg:588.44ms +step:49703/57344 train_time:29247293ms step_avg:588.44ms +grad accum step:12426/14336 +step:49704/57344 train_time:29248726ms step_avg:588.46ms +step:49705/57344 train_time:29248741ms step_avg:588.45ms +step:49706/57344 train_time:29248988ms step_avg:588.44ms +step:49707/57344 train_time:29249555ms step_avg:588.44ms +grad accum step:12427/14336 +step:49708/57344 train_time:29250904ms step_avg:588.45ms +step:49709/57344 train_time:29250915ms step_avg:588.44ms +step:49710/57344 train_time:29251163ms step_avg:588.44ms +step:49711/57344 train_time:29251722ms step_avg:588.44ms +grad accum step:12428/14336 +step:49712/57344 train_time:29253063ms step_avg:588.45ms +step:49713/57344 train_time:29253075ms step_avg:588.44ms +step:49714/57344 train_time:29253316ms step_avg:588.43ms +step:49715/57344 train_time:29253896ms step_avg:588.43ms +grad accum step:12429/14336 +step:49716/57344 train_time:29255248ms step_avg:588.45ms +step:49717/57344 train_time:29255264ms step_avg:588.44ms +step:49718/57344 train_time:29255515ms step_avg:588.43ms +step:49719/57344 train_time:29256095ms step_avg:588.43ms +grad accum step:12430/14336 +step:49720/57344 train_time:29257481ms step_avg:588.44ms +step:49721/57344 train_time:29257498ms step_avg:588.43ms +step:49722/57344 train_time:29257745ms step_avg:588.43ms +step:49723/57344 train_time:29258307ms step_avg:588.43ms +grad accum step:12431/14336 +step:49724/57344 train_time:29259708ms step_avg:588.44ms +step:49725/57344 train_time:29259721ms step_avg:588.43ms +step:49726/57344 train_time:29259976ms step_avg:588.42ms +step:49727/57344 train_time:29260555ms step_avg:588.42ms +grad accum step:12432/14336 +step:49728/57344 train_time:29261889ms step_avg:588.44ms +step:49728/57344 val_loss:5.474648 train_time:29261890ms step_avg:588.44ms +step:49729/57344 train_time:29261902ms step_avg:588.43ms +step:49730/57344 train_time:29262133ms step_avg:588.42ms +step:49731/57344 train_time:29262712ms step_avg:588.42ms +grad accum step:12433/14336 +step:49732/57344 train_time:29264087ms step_avg:588.44ms +step:49733/57344 train_time:29264104ms step_avg:588.42ms +step:49734/57344 train_time:29264345ms step_avg:588.42ms +step:49735/57344 train_time:29264916ms step_avg:588.42ms +grad accum step:12434/14336 +step:49736/57344 train_time:29266332ms step_avg:588.43ms +step:49737/57344 train_time:29266346ms step_avg:588.42ms +step:49738/57344 train_time:29266593ms step_avg:588.42ms +step:49739/57344 train_time:29267142ms step_avg:588.41ms +grad accum step:12435/14336 +step:49740/57344 train_time:29268552ms step_avg:588.43ms +step:49741/57344 train_time:29268573ms step_avg:588.42ms +step:49742/57344 train_time:29268804ms step_avg:588.41ms +step:49743/57344 train_time:29269348ms step_avg:588.41ms +grad accum step:12436/14336 +step:49744/57344 train_time:29270690ms step_avg:588.43ms +step:49745/57344 train_time:29270712ms step_avg:588.42ms +step:49746/57344 train_time:29270938ms step_avg:588.41ms +step:49747/57344 train_time:29271486ms step_avg:588.41ms +grad accum step:12437/14336 +step:49748/57344 train_time:29272857ms step_avg:588.42ms +step:49749/57344 train_time:29272872ms step_avg:588.41ms +step:49750/57344 train_time:29273125ms step_avg:588.40ms +step:49751/57344 train_time:29273696ms step_avg:588.40ms +grad accum step:12438/14336 +step:49752/57344 train_time:29275139ms step_avg:588.42ms +step:49753/57344 train_time:29275156ms step_avg:588.41ms +step:49754/57344 train_time:29275411ms step_avg:588.40ms +step:49755/57344 train_time:29276003ms step_avg:588.40ms +grad accum step:12439/14336 +step:49756/57344 train_time:29277384ms step_avg:588.42ms +step:49757/57344 train_time:29277401ms step_avg:588.41ms +step:49758/57344 train_time:29277648ms step_avg:588.40ms +step:49759/57344 train_time:29278209ms step_avg:588.40ms +grad accum step:12440/14336 +step:49760/57344 train_time:29279540ms step_avg:588.42ms +step:49761/57344 train_time:29279556ms step_avg:588.40ms +step:49762/57344 train_time:29279787ms step_avg:588.40ms +step:49763/57344 train_time:29280337ms step_avg:588.40ms +grad accum step:12441/14336 +step:49764/57344 train_time:29281690ms step_avg:588.41ms +step:49765/57344 train_time:29281745ms step_avg:588.40ms +step:49766/57344 train_time:29281968ms step_avg:588.39ms +step:49767/57344 train_time:29282532ms step_avg:588.39ms +grad accum step:12442/14336 +step:49768/57344 train_time:29283897ms step_avg:588.41ms +step:49769/57344 train_time:29283913ms step_avg:588.40ms +step:49770/57344 train_time:29284157ms step_avg:588.39ms +step:49771/57344 train_time:29284717ms step_avg:588.39ms +grad accum step:12443/14336 +step:49772/57344 train_time:29286050ms step_avg:588.40ms +step:49773/57344 train_time:29286069ms step_avg:588.39ms +step:49774/57344 train_time:29286317ms step_avg:588.39ms +step:49775/57344 train_time:29286893ms step_avg:588.39ms +grad accum step:12444/14336 +step:49776/57344 train_time:29288260ms step_avg:588.40ms +step:49777/57344 train_time:29288274ms step_avg:588.39ms +step:49778/57344 train_time:29288521ms step_avg:588.38ms +step:49779/57344 train_time:29289067ms step_avg:588.38ms +grad accum step:12445/14336 +step:49780/57344 train_time:29290424ms step_avg:588.40ms +step:49781/57344 train_time:29290438ms step_avg:588.39ms +step:49782/57344 train_time:29290686ms step_avg:588.38ms +step:49783/57344 train_time:29291241ms step_avg:588.38ms +grad accum step:12446/14336 +step:49784/57344 train_time:29292556ms step_avg:588.39ms +step:49785/57344 train_time:29292574ms step_avg:588.38ms +step:49786/57344 train_time:29292816ms step_avg:588.37ms +step:49787/57344 train_time:29293402ms step_avg:588.37ms +grad accum step:12447/14336 +step:49788/57344 train_time:29294783ms step_avg:588.39ms +step:49789/57344 train_time:29294798ms step_avg:588.38ms +step:49790/57344 train_time:29295050ms step_avg:588.37ms +step:49791/57344 train_time:29295625ms step_avg:588.37ms +grad accum step:12448/14336 +step:49792/57344 train_time:29297002ms step_avg:588.39ms +step:49792/57344 val_loss:5.469898 train_time:29297009ms step_avg:588.39ms +step:49793/57344 train_time:29297021ms step_avg:588.38ms +step:49794/57344 train_time:29297242ms step_avg:588.37ms +step:49795/57344 train_time:29297780ms step_avg:588.37ms +grad accum step:12449/14336 +step:49796/57344 train_time:29299224ms step_avg:588.39ms +step:49797/57344 train_time:29299245ms step_avg:588.37ms +step:49798/57344 train_time:29299464ms step_avg:588.37ms +step:49799/57344 train_time:29300017ms step_avg:588.37ms +grad accum step:12450/14336 +step:49800/57344 train_time:29301548ms step_avg:588.38ms +step:49801/57344 train_time:29301564ms step_avg:588.37ms +step:49802/57344 train_time:29301790ms step_avg:588.37ms +step:49803/57344 train_time:29302361ms step_avg:588.37ms +grad accum step:12451/14336 +step:49804/57344 train_time:29303737ms step_avg:588.38ms +step:49805/57344 train_time:29303762ms step_avg:588.37ms +step:49806/57344 train_time:29303984ms step_avg:588.36ms +step:49807/57344 train_time:29304541ms step_avg:588.36ms +grad accum step:12452/14336 +step:49808/57344 train_time:29305898ms step_avg:588.38ms +step:49809/57344 train_time:29305921ms step_avg:588.37ms +step:49810/57344 train_time:29306172ms step_avg:588.36ms +step:49811/57344 train_time:29306801ms step_avg:588.36ms +grad accum step:12453/14336 +step:49812/57344 train_time:29308183ms step_avg:588.38ms +step:49813/57344 train_time:29308200ms step_avg:588.36ms +step:49814/57344 train_time:29308439ms step_avg:588.36ms +step:49815/57344 train_time:29308986ms step_avg:588.36ms +grad accum step:12454/14336 +step:49816/57344 train_time:29310333ms step_avg:588.37ms +step:49817/57344 train_time:29310355ms step_avg:588.36ms +step:49818/57344 train_time:29310586ms step_avg:588.35ms +step:49819/57344 train_time:29311128ms step_avg:588.35ms +grad accum step:12455/14336 +step:49820/57344 train_time:29312468ms step_avg:588.37ms +step:49821/57344 train_time:29312480ms step_avg:588.36ms +step:49822/57344 train_time:29312723ms step_avg:588.35ms +step:49823/57344 train_time:29313300ms step_avg:588.35ms +grad accum step:12456/14336 +step:49824/57344 train_time:29314638ms step_avg:588.36ms +step:49825/57344 train_time:29314655ms step_avg:588.35ms +step:49826/57344 train_time:29314890ms step_avg:588.35ms +step:49827/57344 train_time:29315455ms step_avg:588.34ms +grad accum step:12457/14336 +step:49828/57344 train_time:29316867ms step_avg:588.36ms +step:49829/57344 train_time:29316884ms step_avg:588.35ms +step:49830/57344 train_time:29317131ms step_avg:588.34ms +step:49831/57344 train_time:29317701ms step_avg:588.34ms +grad accum step:12458/14336 +step:49832/57344 train_time:29319071ms step_avg:588.36ms +step:49833/57344 train_time:29319087ms step_avg:588.35ms +step:49834/57344 train_time:29319322ms step_avg:588.34ms +step:49835/57344 train_time:29319884ms step_avg:588.34ms +grad accum step:12459/14336 +step:49836/57344 train_time:29321242ms step_avg:588.35ms +step:49837/57344 train_time:29321258ms step_avg:588.34ms +step:49838/57344 train_time:29321506ms step_avg:588.34ms +step:49839/57344 train_time:29322058ms step_avg:588.34ms +grad accum step:12460/14336 +step:49840/57344 train_time:29323388ms step_avg:588.35ms +step:49841/57344 train_time:29323433ms step_avg:588.34ms +step:49842/57344 train_time:29323656ms step_avg:588.33ms +step:49843/57344 train_time:29324219ms step_avg:588.33ms +grad accum step:12461/14336 +step:49844/57344 train_time:29325598ms step_avg:588.35ms +step:49845/57344 train_time:29325616ms step_avg:588.34ms +step:49846/57344 train_time:29325860ms step_avg:588.33ms +step:49847/57344 train_time:29326426ms step_avg:588.33ms +grad accum step:12462/14336 +step:49848/57344 train_time:29327736ms step_avg:588.34ms +step:49849/57344 train_time:29327750ms step_avg:588.33ms +step:49850/57344 train_time:29328114ms step_avg:588.33ms +step:49851/57344 train_time:29328779ms step_avg:588.33ms +grad accum step:12463/14336 +step:49852/57344 train_time:29330233ms step_avg:588.35ms +step:49853/57344 train_time:29330253ms step_avg:588.33ms +step:49854/57344 train_time:29330477ms step_avg:588.33ms +step:49855/57344 train_time:29331050ms step_avg:588.33ms +grad accum step:12464/14336 +step:49856/57344 train_time:29332421ms step_avg:588.34ms +step:49856/57344 val_loss:5.466804 train_time:29332427ms step_avg:588.34ms +step:49857/57344 train_time:29332439ms step_avg:588.33ms +step:49858/57344 train_time:29332657ms step_avg:588.32ms +step:49859/57344 train_time:29333215ms step_avg:588.32ms +grad accum step:12465/14336 +step:49860/57344 train_time:29334569ms step_avg:588.34ms +step:49861/57344 train_time:29334583ms step_avg:588.33ms +step:49862/57344 train_time:29334823ms step_avg:588.32ms +step:49863/57344 train_time:29335365ms step_avg:588.32ms +grad accum step:12466/14336 +step:49864/57344 train_time:29336684ms step_avg:588.33ms +step:49865/57344 train_time:29336701ms step_avg:588.32ms +step:49866/57344 train_time:29336962ms step_avg:588.32ms +step:49867/57344 train_time:29337554ms step_avg:588.32ms +grad accum step:12467/14336 +step:49868/57344 train_time:29338872ms step_avg:588.33ms +step:49869/57344 train_time:29338887ms step_avg:588.32ms +step:49870/57344 train_time:29339132ms step_avg:588.31ms +step:49871/57344 train_time:29339686ms step_avg:588.31ms +grad accum step:12468/14336 +step:49872/57344 train_time:29341039ms step_avg:588.33ms +step:49873/57344 train_time:29341057ms step_avg:588.32ms +step:49874/57344 train_time:29341304ms step_avg:588.31ms +step:49875/57344 train_time:29341885ms step_avg:588.31ms +grad accum step:12469/14336 +step:49876/57344 train_time:29343246ms step_avg:588.32ms +step:49877/57344 train_time:29343269ms step_avg:588.31ms +step:49878/57344 train_time:29343504ms step_avg:588.31ms +step:49879/57344 train_time:29344061ms step_avg:588.30ms +grad accum step:12470/14336 +step:49880/57344 train_time:29345408ms step_avg:588.32ms +step:49881/57344 train_time:29345425ms step_avg:588.31ms +step:49882/57344 train_time:29345665ms step_avg:588.30ms +step:49883/57344 train_time:29346225ms step_avg:588.30ms +grad accum step:12471/14336 +step:49884/57344 train_time:29347542ms step_avg:588.32ms +step:49885/57344 train_time:29347559ms step_avg:588.30ms +step:49886/57344 train_time:29347809ms step_avg:588.30ms +step:49887/57344 train_time:29348363ms step_avg:588.30ms +grad accum step:12472/14336 +step:49888/57344 train_time:29349677ms step_avg:588.31ms +step:49889/57344 train_time:29349698ms step_avg:588.30ms +step:49890/57344 train_time:29349939ms step_avg:588.29ms +step:49891/57344 train_time:29350500ms step_avg:588.29ms +grad accum step:12473/14336 +step:49892/57344 train_time:29351863ms step_avg:588.31ms +step:49893/57344 train_time:29351881ms step_avg:588.30ms +step:49894/57344 train_time:29352124ms step_avg:588.29ms +step:49895/57344 train_time:29352682ms step_avg:588.29ms +grad accum step:12474/14336 +step:49896/57344 train_time:29354029ms step_avg:588.30ms +step:49897/57344 train_time:29354052ms step_avg:588.29ms +step:49898/57344 train_time:29354307ms step_avg:588.29ms +step:49899/57344 train_time:29354907ms step_avg:588.29ms +grad accum step:12475/14336 +step:49900/57344 train_time:29356659ms step_avg:588.31ms +step:49901/57344 train_time:29356674ms step_avg:588.30ms +step:49902/57344 train_time:29356894ms step_avg:588.29ms +step:49903/57344 train_time:29357464ms step_avg:588.29ms +grad accum step:12476/14336 +step:49904/57344 train_time:29358813ms step_avg:588.31ms +step:49905/57344 train_time:29358828ms step_avg:588.29ms +step:49906/57344 train_time:29359062ms step_avg:588.29ms +step:49907/57344 train_time:29359648ms step_avg:588.29ms +grad accum step:12477/14336 +step:49908/57344 train_time:29360978ms step_avg:588.30ms +step:49909/57344 train_time:29360998ms step_avg:588.29ms +step:49910/57344 train_time:29361247ms step_avg:588.28ms +step:49911/57344 train_time:29361838ms step_avg:588.28ms +grad accum step:12478/14336 +step:49912/57344 train_time:29363300ms step_avg:588.30ms +step:49913/57344 train_time:29363319ms step_avg:588.29ms +step:49914/57344 train_time:29363545ms step_avg:588.28ms +step:49915/57344 train_time:29364114ms step_avg:588.28ms +grad accum step:12479/14336 +step:49916/57344 train_time:29365488ms step_avg:588.30ms +step:49917/57344 train_time:29365501ms step_avg:588.29ms +step:49918/57344 train_time:29365748ms step_avg:588.28ms +step:49919/57344 train_time:29366311ms step_avg:588.28ms +grad accum step:12480/14336 +step:49920/57344 train_time:29367795ms step_avg:588.30ms +step:49920/57344 val_loss:5.462715 train_time:29367801ms step_avg:588.30ms +step:49921/57344 train_time:29367813ms step_avg:588.29ms +step:49922/57344 train_time:29368041ms step_avg:588.28ms +step:49923/57344 train_time:29368600ms step_avg:588.28ms +grad accum step:12481/14336 +step:49924/57344 train_time:29369907ms step_avg:588.29ms +step:49925/57344 train_time:29369925ms step_avg:588.28ms +step:49926/57344 train_time:29370160ms step_avg:588.27ms +step:49927/57344 train_time:29370710ms step_avg:588.27ms +grad accum step:12482/14336 +step:49928/57344 train_time:29372072ms step_avg:588.29ms +step:49929/57344 train_time:29372094ms step_avg:588.28ms +step:49930/57344 train_time:29372322ms step_avg:588.27ms +step:49931/57344 train_time:29372893ms step_avg:588.27ms +grad accum step:12483/14336 +step:49932/57344 train_time:29374338ms step_avg:588.29ms +step:49933/57344 train_time:29374359ms step_avg:588.28ms +step:49934/57344 train_time:29374585ms step_avg:588.27ms +step:49935/57344 train_time:29375153ms step_avg:588.27ms +grad accum step:12484/14336 +step:49936/57344 train_time:29376472ms step_avg:588.28ms +step:49937/57344 train_time:29376488ms step_avg:588.27ms +step:49938/57344 train_time:29376743ms step_avg:588.26ms +step:49939/57344 train_time:29377309ms step_avg:588.26ms +grad accum step:12485/14336 +step:49940/57344 train_time:29378703ms step_avg:588.28ms +step:49941/57344 train_time:29378726ms step_avg:588.27ms +step:49942/57344 train_time:29378951ms step_avg:588.26ms +step:49943/57344 train_time:29379492ms step_avg:588.26ms +grad accum step:12486/14336 +step:49944/57344 train_time:29380794ms step_avg:588.27ms +step:49945/57344 train_time:29380811ms step_avg:588.26ms +step:49946/57344 train_time:29381067ms step_avg:588.26ms +step:49947/57344 train_time:29381646ms step_avg:588.26ms +grad accum step:12487/14336 +step:49948/57344 train_time:29382956ms step_avg:588.27ms +step:49949/57344 train_time:29382986ms step_avg:588.26ms +step:49950/57344 train_time:29383205ms step_avg:588.25ms +step:49951/57344 train_time:29383752ms step_avg:588.25ms +grad accum step:12488/14336 +step:49952/57344 train_time:29385118ms step_avg:588.27ms +step:49953/57344 train_time:29385134ms step_avg:588.26ms +step:49954/57344 train_time:29385384ms step_avg:588.25ms +step:49955/57344 train_time:29385958ms step_avg:588.25ms +grad accum step:12489/14336 +step:49956/57344 train_time:29387349ms step_avg:588.26ms +step:49957/57344 train_time:29387365ms step_avg:588.25ms +step:49958/57344 train_time:29387602ms step_avg:588.25ms +step:49959/57344 train_time:29388154ms step_avg:588.25ms +grad accum step:12490/14336 +step:49960/57344 train_time:29389506ms step_avg:588.26ms +step:49961/57344 train_time:29389529ms step_avg:588.25ms +step:49962/57344 train_time:29389761ms step_avg:588.24ms +step:49963/57344 train_time:29390331ms step_avg:588.24ms +grad accum step:12491/14336 +step:49964/57344 train_time:29391721ms step_avg:588.26ms +step:49965/57344 train_time:29391771ms step_avg:588.25ms +step:49966/57344 train_time:29391992ms step_avg:588.24ms +step:49967/57344 train_time:29392563ms step_avg:588.24ms +grad accum step:12492/14336 +step:49968/57344 train_time:29394015ms step_avg:588.26ms +step:49969/57344 train_time:29394041ms step_avg:588.25ms +step:49970/57344 train_time:29394273ms step_avg:588.24ms +step:49971/57344 train_time:29394838ms step_avg:588.24ms +grad accum step:12493/14336 +step:49972/57344 train_time:29396174ms step_avg:588.25ms +step:49973/57344 train_time:29396189ms step_avg:588.24ms +step:49974/57344 train_time:29396438ms step_avg:588.23ms +step:49975/57344 train_time:29397002ms step_avg:588.23ms +grad accum step:12494/14336 +step:49976/57344 train_time:29398469ms step_avg:588.25ms +step:49977/57344 train_time:29398495ms step_avg:588.24ms +step:49978/57344 train_time:29398720ms step_avg:588.23ms +step:49979/57344 train_time:29399295ms step_avg:588.23ms +grad accum step:12495/14336 +step:49980/57344 train_time:29400669ms step_avg:588.25ms +step:49981/57344 train_time:29400688ms step_avg:588.24ms +step:49982/57344 train_time:29400914ms step_avg:588.23ms +step:49983/57344 train_time:29401495ms step_avg:588.23ms +grad accum step:12496/14336 +step:49984/57344 train_time:29402935ms step_avg:588.25ms +step:49984/57344 val_loss:5.460168 train_time:29402956ms step_avg:588.25ms +step:49985/57344 train_time:29402968ms step_avg:588.24ms +step:49986/57344 train_time:29403196ms step_avg:588.23ms +step:49987/57344 train_time:29403765ms step_avg:588.23ms +grad accum step:12497/14336 +step:49988/57344 train_time:29405201ms step_avg:588.25ms +step:49989/57344 train_time:29405215ms step_avg:588.23ms +step:49990/57344 train_time:29405476ms step_avg:588.23ms +step:49991/57344 train_time:29406090ms step_avg:588.23ms +grad accum step:12498/14336 +step:49992/57344 train_time:29407462ms step_avg:588.24ms +step:49993/57344 train_time:29407478ms step_avg:588.23ms +step:49994/57344 train_time:29407727ms step_avg:588.23ms +step:49995/57344 train_time:29408290ms step_avg:588.22ms +grad accum step:12499/14336 +step:49996/57344 train_time:29409662ms step_avg:588.24ms +step:49997/57344 train_time:29409678ms step_avg:588.23ms +step:49998/57344 train_time:29409921ms step_avg:588.22ms +step:49999/57344 train_time:29410471ms step_avg:588.22ms +grad accum step:12500/14336 +step:50000/57344 train_time:29411803ms step_avg:588.24ms +step:50001/57344 train_time:29411820ms step_avg:588.22ms +step:50002/57344 train_time:29412066ms step_avg:588.22ms +step:50003/57344 train_time:29412640ms step_avg:588.22ms +grad accum step:12501/14336 +step:50004/57344 train_time:29414047ms step_avg:588.23ms +step:50005/57344 train_time:29414062ms step_avg:588.22ms +step:50006/57344 train_time:29414288ms step_avg:588.22ms +step:50007/57344 train_time:29414865ms step_avg:588.21ms +grad accum step:12502/14336 +step:50008/57344 train_time:29416233ms step_avg:588.23ms +step:50009/57344 train_time:29416248ms step_avg:588.22ms +step:50010/57344 train_time:29416495ms step_avg:588.21ms +step:50011/57344 train_time:29417051ms step_avg:588.21ms +grad accum step:12503/14336 +step:50012/57344 train_time:29418389ms step_avg:588.23ms +step:50013/57344 train_time:29418405ms step_avg:588.22ms +step:50014/57344 train_time:29418660ms step_avg:588.21ms +step:50015/57344 train_time:29419237ms step_avg:588.21ms +grad accum step:12504/14336 +step:50016/57344 train_time:29420595ms step_avg:588.22ms +step:50017/57344 train_time:29420611ms step_avg:588.21ms +step:50018/57344 train_time:29420856ms step_avg:588.21ms +step:50019/57344 train_time:29421433ms step_avg:588.21ms +grad accum step:12505/14336 +step:50020/57344 train_time:29422785ms step_avg:588.22ms +step:50021/57344 train_time:29422801ms step_avg:588.21ms +step:50022/57344 train_time:29423053ms step_avg:588.20ms +step:50023/57344 train_time:29423621ms step_avg:588.20ms +grad accum step:12506/14336 +step:50024/57344 train_time:29424979ms step_avg:588.22ms +step:50025/57344 train_time:29424996ms step_avg:588.21ms +step:50026/57344 train_time:29425252ms step_avg:588.20ms +step:50027/57344 train_time:29425838ms step_avg:588.20ms +grad accum step:12507/14336 +step:50028/57344 train_time:29427228ms step_avg:588.22ms +step:50029/57344 train_time:29427240ms step_avg:588.20ms +step:50030/57344 train_time:29427484ms step_avg:588.20ms +step:50031/57344 train_time:29428053ms step_avg:588.20ms +grad accum step:12508/14336 +step:50032/57344 train_time:29429494ms step_avg:588.21ms +step:50033/57344 train_time:29429509ms step_avg:588.20ms +step:50034/57344 train_time:29429756ms step_avg:588.20ms +step:50035/57344 train_time:29430302ms step_avg:588.19ms +grad accum step:12509/14336 +step:50036/57344 train_time:29431686ms step_avg:588.21ms +step:50037/57344 train_time:29431702ms step_avg:588.20ms +step:50038/57344 train_time:29431955ms step_avg:588.19ms +step:50039/57344 train_time:29432521ms step_avg:588.19ms +grad accum step:12510/14336 +step:50040/57344 train_time:29433858ms step_avg:588.21ms +step:50041/57344 train_time:29433875ms step_avg:588.20ms +step:50042/57344 train_time:29434131ms step_avg:588.19ms +step:50043/57344 train_time:29434709ms step_avg:588.19ms +grad accum step:12511/14336 +step:50044/57344 train_time:29436093ms step_avg:588.20ms +step:50045/57344 train_time:29436109ms step_avg:588.19ms +step:50046/57344 train_time:29436341ms step_avg:588.19ms +step:50047/57344 train_time:29436935ms step_avg:588.19ms +grad accum step:12512/14336 +step:50048/57344 train_time:29438270ms step_avg:588.20ms +step:50048/57344 val_loss:5.456926 train_time:29438270ms step_avg:588.20ms +step:50049/57344 train_time:29438282ms step_avg:588.19ms +step:50050/57344 train_time:29438520ms step_avg:588.18ms +step:50051/57344 train_time:29439123ms step_avg:588.18ms +grad accum step:12513/14336 +step:50052/57344 train_time:29440419ms step_avg:588.20ms +step:50053/57344 train_time:29440435ms step_avg:588.19ms +step:50054/57344 train_time:29440686ms step_avg:588.18ms +step:50055/57344 train_time:29441257ms step_avg:588.18ms +grad accum step:12514/14336 +step:50056/57344 train_time:29442664ms step_avg:588.19ms +step:50057/57344 train_time:29442678ms step_avg:588.18ms +step:50058/57344 train_time:29442927ms step_avg:588.18ms +step:50059/57344 train_time:29443491ms step_avg:588.18ms +grad accum step:12515/14336 +step:50060/57344 train_time:29444852ms step_avg:588.19ms +step:50061/57344 train_time:29444869ms step_avg:588.18ms +step:50062/57344 train_time:29445123ms step_avg:588.17ms +step:50063/57344 train_time:29445698ms step_avg:588.17ms +grad accum step:12516/14336 +step:50064/57344 train_time:29447083ms step_avg:588.19ms +step:50065/57344 train_time:29447103ms step_avg:588.18ms +step:50066/57344 train_time:29447348ms step_avg:588.17ms +step:50067/57344 train_time:29447936ms step_avg:588.17ms +grad accum step:12517/14336 +step:50068/57344 train_time:29449303ms step_avg:588.19ms +step:50069/57344 train_time:29449320ms step_avg:588.17ms +step:50070/57344 train_time:29449596ms step_avg:588.17ms +step:50071/57344 train_time:29450225ms step_avg:588.17ms +grad accum step:12518/14336 +step:50072/57344 train_time:29451618ms step_avg:588.19ms +step:50073/57344 train_time:29451632ms step_avg:588.17ms +step:50074/57344 train_time:29451861ms step_avg:588.17ms +step:50075/57344 train_time:29452422ms step_avg:588.17ms +grad accum step:12519/14336 +step:50076/57344 train_time:29453794ms step_avg:588.18ms +step:50077/57344 train_time:29453811ms step_avg:588.17ms +step:50078/57344 train_time:29454064ms step_avg:588.16ms +step:50079/57344 train_time:29454642ms step_avg:588.16ms +grad accum step:12520/14336 +step:50080/57344 train_time:29456026ms step_avg:588.18ms +step:50081/57344 train_time:29456042ms step_avg:588.17ms +step:50082/57344 train_time:29456299ms step_avg:588.16ms +step:50083/57344 train_time:29456879ms step_avg:588.16ms +grad accum step:12521/14336 +step:50084/57344 train_time:29458324ms step_avg:588.18ms +step:50085/57344 train_time:29458341ms step_avg:588.17ms +step:50086/57344 train_time:29458595ms step_avg:588.16ms +step:50087/57344 train_time:29459174ms step_avg:588.16ms +grad accum step:12522/14336 +step:50088/57344 train_time:29460577ms step_avg:588.18ms +step:50089/57344 train_time:29460599ms step_avg:588.17ms +step:50090/57344 train_time:29460833ms step_avg:588.16ms +step:50091/57344 train_time:29461429ms step_avg:588.16ms +grad accum step:12523/14336 +step:50092/57344 train_time:29462886ms step_avg:588.18ms +step:50093/57344 train_time:29462901ms step_avg:588.16ms +step:50094/57344 train_time:29463156ms step_avg:588.16ms +step:50095/57344 train_time:29463729ms step_avg:588.16ms +grad accum step:12524/14336 +step:50096/57344 train_time:29465020ms step_avg:588.17ms +step:50097/57344 train_time:29465037ms step_avg:588.16ms +step:50098/57344 train_time:29465287ms step_avg:588.15ms +step:50099/57344 train_time:29465853ms step_avg:588.15ms +grad accum step:12525/14336 +step:50100/57344 train_time:29467233ms step_avg:588.17ms +step:50101/57344 train_time:29467247ms step_avg:588.16ms +step:50102/57344 train_time:29467503ms step_avg:588.15ms +step:50103/57344 train_time:29468087ms step_avg:588.15ms +grad accum step:12526/14336 +step:50104/57344 train_time:29469476ms step_avg:588.17ms +step:50105/57344 train_time:29469493ms step_avg:588.15ms +step:50106/57344 train_time:29469747ms step_avg:588.15ms +step:50107/57344 train_time:29470333ms step_avg:588.15ms +grad accum step:12527/14336 +step:50108/57344 train_time:29471746ms step_avg:588.16ms +step:50109/57344 train_time:29471760ms step_avg:588.15ms +step:50110/57344 train_time:29472014ms step_avg:588.15ms +step:50111/57344 train_time:29472592ms step_avg:588.15ms +grad accum step:12528/14336 +step:50112/57344 train_time:29473900ms step_avg:588.16ms +step:50112/57344 val_loss:5.455181 train_time:29473902ms step_avg:588.16ms +step:50113/57344 train_time:29473914ms step_avg:588.15ms +step:50114/57344 train_time:29474144ms step_avg:588.14ms +step:50115/57344 train_time:29474709ms step_avg:588.14ms +grad accum step:12529/14336 +step:50116/57344 train_time:29476070ms step_avg:588.16ms +step:50117/57344 train_time:29476086ms step_avg:588.15ms +step:50118/57344 train_time:29476340ms step_avg:588.14ms +step:50119/57344 train_time:29476912ms step_avg:588.14ms +grad accum step:12530/14336 +step:50120/57344 train_time:29478276ms step_avg:588.15ms +step:50121/57344 train_time:29478302ms step_avg:588.14ms +step:50122/57344 train_time:29478537ms step_avg:588.14ms +step:50123/57344 train_time:29479104ms step_avg:588.14ms +grad accum step:12531/14336 +step:50124/57344 train_time:29480499ms step_avg:588.15ms +step:50125/57344 train_time:29480516ms step_avg:588.14ms +step:50126/57344 train_time:29480775ms step_avg:588.13ms +step:50127/57344 train_time:29481374ms step_avg:588.13ms +grad accum step:12532/14336 +step:50128/57344 train_time:29482900ms step_avg:588.15ms +step:50129/57344 train_time:29482914ms step_avg:588.14ms +step:50130/57344 train_time:29483180ms step_avg:588.13ms +step:50131/57344 train_time:29483780ms step_avg:588.13ms +grad accum step:12533/14336 +step:50132/57344 train_time:29485120ms step_avg:588.15ms +step:50133/57344 train_time:29485138ms step_avg:588.14ms +step:50134/57344 train_time:29485363ms step_avg:588.13ms +step:50135/57344 train_time:29485906ms step_avg:588.13ms +grad accum step:12534/14336 +step:50136/57344 train_time:29487268ms step_avg:588.15ms +step:50137/57344 train_time:29487282ms step_avg:588.13ms +step:50138/57344 train_time:29487540ms step_avg:588.13ms +step:50139/57344 train_time:29488130ms step_avg:588.13ms +grad accum step:12535/14336 +step:50140/57344 train_time:29489484ms step_avg:588.14ms +step:50141/57344 train_time:29489500ms step_avg:588.13ms +step:50142/57344 train_time:29489745ms step_avg:588.12ms +step:50143/57344 train_time:29490311ms step_avg:588.12ms +grad accum step:12536/14336 +step:50144/57344 train_time:29491699ms step_avg:588.14ms +step:50145/57344 train_time:29491716ms step_avg:588.13ms +step:50146/57344 train_time:29491956ms step_avg:588.12ms +step:50147/57344 train_time:29492510ms step_avg:588.12ms +grad accum step:12537/14336 +step:50148/57344 train_time:29493907ms step_avg:588.14ms +step:50149/57344 train_time:29493922ms step_avg:588.13ms +step:50150/57344 train_time:29494167ms step_avg:588.12ms +step:50151/57344 train_time:29494723ms step_avg:588.12ms +grad accum step:12538/14336 +step:50152/57344 train_time:29496075ms step_avg:588.13ms +step:50153/57344 train_time:29496091ms step_avg:588.12ms +step:50154/57344 train_time:29496334ms step_avg:588.12ms +step:50155/57344 train_time:29496875ms step_avg:588.11ms +grad accum step:12539/14336 +step:50156/57344 train_time:29498272ms step_avg:588.13ms +step:50157/57344 train_time:29498288ms step_avg:588.12ms +step:50158/57344 train_time:29498532ms step_avg:588.11ms +step:50159/57344 train_time:29499069ms step_avg:588.11ms +grad accum step:12540/14336 +step:50160/57344 train_time:29500394ms step_avg:588.13ms +step:50161/57344 train_time:29500408ms step_avg:588.11ms +step:50162/57344 train_time:29500672ms step_avg:588.11ms +step:50163/57344 train_time:29501260ms step_avg:588.11ms +grad accum step:12541/14336 +step:50164/57344 train_time:29502573ms step_avg:588.12ms +step:50165/57344 train_time:29502589ms step_avg:588.11ms +step:50166/57344 train_time:29502849ms step_avg:588.10ms +step:50167/57344 train_time:29503447ms step_avg:588.10ms +grad accum step:12542/14336 +step:50168/57344 train_time:29504799ms step_avg:588.12ms +step:50169/57344 train_time:29504816ms step_avg:588.11ms +step:50170/57344 train_time:29505067ms step_avg:588.10ms +step:50171/57344 train_time:29505653ms step_avg:588.10ms +grad accum step:12543/14336 +step:50172/57344 train_time:29507091ms step_avg:588.12ms +step:50173/57344 train_time:29507107ms step_avg:588.11ms +step:50174/57344 train_time:29507352ms step_avg:588.10ms +step:50175/57344 train_time:29507937ms step_avg:588.10ms +grad accum step:12544/14336 +step:50176/57344 train_time:29509304ms step_avg:588.12ms +step:50176/57344 val_loss:5.449932 train_time:29509305ms step_avg:588.12ms +step:50177/57344 train_time:29509317ms step_avg:588.10ms +step:50178/57344 train_time:29509553ms step_avg:588.10ms +step:50179/57344 train_time:29510147ms step_avg:588.10ms +grad accum step:12545/14336 +step:50180/57344 train_time:29511614ms step_avg:588.12ms +step:50181/57344 train_time:29511631ms step_avg:588.10ms +step:50182/57344 train_time:29511881ms step_avg:588.10ms +step:50183/57344 train_time:29512464ms step_avg:588.10ms +grad accum step:12546/14336 +step:50184/57344 train_time:29513852ms step_avg:588.11ms +step:50185/57344 train_time:29513873ms step_avg:588.10ms +step:50186/57344 train_time:29514093ms step_avg:588.09ms +step:50187/57344 train_time:29514640ms step_avg:588.09ms +grad accum step:12547/14336 +step:50188/57344 train_time:29515988ms step_avg:588.11ms +step:50189/57344 train_time:29515999ms step_avg:588.10ms +step:50190/57344 train_time:29516248ms step_avg:588.09ms +step:50191/57344 train_time:29516805ms step_avg:588.09ms +grad accum step:12548/14336 +step:50192/57344 train_time:29518187ms step_avg:588.11ms +step:50193/57344 train_time:29518203ms step_avg:588.09ms +step:50194/57344 train_time:29518440ms step_avg:588.09ms +step:50195/57344 train_time:29519027ms step_avg:588.09ms +grad accum step:12549/14336 +step:50196/57344 train_time:29520566ms step_avg:588.11ms +step:50197/57344 train_time:29520582ms step_avg:588.09ms +step:50198/57344 train_time:29520833ms step_avg:588.09ms +step:50199/57344 train_time:29521412ms step_avg:588.09ms +grad accum step:12550/14336 +step:50200/57344 train_time:29522850ms step_avg:588.10ms +step:50201/57344 train_time:29522867ms step_avg:588.09ms +step:50202/57344 train_time:29523125ms step_avg:588.09ms +step:50203/57344 train_time:29523709ms step_avg:588.09ms +grad accum step:12551/14336 +step:50204/57344 train_time:29525012ms step_avg:588.10ms +step:50205/57344 train_time:29525029ms step_avg:588.09ms +step:50206/57344 train_time:29525280ms step_avg:588.08ms +step:50207/57344 train_time:29525849ms step_avg:588.08ms +grad accum step:12552/14336 +step:50208/57344 train_time:29527195ms step_avg:588.10ms +step:50209/57344 train_time:29527216ms step_avg:588.09ms +step:50210/57344 train_time:29527453ms step_avg:588.08ms +step:50211/57344 train_time:29528028ms step_avg:588.08ms +grad accum step:12553/14336 +step:50212/57344 train_time:29529379ms step_avg:588.09ms +step:50213/57344 train_time:29529396ms step_avg:588.08ms +step:50214/57344 train_time:29529649ms step_avg:588.08ms +step:50215/57344 train_time:29530221ms step_avg:588.08ms +grad accum step:12554/14336 +step:50216/57344 train_time:29531577ms step_avg:588.09ms +step:50217/57344 train_time:29531594ms step_avg:588.08ms +step:50218/57344 train_time:29531852ms step_avg:588.07ms +step:50219/57344 train_time:29532423ms step_avg:588.07ms +grad accum step:12555/14336 +step:50220/57344 train_time:29533748ms step_avg:588.09ms +step:50221/57344 train_time:29533758ms step_avg:588.08ms +step:50222/57344 train_time:29533993ms step_avg:588.07ms +step:50223/57344 train_time:29534543ms step_avg:588.07ms +grad accum step:12556/14336 +step:50224/57344 train_time:29535906ms step_avg:588.08ms +step:50225/57344 train_time:29535918ms step_avg:588.07ms +step:50226/57344 train_time:29536159ms step_avg:588.07ms +step:50227/57344 train_time:29536748ms step_avg:588.07ms +grad accum step:12557/14336 +step:50228/57344 train_time:29538122ms step_avg:588.08ms +step:50229/57344 train_time:29538138ms step_avg:588.07ms +step:50230/57344 train_time:29538384ms step_avg:588.06ms +step:50231/57344 train_time:29538936ms step_avg:588.06ms +grad accum step:12558/14336 +step:50232/57344 train_time:29540335ms step_avg:588.08ms +step:50233/57344 train_time:29540352ms step_avg:588.07ms +step:50234/57344 train_time:29540599ms step_avg:588.06ms +step:50235/57344 train_time:29541158ms step_avg:588.06ms +grad accum step:12559/14336 +step:50236/57344 train_time:29542503ms step_avg:588.07ms +step:50237/57344 train_time:29542520ms step_avg:588.06ms +step:50238/57344 train_time:29542775ms step_avg:588.06ms +step:50239/57344 train_time:29543353ms step_avg:588.06ms +grad accum step:12560/14336 +step:50240/57344 train_time:29544695ms step_avg:588.07ms +step:50240/57344 val_loss:5.446966 train_time:29544709ms step_avg:588.07ms +step:50241/57344 train_time:29544721ms step_avg:588.06ms +step:50242/57344 train_time:29544953ms step_avg:588.05ms +step:50243/57344 train_time:29545539ms step_avg:588.05ms +grad accum step:12561/14336 +step:50244/57344 train_time:29546939ms step_avg:588.07ms +step:50245/57344 train_time:29546950ms step_avg:588.06ms +step:50246/57344 train_time:29547204ms step_avg:588.05ms +step:50247/57344 train_time:29547785ms step_avg:588.05ms +grad accum step:12562/14336 +step:50248/57344 train_time:29549124ms step_avg:588.07ms +step:50249/57344 train_time:29549141ms step_avg:588.05ms +step:50250/57344 train_time:29549392ms step_avg:588.05ms +step:50251/57344 train_time:29549966ms step_avg:588.05ms +grad accum step:12563/14336 +step:50252/57344 train_time:29551344ms step_avg:588.06ms +step:50253/57344 train_time:29551360ms step_avg:588.05ms +step:50254/57344 train_time:29551619ms step_avg:588.05ms +step:50255/57344 train_time:29552206ms step_avg:588.05ms +grad accum step:12564/14336 +step:50256/57344 train_time:29553555ms step_avg:588.06ms +step:50257/57344 train_time:29553571ms step_avg:588.05ms +step:50258/57344 train_time:29553830ms step_avg:588.04ms +step:50259/57344 train_time:29554420ms step_avg:588.04ms +grad accum step:12565/14336 +step:50260/57344 train_time:29555772ms step_avg:588.06ms +step:50261/57344 train_time:29555787ms step_avg:588.05ms +step:50262/57344 train_time:29556033ms step_avg:588.04ms +step:50263/57344 train_time:29556589ms step_avg:588.04ms +grad accum step:12566/14336 +step:50264/57344 train_time:29557927ms step_avg:588.05ms +step:50265/57344 train_time:29557944ms step_avg:588.04ms +step:50266/57344 train_time:29558190ms step_avg:588.04ms +step:50267/57344 train_time:29558757ms step_avg:588.04ms +grad accum step:12567/14336 +step:50268/57344 train_time:29560122ms step_avg:588.05ms +step:50269/57344 train_time:29560139ms step_avg:588.04ms +step:50270/57344 train_time:29560392ms step_avg:588.03ms +step:50271/57344 train_time:29560967ms step_avg:588.03ms +grad accum step:12568/14336 +step:50272/57344 train_time:29562334ms step_avg:588.05ms +step:50273/57344 train_time:29562350ms step_avg:588.04ms +step:50274/57344 train_time:29562595ms step_avg:588.03ms +step:50275/57344 train_time:29563144ms step_avg:588.03ms +grad accum step:12569/14336 +step:50276/57344 train_time:29564507ms step_avg:588.04ms +step:50277/57344 train_time:29564524ms step_avg:588.03ms +step:50278/57344 train_time:29564810ms step_avg:588.03ms +step:50279/57344 train_time:29565479ms step_avg:588.03ms +grad accum step:12570/14336 +step:50280/57344 train_time:29566822ms step_avg:588.04ms +step:50281/57344 train_time:29566839ms step_avg:588.03ms +step:50282/57344 train_time:29567090ms step_avg:588.03ms +step:50283/57344 train_time:29567662ms step_avg:588.03ms +grad accum step:12571/14336 +step:50284/57344 train_time:29568996ms step_avg:588.04ms +step:50285/57344 train_time:29569011ms step_avg:588.03ms +step:50286/57344 train_time:29569263ms step_avg:588.02ms +step:50287/57344 train_time:29569826ms step_avg:588.02ms +grad accum step:12572/14336 +step:50288/57344 train_time:29571189ms step_avg:588.04ms +step:50289/57344 train_time:29571205ms step_avg:588.03ms +step:50290/57344 train_time:29571465ms step_avg:588.02ms +step:50291/57344 train_time:29572056ms step_avg:588.02ms +grad accum step:12573/14336 +step:50292/57344 train_time:29573396ms step_avg:588.03ms +step:50293/57344 train_time:29573408ms step_avg:588.02ms +step:50294/57344 train_time:29573665ms step_avg:588.02ms +step:50295/57344 train_time:29574245ms step_avg:588.02ms +grad accum step:12574/14336 +step:50296/57344 train_time:29575609ms step_avg:588.03ms +step:50297/57344 train_time:29575625ms step_avg:588.02ms +step:50298/57344 train_time:29575876ms step_avg:588.01ms +step:50299/57344 train_time:29576467ms step_avg:588.01ms +grad accum step:12575/14336 +step:50300/57344 train_time:29577854ms step_avg:588.03ms +step:50301/57344 train_time:29577871ms step_avg:588.02ms +step:50302/57344 train_time:29578124ms step_avg:588.01ms +step:50303/57344 train_time:29578700ms step_avg:588.01ms +grad accum step:12576/14336 +step:50304/57344 train_time:29580104ms step_avg:588.03ms +step:50304/57344 val_loss:5.445749 train_time:29580105ms step_avg:588.03ms +step:50305/57344 train_time:29580116ms step_avg:588.02ms +step:50306/57344 train_time:29580345ms step_avg:588.01ms +step:50307/57344 train_time:29580907ms step_avg:588.01ms +grad accum step:12577/14336 +step:50308/57344 train_time:29582297ms step_avg:588.02ms +step:50309/57344 train_time:29582321ms step_avg:588.01ms +step:50310/57344 train_time:29582542ms step_avg:588.01ms +step:50311/57344 train_time:29583106ms step_avg:588.00ms +grad accum step:12578/14336 +step:50312/57344 train_time:29584526ms step_avg:588.02ms +step:50313/57344 train_time:29584542ms step_avg:588.01ms +step:50314/57344 train_time:29584814ms step_avg:588.00ms +step:50315/57344 train_time:29585430ms step_avg:588.00ms +grad accum step:12579/14336 +step:50316/57344 train_time:29586850ms step_avg:588.02ms +step:50317/57344 train_time:29586866ms step_avg:588.01ms +step:50318/57344 train_time:29587115ms step_avg:588.00ms +step:50319/57344 train_time:29587674ms step_avg:588.00ms +grad accum step:12580/14336 +step:50320/57344 train_time:29589039ms step_avg:588.02ms +step:50321/57344 train_time:29589060ms step_avg:588.01ms +step:50322/57344 train_time:29589281ms step_avg:588.00ms +step:50323/57344 train_time:29589831ms step_avg:588.00ms +grad accum step:12581/14336 +step:50324/57344 train_time:29591203ms step_avg:588.01ms +step:50325/57344 train_time:29591218ms step_avg:588.00ms +step:50326/57344 train_time:29591466ms step_avg:588.00ms +step:50327/57344 train_time:29592027ms step_avg:588.00ms +grad accum step:12582/14336 +step:50328/57344 train_time:29593394ms step_avg:588.01ms +step:50329/57344 train_time:29593411ms step_avg:588.00ms +step:50330/57344 train_time:29593675ms step_avg:587.99ms +step:50331/57344 train_time:29594277ms step_avg:587.99ms +grad accum step:12583/14336 +step:50332/57344 train_time:29595708ms step_avg:588.01ms +step:50333/57344 train_time:29595724ms step_avg:588.00ms +step:50334/57344 train_time:29595974ms step_avg:587.99ms +step:50335/57344 train_time:29596518ms step_avg:587.99ms +grad accum step:12584/14336 +step:50336/57344 train_time:29597898ms step_avg:588.01ms +step:50337/57344 train_time:29597909ms step_avg:588.00ms +step:50338/57344 train_time:29598147ms step_avg:587.99ms +step:50339/57344 train_time:29598709ms step_avg:587.99ms +grad accum step:12585/14336 +step:50340/57344 train_time:29600093ms step_avg:588.00ms +step:50341/57344 train_time:29600110ms step_avg:587.99ms +step:50342/57344 train_time:29600361ms step_avg:587.99ms +step:50343/57344 train_time:29600923ms step_avg:587.98ms +grad accum step:12586/14336 +step:50344/57344 train_time:29602267ms step_avg:588.00ms +step:50345/57344 train_time:29602279ms step_avg:587.99ms +step:50346/57344 train_time:29602533ms step_avg:587.98ms +step:50347/57344 train_time:29603102ms step_avg:587.98ms +grad accum step:12587/14336 +step:50348/57344 train_time:29604436ms step_avg:588.00ms +step:50349/57344 train_time:29604452ms step_avg:587.98ms +step:50350/57344 train_time:29604713ms step_avg:587.98ms +step:50351/57344 train_time:29605337ms step_avg:587.98ms +grad accum step:12588/14336 +step:50352/57344 train_time:29606799ms step_avg:588.00ms +step:50353/57344 train_time:29606833ms step_avg:587.99ms +step:50354/57344 train_time:29607055ms step_avg:587.98ms +step:50355/57344 train_time:29607630ms step_avg:587.98ms +grad accum step:12589/14336 +step:50356/57344 train_time:29609051ms step_avg:587.99ms +step:50357/57344 train_time:29609087ms step_avg:587.98ms +step:50358/57344 train_time:29609322ms step_avg:587.98ms +step:50359/57344 train_time:29609913ms step_avg:587.98ms +grad accum step:12590/14336 +step:50360/57344 train_time:29611248ms step_avg:587.99ms +step:50361/57344 train_time:29611259ms step_avg:587.98ms +step:50362/57344 train_time:29611506ms step_avg:587.97ms +step:50363/57344 train_time:29612058ms step_avg:587.97ms +grad accum step:12591/14336 +step:50364/57344 train_time:29613420ms step_avg:587.99ms +step:50365/57344 train_time:29613437ms step_avg:587.98ms +step:50366/57344 train_time:29613692ms step_avg:587.97ms +step:50367/57344 train_time:29614272ms step_avg:587.97ms +grad accum step:12592/14336 +step:50368/57344 train_time:29615648ms step_avg:587.99ms +step:50368/57344 val_loss:5.442234 train_time:29615652ms step_avg:587.99ms +step:50369/57344 train_time:29615664ms step_avg:587.97ms +step:50370/57344 train_time:29615896ms step_avg:587.97ms +step:50371/57344 train_time:29616484ms step_avg:587.97ms +grad accum step:12593/14336 +step:50372/57344 train_time:29617848ms step_avg:587.98ms +step:50373/57344 train_time:29617865ms step_avg:587.97ms +step:50374/57344 train_time:29618121ms step_avg:587.96ms +step:50375/57344 train_time:29618716ms step_avg:587.96ms +grad accum step:12594/14336 +step:50376/57344 train_time:29620125ms step_avg:587.98ms +step:50377/57344 train_time:29620142ms step_avg:587.97ms +step:50378/57344 train_time:29620401ms step_avg:587.96ms +step:50379/57344 train_time:29620990ms step_avg:587.96ms +grad accum step:12595/14336 +step:50380/57344 train_time:29623939ms step_avg:588.01ms +step:50381/57344 train_time:29624134ms step_avg:588.00ms +step:50382/57344 train_time:29624352ms step_avg:587.99ms +step:50383/57344 train_time:29624935ms step_avg:587.99ms +grad accum step:12596/14336 +step:50384/57344 train_time:29626306ms step_avg:588.01ms +step:50385/57344 train_time:29626323ms step_avg:588.00ms +step:50386/57344 train_time:29626572ms step_avg:587.99ms +step:50387/57344 train_time:29627138ms step_avg:587.99ms +grad accum step:12597/14336 +step:50388/57344 train_time:29628568ms step_avg:588.01ms +step:50389/57344 train_time:29628584ms step_avg:588.00ms +step:50390/57344 train_time:29628833ms step_avg:587.99ms +step:50391/57344 train_time:29629400ms step_avg:587.99ms +grad accum step:12598/14336 +step:50392/57344 train_time:29630781ms step_avg:588.01ms +step:50393/57344 train_time:29630797ms step_avg:587.99ms +step:50394/57344 train_time:29631048ms step_avg:587.99ms +step:50395/57344 train_time:29631617ms step_avg:587.99ms +grad accum step:12599/14336 +step:50396/57344 train_time:29632975ms step_avg:588.00ms +step:50397/57344 train_time:29632986ms step_avg:587.99ms +step:50398/57344 train_time:29633228ms step_avg:587.98ms +step:50399/57344 train_time:29633786ms step_avg:587.98ms +grad accum step:12600/14336 +step:50400/57344 train_time:29635197ms step_avg:588.00ms +step:50401/57344 train_time:29635209ms step_avg:587.99ms +step:50402/57344 train_time:29635454ms step_avg:587.98ms +step:50403/57344 train_time:29636017ms step_avg:587.98ms +grad accum step:12601/14336 +step:50404/57344 train_time:29637336ms step_avg:588.00ms +step:50405/57344 train_time:29637353ms step_avg:587.98ms +step:50406/57344 train_time:29637602ms step_avg:587.98ms +step:50407/57344 train_time:29638167ms step_avg:587.98ms +grad accum step:12602/14336 +step:50408/57344 train_time:29639539ms step_avg:587.99ms +step:50409/57344 train_time:29639552ms step_avg:587.98ms +step:50410/57344 train_time:29639804ms step_avg:587.97ms +step:50411/57344 train_time:29640370ms step_avg:587.97ms +grad accum step:12603/14336 +step:50412/57344 train_time:29641730ms step_avg:587.99ms +step:50413/57344 train_time:29641753ms step_avg:587.98ms +step:50414/57344 train_time:29641979ms step_avg:587.97ms +step:50415/57344 train_time:29642544ms step_avg:587.97ms +grad accum step:12604/14336 +step:50416/57344 train_time:29643911ms step_avg:587.99ms +step:50417/57344 train_time:29643948ms step_avg:587.98ms +step:50418/57344 train_time:29644180ms step_avg:587.97ms +step:50419/57344 train_time:29644757ms step_avg:587.97ms +grad accum step:12605/14336 +step:50420/57344 train_time:29646069ms step_avg:587.98ms +step:50421/57344 train_time:29646086ms step_avg:587.97ms +step:50422/57344 train_time:29646339ms step_avg:587.96ms +step:50423/57344 train_time:29646922ms step_avg:587.96ms +grad accum step:12606/14336 +step:50424/57344 train_time:29648351ms step_avg:587.98ms +step:50425/57344 train_time:29648363ms step_avg:587.97ms +step:50426/57344 train_time:29648619ms step_avg:587.96ms +step:50427/57344 train_time:29649198ms step_avg:587.96ms +grad accum step:12607/14336 +step:50428/57344 train_time:29650568ms step_avg:587.98ms +step:50429/57344 train_time:29650597ms step_avg:587.97ms +step:50430/57344 train_time:29650823ms step_avg:587.96ms +step:50431/57344 train_time:29651400ms step_avg:587.96ms +grad accum step:12608/14336 +step:50432/57344 train_time:29652728ms step_avg:587.97ms +step:50432/57344 val_loss:5.437803 train_time:29652729ms step_avg:587.97ms +step:50433/57344 train_time:29652741ms step_avg:587.96ms +step:50434/57344 train_time:29652969ms step_avg:587.96ms +step:50435/57344 train_time:29653541ms step_avg:587.96ms +grad accum step:12609/14336 +step:50436/57344 train_time:29654946ms step_avg:587.97ms +step:50437/57344 train_time:29654964ms step_avg:587.96ms +step:50438/57344 train_time:29655193ms step_avg:587.95ms +step:50439/57344 train_time:29655759ms step_avg:587.95ms +grad accum step:12610/14336 +step:50440/57344 train_time:29657095ms step_avg:587.97ms +step:50441/57344 train_time:29657111ms step_avg:587.96ms +step:50442/57344 train_time:29657359ms step_avg:587.95ms +step:50443/57344 train_time:29657928ms step_avg:587.95ms +grad accum step:12611/14336 +step:50444/57344 train_time:29659309ms step_avg:587.97ms +step:50445/57344 train_time:29659330ms step_avg:587.95ms +step:50446/57344 train_time:29659570ms step_avg:587.95ms +step:50447/57344 train_time:29660159ms step_avg:587.95ms +grad accum step:12612/14336 +step:50448/57344 train_time:29661565ms step_avg:587.96ms +step:50449/57344 train_time:29661588ms step_avg:587.95ms +step:50450/57344 train_time:29661824ms step_avg:587.94ms +step:50451/57344 train_time:29662381ms step_avg:587.94ms +grad accum step:12613/14336 +step:50452/57344 train_time:29663771ms step_avg:587.96ms +step:50453/57344 train_time:29663783ms step_avg:587.95ms +step:50454/57344 train_time:29664011ms step_avg:587.94ms +step:50455/57344 train_time:29664569ms step_avg:587.94ms +grad accum step:12614/14336 +step:50456/57344 train_time:29665929ms step_avg:587.96ms +step:50457/57344 train_time:29665945ms step_avg:587.95ms +step:50458/57344 train_time:29666196ms step_avg:587.94ms +step:50459/57344 train_time:29666769ms step_avg:587.94ms +grad accum step:12615/14336 +step:50460/57344 train_time:29668163ms step_avg:587.95ms +step:50461/57344 train_time:29668179ms step_avg:587.94ms +step:50462/57344 train_time:29668424ms step_avg:587.94ms +step:50463/57344 train_time:29668966ms step_avg:587.94ms +grad accum step:12616/14336 +step:50464/57344 train_time:29670311ms step_avg:587.95ms +step:50465/57344 train_time:29670328ms step_avg:587.94ms +step:50466/57344 train_time:29670572ms step_avg:587.93ms +step:50467/57344 train_time:29671113ms step_avg:587.93ms +grad accum step:12617/14336 +step:50468/57344 train_time:29672608ms step_avg:587.95ms +step:50469/57344 train_time:29672620ms step_avg:587.94ms +step:50470/57344 train_time:29672863ms step_avg:587.93ms +step:50471/57344 train_time:29673413ms step_avg:587.93ms +grad accum step:12618/14336 +step:50472/57344 train_time:29674757ms step_avg:587.94ms +step:50473/57344 train_time:29674772ms step_avg:587.93ms +step:50474/57344 train_time:29675038ms step_avg:587.93ms +step:50475/57344 train_time:29675651ms step_avg:587.93ms +grad accum step:12619/14336 +step:50476/57344 train_time:29676987ms step_avg:587.94ms +step:50477/57344 train_time:29677004ms step_avg:587.93ms +step:50478/57344 train_time:29677252ms step_avg:587.92ms +step:50479/57344 train_time:29677817ms step_avg:587.92ms +grad accum step:12620/14336 +step:50480/57344 train_time:29679177ms step_avg:587.94ms +step:50481/57344 train_time:29679191ms step_avg:587.93ms +step:50482/57344 train_time:29679445ms step_avg:587.92ms +step:50483/57344 train_time:29680017ms step_avg:587.92ms +grad accum step:12621/14336 +step:50484/57344 train_time:29681362ms step_avg:587.94ms +step:50485/57344 train_time:29681377ms step_avg:587.92ms +step:50486/57344 train_time:29681621ms step_avg:587.92ms +step:50487/57344 train_time:29682197ms step_avg:587.92ms +grad accum step:12622/14336 +step:50488/57344 train_time:29683538ms step_avg:587.93ms +step:50489/57344 train_time:29683554ms step_avg:587.92ms +step:50490/57344 train_time:29683800ms step_avg:587.91ms +step:50491/57344 train_time:29684352ms step_avg:587.91ms +grad accum step:12623/14336 +step:50492/57344 train_time:29685702ms step_avg:587.93ms +step:50493/57344 train_time:29685718ms step_avg:587.92ms +step:50494/57344 train_time:29685982ms step_avg:587.91ms +step:50495/57344 train_time:29686588ms step_avg:587.91ms +grad accum step:12624/14336 +step:50496/57344 train_time:29687947ms step_avg:587.93ms +step:50496/57344 val_loss:5.436185 train_time:29687953ms step_avg:587.93ms +step:50497/57344 train_time:29687964ms step_avg:587.92ms +step:50498/57344 train_time:29688185ms step_avg:587.91ms +step:50499/57344 train_time:29688732ms step_avg:587.91ms +grad accum step:12625/14336 +step:50500/57344 train_time:29690069ms step_avg:587.92ms +step:50501/57344 train_time:29690087ms step_avg:587.91ms +step:50502/57344 train_time:29690314ms step_avg:587.90ms +step:50503/57344 train_time:29690877ms step_avg:587.90ms +grad accum step:12626/14336 +step:50504/57344 train_time:29692226ms step_avg:587.92ms +step:50505/57344 train_time:29692241ms step_avg:587.91ms +step:50506/57344 train_time:29692490ms step_avg:587.90ms +step:50507/57344 train_time:29693047ms step_avg:587.90ms +grad accum step:12627/14336 +step:50508/57344 train_time:29694460ms step_avg:587.92ms +step:50509/57344 train_time:29694490ms step_avg:587.90ms +step:50510/57344 train_time:29694715ms step_avg:587.90ms +step:50511/57344 train_time:29695272ms step_avg:587.90ms +grad accum step:12628/14336 +step:50512/57344 train_time:29696604ms step_avg:587.91ms +step:50513/57344 train_time:29696618ms step_avg:587.90ms +step:50514/57344 train_time:29696863ms step_avg:587.89ms +step:50515/57344 train_time:29697441ms step_avg:587.89ms +grad accum step:12629/14336 +step:50516/57344 train_time:29698790ms step_avg:587.91ms +step:50517/57344 train_time:29698813ms step_avg:587.90ms +step:50518/57344 train_time:29699056ms step_avg:587.89ms +step:50519/57344 train_time:29699643ms step_avg:587.89ms +grad accum step:12630/14336 +step:50520/57344 train_time:29701007ms step_avg:587.91ms +step:50521/57344 train_time:29701023ms step_avg:587.89ms +step:50522/57344 train_time:29701274ms step_avg:587.89ms +step:50523/57344 train_time:29701842ms step_avg:587.89ms +grad accum step:12631/14336 +step:50524/57344 train_time:29703162ms step_avg:587.90ms +step:50525/57344 train_time:29703174ms step_avg:587.89ms +step:50526/57344 train_time:29703417ms step_avg:587.88ms +step:50527/57344 train_time:29703980ms step_avg:587.88ms +grad accum step:12632/14336 +step:50528/57344 train_time:29705375ms step_avg:587.90ms +step:50529/57344 train_time:29705394ms step_avg:587.89ms +step:50530/57344 train_time:29705630ms step_avg:587.88ms +step:50531/57344 train_time:29706202ms step_avg:587.88ms +grad accum step:12633/14336 +step:50532/57344 train_time:29707557ms step_avg:587.90ms +step:50533/57344 train_time:29707573ms step_avg:587.88ms +step:50534/57344 train_time:29707825ms step_avg:587.88ms +step:50535/57344 train_time:29708390ms step_avg:587.88ms +grad accum step:12634/14336 +step:50536/57344 train_time:29709737ms step_avg:587.89ms +step:50537/57344 train_time:29709756ms step_avg:587.88ms +step:50538/57344 train_time:29710004ms step_avg:587.87ms +step:50539/57344 train_time:29710597ms step_avg:587.87ms +grad accum step:12635/14336 +step:50540/57344 train_time:29711952ms step_avg:587.89ms +step:50541/57344 train_time:29711973ms step_avg:587.88ms +step:50542/57344 train_time:29712202ms step_avg:587.87ms +step:50543/57344 train_time:29712761ms step_avg:587.87ms +grad accum step:12636/14336 +step:50544/57344 train_time:29714104ms step_avg:587.89ms +step:50545/57344 train_time:29714122ms step_avg:587.87ms +step:50546/57344 train_time:29714353ms step_avg:587.87ms +step:50547/57344 train_time:29714925ms step_avg:587.87ms +grad accum step:12637/14336 +step:50548/57344 train_time:29716281ms step_avg:587.88ms +step:50549/57344 train_time:29716296ms step_avg:587.87ms +step:50550/57344 train_time:29716542ms step_avg:587.86ms +step:50551/57344 train_time:29717099ms step_avg:587.86ms +grad accum step:12638/14336 +step:50552/57344 train_time:29718422ms step_avg:587.88ms +step:50553/57344 train_time:29718438ms step_avg:587.87ms +step:50554/57344 train_time:29718691ms step_avg:587.86ms +step:50555/57344 train_time:29719258ms step_avg:587.86ms +grad accum step:12639/14336 +step:50556/57344 train_time:29720592ms step_avg:587.87ms +step:50557/57344 train_time:29720611ms step_avg:587.86ms +step:50558/57344 train_time:29720847ms step_avg:587.86ms +step:50559/57344 train_time:29721415ms step_avg:587.86ms +grad accum step:12640/14336 +step:50560/57344 train_time:29722902ms step_avg:587.87ms +step:50560/57344 val_loss:5.431937 train_time:29722909ms step_avg:587.87ms +step:50561/57344 train_time:29722920ms step_avg:587.86ms +step:50562/57344 train_time:29723147ms step_avg:587.86ms +step:50563/57344 train_time:29723723ms step_avg:587.86ms +grad accum step:12641/14336 +step:50564/57344 train_time:29725094ms step_avg:587.87ms +step:50565/57344 train_time:29725109ms step_avg:587.86ms +step:50566/57344 train_time:29725365ms step_avg:587.85ms +step:50567/57344 train_time:29725961ms step_avg:587.85ms +grad accum step:12642/14336 +step:50568/57344 train_time:29727312ms step_avg:587.87ms +step:50569/57344 train_time:29727452ms step_avg:587.86ms +step:50570/57344 train_time:29727676ms step_avg:587.85ms +step:50571/57344 train_time:29728246ms step_avg:587.85ms +grad accum step:12643/14336 +step:50572/57344 train_time:29729572ms step_avg:587.87ms +step:50573/57344 train_time:29729589ms step_avg:587.85ms +step:50574/57344 train_time:29729833ms step_avg:587.85ms +step:50575/57344 train_time:29730394ms step_avg:587.85ms +grad accum step:12644/14336 +step:50576/57344 train_time:29731705ms step_avg:587.86ms +step:50577/57344 train_time:29731718ms step_avg:587.85ms +step:50578/57344 train_time:29731974ms step_avg:587.84ms +step:50579/57344 train_time:29732556ms step_avg:587.84ms +grad accum step:12645/14336 +step:50580/57344 train_time:29734000ms step_avg:587.86ms +step:50581/57344 train_time:29734014ms step_avg:587.85ms +step:50582/57344 train_time:29734265ms step_avg:587.84ms +step:50583/57344 train_time:29734829ms step_avg:587.84ms +grad accum step:12646/14336 +step:50584/57344 train_time:29736197ms step_avg:587.86ms +step:50585/57344 train_time:29736211ms step_avg:587.85ms +step:50586/57344 train_time:29736460ms step_avg:587.84ms +step:50587/57344 train_time:29737033ms step_avg:587.84ms +grad accum step:12647/14336 +step:50588/57344 train_time:29738368ms step_avg:587.85ms +step:50589/57344 train_time:29738389ms step_avg:587.84ms +step:50590/57344 train_time:29738624ms step_avg:587.84ms +step:50591/57344 train_time:29739189ms step_avg:587.84ms +grad accum step:12648/14336 +step:50592/57344 train_time:29740615ms step_avg:587.85ms +step:50593/57344 train_time:29740631ms step_avg:587.84ms +step:50594/57344 train_time:29740883ms step_avg:587.83ms +step:50595/57344 train_time:29741463ms step_avg:587.83ms +grad accum step:12649/14336 +step:50596/57344 train_time:29742856ms step_avg:587.85ms +step:50597/57344 train_time:29742871ms step_avg:587.84ms +step:50598/57344 train_time:29743121ms step_avg:587.83ms +step:50599/57344 train_time:29743689ms step_avg:587.83ms +grad accum step:12650/14336 +step:50600/57344 train_time:29745018ms step_avg:587.85ms +step:50601/57344 train_time:29745038ms step_avg:587.83ms +step:50602/57344 train_time:29745293ms step_avg:587.83ms +step:50603/57344 train_time:29745898ms step_avg:587.83ms +grad accum step:12651/14336 +step:50604/57344 train_time:29747261ms step_avg:587.84ms +step:50605/57344 train_time:29747275ms step_avg:587.83ms +step:50606/57344 train_time:29747554ms step_avg:587.83ms +step:50607/57344 train_time:29748189ms step_avg:587.83ms +grad accum step:12652/14336 +step:50608/57344 train_time:29749551ms step_avg:587.84ms +step:50609/57344 train_time:29749572ms step_avg:587.83ms +step:50610/57344 train_time:29749805ms step_avg:587.82ms +step:50611/57344 train_time:29750380ms step_avg:587.82ms +grad accum step:12653/14336 +step:50612/57344 train_time:29751714ms step_avg:587.84ms +step:50613/57344 train_time:29751730ms step_avg:587.83ms +step:50614/57344 train_time:29751978ms step_avg:587.82ms +step:50615/57344 train_time:29752522ms step_avg:587.82ms +grad accum step:12654/14336 +step:50616/57344 train_time:29753863ms step_avg:587.84ms +step:50617/57344 train_time:29753882ms step_avg:587.82ms +step:50618/57344 train_time:29754122ms step_avg:587.82ms +step:50619/57344 train_time:29754669ms step_avg:587.82ms +grad accum step:12655/14336 +step:50620/57344 train_time:29756356ms step_avg:587.84ms +step:50621/57344 train_time:29756376ms step_avg:587.83ms +step:50622/57344 train_time:29756605ms step_avg:587.82ms +step:50623/57344 train_time:29757190ms step_avg:587.82ms +grad accum step:12656/14336 +step:50624/57344 train_time:29758586ms step_avg:587.84ms +step:50624/57344 val_loss:5.430612 train_time:29758591ms step_avg:587.84ms +step:50625/57344 train_time:29758603ms step_avg:587.82ms +step:50626/57344 train_time:29758833ms step_avg:587.82ms +step:50627/57344 train_time:29759436ms step_avg:587.82ms +grad accum step:12657/14336 +step:50628/57344 train_time:29760875ms step_avg:587.83ms +step:50629/57344 train_time:29760900ms step_avg:587.82ms +step:50630/57344 train_time:29761126ms step_avg:587.82ms +step:50631/57344 train_time:29761678ms step_avg:587.82ms +grad accum step:12658/14336 +step:50632/57344 train_time:29763006ms step_avg:587.83ms +step:50633/57344 train_time:29763020ms step_avg:587.82ms +step:50634/57344 train_time:29763269ms step_avg:587.81ms +step:50635/57344 train_time:29763831ms step_avg:587.81ms +grad accum step:12659/14336 +step:50636/57344 train_time:29765284ms step_avg:587.83ms +step:50637/57344 train_time:29765312ms step_avg:587.82ms +step:50638/57344 train_time:29765542ms step_avg:587.81ms +step:50639/57344 train_time:29766121ms step_avg:587.81ms +grad accum step:12660/14336 +step:50640/57344 train_time:29767499ms step_avg:587.83ms +step:50641/57344 train_time:29767540ms step_avg:587.82ms +step:50642/57344 train_time:29767759ms step_avg:587.81ms +step:50643/57344 train_time:29768309ms step_avg:587.81ms +grad accum step:12661/14336 +step:50644/57344 train_time:29769702ms step_avg:587.82ms +step:50645/57344 train_time:29769719ms step_avg:587.81ms +step:50646/57344 train_time:29769973ms step_avg:587.81ms +step:50647/57344 train_time:29770576ms step_avg:587.81ms +grad accum step:12662/14336 +step:50648/57344 train_time:29771965ms step_avg:587.82ms +step:50649/57344 train_time:29772465ms step_avg:587.82ms +step:50650/57344 train_time:29772531ms step_avg:587.81ms +step:50651/57344 train_time:29773073ms step_avg:587.81ms +grad accum step:12663/14336 +step:50652/57344 train_time:29774471ms step_avg:587.82ms +step:50653/57344 train_time:29774570ms step_avg:587.81ms +step:50654/57344 train_time:29774789ms step_avg:587.81ms +step:50655/57344 train_time:29776614ms step_avg:587.83ms +grad accum step:12664/14336 +step:50656/57344 train_time:29777415ms step_avg:587.84ms +step:50657/57344 train_time:29777446ms step_avg:587.82ms +step:50658/57344 train_time:29777680ms step_avg:587.82ms +step:50659/57344 train_time:29778259ms step_avg:587.82ms +grad accum step:12665/14336 +step:50660/57344 train_time:29779640ms step_avg:587.83ms +step:50661/57344 train_time:29779658ms step_avg:587.82ms +step:50662/57344 train_time:29779910ms step_avg:587.82ms +step:50663/57344 train_time:29780488ms step_avg:587.82ms +grad accum step:12666/14336 +step:50664/57344 train_time:29781900ms step_avg:587.83ms +step:50665/57344 train_time:29781912ms step_avg:587.82ms +step:50666/57344 train_time:29782163ms step_avg:587.81ms +step:50667/57344 train_time:29782747ms step_avg:587.81ms +grad accum step:12667/14336 +step:50668/57344 train_time:29784076ms step_avg:587.83ms +step:50669/57344 train_time:29784095ms step_avg:587.82ms +step:50670/57344 train_time:29784338ms step_avg:587.81ms +step:50671/57344 train_time:29784915ms step_avg:587.81ms +grad accum step:12668/14336 +step:50672/57344 train_time:29787861ms step_avg:587.86ms +step:50673/57344 train_time:29788370ms step_avg:587.85ms +step:50674/57344 train_time:29788462ms step_avg:587.85ms +step:50675/57344 train_time:29789080ms step_avg:587.85ms +grad accum step:12669/14336 +step:50676/57344 train_time:29790464ms step_avg:587.86ms +step:50677/57344 train_time:29790482ms step_avg:587.85ms +step:50678/57344 train_time:29790715ms step_avg:587.84ms +step:50679/57344 train_time:29791269ms step_avg:587.84ms +grad accum step:12670/14336 +step:50680/57344 train_time:29792636ms step_avg:587.86ms +step:50681/57344 train_time:29792649ms step_avg:587.85ms +step:50682/57344 train_time:29792898ms step_avg:587.84ms +step:50683/57344 train_time:29793458ms step_avg:587.84ms +grad accum step:12671/14336 +step:50684/57344 train_time:29794963ms step_avg:587.86ms +step:50685/57344 train_time:29794977ms step_avg:587.85ms +step:50686/57344 train_time:29795210ms step_avg:587.84ms +step:50687/57344 train_time:29795800ms step_avg:587.84ms +grad accum step:12672/14336 +step:50688/57344 train_time:29797142ms step_avg:587.85ms +step:50688/57344 val_loss:5.428927 train_time:29797152ms step_avg:587.85ms +step:50689/57344 train_time:29797165ms step_avg:587.84ms +step:50690/57344 train_time:29797391ms step_avg:587.84ms +step:50691/57344 train_time:29797955ms step_avg:587.84ms +grad accum step:12673/14336 +step:50692/57344 train_time:29799317ms step_avg:587.85ms +step:50693/57344 train_time:29799335ms step_avg:587.84ms +step:50694/57344 train_time:29799557ms step_avg:587.83ms +step:50695/57344 train_time:29800122ms step_avg:587.83ms +grad accum step:12674/14336 +step:50696/57344 train_time:29801484ms step_avg:587.85ms +step:50697/57344 train_time:29801504ms step_avg:587.84ms +step:50698/57344 train_time:29801728ms step_avg:587.83ms +step:50699/57344 train_time:29802296ms step_avg:587.83ms +grad accum step:12675/14336 +step:50700/57344 train_time:29803615ms step_avg:587.84ms +step:50701/57344 train_time:29803632ms step_avg:587.83ms +step:50702/57344 train_time:29803881ms step_avg:587.82ms +step:50703/57344 train_time:29804436ms step_avg:587.82ms +grad accum step:12676/14336 +step:50704/57344 train_time:29805837ms step_avg:587.84ms +step:50705/57344 train_time:29805853ms step_avg:587.83ms +step:50706/57344 train_time:29806072ms step_avg:587.82ms +step:50707/57344 train_time:29806647ms step_avg:587.82ms +grad accum step:12677/14336 +step:50708/57344 train_time:29808092ms step_avg:587.84ms +step:50709/57344 train_time:29808132ms step_avg:587.83ms +step:50710/57344 train_time:29808360ms step_avg:587.82ms +step:50711/57344 train_time:29808929ms step_avg:587.82ms +grad accum step:12678/14336 +step:50712/57344 train_time:29810279ms step_avg:587.83ms +step:50713/57344 train_time:29810317ms step_avg:587.82ms +step:50714/57344 train_time:29810542ms step_avg:587.82ms +step:50715/57344 train_time:29811116ms step_avg:587.82ms +grad accum step:12679/14336 +step:50716/57344 train_time:29812544ms step_avg:587.83ms +step:50717/57344 train_time:29812568ms step_avg:587.82ms +step:50718/57344 train_time:29812806ms step_avg:587.82ms +step:50719/57344 train_time:29813368ms step_avg:587.81ms +grad accum step:12680/14336 +step:50720/57344 train_time:29814711ms step_avg:587.83ms +step:50721/57344 train_time:29814801ms step_avg:587.82ms +step:50722/57344 train_time:29815025ms step_avg:587.81ms +step:50723/57344 train_time:29815589ms step_avg:587.81ms +grad accum step:12681/14336 +step:50724/57344 train_time:29816931ms step_avg:587.83ms +step:50725/57344 train_time:29816948ms step_avg:587.82ms +step:50726/57344 train_time:29817184ms step_avg:587.81ms +step:50727/57344 train_time:29817743ms step_avg:587.81ms +grad accum step:12682/14336 +step:50728/57344 train_time:29819106ms step_avg:587.82ms +step:50729/57344 train_time:29819128ms step_avg:587.81ms +step:50730/57344 train_time:29819354ms step_avg:587.81ms +step:50731/57344 train_time:29819926ms step_avg:587.80ms +grad accum step:12683/14336 +step:50732/57344 train_time:29821275ms step_avg:587.82ms +step:50733/57344 train_time:29821292ms step_avg:587.81ms +step:50734/57344 train_time:29821547ms step_avg:587.80ms +step:50735/57344 train_time:29822153ms step_avg:587.80ms +grad accum step:12684/14336 +step:50736/57344 train_time:29823559ms step_avg:587.82ms +step:50737/57344 train_time:29823609ms step_avg:587.81ms +step:50738/57344 train_time:29823837ms step_avg:587.80ms +step:50739/57344 train_time:29824414ms step_avg:587.80ms +grad accum step:12685/14336 +step:50740/57344 train_time:29825806ms step_avg:587.82ms +step:50741/57344 train_time:29825821ms step_avg:587.81ms +step:50742/57344 train_time:29826073ms step_avg:587.80ms +step:50743/57344 train_time:29826645ms step_avg:587.80ms +grad accum step:12686/14336 +step:50744/57344 train_time:29828081ms step_avg:587.81ms +step:50745/57344 train_time:29828100ms step_avg:587.80ms +step:50746/57344 train_time:29828343ms step_avg:587.80ms +step:50747/57344 train_time:29828920ms step_avg:587.80ms +grad accum step:12687/14336 +step:50748/57344 train_time:29830272ms step_avg:587.81ms +step:50749/57344 train_time:29830292ms step_avg:587.80ms +step:50750/57344 train_time:29830530ms step_avg:587.79ms +step:50751/57344 train_time:29831082ms step_avg:587.79ms +grad accum step:12688/14336 +step:50752/57344 train_time:29832436ms step_avg:587.81ms +step:50752/57344 val_loss:5.425698 train_time:29832461ms step_avg:587.81ms +step:50753/57344 train_time:29832473ms step_avg:587.80ms +step:50754/57344 train_time:29832720ms step_avg:587.79ms +step:50755/57344 train_time:29833341ms step_avg:587.79ms +grad accum step:12689/14336 +step:50756/57344 train_time:29835021ms step_avg:587.81ms +step:50757/57344 train_time:29835042ms step_avg:587.80ms +step:50758/57344 train_time:29835259ms step_avg:587.79ms +step:50759/57344 train_time:29835823ms step_avg:587.79ms +grad accum step:12690/14336 +step:50760/57344 train_time:29837224ms step_avg:587.81ms +step:50761/57344 train_time:29837239ms step_avg:587.80ms +step:50762/57344 train_time:29837486ms step_avg:587.79ms +step:50763/57344 train_time:29838045ms step_avg:587.79ms +grad accum step:12691/14336 +step:50764/57344 train_time:29839446ms step_avg:587.81ms +step:50765/57344 train_time:29839465ms step_avg:587.80ms +step:50766/57344 train_time:29839696ms step_avg:587.79ms +step:50767/57344 train_time:29840291ms step_avg:587.79ms +grad accum step:12692/14336 +step:50768/57344 train_time:29841708ms step_avg:587.81ms +step:50769/57344 train_time:29841724ms step_avg:587.79ms +step:50770/57344 train_time:29841973ms step_avg:587.79ms +step:50771/57344 train_time:29842537ms step_avg:587.79ms +grad accum step:12693/14336 +step:50772/57344 train_time:29843877ms step_avg:587.80ms +step:50773/57344 train_time:29843904ms step_avg:587.79ms +step:50774/57344 train_time:29844139ms step_avg:587.78ms +step:50775/57344 train_time:29844686ms step_avg:587.78ms +grad accum step:12694/14336 +step:50776/57344 train_time:29846039ms step_avg:587.80ms +step:50777/57344 train_time:29846055ms step_avg:587.79ms +step:50778/57344 train_time:29846307ms step_avg:587.78ms +step:50779/57344 train_time:29846870ms step_avg:587.78ms +grad accum step:12695/14336 +step:50780/57344 train_time:29848208ms step_avg:587.79ms +step:50781/57344 train_time:29848221ms step_avg:587.78ms +step:50782/57344 train_time:29848471ms step_avg:587.78ms +step:50783/57344 train_time:29849034ms step_avg:587.78ms +grad accum step:12696/14336 +step:50784/57344 train_time:29850503ms step_avg:587.79ms +step:50785/57344 train_time:29850517ms step_avg:587.78ms +step:50786/57344 train_time:29850739ms step_avg:587.77ms +step:50787/57344 train_time:29851303ms step_avg:587.77ms +grad accum step:12697/14336 +step:50788/57344 train_time:29852682ms step_avg:587.79ms +step:50789/57344 train_time:29852710ms step_avg:587.78ms +step:50790/57344 train_time:29852943ms step_avg:587.77ms +step:50791/57344 train_time:29853522ms step_avg:587.77ms +grad accum step:12698/14336 +step:50792/57344 train_time:29854896ms step_avg:587.79ms +step:50793/57344 train_time:29854912ms step_avg:587.78ms +step:50794/57344 train_time:29855162ms step_avg:587.77ms +step:50795/57344 train_time:29855727ms step_avg:587.77ms +grad accum step:12699/14336 +step:50796/57344 train_time:29857118ms step_avg:587.78ms +step:50797/57344 train_time:29857139ms step_avg:587.77ms +step:50798/57344 train_time:29857357ms step_avg:587.77ms +step:50799/57344 train_time:29857908ms step_avg:587.77ms +grad accum step:12700/14336 +step:50800/57344 train_time:29859331ms step_avg:587.78ms +step:50801/57344 train_time:29859355ms step_avg:587.77ms +step:50802/57344 train_time:29859575ms step_avg:587.76ms +step:50803/57344 train_time:29860133ms step_avg:587.76ms +grad accum step:12701/14336 +step:50804/57344 train_time:29861455ms step_avg:587.78ms +step:50805/57344 train_time:29861471ms step_avg:587.77ms +step:50806/57344 train_time:29861713ms step_avg:587.76ms +step:50807/57344 train_time:29862272ms step_avg:587.76ms +grad accum step:12702/14336 +step:50808/57344 train_time:29863640ms step_avg:587.77ms +step:50809/57344 train_time:29863656ms step_avg:587.76ms +step:50810/57344 train_time:29863902ms step_avg:587.76ms +step:50811/57344 train_time:29864453ms step_avg:587.76ms +grad accum step:12703/14336 +step:50812/57344 train_time:29865803ms step_avg:587.77ms +step:50813/57344 train_time:29865819ms step_avg:587.76ms +step:50814/57344 train_time:29866068ms step_avg:587.75ms +step:50815/57344 train_time:29866630ms step_avg:587.75ms +grad accum step:12704/14336 +step:50816/57344 train_time:29868003ms step_avg:587.77ms +step:50816/57344 val_loss:5.422413 train_time:29868017ms step_avg:587.77ms +step:50817/57344 train_time:29868029ms step_avg:587.76ms +step:50818/57344 train_time:29868253ms step_avg:587.75ms +step:50819/57344 train_time:29868815ms step_avg:587.75ms +grad accum step:12705/14336 +step:50820/57344 train_time:29870226ms step_avg:587.77ms +step:50821/57344 train_time:29870248ms step_avg:587.75ms +step:50822/57344 train_time:29870485ms step_avg:587.75ms +step:50823/57344 train_time:29871059ms step_avg:587.75ms +grad accum step:12706/14336 +step:50824/57344 train_time:29872407ms step_avg:587.76ms +step:50825/57344 train_time:29872423ms step_avg:587.75ms +step:50826/57344 train_time:29872665ms step_avg:587.74ms +step:50827/57344 train_time:29873252ms step_avg:587.74ms +grad accum step:12707/14336 +step:50828/57344 train_time:29874729ms step_avg:587.76ms +step:50829/57344 train_time:29874747ms step_avg:587.75ms +step:50830/57344 train_time:29874988ms step_avg:587.74ms +step:50831/57344 train_time:29875567ms step_avg:587.74ms +grad accum step:12708/14336 +step:50832/57344 train_time:29876950ms step_avg:587.76ms +step:50833/57344 train_time:29876971ms step_avg:587.75ms +step:50834/57344 train_time:29877207ms step_avg:587.74ms +step:50835/57344 train_time:29877758ms step_avg:587.74ms +grad accum step:12709/14336 +step:50836/57344 train_time:29879326ms step_avg:587.76ms +step:50837/57344 train_time:29879350ms step_avg:587.75ms +step:50838/57344 train_time:29879608ms step_avg:587.74ms +step:50839/57344 train_time:29880290ms step_avg:587.74ms +grad accum step:12710/14336 +step:50840/57344 train_time:29881761ms step_avg:587.76ms +step:50841/57344 train_time:29881809ms step_avg:587.75ms +step:50842/57344 train_time:29882039ms step_avg:587.74ms +step:50843/57344 train_time:29882637ms step_avg:587.74ms +grad accum step:12711/14336 +step:50844/57344 train_time:29883989ms step_avg:587.76ms +step:50845/57344 train_time:29884005ms step_avg:587.75ms +step:50846/57344 train_time:29884259ms step_avg:587.74ms +step:50847/57344 train_time:29884832ms step_avg:587.74ms +grad accum step:12712/14336 +step:50848/57344 train_time:29886155ms step_avg:587.75ms +step:50849/57344 train_time:29886172ms step_avg:587.74ms +step:50850/57344 train_time:29886418ms step_avg:587.74ms +step:50851/57344 train_time:29886968ms step_avg:587.74ms +grad accum step:12713/14336 +step:50852/57344 train_time:29888463ms step_avg:587.75ms +step:50853/57344 train_time:29888482ms step_avg:587.74ms +step:50854/57344 train_time:29888710ms step_avg:587.74ms +step:50855/57344 train_time:29889267ms step_avg:587.74ms +grad accum step:12714/14336 +step:50856/57344 train_time:29890668ms step_avg:587.75ms +step:50857/57344 train_time:29890686ms step_avg:587.74ms +step:50858/57344 train_time:29890926ms step_avg:587.73ms +step:50859/57344 train_time:29891476ms step_avg:587.73ms +grad accum step:12715/14336 +step:50860/57344 train_time:29892819ms step_avg:587.75ms +step:50861/57344 train_time:29892841ms step_avg:587.74ms +step:50862/57344 train_time:29893080ms step_avg:587.73ms +step:50863/57344 train_time:29893648ms step_avg:587.73ms +grad accum step:12716/14336 +step:50864/57344 train_time:29895041ms step_avg:587.74ms +step:50865/57344 train_time:29895056ms step_avg:587.73ms +step:50866/57344 train_time:29895309ms step_avg:587.73ms +step:50867/57344 train_time:29895887ms step_avg:587.73ms +grad accum step:12717/14336 +step:50868/57344 train_time:29897273ms step_avg:587.74ms +step:50869/57344 train_time:29897286ms step_avg:587.73ms +step:50870/57344 train_time:29897545ms step_avg:587.72ms +step:50871/57344 train_time:29898164ms step_avg:587.73ms +grad accum step:12718/14336 +step:50872/57344 train_time:29899529ms step_avg:587.74ms +step:50873/57344 train_time:29899546ms step_avg:587.73ms +step:50874/57344 train_time:29899789ms step_avg:587.72ms +step:50875/57344 train_time:29900362ms step_avg:587.72ms +grad accum step:12719/14336 +step:50876/57344 train_time:29901795ms step_avg:587.74ms +step:50877/57344 train_time:29901815ms step_avg:587.73ms +step:50878/57344 train_time:29902051ms step_avg:587.72ms +step:50879/57344 train_time:29902633ms step_avg:587.72ms +grad accum step:12720/14336 +step:50880/57344 train_time:29904060ms step_avg:587.74ms +step:50880/57344 val_loss:5.421695 train_time:29904066ms step_avg:587.74ms +step:50881/57344 train_time:29904077ms step_avg:587.73ms +step:50882/57344 train_time:29904313ms step_avg:587.72ms +step:50883/57344 train_time:29904912ms step_avg:587.72ms +grad accum step:12721/14336 +step:50884/57344 train_time:29906257ms step_avg:587.73ms +step:50885/57344 train_time:29906274ms step_avg:587.72ms +step:50886/57344 train_time:29906539ms step_avg:587.72ms +step:50887/57344 train_time:29907147ms step_avg:587.72ms +grad accum step:12722/14336 +step:50888/57344 train_time:29908500ms step_avg:587.73ms +step:50889/57344 train_time:29908525ms step_avg:587.72ms +step:50890/57344 train_time:29908772ms step_avg:587.71ms +step:50891/57344 train_time:29909367ms step_avg:587.71ms +grad accum step:12723/14336 +step:50892/57344 train_time:29910791ms step_avg:587.73ms +step:50893/57344 train_time:29910806ms step_avg:587.72ms +step:50894/57344 train_time:29911028ms step_avg:587.71ms +step:50895/57344 train_time:29911590ms step_avg:587.71ms +grad accum step:12724/14336 +step:50896/57344 train_time:29912971ms step_avg:587.73ms +step:50897/57344 train_time:29913035ms step_avg:587.72ms +step:50898/57344 train_time:29913254ms step_avg:587.71ms +step:50899/57344 train_time:29913814ms step_avg:587.71ms +grad accum step:12725/14336 +step:50900/57344 train_time:29915200ms step_avg:587.72ms +step:50901/57344 train_time:29915219ms step_avg:587.71ms +step:50902/57344 train_time:29915465ms step_avg:587.71ms +step:50903/57344 train_time:29916051ms step_avg:587.71ms +grad accum step:12726/14336 +step:50904/57344 train_time:29917400ms step_avg:587.72ms +step:50905/57344 train_time:29917415ms step_avg:587.71ms +step:50906/57344 train_time:29917683ms step_avg:587.70ms +step:50907/57344 train_time:29918300ms step_avg:587.71ms +grad accum step:12727/14336 +step:50908/57344 train_time:29919852ms step_avg:587.72ms +step:50909/57344 train_time:29919870ms step_avg:587.71ms +step:50910/57344 train_time:29920095ms step_avg:587.71ms +step:50911/57344 train_time:29920693ms step_avg:587.71ms +grad accum step:12728/14336 +step:50912/57344 train_time:29922046ms step_avg:587.72ms +step:50913/57344 train_time:29922062ms step_avg:587.71ms +step:50914/57344 train_time:29922332ms step_avg:587.70ms +step:50915/57344 train_time:29922962ms step_avg:587.70ms +grad accum step:12729/14336 +step:50916/57344 train_time:29924386ms step_avg:587.72ms +step:50917/57344 train_time:29924400ms step_avg:587.71ms +step:50918/57344 train_time:29924671ms step_avg:587.70ms +step:50919/57344 train_time:29925290ms step_avg:587.70ms +grad accum step:12730/14336 +step:50920/57344 train_time:29926668ms step_avg:587.72ms +step:50921/57344 train_time:29926683ms step_avg:587.71ms +step:50922/57344 train_time:29926934ms step_avg:587.70ms +step:50923/57344 train_time:29927497ms step_avg:587.70ms +grad accum step:12731/14336 +step:50924/57344 train_time:29928974ms step_avg:587.72ms +step:50925/57344 train_time:29929016ms step_avg:587.71ms +step:50926/57344 train_time:29929243ms step_avg:587.70ms +step:50927/57344 train_time:29929819ms step_avg:587.70ms +grad accum step:12732/14336 +step:50928/57344 train_time:29931183ms step_avg:587.72ms +step:50929/57344 train_time:29931213ms step_avg:587.70ms +step:50930/57344 train_time:29931438ms step_avg:587.70ms +step:50931/57344 train_time:29931996ms step_avg:587.70ms +grad accum step:12733/14336 +step:50932/57344 train_time:29933372ms step_avg:587.71ms +step:50933/57344 train_time:29933397ms step_avg:587.70ms +step:50934/57344 train_time:29933636ms step_avg:587.69ms +step:50935/57344 train_time:29934228ms step_avg:587.69ms +grad accum step:12734/14336 +step:50936/57344 train_time:29935660ms step_avg:587.71ms +step:50937/57344 train_time:29935685ms step_avg:587.70ms +step:50938/57344 train_time:29935925ms step_avg:587.69ms +step:50939/57344 train_time:29936491ms step_avg:587.69ms +grad accum step:12735/14336 +step:50940/57344 train_time:29937838ms step_avg:587.71ms +step:50941/57344 train_time:29937856ms step_avg:587.70ms +step:50942/57344 train_time:29938100ms step_avg:587.69ms +step:50943/57344 train_time:29938665ms step_avg:587.69ms +grad accum step:12736/14336 +step:50944/57344 train_time:29940048ms step_avg:587.71ms +step:50944/57344 val_loss:5.418712 train_time:29940051ms step_avg:587.71ms +step:50945/57344 train_time:29940063ms step_avg:587.69ms +step:50946/57344 train_time:29940291ms step_avg:587.69ms +step:50947/57344 train_time:29940869ms step_avg:587.69ms +grad accum step:12737/14336 +step:50948/57344 train_time:29942216ms step_avg:587.70ms +step:50949/57344 train_time:29942232ms step_avg:587.69ms +step:50950/57344 train_time:29942483ms step_avg:587.68ms +step:50951/57344 train_time:29943056ms step_avg:587.68ms +grad accum step:12738/14336 +step:50952/57344 train_time:29944410ms step_avg:587.70ms +step:50953/57344 train_time:29944427ms step_avg:587.69ms +step:50954/57344 train_time:29944676ms step_avg:587.68ms +step:50955/57344 train_time:29945231ms step_avg:587.68ms +grad accum step:12739/14336 +step:50956/57344 train_time:29946580ms step_avg:587.69ms +step:50957/57344 train_time:29946598ms step_avg:587.68ms +step:50958/57344 train_time:29946851ms step_avg:587.68ms +step:50959/57344 train_time:29947445ms step_avg:587.68ms +grad accum step:12740/14336 +step:50960/57344 train_time:29948829ms step_avg:587.69ms +step:50961/57344 train_time:29948844ms step_avg:587.68ms +step:50962/57344 train_time:29949093ms step_avg:587.67ms +step:50963/57344 train_time:29949666ms step_avg:587.67ms +grad accum step:12741/14336 +step:50964/57344 train_time:29951065ms step_avg:587.69ms +step:50965/57344 train_time:29951081ms step_avg:587.68ms +step:50966/57344 train_time:29951301ms step_avg:587.67ms +step:50967/57344 train_time:29951845ms step_avg:587.67ms +grad accum step:12742/14336 +step:50968/57344 train_time:29953157ms step_avg:587.69ms +step:50969/57344 train_time:29953180ms step_avg:587.67ms +step:50970/57344 train_time:29953405ms step_avg:587.67ms +step:50971/57344 train_time:29953968ms step_avg:587.67ms +grad accum step:12743/14336 +step:50972/57344 train_time:29955308ms step_avg:587.68ms +step:50973/57344 train_time:29955324ms step_avg:587.67ms +step:50974/57344 train_time:29955572ms step_avg:587.66ms +step:50975/57344 train_time:29956143ms step_avg:587.66ms +grad accum step:12744/14336 +step:50976/57344 train_time:29957591ms step_avg:587.68ms +step:50977/57344 train_time:29957611ms step_avg:587.67ms +step:50978/57344 train_time:29957852ms step_avg:587.66ms +step:50979/57344 train_time:29958411ms step_avg:587.66ms +grad accum step:12745/14336 +step:50980/57344 train_time:29959794ms step_avg:587.68ms +step:50981/57344 train_time:29959809ms step_avg:587.67ms +step:50982/57344 train_time:29960051ms step_avg:587.66ms +step:50983/57344 train_time:29960604ms step_avg:587.66ms +grad accum step:12746/14336 +step:50984/57344 train_time:29961923ms step_avg:587.67ms +step:50985/57344 train_time:29961939ms step_avg:587.66ms +step:50986/57344 train_time:29962183ms step_avg:587.66ms +step:50987/57344 train_time:29962728ms step_avg:587.65ms +grad accum step:12747/14336 +step:50988/57344 train_time:29964077ms step_avg:587.67ms +step:50989/57344 train_time:29964092ms step_avg:587.66ms +step:50990/57344 train_time:29964339ms step_avg:587.65ms +step:50991/57344 train_time:29964887ms step_avg:587.65ms +grad accum step:12748/14336 +step:50992/57344 train_time:29966230ms step_avg:587.67ms +step:50993/57344 train_time:29966264ms step_avg:587.65ms +step:50994/57344 train_time:29966491ms step_avg:587.65ms +step:50995/57344 train_time:29967057ms step_avg:587.65ms +grad accum step:12749/14336 +step:50996/57344 train_time:29968423ms step_avg:587.66ms +step:50997/57344 train_time:29968439ms step_avg:587.65ms +step:50998/57344 train_time:29968683ms step_avg:587.64ms +step:50999/57344 train_time:29969238ms step_avg:587.64ms +grad accum step:12750/14336 +step:51000/57344 train_time:29970700ms step_avg:587.66ms +step:51001/57344 train_time:29970719ms step_avg:587.65ms +step:51002/57344 train_time:29970944ms step_avg:587.64ms +step:51003/57344 train_time:29971522ms step_avg:587.64ms +grad accum step:12751/14336 +step:51004/57344 train_time:29972951ms step_avg:587.66ms +step:51005/57344 train_time:29972966ms step_avg:587.65ms +step:51006/57344 train_time:29973222ms step_avg:587.64ms +step:51007/57344 train_time:29973808ms step_avg:587.64ms +grad accum step:12752/14336 +step:51008/57344 train_time:29975180ms step_avg:587.66ms +step:51008/57344 val_loss:5.415042 train_time:29975185ms step_avg:587.66ms +step:51009/57344 train_time:29975197ms step_avg:587.65ms +step:51010/57344 train_time:29975421ms step_avg:587.64ms +step:51011/57344 train_time:29975981ms step_avg:587.64ms +grad accum step:12753/14336 +step:51012/57344 train_time:29977336ms step_avg:587.65ms +step:51013/57344 train_time:29977353ms step_avg:587.64ms +step:51014/57344 train_time:29977606ms step_avg:587.63ms +step:51015/57344 train_time:29978181ms step_avg:587.63ms +grad accum step:12754/14336 +step:51016/57344 train_time:29979578ms step_avg:587.65ms +step:51017/57344 train_time:29979591ms step_avg:587.64ms +step:51018/57344 train_time:29979839ms step_avg:587.63ms +step:51019/57344 train_time:29980412ms step_avg:587.63ms +grad accum step:12755/14336 +step:51020/57344 train_time:29981765ms step_avg:587.65ms +step:51021/57344 train_time:29981778ms step_avg:587.64ms +step:51022/57344 train_time:29982024ms step_avg:587.63ms +step:51023/57344 train_time:29982576ms step_avg:587.63ms +grad accum step:12756/14336 +step:51024/57344 train_time:29983972ms step_avg:587.64ms +step:51025/57344 train_time:29983989ms step_avg:587.63ms +step:51026/57344 train_time:29984240ms step_avg:587.63ms +step:51027/57344 train_time:29984812ms step_avg:587.63ms +grad accum step:12757/14336 +step:51028/57344 train_time:29986151ms step_avg:587.64ms +step:51029/57344 train_time:29986168ms step_avg:587.63ms +step:51030/57344 train_time:29986416ms step_avg:587.62ms +step:51031/57344 train_time:29986980ms step_avg:587.62ms +grad accum step:12758/14336 +step:51032/57344 train_time:29988362ms step_avg:587.64ms +step:51033/57344 train_time:29988379ms step_avg:587.63ms +step:51034/57344 train_time:29988626ms step_avg:587.62ms +step:51035/57344 train_time:29989196ms step_avg:587.62ms +grad accum step:12759/14336 +step:51036/57344 train_time:29990607ms step_avg:587.64ms +step:51037/57344 train_time:29990620ms step_avg:587.63ms +step:51038/57344 train_time:29990874ms step_avg:587.62ms +step:51039/57344 train_time:29991485ms step_avg:587.62ms +grad accum step:12760/14336 +step:51040/57344 train_time:29992835ms step_avg:587.63ms +step:51041/57344 train_time:29992848ms step_avg:587.62ms +step:51042/57344 train_time:29993088ms step_avg:587.62ms +step:51043/57344 train_time:29993646ms step_avg:587.62ms +grad accum step:12761/14336 +step:51044/57344 train_time:29994935ms step_avg:587.63ms +step:51045/57344 train_time:29994952ms step_avg:587.62ms +step:51046/57344 train_time:29995200ms step_avg:587.61ms +step:51047/57344 train_time:29995772ms step_avg:587.61ms +grad accum step:12762/14336 +step:51048/57344 train_time:29997201ms step_avg:587.63ms +step:51049/57344 train_time:29997215ms step_avg:587.62ms +step:51050/57344 train_time:29997461ms step_avg:587.61ms +step:51051/57344 train_time:29998011ms step_avg:587.61ms +grad accum step:12763/14336 +step:51052/57344 train_time:29999364ms step_avg:587.62ms +step:51053/57344 train_time:29999379ms step_avg:587.61ms +step:51054/57344 train_time:29999633ms step_avg:587.61ms +step:51055/57344 train_time:30000209ms step_avg:587.61ms +grad accum step:12764/14336 +step:51056/57344 train_time:30001591ms step_avg:587.62ms +step:51057/57344 train_time:30001604ms step_avg:587.61ms +step:51058/57344 train_time:30001847ms step_avg:587.60ms +step:51059/57344 train_time:30002403ms step_avg:587.60ms +grad accum step:12765/14336 +step:51060/57344 train_time:30003743ms step_avg:587.62ms +step:51061/57344 train_time:30003760ms step_avg:587.61ms +step:51062/57344 train_time:30004011ms step_avg:587.60ms +step:51063/57344 train_time:30004583ms step_avg:587.60ms +grad accum step:12766/14336 +step:51064/57344 train_time:30005938ms step_avg:587.61ms +step:51065/57344 train_time:30005955ms step_avg:587.60ms +step:51066/57344 train_time:30006210ms step_avg:587.60ms +step:51067/57344 train_time:30006805ms step_avg:587.60ms +grad accum step:12767/14336 +step:51068/57344 train_time:30014094ms step_avg:587.73ms +step:51069/57344 train_time:30014109ms step_avg:587.72ms +step:51070/57344 train_time:30014360ms step_avg:587.71ms +step:51071/57344 train_time:30014936ms step_avg:587.71ms +grad accum step:12768/14336 +step:51072/57344 train_time:30016336ms step_avg:587.73ms +step:51072/57344 val_loss:5.411607 train_time:30016337ms step_avg:587.73ms +step:51073/57344 train_time:30016349ms step_avg:587.71ms +step:51074/57344 train_time:30016750ms step_avg:587.71ms +step:51075/57344 train_time:30017223ms step_avg:587.71ms +grad accum step:12769/14336 +step:51076/57344 train_time:30018589ms step_avg:587.72ms +step:51077/57344 train_time:30018617ms step_avg:587.71ms +step:51078/57344 train_time:30018838ms step_avg:587.71ms +step:51079/57344 train_time:30019384ms step_avg:587.71ms +grad accum step:12770/14336 +step:51080/57344 train_time:30020763ms step_avg:587.72ms +step:51081/57344 train_time:30020774ms step_avg:587.71ms +step:51082/57344 train_time:30021013ms step_avg:587.70ms +step:51083/57344 train_time:30021575ms step_avg:587.70ms +grad accum step:12771/14336 +step:51084/57344 train_time:30022929ms step_avg:587.72ms +step:51085/57344 train_time:30022956ms step_avg:587.71ms +step:51086/57344 train_time:30023185ms step_avg:587.70ms +step:51087/57344 train_time:30023764ms step_avg:587.70ms +grad accum step:12772/14336 +step:51088/57344 train_time:30025107ms step_avg:587.71ms +step:51089/57344 train_time:30025123ms step_avg:587.70ms +step:51090/57344 train_time:30025384ms step_avg:587.70ms +step:51091/57344 train_time:30025970ms step_avg:587.70ms +grad accum step:12773/14336 +step:51092/57344 train_time:30027395ms step_avg:587.71ms +step:51093/57344 train_time:30027411ms step_avg:587.70ms +step:51094/57344 train_time:30027672ms step_avg:587.69ms +step:51095/57344 train_time:30028259ms step_avg:587.69ms +grad accum step:12774/14336 +step:51096/57344 train_time:30029624ms step_avg:587.71ms +step:51097/57344 train_time:30029645ms step_avg:587.70ms +step:51098/57344 train_time:30029864ms step_avg:587.69ms +step:51099/57344 train_time:30030409ms step_avg:587.69ms +grad accum step:12775/14336 +step:51100/57344 train_time:30031766ms step_avg:587.71ms +step:51101/57344 train_time:30031828ms step_avg:587.70ms +step:51102/57344 train_time:30032049ms step_avg:587.69ms +step:51103/57344 train_time:30032592ms step_avg:587.69ms +grad accum step:12776/14336 +step:51104/57344 train_time:30033954ms step_avg:587.70ms +step:51105/57344 train_time:30033971ms step_avg:587.69ms +step:51106/57344 train_time:30034219ms step_avg:587.68ms +step:51107/57344 train_time:30034780ms step_avg:587.68ms +grad accum step:12777/14336 +step:51108/57344 train_time:30036132ms step_avg:587.70ms +step:51109/57344 train_time:30036146ms step_avg:587.69ms +step:51110/57344 train_time:30036391ms step_avg:587.68ms +step:51111/57344 train_time:30036951ms step_avg:587.68ms +grad accum step:12778/14336 +step:51112/57344 train_time:30081211ms step_avg:588.54ms +step:51113/57344 train_time:30081228ms step_avg:588.52ms +step:51114/57344 train_time:30081455ms step_avg:588.52ms +step:51115/57344 train_time:30082018ms step_avg:588.52ms +grad accum step:12779/14336 +step:51116/57344 train_time:30083317ms step_avg:588.53ms +step:51117/57344 train_time:30083328ms step_avg:588.52ms +step:51118/57344 train_time:30083556ms step_avg:588.51ms +step:51119/57344 train_time:30084100ms step_avg:588.51ms +grad accum step:12780/14336 +step:51120/57344 train_time:30085430ms step_avg:588.53ms +step:51121/57344 train_time:30085443ms step_avg:588.51ms +step:51122/57344 train_time:30085689ms step_avg:588.51ms +step:51123/57344 train_time:30086249ms step_avg:588.51ms +grad accum step:12781/14336 +step:51124/57344 train_time:30087599ms step_avg:588.52ms +step:51125/57344 train_time:30087616ms step_avg:588.51ms +step:51126/57344 train_time:30087863ms step_avg:588.50ms +step:51127/57344 train_time:30088425ms step_avg:588.50ms +grad accum step:12782/14336 +step:51128/57344 train_time:30089791ms step_avg:588.52ms +step:51129/57344 train_time:30089808ms step_avg:588.51ms +step:51130/57344 train_time:30090051ms step_avg:588.50ms +step:51131/57344 train_time:30090595ms step_avg:588.50ms +grad accum step:12783/14336 +step:51132/57344 train_time:30091920ms step_avg:588.51ms +step:51133/57344 train_time:30091954ms step_avg:588.50ms +step:51134/57344 train_time:30092170ms step_avg:588.50ms +step:51135/57344 train_time:30092710ms step_avg:588.50ms +grad accum step:12784/14336 +step:51136/57344 train_time:30094027ms step_avg:588.51ms +step:51136/57344 val_loss:5.409361 train_time:30094028ms step_avg:588.51ms +step:51137/57344 train_time:30094040ms step_avg:588.50ms +step:51138/57344 train_time:30094348ms step_avg:588.49ms +step:51139/57344 train_time:30094926ms step_avg:588.49ms +grad accum step:12785/14336 +step:51140/57344 train_time:30096307ms step_avg:588.51ms +step:51141/57344 train_time:30096324ms step_avg:588.50ms +step:51142/57344 train_time:30096572ms step_avg:588.49ms +step:51143/57344 train_time:30097132ms step_avg:588.49ms +grad accum step:12786/14336 +step:51144/57344 train_time:30098540ms step_avg:588.51ms +step:51145/57344 train_time:30098557ms step_avg:588.49ms +step:51146/57344 train_time:30098780ms step_avg:588.49ms +step:51147/57344 train_time:30099344ms step_avg:588.49ms +grad accum step:12787/14336 +step:51148/57344 train_time:30100702ms step_avg:588.50ms +step:51149/57344 train_time:30100927ms step_avg:588.49ms +step:51150/57344 train_time:30101144ms step_avg:588.49ms +step:51151/57344 train_time:30171273ms step_avg:589.85ms +grad accum step:12788/14336 +step:51152/57344 train_time:30172376ms step_avg:589.86ms +step:51153/57344 train_time:30172390ms step_avg:589.85ms +step:51154/57344 train_time:30172638ms step_avg:589.84ms +step:51155/57344 train_time:30173188ms step_avg:589.84ms +grad accum step:12789/14336 +step:51156/57344 train_time:30174463ms step_avg:589.85ms +step:51157/57344 train_time:30174478ms step_avg:589.84ms +step:51158/57344 train_time:30174724ms step_avg:589.83ms +step:51159/57344 train_time:30175284ms step_avg:589.83ms +grad accum step:12790/14336 +step:51160/57344 train_time:30176621ms step_avg:589.85ms +step:51161/57344 train_time:30176638ms step_avg:589.84ms +step:51162/57344 train_time:30176897ms step_avg:589.83ms +step:51163/57344 train_time:30177485ms step_avg:589.83ms +grad accum step:12791/14336 +step:51164/57344 train_time:30178813ms step_avg:589.84ms +step:51165/57344 train_time:30178825ms step_avg:589.83ms +step:51166/57344 train_time:30179072ms step_avg:589.83ms +step:51167/57344 train_time:30179627ms step_avg:589.83ms +grad accum step:12792/14336 +step:51168/57344 train_time:30180933ms step_avg:589.84ms +step:51169/57344 train_time:30180949ms step_avg:589.83ms +step:51170/57344 train_time:30181194ms step_avg:589.82ms +step:51171/57344 train_time:30181748ms step_avg:589.82ms +grad accum step:12793/14336 +step:51172/57344 train_time:30183154ms step_avg:589.84ms +step:51173/57344 train_time:30183169ms step_avg:589.83ms +step:51174/57344 train_time:30183415ms step_avg:589.82ms +step:51175/57344 train_time:30183970ms step_avg:589.82ms +grad accum step:12794/14336 +step:51176/57344 train_time:30185420ms step_avg:589.84ms +step:51177/57344 train_time:30185435ms step_avg:589.82ms +step:51178/57344 train_time:30185664ms step_avg:589.82ms +step:51179/57344 train_time:30186202ms step_avg:589.82ms +grad accum step:12795/14336 +step:51180/57344 train_time:30187524ms step_avg:589.83ms +step:51181/57344 train_time:30187538ms step_avg:589.82ms +step:51182/57344 train_time:30187787ms step_avg:589.81ms +step:51183/57344 train_time:30188352ms step_avg:589.81ms +grad accum step:12796/14336 +step:51184/57344 train_time:30189646ms step_avg:589.83ms +step:51185/57344 train_time:30189663ms step_avg:589.81ms +step:51186/57344 train_time:30189907ms step_avg:589.81ms +step:51187/57344 train_time:30190461ms step_avg:589.81ms +grad accum step:12797/14336 +step:51188/57344 train_time:30191760ms step_avg:589.82ms +step:51189/57344 train_time:30191772ms step_avg:589.81ms +step:51190/57344 train_time:30192011ms step_avg:589.80ms +step:51191/57344 train_time:30192547ms step_avg:589.80ms +grad accum step:12798/14336 +step:51192/57344 train_time:30193889ms step_avg:589.82ms +step:51193/57344 train_time:30193905ms step_avg:589.81ms +step:51194/57344 train_time:30194152ms step_avg:589.80ms +step:51195/57344 train_time:30194703ms step_avg:589.80ms +grad accum step:12799/14336 +step:51196/57344 train_time:30196027ms step_avg:589.81ms +step:51197/57344 train_time:30196039ms step_avg:589.80ms +step:51198/57344 train_time:30196288ms step_avg:589.79ms +step:51199/57344 train_time:30196862ms step_avg:589.79ms +grad accum step:12800/14336 +step:51200/57344 train_time:30198339ms step_avg:589.81ms +step:51200/57344 val_loss:5.407613 train_time:30198342ms step_avg:589.81ms +step:51201/57344 train_time:30198354ms step_avg:589.80ms +step:51202/57344 train_time:30198577ms step_avg:589.79ms +step:51203/57344 train_time:30199144ms step_avg:589.79ms +grad accum step:12801/14336 +step:51204/57344 train_time:30200509ms step_avg:589.81ms +step:51205/57344 train_time:30200524ms step_avg:589.80ms +step:51206/57344 train_time:30200763ms step_avg:589.79ms +step:51207/57344 train_time:30201304ms step_avg:589.79ms +grad accum step:12802/14336 +step:51208/57344 train_time:30202704ms step_avg:589.80ms +step:51209/57344 train_time:30202736ms step_avg:589.79ms +step:51210/57344 train_time:30202961ms step_avg:589.79ms +step:51211/57344 train_time:30203523ms step_avg:589.79ms +grad accum step:12803/14336 +step:51212/57344 train_time:30204850ms step_avg:589.80ms +step:51213/57344 train_time:30204870ms step_avg:589.79ms +step:51214/57344 train_time:30205119ms step_avg:589.78ms +step:51215/57344 train_time:30205681ms step_avg:589.78ms +grad accum step:12804/14336 +step:51216/57344 train_time:30207059ms step_avg:589.80ms +step:51217/57344 train_time:30207076ms step_avg:589.79ms +step:51218/57344 train_time:30207343ms step_avg:589.78ms +step:51219/57344 train_time:30207955ms step_avg:589.78ms +grad accum step:12805/14336 +step:51220/57344 train_time:30209304ms step_avg:589.80ms +step:51221/57344 train_time:30209321ms step_avg:589.78ms +step:51222/57344 train_time:30209575ms step_avg:589.78ms +step:51223/57344 train_time:30210170ms step_avg:589.78ms +grad accum step:12806/14336 +step:51224/57344 train_time:30211650ms step_avg:589.79ms +step:51225/57344 train_time:30211670ms step_avg:589.78ms +step:51226/57344 train_time:30211888ms step_avg:589.78ms +step:51227/57344 train_time:30212443ms step_avg:589.78ms +grad accum step:12807/14336 +step:51228/57344 train_time:30213817ms step_avg:589.79ms +step:51229/57344 train_time:30213832ms step_avg:589.78ms +step:51230/57344 train_time:30214078ms step_avg:589.77ms +step:51231/57344 train_time:30214635ms step_avg:589.77ms +grad accum step:12808/14336 +step:51232/57344 train_time:30216110ms step_avg:589.79ms +step:51233/57344 train_time:30216128ms step_avg:589.78ms +step:51234/57344 train_time:30216352ms step_avg:589.77ms +step:51235/57344 train_time:30216933ms step_avg:589.77ms +grad accum step:12809/14336 +step:51236/57344 train_time:30218301ms step_avg:589.79ms +step:51237/57344 train_time:30218316ms step_avg:589.78ms +step:51238/57344 train_time:30218579ms step_avg:589.77ms +step:51239/57344 train_time:30219189ms step_avg:589.77ms +grad accum step:12810/14336 +step:51240/57344 train_time:30220583ms step_avg:589.78ms +step:51241/57344 train_time:30220598ms step_avg:589.77ms +step:51242/57344 train_time:30220850ms step_avg:589.77ms +step:51243/57344 train_time:30221420ms step_avg:589.77ms +grad accum step:12811/14336 +step:51244/57344 train_time:30222746ms step_avg:589.78ms +step:51245/57344 train_time:30222760ms step_avg:589.77ms +step:51246/57344 train_time:30223003ms step_avg:589.76ms +step:51247/57344 train_time:30223545ms step_avg:589.76ms +grad accum step:12812/14336 +step:51248/57344 train_time:30224887ms step_avg:589.78ms +step:51249/57344 train_time:30224911ms step_avg:589.77ms +step:51250/57344 train_time:30225136ms step_avg:589.76ms +step:51251/57344 train_time:30225716ms step_avg:589.76ms +grad accum step:12813/14336 +step:51252/57344 train_time:30227132ms step_avg:589.77ms +step:51253/57344 train_time:30227148ms step_avg:589.76ms +step:51254/57344 train_time:30227398ms step_avg:589.76ms +step:51255/57344 train_time:30227962ms step_avg:589.76ms +grad accum step:12814/14336 +step:51256/57344 train_time:30229294ms step_avg:589.77ms +step:51257/57344 train_time:30229311ms step_avg:589.76ms +step:51258/57344 train_time:30229576ms step_avg:589.75ms +step:51259/57344 train_time:30230197ms step_avg:589.75ms +grad accum step:12815/14336 +step:51260/57344 train_time:30231520ms step_avg:589.77ms +step:51261/57344 train_time:30231537ms step_avg:589.76ms +step:51262/57344 train_time:30231784ms step_avg:589.75ms +step:51263/57344 train_time:30232359ms step_avg:589.75ms +grad accum step:12816/14336 +step:51264/57344 train_time:30233829ms step_avg:589.77ms +step:51264/57344 val_loss:5.406591 train_time:30233834ms step_avg:589.77ms +step:51265/57344 train_time:30233846ms step_avg:589.76ms +step:51266/57344 train_time:30234066ms step_avg:589.75ms +step:51267/57344 train_time:30234625ms step_avg:589.75ms +grad accum step:12817/14336 +step:51268/57344 train_time:30235980ms step_avg:589.76ms +step:51269/57344 train_time:30235994ms step_avg:589.75ms +step:51270/57344 train_time:30236228ms step_avg:589.75ms +step:51271/57344 train_time:30236836ms step_avg:589.75ms +grad accum step:12818/14336 +step:51272/57344 train_time:30238264ms step_avg:589.76ms +step:51273/57344 train_time:30238284ms step_avg:589.75ms +step:51274/57344 train_time:30238535ms step_avg:589.74ms +step:51275/57344 train_time:30239118ms step_avg:589.74ms +grad accum step:12819/14336 +step:51276/57344 train_time:30240459ms step_avg:589.76ms +step:51277/57344 train_time:30240476ms step_avg:589.75ms +step:51278/57344 train_time:30240721ms step_avg:589.74ms +step:51279/57344 train_time:30241267ms step_avg:589.74ms +grad accum step:12820/14336 +step:51280/57344 train_time:30242613ms step_avg:589.75ms +step:51281/57344 train_time:30242628ms step_avg:589.74ms +step:51282/57344 train_time:30242866ms step_avg:589.74ms +step:51283/57344 train_time:30243445ms step_avg:589.74ms +grad accum step:12821/14336 +step:51284/57344 train_time:30244791ms step_avg:589.75ms +step:51285/57344 train_time:30244804ms step_avg:589.74ms +step:51286/57344 train_time:30245050ms step_avg:589.73ms +step:51287/57344 train_time:30245601ms step_avg:589.73ms +grad accum step:12822/14336 +step:51288/57344 train_time:30246924ms step_avg:589.75ms +step:51289/57344 train_time:30246939ms step_avg:589.74ms +step:51290/57344 train_time:30247190ms step_avg:589.73ms +step:51291/57344 train_time:30247750ms step_avg:589.73ms +grad accum step:12823/14336 +step:51292/57344 train_time:30249200ms step_avg:589.74ms +step:51293/57344 train_time:30249218ms step_avg:589.73ms +step:51294/57344 train_time:30249434ms step_avg:589.73ms +step:51295/57344 train_time:30249978ms step_avg:589.73ms +grad accum step:12824/14336 +step:51296/57344 train_time:30251301ms step_avg:589.74ms +step:51297/57344 train_time:30251324ms step_avg:589.73ms +step:51298/57344 train_time:30251560ms step_avg:589.72ms +step:51299/57344 train_time:30252118ms step_avg:589.72ms +grad accum step:12825/14336 +step:51300/57344 train_time:30253450ms step_avg:589.74ms +step:51301/57344 train_time:30253467ms step_avg:589.72ms +step:51302/57344 train_time:30253715ms step_avg:589.72ms +step:51303/57344 train_time:30254297ms step_avg:589.72ms +grad accum step:12826/14336 +step:51304/57344 train_time:30255707ms step_avg:589.73ms +step:51305/57344 train_time:30255723ms step_avg:589.72ms +step:51306/57344 train_time:30255976ms step_avg:589.72ms +step:51307/57344 train_time:30256557ms step_avg:589.72ms +grad accum step:12827/14336 +step:51308/57344 train_time:30257917ms step_avg:589.73ms +step:51309/57344 train_time:30257933ms step_avg:589.72ms +step:51310/57344 train_time:30258184ms step_avg:589.71ms +step:51311/57344 train_time:30258764ms step_avg:589.71ms +grad accum step:12828/14336 +step:51312/57344 train_time:30260130ms step_avg:589.73ms +step:51313/57344 train_time:30260153ms step_avg:589.72ms +step:51314/57344 train_time:30260390ms step_avg:589.71ms +step:51315/57344 train_time:30260959ms step_avg:589.71ms +grad accum step:12829/14336 +step:51316/57344 train_time:30262328ms step_avg:589.72ms +step:51317/57344 train_time:30262352ms step_avg:589.71ms +step:51318/57344 train_time:30262577ms step_avg:589.71ms +step:51319/57344 train_time:30263149ms step_avg:589.71ms +grad accum step:12830/14336 +step:51320/57344 train_time:30264532ms step_avg:589.72ms +step:51321/57344 train_time:30264544ms step_avg:589.71ms +step:51322/57344 train_time:30264798ms step_avg:589.70ms +step:51323/57344 train_time:30265381ms step_avg:589.70ms +grad accum step:12831/14336 +step:51324/57344 train_time:30266800ms step_avg:589.72ms +step:51325/57344 train_time:30266814ms step_avg:589.71ms +step:51326/57344 train_time:30267062ms step_avg:589.70ms +step:51327/57344 train_time:30267625ms step_avg:589.70ms +grad accum step:12832/14336 +step:51328/57344 train_time:30269091ms step_avg:589.72ms +step:51328/57344 val_loss:5.404155 train_time:30269107ms step_avg:589.72ms +step:51329/57344 train_time:30269119ms step_avg:589.71ms +step:51330/57344 train_time:30269352ms step_avg:589.70ms +step:51331/57344 train_time:30269935ms step_avg:589.70ms +grad accum step:12833/14336 +step:51332/57344 train_time:30271292ms step_avg:589.72ms +step:51333/57344 train_time:30271308ms step_avg:589.70ms +step:51334/57344 train_time:30271547ms step_avg:589.70ms +step:51335/57344 train_time:30272113ms step_avg:589.70ms +grad accum step:12834/14336 +step:51336/57344 train_time:30273468ms step_avg:589.71ms +step:51337/57344 train_time:30273484ms step_avg:589.70ms +step:51338/57344 train_time:30273729ms step_avg:589.69ms +step:51339/57344 train_time:30274288ms step_avg:589.69ms +grad accum step:12835/14336 +step:51340/57344 train_time:30275734ms step_avg:589.71ms +step:51341/57344 train_time:30275751ms step_avg:589.70ms +step:51342/57344 train_time:30275968ms step_avg:589.69ms +step:51343/57344 train_time:30276525ms step_avg:589.69ms +grad accum step:12836/14336 +step:51344/57344 train_time:30277836ms step_avg:589.71ms +step:51345/57344 train_time:30277862ms step_avg:589.69ms +step:51346/57344 train_time:30278093ms step_avg:589.69ms +step:51347/57344 train_time:30278686ms step_avg:589.69ms +grad accum step:12837/14336 +step:51348/57344 train_time:30280143ms step_avg:589.70ms +step:51349/57344 train_time:30280165ms step_avg:589.69ms +step:51350/57344 train_time:30280390ms step_avg:589.69ms +step:51351/57344 train_time:30280978ms step_avg:589.69ms +grad accum step:12838/14336 +step:51352/57344 train_time:30282353ms step_avg:589.70ms +step:51353/57344 train_time:30282372ms step_avg:589.69ms +step:51354/57344 train_time:30282607ms step_avg:589.68ms +step:51355/57344 train_time:30283161ms step_avg:589.68ms +grad accum step:12839/14336 +step:51356/57344 train_time:30284501ms step_avg:589.70ms +step:51357/57344 train_time:30284517ms step_avg:589.69ms +step:51358/57344 train_time:30284769ms step_avg:589.68ms +step:51359/57344 train_time:30285351ms step_avg:589.68ms +grad accum step:12840/14336 +step:51360/57344 train_time:30286732ms step_avg:589.69ms +step:51361/57344 train_time:30286750ms step_avg:589.68ms +step:51362/57344 train_time:30286990ms step_avg:589.68ms +step:51363/57344 train_time:30287575ms step_avg:589.68ms +grad accum step:12841/14336 +step:51364/57344 train_time:30288971ms step_avg:589.69ms +step:51365/57344 train_time:30288990ms step_avg:589.68ms +step:51366/57344 train_time:30289231ms step_avg:589.67ms +step:51367/57344 train_time:30289783ms step_avg:589.67ms +grad accum step:12842/14336 +step:51368/57344 train_time:30291146ms step_avg:589.69ms +step:51369/57344 train_time:30291162ms step_avg:589.68ms +step:51370/57344 train_time:30291410ms step_avg:589.67ms +step:51371/57344 train_time:30291975ms step_avg:589.67ms +grad accum step:12843/14336 +step:51372/57344 train_time:30293342ms step_avg:589.69ms +step:51373/57344 train_time:30293359ms step_avg:589.67ms +step:51374/57344 train_time:30293606ms step_avg:589.67ms +step:51375/57344 train_time:30294197ms step_avg:589.67ms +grad accum step:12844/14336 +step:51376/57344 train_time:30295646ms step_avg:589.68ms +step:51377/57344 train_time:30295671ms step_avg:589.67ms +step:51378/57344 train_time:30295915ms step_avg:589.67ms +step:51379/57344 train_time:30296513ms step_avg:589.67ms +grad accum step:12845/14336 +step:51380/57344 train_time:30297952ms step_avg:589.68ms +step:51381/57344 train_time:30297971ms step_avg:589.67ms +step:51382/57344 train_time:30298218ms step_avg:589.67ms +step:51383/57344 train_time:30298805ms step_avg:589.67ms +grad accum step:12846/14336 +step:51384/57344 train_time:30300190ms step_avg:589.68ms +step:51385/57344 train_time:30300215ms step_avg:589.67ms +step:51386/57344 train_time:30300451ms step_avg:589.66ms +step:51387/57344 train_time:30301020ms step_avg:589.66ms +grad accum step:12847/14336 +step:51388/57344 train_time:30302447ms step_avg:589.68ms +step:51389/57344 train_time:30302467ms step_avg:589.67ms +step:51390/57344 train_time:30302687ms step_avg:589.66ms +step:51391/57344 train_time:30303247ms step_avg:589.66ms +grad accum step:12848/14336 +step:51392/57344 train_time:30304603ms step_avg:589.68ms +step:51392/57344 val_loss:5.401775 train_time:30304605ms step_avg:589.68ms +step:51393/57344 train_time:30304616ms step_avg:589.66ms +step:51394/57344 train_time:30304835ms step_avg:589.66ms +step:51395/57344 train_time:30305386ms step_avg:589.66ms +grad accum step:12849/14336 +step:51396/57344 train_time:30306718ms step_avg:589.67ms +step:51397/57344 train_time:30306734ms step_avg:589.66ms +step:51398/57344 train_time:30306987ms step_avg:589.65ms +step:51399/57344 train_time:30307555ms step_avg:589.65ms +grad accum step:12850/14336 +step:51400/57344 train_time:30308918ms step_avg:589.67ms +step:51401/57344 train_time:30308936ms step_avg:589.66ms +step:51402/57344 train_time:30309178ms step_avg:589.65ms +step:51403/57344 train_time:30309731ms step_avg:589.65ms +grad accum step:12851/14336 +step:51404/57344 train_time:30311162ms step_avg:589.67ms +step:51405/57344 train_time:30311178ms step_avg:589.65ms +step:51406/57344 train_time:30311423ms step_avg:589.65ms +step:51407/57344 train_time:30311981ms step_avg:589.65ms +grad accum step:12852/14336 +step:51408/57344 train_time:30313349ms step_avg:589.66ms +step:51409/57344 train_time:30313363ms step_avg:589.65ms +step:51410/57344 train_time:30313607ms step_avg:589.64ms +step:51411/57344 train_time:30314157ms step_avg:589.64ms +grad accum step:12853/14336 +step:51412/57344 train_time:30315498ms step_avg:589.66ms +step:51413/57344 train_time:30315517ms step_avg:589.65ms +step:51414/57344 train_time:30315764ms step_avg:589.64ms +step:51415/57344 train_time:30316331ms step_avg:589.64ms +grad accum step:12854/14336 +step:51416/57344 train_time:30317677ms step_avg:589.65ms +step:51417/57344 train_time:30317693ms step_avg:589.64ms +step:51418/57344 train_time:30317940ms step_avg:589.64ms +step:51419/57344 train_time:30318502ms step_avg:589.64ms +grad accum step:12855/14336 +step:51420/57344 train_time:30319858ms step_avg:589.65ms +step:51421/57344 train_time:30319874ms step_avg:589.64ms +step:51422/57344 train_time:30320121ms step_avg:589.63ms +step:51423/57344 train_time:30320672ms step_avg:589.63ms +grad accum step:12856/14336 +step:51424/57344 train_time:30322005ms step_avg:589.65ms +step:51425/57344 train_time:30322025ms step_avg:589.64ms +step:51426/57344 train_time:30322259ms step_avg:589.63ms +step:51427/57344 train_time:30322818ms step_avg:589.63ms +grad accum step:12857/14336 +step:51428/57344 train_time:30324256ms step_avg:589.64ms +step:51429/57344 train_time:30324272ms step_avg:589.63ms +step:51430/57344 train_time:30324521ms step_avg:589.63ms +step:51431/57344 train_time:30325094ms step_avg:589.63ms +grad accum step:12858/14336 +step:51432/57344 train_time:30326442ms step_avg:589.64ms +step:51433/57344 train_time:30326457ms step_avg:589.63ms +step:51434/57344 train_time:30326711ms step_avg:589.62ms +step:51435/57344 train_time:30327300ms step_avg:589.62ms +grad accum step:12859/14336 +step:51436/57344 train_time:30328682ms step_avg:589.64ms +step:51437/57344 train_time:30328696ms step_avg:589.63ms +step:51438/57344 train_time:30328943ms step_avg:589.62ms +step:51439/57344 train_time:30329518ms step_avg:589.62ms +grad accum step:12860/14336 +step:51440/57344 train_time:30330849ms step_avg:589.64ms +step:51441/57344 train_time:30330881ms step_avg:589.62ms +step:51442/57344 train_time:30331111ms step_avg:589.62ms +step:51443/57344 train_time:30331669ms step_avg:589.62ms +grad accum step:12861/14336 +step:51444/57344 train_time:30333042ms step_avg:589.63ms +step:51445/57344 train_time:30333058ms step_avg:589.62ms +step:51446/57344 train_time:30333316ms step_avg:589.61ms +step:51447/57344 train_time:30333909ms step_avg:589.61ms +grad accum step:12862/14336 +step:51448/57344 train_time:30335364ms step_avg:589.63ms +step:51449/57344 train_time:30335382ms step_avg:589.62ms +step:51450/57344 train_time:30335656ms step_avg:589.61ms +step:51451/57344 train_time:30336336ms step_avg:589.62ms +grad accum step:12863/14336 +step:51452/57344 train_time:30337781ms step_avg:589.63ms +step:51453/57344 train_time:30337801ms step_avg:589.62ms +step:51454/57344 train_time:30338039ms step_avg:589.61ms +step:51455/57344 train_time:30338604ms step_avg:589.61ms +grad accum step:12864/14336 +step:51456/57344 train_time:30340035ms step_avg:589.63ms +step:51456/57344 val_loss:5.404428 train_time:30340036ms step_avg:589.63ms +step:51457/57344 train_time:30340047ms step_avg:589.62ms +step:51458/57344 train_time:30340285ms step_avg:589.61ms +step:51459/57344 train_time:30340859ms step_avg:589.61ms +grad accum step:12865/14336 +step:51460/57344 train_time:30342165ms step_avg:589.63ms +step:51461/57344 train_time:30342183ms step_avg:589.62ms +step:51462/57344 train_time:30342432ms step_avg:589.61ms +step:51463/57344 train_time:30343007ms step_avg:589.61ms +grad accum step:12866/14336 +step:51464/57344 train_time:30344408ms step_avg:589.62ms +step:51465/57344 train_time:30344422ms step_avg:589.61ms +step:51466/57344 train_time:30344642ms step_avg:589.61ms +step:51467/57344 train_time:30345188ms step_avg:589.60ms +grad accum step:12867/14336 +step:51468/57344 train_time:30346531ms step_avg:589.62ms +step:51469/57344 train_time:30346548ms step_avg:589.61ms +step:51470/57344 train_time:30346804ms step_avg:589.60ms +step:51471/57344 train_time:30347385ms step_avg:589.60ms +grad accum step:12868/14336 +step:51472/57344 train_time:30348730ms step_avg:589.62ms +step:51473/57344 train_time:30348746ms step_avg:589.61ms +step:51474/57344 train_time:30349007ms step_avg:589.60ms +step:51475/57344 train_time:30349613ms step_avg:589.60ms +grad accum step:12869/14336 +step:51476/57344 train_time:30350966ms step_avg:589.61ms +step:51477/57344 train_time:30350981ms step_avg:589.60ms +step:51478/57344 train_time:30351232ms step_avg:589.60ms +step:51479/57344 train_time:30351805ms step_avg:589.60ms +grad accum step:12870/14336 +step:51480/57344 train_time:30353137ms step_avg:589.61ms +step:51481/57344 train_time:30353153ms step_avg:589.60ms +step:51482/57344 train_time:30353413ms step_avg:589.59ms +step:51483/57344 train_time:30354010ms step_avg:589.59ms +grad accum step:12871/14336 +step:51484/57344 train_time:30355394ms step_avg:589.61ms +step:51485/57344 train_time:30355412ms step_avg:589.60ms +step:51486/57344 train_time:30355648ms step_avg:589.59ms +step:51487/57344 train_time:30356207ms step_avg:589.59ms +grad accum step:12872/14336 +step:51488/57344 train_time:30357595ms step_avg:589.61ms +step:51489/57344 train_time:30357611ms step_avg:589.59ms +step:51490/57344 train_time:30357869ms step_avg:589.59ms +step:51491/57344 train_time:30358452ms step_avg:589.59ms +grad accum step:12873/14336 +step:51492/57344 train_time:30359791ms step_avg:589.60ms +step:51493/57344 train_time:30359806ms step_avg:589.59ms +step:51494/57344 train_time:30360054ms step_avg:589.58ms +step:51495/57344 train_time:30360604ms step_avg:589.58ms +grad accum step:12874/14336 +step:51496/57344 train_time:30361911ms step_avg:589.60ms +step:51497/57344 train_time:30361927ms step_avg:589.59ms +step:51498/57344 train_time:30362174ms step_avg:589.58ms +step:51499/57344 train_time:30362721ms step_avg:589.58ms +grad accum step:12875/14336 +step:51500/57344 train_time:30364058ms step_avg:589.59ms +step:51501/57344 train_time:30364073ms step_avg:589.58ms +step:51502/57344 train_time:30364321ms step_avg:589.58ms +step:51503/57344 train_time:30364872ms step_avg:589.57ms +grad accum step:12876/14336 +step:51504/57344 train_time:30366222ms step_avg:589.59ms +step:51505/57344 train_time:30366237ms step_avg:589.58ms +step:51506/57344 train_time:30366491ms step_avg:589.57ms +step:51507/57344 train_time:30367072ms step_avg:589.57ms +grad accum step:12877/14336 +step:51508/57344 train_time:30368431ms step_avg:589.59ms +step:51509/57344 train_time:30368445ms step_avg:589.58ms +step:51510/57344 train_time:30368692ms step_avg:589.57ms +step:51511/57344 train_time:30369241ms step_avg:589.57ms +grad accum step:12878/14336 +step:51512/57344 train_time:30370559ms step_avg:589.58ms +step:51513/57344 train_time:30370682ms step_avg:589.57ms +step:51514/57344 train_time:30370905ms step_avg:589.57ms +step:51515/57344 train_time:30371468ms step_avg:589.57ms +grad accum step:12879/14336 +step:51516/57344 train_time:30372823ms step_avg:589.58ms +step:51517/57344 train_time:30372838ms step_avg:589.57ms +step:51518/57344 train_time:30373084ms step_avg:589.56ms +step:51519/57344 train_time:30373632ms step_avg:589.56ms +grad accum step:12880/14336 +step:51520/57344 train_time:30375040ms step_avg:589.58ms +step:51520/57344 val_loss:5.397223 train_time:30375043ms step_avg:589.58ms +step:51521/57344 train_time:30375055ms step_avg:589.57ms +step:51522/57344 train_time:30375280ms step_avg:589.56ms +step:51523/57344 train_time:30375835ms step_avg:589.56ms +grad accum step:12881/14336 +step:51524/57344 train_time:30377298ms step_avg:589.58ms +step:51525/57344 train_time:30377314ms step_avg:589.56ms +step:51526/57344 train_time:30377543ms step_avg:589.56ms +step:51527/57344 train_time:30378119ms step_avg:589.56ms +grad accum step:12882/14336 +step:51528/57344 train_time:30379534ms step_avg:589.57ms +step:51529/57344 train_time:30379548ms step_avg:589.56ms +step:51530/57344 train_time:30379771ms step_avg:589.56ms +step:51531/57344 train_time:30380330ms step_avg:589.55ms +grad accum step:12883/14336 +step:51532/57344 train_time:30381709ms step_avg:589.57ms +step:51533/57344 train_time:30381724ms step_avg:589.56ms +step:51534/57344 train_time:30381975ms step_avg:589.55ms +step:51535/57344 train_time:30382539ms step_avg:589.55ms +grad accum step:12884/14336 +step:51536/57344 train_time:30383872ms step_avg:589.57ms +step:51537/57344 train_time:30383888ms step_avg:589.55ms +step:51538/57344 train_time:30384141ms step_avg:589.55ms +step:51539/57344 train_time:30384722ms step_avg:589.55ms +grad accum step:12885/14336 +step:51540/57344 train_time:30386132ms step_avg:589.56ms +step:51541/57344 train_time:30386144ms step_avg:589.55ms +step:51542/57344 train_time:30386392ms step_avg:589.55ms +step:51543/57344 train_time:30386955ms step_avg:589.55ms +grad accum step:12886/14336 +step:51544/57344 train_time:30388338ms step_avg:589.56ms +step:51545/57344 train_time:30388355ms step_avg:589.55ms +step:51546/57344 train_time:30388609ms step_avg:589.54ms +step:51547/57344 train_time:30389194ms step_avg:589.54ms +grad accum step:12887/14336 +step:51548/57344 train_time:30390580ms step_avg:589.56ms +step:51549/57344 train_time:30390600ms step_avg:589.55ms +step:51550/57344 train_time:30390834ms step_avg:589.54ms +step:51551/57344 train_time:30391417ms step_avg:589.54ms +grad accum step:12888/14336 +step:51552/57344 train_time:30392763ms step_avg:589.56ms +step:51553/57344 train_time:30392778ms step_avg:589.54ms +step:51554/57344 train_time:30393040ms step_avg:589.54ms +step:51555/57344 train_time:30393638ms step_avg:589.54ms +grad accum step:12889/14336 +step:51556/57344 train_time:30395020ms step_avg:589.55ms +step:51557/57344 train_time:30395036ms step_avg:589.54ms +step:51558/57344 train_time:30395286ms step_avg:589.54ms +step:51559/57344 train_time:30395854ms step_avg:589.54ms +grad accum step:12890/14336 +step:51560/57344 train_time:30397206ms step_avg:589.55ms +step:51561/57344 train_time:30397222ms step_avg:589.54ms +step:51562/57344 train_time:30397468ms step_avg:589.53ms +step:51563/57344 train_time:30398013ms step_avg:589.53ms +grad accum step:12891/14336 +step:51564/57344 train_time:30399417ms step_avg:589.55ms +step:51565/57344 train_time:30399430ms step_avg:589.54ms +step:51566/57344 train_time:30399678ms step_avg:589.53ms +step:51567/57344 train_time:30400241ms step_avg:589.53ms +grad accum step:12892/14336 +step:51568/57344 train_time:30401595ms step_avg:589.54ms +step:51569/57344 train_time:30401611ms step_avg:589.53ms +step:51570/57344 train_time:30401867ms step_avg:589.53ms +step:51571/57344 train_time:30402442ms step_avg:589.53ms +grad accum step:12893/14336 +step:51572/57344 train_time:30403821ms step_avg:589.54ms +step:51573/57344 train_time:30403840ms step_avg:589.53ms +step:51574/57344 train_time:30404087ms step_avg:589.52ms +step:51575/57344 train_time:30404676ms step_avg:589.52ms +grad accum step:12894/14336 +step:51576/57344 train_time:30406030ms step_avg:589.54ms +step:51577/57344 train_time:30406041ms step_avg:589.53ms +step:51578/57344 train_time:30406291ms step_avg:589.52ms +step:51579/57344 train_time:30406865ms step_avg:589.52ms +grad accum step:12895/14336 +step:51580/57344 train_time:30408251ms step_avg:589.54ms +step:51581/57344 train_time:30408267ms step_avg:589.52ms +step:51582/57344 train_time:30408515ms step_avg:589.52ms +step:51583/57344 train_time:30409082ms step_avg:589.52ms +grad accum step:12896/14336 +step:51584/57344 train_time:30410424ms step_avg:589.53ms +step:51584/57344 val_loss:5.395511 train_time:30410425ms step_avg:589.53ms +step:51585/57344 train_time:30410437ms step_avg:589.52ms +step:51586/57344 train_time:30410669ms step_avg:589.51ms +step:51587/57344 train_time:30411237ms step_avg:589.51ms +grad accum step:12897/14336 +step:51588/57344 train_time:30412562ms step_avg:589.53ms +step:51589/57344 train_time:30412574ms step_avg:589.52ms +step:51590/57344 train_time:30412823ms step_avg:589.51ms +step:51591/57344 train_time:30413382ms step_avg:589.51ms +grad accum step:12898/14336 +step:51592/57344 train_time:30414729ms step_avg:589.52ms +step:51593/57344 train_time:30414746ms step_avg:589.51ms +step:51594/57344 train_time:30414987ms step_avg:589.51ms +step:51595/57344 train_time:30415546ms step_avg:589.51ms +grad accum step:12899/14336 +step:51596/57344 train_time:30416881ms step_avg:589.52ms +step:51597/57344 train_time:30416898ms step_avg:589.51ms +step:51598/57344 train_time:30417164ms step_avg:589.50ms +step:51599/57344 train_time:30417780ms step_avg:589.50ms +grad accum step:12900/14336 +step:51600/57344 train_time:30419198ms step_avg:589.52ms +step:51601/57344 train_time:30419232ms step_avg:589.51ms +step:51602/57344 train_time:30419469ms step_avg:589.50ms +step:51603/57344 train_time:30420059ms step_avg:589.50ms +grad accum step:12901/14336 +step:51604/57344 train_time:30421405ms step_avg:589.52ms +step:51605/57344 train_time:30421417ms step_avg:589.51ms +step:51606/57344 train_time:30421658ms step_avg:589.50ms +step:51607/57344 train_time:30422216ms step_avg:589.50ms +grad accum step:12902/14336 +step:51608/57344 train_time:30423533ms step_avg:589.51ms +step:51609/57344 train_time:30423550ms step_avg:589.50ms +step:51610/57344 train_time:30423800ms step_avg:589.49ms +step:51611/57344 train_time:30424397ms step_avg:589.49ms +grad accum step:12903/14336 +step:51612/57344 train_time:30425815ms step_avg:589.51ms +step:51613/57344 train_time:30425828ms step_avg:589.50ms +step:51614/57344 train_time:30426073ms step_avg:589.49ms +step:51615/57344 train_time:30426637ms step_avg:589.49ms +grad accum step:12904/14336 +step:51616/57344 train_time:30428008ms step_avg:589.51ms +step:51617/57344 train_time:30428024ms step_avg:589.50ms +step:51618/57344 train_time:30428280ms step_avg:589.49ms +step:51619/57344 train_time:30428858ms step_avg:589.49ms +grad accum step:12905/14336 +step:51620/57344 train_time:30430227ms step_avg:589.50ms +step:51621/57344 train_time:30430246ms step_avg:589.49ms +step:51622/57344 train_time:30430485ms step_avg:589.49ms +step:51623/57344 train_time:30431044ms step_avg:589.49ms +grad accum step:12906/14336 +step:51624/57344 train_time:30432368ms step_avg:589.50ms +step:51625/57344 train_time:30432384ms step_avg:589.49ms +step:51626/57344 train_time:30432635ms step_avg:589.48ms +step:51627/57344 train_time:30433209ms step_avg:589.48ms +grad accum step:12907/14336 +step:51628/57344 train_time:30434583ms step_avg:589.50ms +step:51629/57344 train_time:30434603ms step_avg:589.49ms +step:51630/57344 train_time:30434858ms step_avg:589.48ms +step:51631/57344 train_time:30435457ms step_avg:589.48ms +grad accum step:12908/14336 +step:51632/57344 train_time:30436797ms step_avg:589.49ms +step:51633/57344 train_time:30436813ms step_avg:589.48ms +step:51634/57344 train_time:30437068ms step_avg:589.48ms +step:51635/57344 train_time:30437633ms step_avg:589.48ms +grad accum step:12909/14336 +step:51636/57344 train_time:30438945ms step_avg:589.49ms +step:51637/57344 train_time:30438959ms step_avg:589.48ms +step:51638/57344 train_time:30439206ms step_avg:589.47ms +step:51639/57344 train_time:30439757ms step_avg:589.47ms +grad accum step:12910/14336 +step:51640/57344 train_time:30441080ms step_avg:589.49ms +step:51641/57344 train_time:30441096ms step_avg:589.48ms +step:51642/57344 train_time:30441350ms step_avg:589.47ms +step:51643/57344 train_time:30441920ms step_avg:589.47ms +grad accum step:12911/14336 +step:51644/57344 train_time:30443257ms step_avg:589.48ms +step:51645/57344 train_time:30443272ms step_avg:589.47ms +step:51646/57344 train_time:30443536ms step_avg:589.47ms +step:51647/57344 train_time:30444136ms step_avg:589.47ms +grad accum step:12912/14336 +step:51648/57344 train_time:30445520ms step_avg:589.48ms +step:51648/57344 val_loss:5.393268 train_time:30445528ms step_avg:589.48ms +step:51649/57344 train_time:30445540ms step_avg:589.47ms +step:51650/57344 train_time:30445767ms step_avg:589.46ms +step:51651/57344 train_time:30446344ms step_avg:589.46ms +grad accum step:12913/14336 +step:51652/57344 train_time:30447743ms step_avg:589.48ms +step:51653/57344 train_time:30447785ms step_avg:589.47ms +step:51654/57344 train_time:30448015ms step_avg:589.46ms +step:51655/57344 train_time:30448593ms step_avg:589.46ms +grad accum step:12914/14336 +step:51656/57344 train_time:30449950ms step_avg:589.48ms +step:51657/57344 train_time:30449975ms step_avg:589.46ms +step:51658/57344 train_time:30450209ms step_avg:589.46ms +step:51659/57344 train_time:30450773ms step_avg:589.46ms +grad accum step:12915/14336 +step:51660/57344 train_time:30452175ms step_avg:589.47ms +step:51661/57344 train_time:30452199ms step_avg:589.46ms +step:51662/57344 train_time:30452445ms step_avg:589.46ms +step:51663/57344 train_time:30453070ms step_avg:589.46ms +grad accum step:12916/14336 +step:51664/57344 train_time:30454485ms step_avg:589.47ms +step:51665/57344 train_time:30454543ms step_avg:589.46ms +step:51666/57344 train_time:30454768ms step_avg:589.45ms +step:51667/57344 train_time:30455336ms step_avg:589.45ms +grad accum step:12917/14336 +step:51668/57344 train_time:30456699ms step_avg:589.47ms +step:51669/57344 train_time:30456723ms step_avg:589.46ms +step:51670/57344 train_time:30456967ms step_avg:589.45ms +step:51671/57344 train_time:30457550ms step_avg:589.45ms +grad accum step:12918/14336 +step:51672/57344 train_time:30458899ms step_avg:589.47ms +step:51673/57344 train_time:30458916ms step_avg:589.46ms +step:51674/57344 train_time:30459166ms step_avg:589.45ms +step:51675/57344 train_time:30459742ms step_avg:589.45ms +grad accum step:12919/14336 +step:51676/57344 train_time:30461068ms step_avg:589.46ms +step:51677/57344 train_time:30461083ms step_avg:589.45ms +step:51678/57344 train_time:30461327ms step_avg:589.44ms +step:51679/57344 train_time:30461888ms step_avg:589.44ms +grad accum step:12920/14336 +step:51680/57344 train_time:30463254ms step_avg:589.46ms +step:51681/57344 train_time:30463275ms step_avg:589.45ms +step:51682/57344 train_time:30463516ms step_avg:589.44ms +step:51683/57344 train_time:30464098ms step_avg:589.44ms +grad accum step:12921/14336 +step:51684/57344 train_time:30465419ms step_avg:589.46ms +step:51685/57344 train_time:30465440ms step_avg:589.44ms +step:51686/57344 train_time:30465681ms step_avg:589.44ms +step:51687/57344 train_time:30466243ms step_avg:589.44ms +grad accum step:12922/14336 +step:51688/57344 train_time:30467607ms step_avg:589.45ms +step:51689/57344 train_time:30467628ms step_avg:589.44ms +step:51690/57344 train_time:30467868ms step_avg:589.43ms +step:51691/57344 train_time:30468438ms step_avg:589.43ms +grad accum step:12923/14336 +step:51692/57344 train_time:30469783ms step_avg:589.45ms +step:51693/57344 train_time:30469798ms step_avg:589.44ms +step:51694/57344 train_time:30470061ms step_avg:589.43ms +step:51695/57344 train_time:30470646ms step_avg:589.43ms +grad accum step:12924/14336 +step:51696/57344 train_time:30472031ms step_avg:589.45ms +step:51697/57344 train_time:30472053ms step_avg:589.44ms +step:51698/57344 train_time:30472276ms step_avg:589.43ms +step:51699/57344 train_time:30472831ms step_avg:589.43ms +grad accum step:12925/14336 +step:51700/57344 train_time:30474186ms step_avg:589.44ms +step:51701/57344 train_time:30474209ms step_avg:589.43ms +step:51702/57344 train_time:30474460ms step_avg:589.43ms +step:51703/57344 train_time:30475055ms step_avg:589.43ms +grad accum step:12926/14336 +step:51704/57344 train_time:30476453ms step_avg:589.44ms +step:51705/57344 train_time:30476470ms step_avg:589.43ms +step:51706/57344 train_time:30476697ms step_avg:589.42ms +step:51707/57344 train_time:30477272ms step_avg:589.42ms +grad accum step:12927/14336 +step:51708/57344 train_time:30478683ms step_avg:589.44ms +step:51709/57344 train_time:30478697ms step_avg:589.43ms +step:51710/57344 train_time:30478939ms step_avg:589.42ms +step:51711/57344 train_time:30479493ms step_avg:589.42ms +grad accum step:12928/14336 +step:51712/57344 train_time:30480969ms step_avg:589.44ms +step:51712/57344 val_loss:5.390558 train_time:30480976ms step_avg:589.44ms +step:51713/57344 train_time:30481051ms step_avg:589.43ms +step:51714/57344 train_time:30481215ms step_avg:589.42ms +step:51715/57344 train_time:30481783ms step_avg:589.42ms +grad accum step:12929/14336 +step:51716/57344 train_time:30483135ms step_avg:589.43ms +step:51717/57344 train_time:30483156ms step_avg:589.42ms +step:51718/57344 train_time:30483385ms step_avg:589.42ms +step:51719/57344 train_time:30483948ms step_avg:589.41ms +grad accum step:12930/14336 +step:51720/57344 train_time:30485290ms step_avg:589.43ms +step:51721/57344 train_time:30485307ms step_avg:589.42ms +step:51722/57344 train_time:30485548ms step_avg:589.41ms +step:51723/57344 train_time:30486094ms step_avg:589.41ms +grad accum step:12931/14336 +step:51724/57344 train_time:30487422ms step_avg:589.43ms +step:51725/57344 train_time:30487443ms step_avg:589.41ms +step:51726/57344 train_time:30487681ms step_avg:589.41ms +step:51727/57344 train_time:30488241ms step_avg:589.41ms +grad accum step:12932/14336 +step:51728/57344 train_time:30489580ms step_avg:589.42ms +step:51729/57344 train_time:30489635ms step_avg:589.41ms +step:51730/57344 train_time:30489858ms step_avg:589.40ms +step:51731/57344 train_time:30490432ms step_avg:589.40ms +grad accum step:12933/14336 +step:51732/57344 train_time:30491835ms step_avg:589.42ms +step:51733/57344 train_time:30491852ms step_avg:589.41ms +step:51734/57344 train_time:30492104ms step_avg:589.40ms +step:51735/57344 train_time:30492688ms step_avg:589.40ms +grad accum step:12934/14336 +step:51736/57344 train_time:30494083ms step_avg:589.42ms +step:51737/57344 train_time:30494111ms step_avg:589.41ms +step:51738/57344 train_time:30494338ms step_avg:589.40ms +step:51739/57344 train_time:30494908ms step_avg:589.40ms +grad accum step:12935/14336 +step:51740/57344 train_time:30496276ms step_avg:589.41ms +step:51741/57344 train_time:30496315ms step_avg:589.40ms +step:51742/57344 train_time:30496539ms step_avg:589.40ms +step:51743/57344 train_time:30497112ms step_avg:589.40ms +grad accum step:12936/14336 +step:51744/57344 train_time:30498525ms step_avg:589.41ms +step:51745/57344 train_time:30498540ms step_avg:589.40ms +step:51746/57344 train_time:30498796ms step_avg:589.39ms +step:51747/57344 train_time:30499386ms step_avg:589.39ms +grad accum step:12937/14336 +step:51748/57344 train_time:30500780ms step_avg:589.41ms +step:51749/57344 train_time:30500794ms step_avg:589.40ms +step:51750/57344 train_time:30501050ms step_avg:589.39ms +step:51751/57344 train_time:30501629ms step_avg:589.39ms +grad accum step:12938/14336 +step:51752/57344 train_time:30502987ms step_avg:589.41ms +step:51753/57344 train_time:30503004ms step_avg:589.40ms +step:51754/57344 train_time:30503259ms step_avg:589.39ms +step:51755/57344 train_time:30503830ms step_avg:589.39ms +grad accum step:12939/14336 +step:51756/57344 train_time:30505211ms step_avg:589.40ms +step:51757/57344 train_time:30505228ms step_avg:589.39ms +step:51758/57344 train_time:30505466ms step_avg:589.39ms +step:51759/57344 train_time:30506056ms step_avg:589.39ms +grad accum step:12940/14336 +step:51760/57344 train_time:30507391ms step_avg:589.40ms +step:51761/57344 train_time:30507408ms step_avg:589.39ms +step:51762/57344 train_time:30507663ms step_avg:589.38ms +step:51763/57344 train_time:30508239ms step_avg:589.38ms +grad accum step:12941/14336 +step:51764/57344 train_time:30509616ms step_avg:589.40ms +step:51765/57344 train_time:30509658ms step_avg:589.39ms +step:51766/57344 train_time:30509887ms step_avg:589.38ms +step:51767/57344 train_time:30510464ms step_avg:589.38ms +grad accum step:12942/14336 +step:51768/57344 train_time:30511806ms step_avg:589.40ms +step:51769/57344 train_time:30511820ms step_avg:589.38ms +step:51770/57344 train_time:30512084ms step_avg:589.38ms +step:51771/57344 train_time:30512676ms step_avg:589.38ms +grad accum step:12943/14336 +step:51772/57344 train_time:30514011ms step_avg:589.39ms +step:51773/57344 train_time:30514031ms step_avg:589.38ms +step:51774/57344 train_time:30514265ms step_avg:589.37ms +step:51775/57344 train_time:30514822ms step_avg:589.37ms +grad accum step:12944/14336 +step:51776/57344 train_time:30516241ms step_avg:589.39ms +step:51776/57344 val_loss:5.389056 train_time:30516242ms step_avg:589.39ms +step:51777/57344 train_time:30516254ms step_avg:589.38ms +step:51778/57344 train_time:30516479ms step_avg:589.37ms +step:51779/57344 train_time:30517039ms step_avg:589.37ms +grad accum step:12945/14336 +step:51780/57344 train_time:30518435ms step_avg:589.39ms +step:51781/57344 train_time:30518459ms step_avg:589.38ms +step:51782/57344 train_time:30518689ms step_avg:589.37ms +step:51783/57344 train_time:30519266ms step_avg:589.37ms +grad accum step:12946/14336 +step:51784/57344 train_time:30520618ms step_avg:589.38ms +step:51785/57344 train_time:30520633ms step_avg:589.37ms +step:51786/57344 train_time:30520883ms step_avg:589.37ms +step:51787/57344 train_time:30521442ms step_avg:589.36ms +grad accum step:12947/14336 +step:51788/57344 train_time:30522841ms step_avg:589.38ms +step:51789/57344 train_time:30522860ms step_avg:589.37ms +step:51790/57344 train_time:30523106ms step_avg:589.36ms +step:51791/57344 train_time:30523675ms step_avg:589.36ms +grad accum step:12948/14336 +step:51792/57344 train_time:30525094ms step_avg:589.38ms +step:51793/57344 train_time:30525111ms step_avg:589.37ms +step:51794/57344 train_time:30525356ms step_avg:589.36ms +step:51795/57344 train_time:30525940ms step_avg:589.36ms +grad accum step:12949/14336 +step:51796/57344 train_time:30527351ms step_avg:589.38ms +step:51797/57344 train_time:30527376ms step_avg:589.37ms +step:51798/57344 train_time:30527591ms step_avg:589.36ms +step:51799/57344 train_time:30528126ms step_avg:589.36ms +grad accum step:12950/14336 +step:51800/57344 train_time:30529463ms step_avg:589.37ms +step:51801/57344 train_time:30529480ms step_avg:589.36ms +step:51802/57344 train_time:30529727ms step_avg:589.35ms +step:51803/57344 train_time:30530291ms step_avg:589.35ms +grad accum step:12951/14336 +step:51804/57344 train_time:30531659ms step_avg:589.37ms +step:51805/57344 train_time:30531675ms step_avg:589.36ms +step:51806/57344 train_time:30531928ms step_avg:589.35ms +step:51807/57344 train_time:30532517ms step_avg:589.35ms +grad accum step:12952/14336 +step:51808/57344 train_time:30533885ms step_avg:589.37ms +step:51809/57344 train_time:30533900ms step_avg:589.36ms +step:51810/57344 train_time:30534154ms step_avg:589.35ms +step:51811/57344 train_time:30534737ms step_avg:589.35ms +grad accum step:12953/14336 +step:51812/57344 train_time:30536185ms step_avg:589.37ms +step:51813/57344 train_time:30536207ms step_avg:589.35ms +step:51814/57344 train_time:30536432ms step_avg:589.35ms +step:51815/57344 train_time:30536995ms step_avg:589.35ms +grad accum step:12954/14336 +step:51816/57344 train_time:30538368ms step_avg:589.36ms +step:51817/57344 train_time:30538384ms step_avg:589.35ms +step:51818/57344 train_time:30538658ms step_avg:589.34ms +step:51819/57344 train_time:30539271ms step_avg:589.35ms +grad accum step:12955/14336 +step:51820/57344 train_time:30540582ms step_avg:589.36ms +step:51821/57344 train_time:30540601ms step_avg:589.35ms +step:51822/57344 train_time:30540853ms step_avg:589.34ms +step:51823/57344 train_time:30541442ms step_avg:589.34ms +grad accum step:12956/14336 +step:51824/57344 train_time:30542844ms step_avg:589.36ms +step:51825/57344 train_time:30542877ms step_avg:589.35ms +step:51826/57344 train_time:30543098ms step_avg:589.34ms +step:51827/57344 train_time:30543658ms step_avg:589.34ms +grad accum step:12957/14336 +step:51828/57344 train_time:30545061ms step_avg:589.35ms +step:51829/57344 train_time:30545078ms step_avg:589.34ms +step:51830/57344 train_time:30545320ms step_avg:589.34ms +step:51831/57344 train_time:30545891ms step_avg:589.34ms +grad accum step:12958/14336 +step:51832/57344 train_time:30547343ms step_avg:589.35ms +step:51833/57344 train_time:30547358ms step_avg:589.34ms +step:51834/57344 train_time:30547580ms step_avg:589.33ms +step:51835/57344 train_time:30548141ms step_avg:589.33ms +grad accum step:12959/14336 +step:51836/57344 train_time:30549605ms step_avg:589.35ms +step:51837/57344 train_time:30549656ms step_avg:589.34ms +step:51838/57344 train_time:30549871ms step_avg:589.33ms +step:51839/57344 train_time:30550418ms step_avg:589.33ms +grad accum step:12960/14336 +step:51840/57344 train_time:30551828ms step_avg:589.35ms +step:51840/57344 val_loss:5.388100 train_time:30551838ms step_avg:589.35ms +step:51841/57344 train_time:30551849ms step_avg:589.34ms +step:51842/57344 train_time:30552077ms step_avg:589.33ms +step:51843/57344 train_time:30552649ms step_avg:589.33ms +grad accum step:12961/14336 +step:51844/57344 train_time:30554011ms step_avg:589.35ms +step:51845/57344 train_time:30554035ms step_avg:589.33ms +step:51846/57344 train_time:30554277ms step_avg:589.33ms +step:51847/57344 train_time:30554846ms step_avg:589.33ms +grad accum step:12962/14336 +step:51848/57344 train_time:30556188ms step_avg:589.34ms +step:51849/57344 train_time:30556210ms step_avg:589.33ms +step:51850/57344 train_time:30556445ms step_avg:589.32ms +step:51851/57344 train_time:30557009ms step_avg:589.32ms +grad accum step:12963/14336 +step:51852/57344 train_time:30558343ms step_avg:589.34ms +step:51853/57344 train_time:30558360ms step_avg:589.33ms +step:51854/57344 train_time:30558616ms step_avg:589.32ms +step:51855/57344 train_time:30559194ms step_avg:589.32ms +grad accum step:12964/14336 +step:51856/57344 train_time:30560516ms step_avg:589.33ms +step:51857/57344 train_time:30560533ms step_avg:589.32ms +step:51858/57344 train_time:30560777ms step_avg:589.32ms +step:51859/57344 train_time:30561338ms step_avg:589.32ms +grad accum step:12965/14336 +step:51860/57344 train_time:30562798ms step_avg:589.33ms +step:51861/57344 train_time:30562878ms step_avg:589.32ms +step:51862/57344 train_time:30563103ms step_avg:589.32ms +step:51863/57344 train_time:30563679ms step_avg:589.32ms +grad accum step:12966/14336 +step:51864/57344 train_time:30565077ms step_avg:589.33ms +step:51865/57344 train_time:30565093ms step_avg:589.32ms +step:51866/57344 train_time:30565346ms step_avg:589.31ms +step:51867/57344 train_time:30565906ms step_avg:589.31ms +grad accum step:12967/14336 +step:51868/57344 train_time:30567251ms step_avg:589.33ms +step:51869/57344 train_time:30567275ms step_avg:589.32ms +step:51870/57344 train_time:30567513ms step_avg:589.31ms +step:51871/57344 train_time:30568111ms step_avg:589.31ms +grad accum step:12968/14336 +step:51872/57344 train_time:30569641ms step_avg:589.33ms +step:51873/57344 train_time:30569663ms step_avg:589.32ms +step:51874/57344 train_time:30569879ms step_avg:589.31ms +step:51875/57344 train_time:30570433ms step_avg:589.31ms +grad accum step:12969/14336 +step:51876/57344 train_time:30571806ms step_avg:589.32ms +step:51877/57344 train_time:30571889ms step_avg:589.31ms +step:51878/57344 train_time:30572120ms step_avg:589.31ms +step:51879/57344 train_time:30572726ms step_avg:589.31ms +grad accum step:12970/14336 +step:51880/57344 train_time:30574109ms step_avg:589.32ms +step:51881/57344 train_time:30574124ms step_avg:589.31ms +step:51882/57344 train_time:30574347ms step_avg:589.31ms +step:51883/57344 train_time:30574917ms step_avg:589.31ms +grad accum step:12971/14336 +step:51884/57344 train_time:30576336ms step_avg:589.32ms +step:51885/57344 train_time:30576357ms step_avg:589.31ms +step:51886/57344 train_time:30576598ms step_avg:589.30ms +step:51887/57344 train_time:30577168ms step_avg:589.30ms +grad accum step:12972/14336 +step:51888/57344 train_time:30578831ms step_avg:589.32ms +step:51889/57344 train_time:30578854ms step_avg:589.31ms +step:51890/57344 train_time:30579089ms step_avg:589.31ms +step:51891/57344 train_time:30579695ms step_avg:589.31ms +grad accum step:12973/14336 +step:51892/57344 train_time:30581109ms step_avg:589.32ms +step:51893/57344 train_time:30581122ms step_avg:589.31ms +step:51894/57344 train_time:30581344ms step_avg:589.30ms +step:51895/57344 train_time:30581907ms step_avg:589.30ms +grad accum step:12974/14336 +step:51896/57344 train_time:30583250ms step_avg:589.32ms +step:51897/57344 train_time:30583268ms step_avg:589.31ms +step:51898/57344 train_time:30583512ms step_avg:589.30ms +step:51899/57344 train_time:30584071ms step_avg:589.30ms +grad accum step:12975/14336 +step:51900/57344 train_time:30585390ms step_avg:589.31ms +step:51901/57344 train_time:30585406ms step_avg:589.30ms +step:51902/57344 train_time:30585665ms step_avg:589.30ms +step:51903/57344 train_time:30586244ms step_avg:589.30ms +grad accum step:12976/14336 +step:51904/57344 train_time:30587610ms step_avg:589.31ms +step:51904/57344 val_loss:5.383886 train_time:30587621ms step_avg:589.31ms +step:51905/57344 train_time:30587633ms step_avg:589.30ms +step:51906/57344 train_time:30587859ms step_avg:589.29ms +step:51907/57344 train_time:30588431ms step_avg:589.29ms +grad accum step:12977/14336 +step:51908/57344 train_time:30589765ms step_avg:589.31ms +step:51909/57344 train_time:30589779ms step_avg:589.30ms +step:51910/57344 train_time:30590037ms step_avg:589.29ms +step:51911/57344 train_time:30590620ms step_avg:589.29ms +grad accum step:12978/14336 +step:51912/57344 train_time:30591957ms step_avg:589.30ms +step:51913/57344 train_time:30591971ms step_avg:589.29ms +step:51914/57344 train_time:30592223ms step_avg:589.29ms +step:51915/57344 train_time:30592808ms step_avg:589.29ms +grad accum step:12979/14336 +step:51916/57344 train_time:30594205ms step_avg:589.30ms +step:51917/57344 train_time:30594220ms step_avg:589.29ms +step:51918/57344 train_time:30594469ms step_avg:589.28ms +step:51919/57344 train_time:30595026ms step_avg:589.28ms +grad accum step:12980/14336 +step:51920/57344 train_time:30596366ms step_avg:589.30ms +step:51921/57344 train_time:30596382ms step_avg:589.29ms +step:51922/57344 train_time:30596635ms step_avg:589.28ms +step:51923/57344 train_time:30597213ms step_avg:589.28ms +grad accum step:12981/14336 +step:51924/57344 train_time:30598646ms step_avg:589.30ms +step:51925/57344 train_time:30598659ms step_avg:589.29ms +step:51926/57344 train_time:30598893ms step_avg:589.28ms +step:51927/57344 train_time:30599453ms step_avg:589.28ms +grad accum step:12982/14336 +step:51928/57344 train_time:30600798ms step_avg:589.29ms +step:51929/57344 train_time:30600812ms step_avg:589.28ms +step:51930/57344 train_time:30601058ms step_avg:589.28ms +step:51931/57344 train_time:30601614ms step_avg:589.27ms +grad accum step:12983/14336 +step:51932/57344 train_time:30602956ms step_avg:589.29ms +step:51933/57344 train_time:30602973ms step_avg:589.28ms +step:51934/57344 train_time:30603232ms step_avg:589.27ms +step:51935/57344 train_time:30603816ms step_avg:589.27ms +grad accum step:12984/14336 +step:51936/57344 train_time:30605158ms step_avg:589.29ms +step:51937/57344 train_time:30605174ms step_avg:589.27ms +step:51938/57344 train_time:30605423ms step_avg:589.27ms +step:51939/57344 train_time:30605985ms step_avg:589.27ms +grad accum step:12985/14336 +step:51940/57344 train_time:30607346ms step_avg:589.28ms +step:51941/57344 train_time:30607369ms step_avg:589.27ms +step:51942/57344 train_time:30607611ms step_avg:589.27ms +step:51943/57344 train_time:30608172ms step_avg:589.26ms +grad accum step:12986/14336 +step:51944/57344 train_time:30609562ms step_avg:589.28ms +step:51945/57344 train_time:30609580ms step_avg:589.27ms +step:51946/57344 train_time:30609820ms step_avg:589.26ms +step:51947/57344 train_time:30610376ms step_avg:589.26ms +grad accum step:12987/14336 +step:51948/57344 train_time:30611756ms step_avg:589.28ms +step:51949/57344 train_time:30611772ms step_avg:589.27ms +step:51950/57344 train_time:30612025ms step_avg:589.26ms +step:51951/57344 train_time:30612596ms step_avg:589.26ms +grad accum step:12988/14336 +step:51952/57344 train_time:30613943ms step_avg:589.27ms +step:51953/57344 train_time:30613963ms step_avg:589.26ms +step:51954/57344 train_time:30614240ms step_avg:589.26ms +step:51955/57344 train_time:30614891ms step_avg:589.26ms +grad accum step:12989/14336 +step:51956/57344 train_time:30616415ms step_avg:589.28ms +step:51957/57344 train_time:30616427ms step_avg:589.26ms +step:51958/57344 train_time:30616659ms step_avg:589.26ms +step:51959/57344 train_time:30617254ms step_avg:589.26ms +grad accum step:12990/14336 +step:51960/57344 train_time:30618825ms step_avg:589.28ms +step:51961/57344 train_time:30618844ms step_avg:589.27ms +step:51962/57344 train_time:30619071ms step_avg:589.26ms +step:51963/57344 train_time:30619643ms step_avg:589.26ms +grad accum step:12991/14336 +step:51964/57344 train_time:30621033ms step_avg:589.27ms +step:51965/57344 train_time:30621050ms step_avg:589.26ms +step:51966/57344 train_time:30621275ms step_avg:589.26ms +step:51967/57344 train_time:30621844ms step_avg:589.26ms +grad accum step:12992/14336 +step:51968/57344 train_time:30623327ms step_avg:589.27ms +step:51968/57344 val_loss:5.381837 train_time:30623334ms step_avg:589.27ms +step:51969/57344 train_time:30623375ms step_avg:589.26ms +step:51970/57344 train_time:30623572ms step_avg:589.25ms +step:51971/57344 train_time:30624156ms step_avg:589.25ms +grad accum step:12993/14336 +step:51972/57344 train_time:30625768ms step_avg:589.27ms +step:51973/57344 train_time:30625786ms step_avg:589.26ms +step:51974/57344 train_time:30626040ms step_avg:589.26ms +step:51975/57344 train_time:30626697ms step_avg:589.26ms +grad accum step:12994/14336 +step:51976/57344 train_time:30628198ms step_avg:589.28ms +step:51977/57344 train_time:30628215ms step_avg:589.26ms +step:51978/57344 train_time:30628470ms step_avg:589.26ms +step:51979/57344 train_time:30629061ms step_avg:589.26ms +grad accum step:12995/14336 +step:51980/57344 train_time:30630487ms step_avg:589.27ms +step:51981/57344 train_time:30630505ms step_avg:589.26ms +step:51982/57344 train_time:30630766ms step_avg:589.26ms +step:51983/57344 train_time:30631400ms step_avg:589.26ms +grad accum step:12996/14336 +step:51984/57344 train_time:30632766ms step_avg:589.27ms +step:51985/57344 train_time:30632782ms step_avg:589.26ms +step:51986/57344 train_time:30633021ms step_avg:589.26ms +step:51987/57344 train_time:30633574ms step_avg:589.25ms +grad accum step:12997/14336 +step:51988/57344 train_time:30634939ms step_avg:589.27ms +step:51989/57344 train_time:30634956ms step_avg:589.26ms +step:51990/57344 train_time:30635185ms step_avg:589.25ms +step:51991/57344 train_time:30635745ms step_avg:589.25ms +grad accum step:12998/14336 +step:51992/57344 train_time:30637167ms step_avg:589.27ms +step:51993/57344 train_time:30637184ms step_avg:589.26ms +step:51994/57344 train_time:30637417ms step_avg:589.25ms +step:51995/57344 train_time:30637985ms step_avg:589.25ms +grad accum step:12999/14336 +step:51996/57344 train_time:30639351ms step_avg:589.26ms +step:51997/57344 train_time:30639386ms step_avg:589.25ms +step:51998/57344 train_time:30639616ms step_avg:589.25ms +step:51999/57344 train_time:30640189ms step_avg:589.25ms +grad accum step:13000/14336 +step:52000/57344 train_time:30641520ms step_avg:589.26ms +step:52001/57344 train_time:30641542ms step_avg:589.25ms +step:52002/57344 train_time:30641772ms step_avg:589.24ms +step:52003/57344 train_time:30642307ms step_avg:589.24ms +grad accum step:13001/14336 +step:52004/57344 train_time:30643723ms step_avg:589.26ms +step:52005/57344 train_time:30643755ms step_avg:589.25ms +step:52006/57344 train_time:30643996ms step_avg:589.24ms +step:52007/57344 train_time:30644586ms step_avg:589.24ms +grad accum step:13002/14336 +step:52008/57344 train_time:30645935ms step_avg:589.25ms +step:52009/57344 train_time:30645968ms step_avg:589.24ms +step:52010/57344 train_time:30646199ms step_avg:589.24ms +step:52011/57344 train_time:30646769ms step_avg:589.24ms +grad accum step:13003/14336 +step:52012/57344 train_time:30648142ms step_avg:589.25ms +step:52013/57344 train_time:30648159ms step_avg:589.24ms +step:52014/57344 train_time:30648405ms step_avg:589.23ms +step:52015/57344 train_time:30648975ms step_avg:589.23ms +grad accum step:13004/14336 +step:52016/57344 train_time:30650329ms step_avg:589.25ms +step:52017/57344 train_time:30650346ms step_avg:589.24ms +step:52018/57344 train_time:30650588ms step_avg:589.23ms +step:52019/57344 train_time:30651142ms step_avg:589.23ms +grad accum step:13005/14336 +step:52020/57344 train_time:30652451ms step_avg:589.24ms +step:52021/57344 train_time:30652476ms step_avg:589.23ms +step:52022/57344 train_time:30652711ms step_avg:589.23ms +step:52023/57344 train_time:30653296ms step_avg:589.23ms +grad accum step:13006/14336 +step:52024/57344 train_time:30654709ms step_avg:589.24ms +step:52025/57344 train_time:30654733ms step_avg:589.23ms +step:52026/57344 train_time:30654971ms step_avg:589.22ms +step:52027/57344 train_time:30655526ms step_avg:589.22ms +grad accum step:13007/14336 +step:52028/57344 train_time:30656936ms step_avg:589.24ms +step:52029/57344 train_time:30656951ms step_avg:589.23ms +step:52030/57344 train_time:30657213ms step_avg:589.22ms +step:52031/57344 train_time:30657803ms step_avg:589.22ms +grad accum step:13008/14336 +step:52032/57344 train_time:30659133ms step_avg:589.24ms +step:52032/57344 val_loss:5.380775 train_time:30659141ms step_avg:589.24ms +step:52033/57344 train_time:30659153ms step_avg:589.23ms +step:52034/57344 train_time:30659389ms step_avg:589.22ms +step:52035/57344 train_time:30659972ms step_avg:589.22ms +grad accum step:13009/14336 +step:52036/57344 train_time:30661327ms step_avg:589.23ms +step:52037/57344 train_time:30661344ms step_avg:589.22ms +step:52038/57344 train_time:30661591ms step_avg:589.22ms +step:52039/57344 train_time:30662140ms step_avg:589.21ms +grad accum step:13010/14336 +step:52040/57344 train_time:30663495ms step_avg:589.23ms +step:52041/57344 train_time:30663512ms step_avg:589.22ms +step:52042/57344 train_time:30663759ms step_avg:589.21ms +step:52043/57344 train_time:30664305ms step_avg:589.21ms +grad accum step:13011/14336 +step:52044/57344 train_time:30665663ms step_avg:589.23ms +step:52045/57344 train_time:30665679ms step_avg:589.21ms +step:52046/57344 train_time:30665934ms step_avg:589.21ms +step:52047/57344 train_time:30666525ms step_avg:589.21ms +grad accum step:13012/14336 +step:52048/57344 train_time:30667913ms step_avg:589.22ms +step:52049/57344 train_time:30667930ms step_avg:589.21ms +step:52050/57344 train_time:30668182ms step_avg:589.21ms +step:52051/57344 train_time:30668768ms step_avg:589.21ms +grad accum step:13013/14336 +step:52052/57344 train_time:30670253ms step_avg:589.22ms +step:52053/57344 train_time:30670268ms step_avg:589.21ms +step:52054/57344 train_time:30670521ms step_avg:589.21ms +step:52055/57344 train_time:30671103ms step_avg:589.21ms +grad accum step:13014/14336 +step:52056/57344 train_time:30672461ms step_avg:589.22ms +step:52057/57344 train_time:30672477ms step_avg:589.21ms +step:52058/57344 train_time:30672722ms step_avg:589.20ms +step:52059/57344 train_time:30673266ms step_avg:589.20ms +grad accum step:13015/14336 +step:52060/57344 train_time:30674576ms step_avg:589.22ms +step:52061/57344 train_time:30674591ms step_avg:589.20ms +step:52062/57344 train_time:30674841ms step_avg:589.20ms +step:52063/57344 train_time:30675418ms step_avg:589.20ms +grad accum step:13016/14336 +step:52064/57344 train_time:30676823ms step_avg:589.21ms +step:52065/57344 train_time:30676838ms step_avg:589.20ms +step:52066/57344 train_time:30677092ms step_avg:589.20ms +step:52067/57344 train_time:30677675ms step_avg:589.20ms +grad accum step:13017/14336 +step:52068/57344 train_time:30679003ms step_avg:589.21ms +step:52069/57344 train_time:30679020ms step_avg:589.20ms +step:52070/57344 train_time:30679275ms step_avg:589.19ms +step:52071/57344 train_time:30679856ms step_avg:589.19ms +grad accum step:13018/14336 +step:52072/57344 train_time:30681198ms step_avg:589.21ms +step:52073/57344 train_time:30681210ms step_avg:589.20ms +step:52074/57344 train_time:30681454ms step_avg:589.19ms +step:52075/57344 train_time:30682010ms step_avg:589.19ms +grad accum step:13019/14336 +step:52076/57344 train_time:30683331ms step_avg:589.20ms +step:52077/57344 train_time:30683347ms step_avg:589.19ms +step:52078/57344 train_time:30683593ms step_avg:589.19ms +step:52079/57344 train_time:30684156ms step_avg:589.18ms +grad accum step:13020/14336 +step:52080/57344 train_time:30685545ms step_avg:589.20ms +step:52081/57344 train_time:30685558ms step_avg:589.19ms +step:52082/57344 train_time:30685804ms step_avg:589.18ms +step:52083/57344 train_time:30686358ms step_avg:589.18ms +grad accum step:13021/14336 +step:52084/57344 train_time:30687684ms step_avg:589.20ms +step:52085/57344 train_time:30687700ms step_avg:589.18ms +step:52086/57344 train_time:30687974ms step_avg:589.18ms +step:52087/57344 train_time:30688593ms step_avg:589.18ms +grad accum step:13022/14336 +step:52088/57344 train_time:30689966ms step_avg:589.19ms +step:52089/57344 train_time:30689983ms step_avg:589.18ms +step:52090/57344 train_time:30690234ms step_avg:589.18ms +step:52091/57344 train_time:30690800ms step_avg:589.18ms +grad accum step:13023/14336 +step:52092/57344 train_time:30692146ms step_avg:589.19ms +step:52093/57344 train_time:30692174ms step_avg:589.18ms +step:52094/57344 train_time:30692416ms step_avg:589.17ms +step:52095/57344 train_time:30693008ms step_avg:589.17ms +grad accum step:13024/14336 +step:52096/57344 train_time:30694367ms step_avg:589.19ms +step:52096/57344 val_loss:5.378386 train_time:30694368ms step_avg:589.19ms +step:52097/57344 train_time:30694380ms step_avg:589.18ms +step:52098/57344 train_time:30694606ms step_avg:589.17ms +step:52099/57344 train_time:30695164ms step_avg:589.17ms +grad accum step:13025/14336 +step:52100/57344 train_time:30696523ms step_avg:589.18ms +step:52101/57344 train_time:30696538ms step_avg:589.17ms +step:52102/57344 train_time:30696783ms step_avg:589.17ms +step:52103/57344 train_time:30697335ms step_avg:589.17ms +grad accum step:13026/14336 +step:52104/57344 train_time:30698761ms step_avg:589.18ms +step:52105/57344 train_time:30698776ms step_avg:589.17ms +step:52106/57344 train_time:30699029ms step_avg:589.16ms +step:52107/57344 train_time:30699597ms step_avg:589.16ms +grad accum step:13027/14336 +step:52108/57344 train_time:30700962ms step_avg:589.18ms +step:52109/57344 train_time:30700979ms step_avg:589.17ms +step:52110/57344 train_time:30701250ms step_avg:589.16ms +step:52111/57344 train_time:30701861ms step_avg:589.16ms +grad accum step:13028/14336 +step:52112/57344 train_time:30703240ms step_avg:589.18ms +step:52113/57344 train_time:30703257ms step_avg:589.17ms +step:52114/57344 train_time:30703507ms step_avg:589.16ms +step:52115/57344 train_time:30704079ms step_avg:589.16ms +grad accum step:13029/14336 +step:52116/57344 train_time:30705475ms step_avg:589.18ms +step:52117/57344 train_time:30705491ms step_avg:589.16ms +step:52118/57344 train_time:30705746ms step_avg:589.16ms +step:52119/57344 train_time:30706324ms step_avg:589.16ms +grad accum step:13030/14336 +step:52120/57344 train_time:30707676ms step_avg:589.17ms +step:52121/57344 train_time:30707692ms step_avg:589.16ms +step:52122/57344 train_time:30707945ms step_avg:589.16ms +step:52123/57344 train_time:30708508ms step_avg:589.15ms +grad accum step:13031/14336 +step:52124/57344 train_time:30709819ms step_avg:589.17ms +step:52125/57344 train_time:30709835ms step_avg:589.16ms +step:52126/57344 train_time:30710076ms step_avg:589.15ms +step:52127/57344 train_time:30710625ms step_avg:589.15ms +grad accum step:13032/14336 +step:52128/57344 train_time:30712010ms step_avg:589.17ms +step:52129/57344 train_time:30712023ms step_avg:589.15ms +step:52130/57344 train_time:30712271ms step_avg:589.15ms +step:52131/57344 train_time:30712827ms step_avg:589.15ms +grad accum step:13033/14336 +step:52132/57344 train_time:30714152ms step_avg:589.16ms +step:52133/57344 train_time:30714167ms step_avg:589.15ms +step:52134/57344 train_time:30714414ms step_avg:589.14ms +step:52135/57344 train_time:30714968ms step_avg:589.14ms +grad accum step:13034/14336 +step:52136/57344 train_time:30716321ms step_avg:589.16ms +step:52137/57344 train_time:30716336ms step_avg:589.15ms +step:52138/57344 train_time:30716588ms step_avg:589.14ms +step:52139/57344 train_time:30717162ms step_avg:589.14ms +grad accum step:13035/14336 +step:52140/57344 train_time:30718536ms step_avg:589.15ms +step:52141/57344 train_time:30718558ms step_avg:589.14ms +step:52142/57344 train_time:30718791ms step_avg:589.14ms +step:52143/57344 train_time:30719378ms step_avg:589.14ms +grad accum step:13036/14336 +step:52144/57344 train_time:30720727ms step_avg:589.15ms +step:52145/57344 train_time:30720743ms step_avg:589.14ms +step:52146/57344 train_time:30721000ms step_avg:589.13ms +step:52147/57344 train_time:30721579ms step_avg:589.13ms +grad accum step:13037/14336 +step:52148/57344 train_time:30722926ms step_avg:589.15ms +step:52149/57344 train_time:30722943ms step_avg:589.14ms +step:52150/57344 train_time:30723203ms step_avg:589.13ms +step:52151/57344 train_time:30723795ms step_avg:589.13ms +grad accum step:13038/14336 +step:52152/57344 train_time:30725220ms step_avg:589.15ms +step:52153/57344 train_time:30725236ms step_avg:589.14ms +step:52154/57344 train_time:30725476ms step_avg:589.13ms +step:52155/57344 train_time:30726027ms step_avg:589.13ms +grad accum step:13039/14336 +step:52156/57344 train_time:30727405ms step_avg:589.14ms +step:52157/57344 train_time:30727421ms step_avg:589.13ms +step:52158/57344 train_time:30727670ms step_avg:589.13ms +step:52159/57344 train_time:30728248ms step_avg:589.13ms +grad accum step:13040/14336 +step:52160/57344 train_time:30729667ms step_avg:589.14ms +step:52160/57344 val_loss:5.376794 train_time:30729668ms step_avg:589.14ms +step:52161/57344 train_time:30730194ms step_avg:589.14ms +step:52162/57344 train_time:30730362ms step_avg:589.13ms +step:52163/57344 train_time:30730702ms step_avg:589.13ms +grad accum step:13041/14336 +step:52164/57344 train_time:30732412ms step_avg:589.15ms +step:52165/57344 train_time:30732430ms step_avg:589.14ms +step:52166/57344 train_time:30732658ms step_avg:589.13ms +step:52167/57344 train_time:30733243ms step_avg:589.13ms +grad accum step:13042/14336 +step:52168/57344 train_time:30734618ms step_avg:589.15ms +step:52169/57344 train_time:30734634ms step_avg:589.14ms +step:52170/57344 train_time:30734882ms step_avg:589.13ms +step:52171/57344 train_time:30735448ms step_avg:589.13ms +grad accum step:13043/14336 +step:52172/57344 train_time:30736771ms step_avg:589.14ms +step:52173/57344 train_time:30736788ms step_avg:589.13ms +step:52174/57344 train_time:30737028ms step_avg:589.13ms +step:52175/57344 train_time:30737597ms step_avg:589.13ms +grad accum step:13044/14336 +step:52176/57344 train_time:30738939ms step_avg:589.14ms +step:52177/57344 train_time:30738961ms step_avg:589.13ms +step:52178/57344 train_time:30739196ms step_avg:589.12ms +step:52179/57344 train_time:30739755ms step_avg:589.12ms +grad accum step:13045/14336 +step:52180/57344 train_time:30741066ms step_avg:589.14ms +step:52181/57344 train_time:30741087ms step_avg:589.12ms +step:52182/57344 train_time:30741313ms step_avg:589.12ms +step:52183/57344 train_time:30741855ms step_avg:589.12ms +grad accum step:13046/14336 +step:52184/57344 train_time:30743248ms step_avg:589.13ms +step:52185/57344 train_time:30743262ms step_avg:589.12ms +step:52186/57344 train_time:30743498ms step_avg:589.11ms +step:52187/57344 train_time:30744058ms step_avg:589.11ms +grad accum step:13047/14336 +step:52188/57344 train_time:30745413ms step_avg:589.13ms +step:52189/57344 train_time:30745532ms step_avg:589.12ms +step:52190/57344 train_time:30745750ms step_avg:589.11ms +step:52191/57344 train_time:30746296ms step_avg:589.11ms +grad accum step:13048/14336 +step:52192/57344 train_time:30747649ms step_avg:589.13ms +step:52193/57344 train_time:30747664ms step_avg:589.11ms +step:52194/57344 train_time:30747926ms step_avg:589.11ms +step:52195/57344 train_time:30748531ms step_avg:589.11ms +grad accum step:13049/14336 +step:52196/57344 train_time:30749919ms step_avg:589.12ms +step:52197/57344 train_time:30749935ms step_avg:589.11ms +step:52198/57344 train_time:30750180ms step_avg:589.11ms +step:52199/57344 train_time:30750731ms step_avg:589.11ms +grad accum step:13050/14336 +step:52200/57344 train_time:30752100ms step_avg:589.12ms +step:52201/57344 train_time:30752121ms step_avg:589.11ms +step:52202/57344 train_time:30752358ms step_avg:589.10ms +step:52203/57344 train_time:30752917ms step_avg:589.10ms +grad accum step:13051/14336 +step:52204/57344 train_time:30754322ms step_avg:589.12ms +step:52205/57344 train_time:30754339ms step_avg:589.11ms +step:52206/57344 train_time:30754559ms step_avg:589.10ms +step:52207/57344 train_time:30755117ms step_avg:589.10ms +grad accum step:13052/14336 +step:52208/57344 train_time:30756443ms step_avg:589.11ms +step:52209/57344 train_time:30756460ms step_avg:589.10ms +step:52210/57344 train_time:30756705ms step_avg:589.10ms +step:52211/57344 train_time:30757273ms step_avg:589.10ms +grad accum step:13053/14336 +step:52212/57344 train_time:30758833ms step_avg:589.11ms +step:52213/57344 train_time:30758854ms step_avg:589.10ms +step:52214/57344 train_time:30759080ms step_avg:589.10ms +step:52215/57344 train_time:30759664ms step_avg:589.10ms +grad accum step:13054/14336 +step:52216/57344 train_time:30761040ms step_avg:589.11ms +step:52217/57344 train_time:30761059ms step_avg:589.10ms +step:52218/57344 train_time:30761286ms step_avg:589.09ms +step:52219/57344 train_time:30761821ms step_avg:589.09ms +grad accum step:13055/14336 +step:52220/57344 train_time:30763126ms step_avg:589.11ms +step:52221/57344 train_time:30763141ms step_avg:589.10ms +step:52222/57344 train_time:30763395ms step_avg:589.09ms +step:52223/57344 train_time:30763981ms step_avg:589.09ms +grad accum step:13056/14336 +step:52224/57344 train_time:30765365ms step_avg:589.10ms +step:52224/57344 val_loss:5.375344 train_time:30765369ms step_avg:589.10ms +step:52225/57344 train_time:30765381ms step_avg:589.09ms +step:52226/57344 train_time:30765618ms step_avg:589.09ms +step:52227/57344 train_time:30766223ms step_avg:589.09ms +grad accum step:13057/14336 +step:52228/57344 train_time:30767608ms step_avg:589.10ms +step:52229/57344 train_time:30767621ms step_avg:589.09ms +step:52230/57344 train_time:30767864ms step_avg:589.08ms +step:52231/57344 train_time:30768425ms step_avg:589.08ms +grad accum step:13058/14336 +step:52232/57344 train_time:30769800ms step_avg:589.10ms +step:52233/57344 train_time:30769814ms step_avg:589.09ms +step:52234/57344 train_time:30770073ms step_avg:589.08ms +step:52235/57344 train_time:30770669ms step_avg:589.08ms +grad accum step:13059/14336 +step:52236/57344 train_time:30772076ms step_avg:589.10ms +step:52237/57344 train_time:30772092ms step_avg:589.09ms +step:52238/57344 train_time:30772340ms step_avg:589.08ms +step:52239/57344 train_time:30772896ms step_avg:589.08ms +grad accum step:13060/14336 +step:52240/57344 train_time:30774220ms step_avg:589.09ms +step:52241/57344 train_time:30774237ms step_avg:589.08ms +step:52242/57344 train_time:30774519ms step_avg:589.08ms +step:52243/57344 train_time:30775172ms step_avg:589.08ms +grad accum step:13061/14336 +step:52244/57344 train_time:30776568ms step_avg:589.09ms +step:52245/57344 train_time:30776584ms step_avg:589.08ms +step:52246/57344 train_time:30776836ms step_avg:589.08ms +step:52247/57344 train_time:30777396ms step_avg:589.07ms +grad accum step:13062/14336 +step:52248/57344 train_time:30778754ms step_avg:589.09ms +step:52249/57344 train_time:30778771ms step_avg:589.08ms +step:52250/57344 train_time:30779022ms step_avg:589.07ms +step:52251/57344 train_time:30779583ms step_avg:589.07ms +grad accum step:13063/14336 +step:52252/57344 train_time:30781057ms step_avg:589.09ms +step:52253/57344 train_time:30781072ms step_avg:589.08ms +step:52254/57344 train_time:30781327ms step_avg:589.07ms +step:52255/57344 train_time:30781903ms step_avg:589.07ms +grad accum step:13064/14336 +step:52256/57344 train_time:30783279ms step_avg:589.09ms +step:52257/57344 train_time:30783292ms step_avg:589.07ms +step:52258/57344 train_time:30783535ms step_avg:589.07ms +step:52259/57344 train_time:30784082ms step_avg:589.07ms +grad accum step:13065/14336 +step:52260/57344 train_time:30785502ms step_avg:589.08ms +step:52261/57344 train_time:30785519ms step_avg:589.07ms +step:52262/57344 train_time:30785770ms step_avg:589.07ms +step:52263/57344 train_time:30786343ms step_avg:589.07ms +grad accum step:13066/14336 +step:52264/57344 train_time:30787710ms step_avg:589.08ms +step:52265/57344 train_time:30787726ms step_avg:589.07ms +step:52266/57344 train_time:30787976ms step_avg:589.06ms +step:52267/57344 train_time:30788534ms step_avg:589.06ms +grad accum step:13067/14336 +step:52268/57344 train_time:30789901ms step_avg:589.08ms +step:52269/57344 train_time:30789917ms step_avg:589.07ms +step:52270/57344 train_time:30790164ms step_avg:589.06ms +step:52271/57344 train_time:30790722ms step_avg:589.06ms +grad accum step:13068/14336 +step:52272/57344 train_time:30792104ms step_avg:589.07ms +step:52273/57344 train_time:30792118ms step_avg:589.06ms +step:52274/57344 train_time:30792368ms step_avg:589.06ms +step:52275/57344 train_time:30792945ms step_avg:589.06ms +grad accum step:13069/14336 +step:52276/57344 train_time:30794455ms step_avg:589.07ms +step:52277/57344 train_time:30794469ms step_avg:589.06ms +step:52278/57344 train_time:30794727ms step_avg:589.06ms +step:52279/57344 train_time:30795333ms step_avg:589.06ms +grad accum step:13070/14336 +step:52280/57344 train_time:30796731ms step_avg:589.07ms +step:52281/57344 train_time:30796745ms step_avg:589.06ms +step:52282/57344 train_time:30796993ms step_avg:589.06ms +step:52283/57344 train_time:30797546ms step_avg:589.05ms +grad accum step:13071/14336 +step:52284/57344 train_time:30798917ms step_avg:589.07ms +step:52285/57344 train_time:30798936ms step_avg:589.06ms +step:52286/57344 train_time:30799184ms step_avg:589.05ms +step:52287/57344 train_time:30799775ms step_avg:589.05ms +grad accum step:13072/14336 +step:52288/57344 train_time:30801164ms step_avg:589.07ms +step:52288/57344 val_loss:5.375767 train_time:30801165ms step_avg:589.07ms +step:52289/57344 train_time:30801177ms step_avg:589.06ms +step:52290/57344 train_time:30801395ms step_avg:589.05ms +step:52291/57344 train_time:30801937ms step_avg:589.05ms +grad accum step:13073/14336 +step:52292/57344 train_time:30803316ms step_avg:589.06ms +step:52293/57344 train_time:30803333ms step_avg:589.05ms +step:52294/57344 train_time:30803578ms step_avg:589.05ms +step:52295/57344 train_time:30804155ms step_avg:589.05ms +grad accum step:13074/14336 +step:52296/57344 train_time:30805632ms step_avg:589.06ms +step:52297/57344 train_time:30805647ms step_avg:589.05ms +step:52298/57344 train_time:30805866ms step_avg:589.04ms +step:52299/57344 train_time:30806415ms step_avg:589.04ms +grad accum step:13075/14336 +step:52300/57344 train_time:30807814ms step_avg:589.06ms +step:52301/57344 train_time:30807830ms step_avg:589.05ms +step:52302/57344 train_time:30808073ms step_avg:589.04ms +step:52303/57344 train_time:30808632ms step_avg:589.04ms +grad accum step:13076/14336 +step:52304/57344 train_time:30810036ms step_avg:589.06ms +step:52305/57344 train_time:30810055ms step_avg:589.05ms +step:52306/57344 train_time:30810279ms step_avg:589.04ms +step:52307/57344 train_time:30810853ms step_avg:589.04ms +grad accum step:13077/14336 +step:52308/57344 train_time:30812243ms step_avg:589.05ms +step:52309/57344 train_time:30812258ms step_avg:589.04ms +step:52310/57344 train_time:30812519ms step_avg:589.04ms +step:52311/57344 train_time:30813119ms step_avg:589.04ms +grad accum step:13078/14336 +step:52312/57344 train_time:30814506ms step_avg:589.05ms +step:52313/57344 train_time:30814543ms step_avg:589.04ms +step:52314/57344 train_time:30814768ms step_avg:589.03ms +step:52315/57344 train_time:30815325ms step_avg:589.03ms +grad accum step:13079/14336 +step:52316/57344 train_time:30816712ms step_avg:589.05ms +step:52317/57344 train_time:30816760ms step_avg:589.04ms +step:52318/57344 train_time:30816991ms step_avg:589.03ms +step:52319/57344 train_time:30817571ms step_avg:589.03ms +grad accum step:13080/14336 +step:52320/57344 train_time:30830572ms step_avg:589.27ms +step:52321/57344 train_time:30830588ms step_avg:589.26ms +step:52322/57344 train_time:30830874ms step_avg:589.25ms +step:52323/57344 train_time:30831437ms step_avg:589.25ms +grad accum step:13081/14336 +step:52324/57344 train_time:30832782ms step_avg:589.27ms +step:52325/57344 train_time:30832797ms step_avg:589.26ms +step:52326/57344 train_time:30833014ms step_avg:589.25ms +step:52327/57344 train_time:30833568ms step_avg:589.25ms +grad accum step:13082/14336 +step:52328/57344 train_time:30834893ms step_avg:589.26ms +step:52329/57344 train_time:30834912ms step_avg:589.25ms +step:52330/57344 train_time:30835147ms step_avg:589.24ms +step:52331/57344 train_time:30835692ms step_avg:589.24ms +grad accum step:13083/14336 +step:52332/57344 train_time:30837043ms step_avg:589.26ms +step:52333/57344 train_time:30837055ms step_avg:589.25ms +step:52334/57344 train_time:30837309ms step_avg:589.24ms +step:52335/57344 train_time:30837899ms step_avg:589.24ms +grad accum step:13084/14336 +step:52336/57344 train_time:30839306ms step_avg:589.26ms +step:52337/57344 train_time:30839322ms step_avg:589.25ms +step:52338/57344 train_time:30839573ms step_avg:589.24ms +step:52339/57344 train_time:30840143ms step_avg:589.24ms +grad accum step:13085/14336 +step:52340/57344 train_time:30841553ms step_avg:589.25ms +step:52341/57344 train_time:30841573ms step_avg:589.24ms +step:52342/57344 train_time:30841808ms step_avg:589.24ms +step:52343/57344 train_time:30842372ms step_avg:589.24ms +grad accum step:13086/14336 +step:52344/57344 train_time:30843742ms step_avg:589.25ms +step:52345/57344 train_time:30843758ms step_avg:589.24ms +step:52346/57344 train_time:30844009ms step_avg:589.23ms +step:52347/57344 train_time:30844574ms step_avg:589.23ms +grad accum step:13087/14336 +step:52348/57344 train_time:30845892ms step_avg:589.25ms +step:52349/57344 train_time:30845907ms step_avg:589.24ms +step:52350/57344 train_time:30846153ms step_avg:589.23ms +step:52351/57344 train_time:30846701ms step_avg:589.23ms +grad accum step:13088/14336 +step:52352/57344 train_time:30848042ms step_avg:589.24ms +step:52352/57344 val_loss:5.371130 train_time:30848043ms step_avg:589.24ms +step:52353/57344 train_time:30848055ms step_avg:589.23ms +step:52354/57344 train_time:30848283ms step_avg:589.22ms +step:52355/57344 train_time:30848860ms step_avg:589.22ms +grad accum step:13089/14336 +step:52356/57344 train_time:30850283ms step_avg:589.24ms +step:52357/57344 train_time:30850296ms step_avg:589.23ms +step:52358/57344 train_time:30850534ms step_avg:589.22ms +step:52359/57344 train_time:30851102ms step_avg:589.22ms +grad accum step:13090/14336 +step:52360/57344 train_time:30852465ms step_avg:589.24ms +step:52361/57344 train_time:30852482ms step_avg:589.23ms +step:52362/57344 train_time:30852750ms step_avg:589.22ms +step:52363/57344 train_time:30853359ms step_avg:589.22ms +grad accum step:13091/14336 +step:52364/57344 train_time:30854690ms step_avg:589.23ms +step:52365/57344 train_time:30854715ms step_avg:589.22ms +step:52366/57344 train_time:30854949ms step_avg:589.22ms +step:52367/57344 train_time:30855525ms step_avg:589.22ms +grad accum step:13092/14336 +step:52368/57344 train_time:30856908ms step_avg:589.23ms +step:52369/57344 train_time:30856924ms step_avg:589.22ms +step:52370/57344 train_time:30857169ms step_avg:589.21ms +step:52371/57344 train_time:30857716ms step_avg:589.21ms +grad accum step:13093/14336 +step:52372/57344 train_time:30859178ms step_avg:589.23ms +step:52373/57344 train_time:30859191ms step_avg:589.22ms +step:52374/57344 train_time:30859434ms step_avg:589.21ms +step:52375/57344 train_time:30860010ms step_avg:589.21ms +grad accum step:13094/14336 +step:52376/57344 train_time:30861428ms step_avg:589.23ms +step:52377/57344 train_time:30861440ms step_avg:589.22ms +step:52378/57344 train_time:30861689ms step_avg:589.21ms +step:52379/57344 train_time:30862279ms step_avg:589.21ms +grad accum step:13095/14336 +step:52380/57344 train_time:30863644ms step_avg:589.23ms +step:52381/57344 train_time:30863660ms step_avg:589.21ms +step:52382/57344 train_time:30863918ms step_avg:589.21ms +step:52383/57344 train_time:30864500ms step_avg:589.21ms +grad accum step:13096/14336 +step:52384/57344 train_time:30865803ms step_avg:589.22ms +step:52385/57344 train_time:30865819ms step_avg:589.21ms +step:52386/57344 train_time:30866068ms step_avg:589.20ms +step:52387/57344 train_time:30866627ms step_avg:589.20ms +grad accum step:13097/14336 +step:52388/57344 train_time:30867985ms step_avg:589.22ms +step:52389/57344 train_time:30867996ms step_avg:589.21ms +step:52390/57344 train_time:30868245ms step_avg:589.20ms +step:52391/57344 train_time:30868824ms step_avg:589.20ms +grad accum step:13098/14336 +step:52392/57344 train_time:30870183ms step_avg:589.22ms +step:52393/57344 train_time:30870197ms step_avg:589.20ms +step:52394/57344 train_time:30870450ms step_avg:589.20ms +step:52395/57344 train_time:30871025ms step_avg:589.20ms +grad accum step:13099/14336 +step:52396/57344 train_time:30872404ms step_avg:589.21ms +step:52397/57344 train_time:30872421ms step_avg:589.20ms +step:52398/57344 train_time:30872680ms step_avg:589.20ms +step:52399/57344 train_time:30873267ms step_avg:589.20ms +grad accum step:13100/14336 +step:52400/57344 train_time:30874719ms step_avg:589.21ms +step:52401/57344 train_time:30874734ms step_avg:589.20ms +step:52402/57344 train_time:30874985ms step_avg:589.19ms +step:52403/57344 train_time:30875572ms step_avg:589.19ms +grad accum step:13101/14336 +step:52404/57344 train_time:30876964ms step_avg:589.21ms +step:52405/57344 train_time:30876981ms step_avg:589.20ms +step:52406/57344 train_time:30877228ms step_avg:589.19ms +step:52407/57344 train_time:30877779ms step_avg:589.19ms +grad accum step:13102/14336 +step:52408/57344 train_time:30879092ms step_avg:589.21ms +step:52409/57344 train_time:30879108ms step_avg:589.19ms +step:52410/57344 train_time:30879357ms step_avg:589.19ms +step:52411/57344 train_time:30879912ms step_avg:589.19ms +grad accum step:13103/14336 +step:52412/57344 train_time:30881308ms step_avg:589.20ms +step:52413/57344 train_time:30881324ms step_avg:589.19ms +step:52414/57344 train_time:30881578ms step_avg:589.19ms +step:52415/57344 train_time:30882161ms step_avg:589.19ms +grad accum step:13104/14336 +step:52416/57344 train_time:30901537ms step_avg:589.54ms +step:52416/57344 val_loss:5.369224 train_time:30901541ms step_avg:589.54ms +step:52417/57344 train_time:30901553ms step_avg:589.53ms +step:52418/57344 train_time:30901774ms step_avg:589.53ms +step:52419/57344 train_time:30902319ms step_avg:589.53ms +grad accum step:13105/14336 +step:52420/57344 train_time:30903630ms step_avg:589.54ms +step:52421/57344 train_time:30903645ms step_avg:589.53ms +step:52422/57344 train_time:30903894ms step_avg:589.52ms +step:52423/57344 train_time:30904456ms step_avg:589.52ms +grad accum step:13106/14336 +step:52424/57344 train_time:30905783ms step_avg:589.54ms +step:52425/57344 train_time:30905801ms step_avg:589.52ms +step:52426/57344 train_time:30906050ms step_avg:589.52ms +step:52427/57344 train_time:30906623ms step_avg:589.52ms +grad accum step:13107/14336 +step:52428/57344 train_time:30907989ms step_avg:589.53ms +step:52429/57344 train_time:30908006ms step_avg:589.52ms +step:52430/57344 train_time:30908260ms step_avg:589.51ms +step:52431/57344 train_time:30908833ms step_avg:589.51ms +grad accum step:13108/14336 +step:52432/57344 train_time:30910253ms step_avg:589.53ms +step:52433/57344 train_time:30910268ms step_avg:589.52ms +step:52434/57344 train_time:30910527ms step_avg:589.51ms +step:52435/57344 train_time:30911107ms step_avg:589.51ms +grad accum step:13109/14336 +step:52436/57344 train_time:30912462ms step_avg:589.53ms +step:52437/57344 train_time:30912480ms step_avg:589.52ms +step:52438/57344 train_time:30912752ms step_avg:589.51ms +step:52439/57344 train_time:30913360ms step_avg:589.51ms +grad accum step:13110/14336 +step:52440/57344 train_time:30914718ms step_avg:589.53ms +step:52441/57344 train_time:30914730ms step_avg:589.51ms +step:52442/57344 train_time:30914974ms step_avg:589.51ms +step:52443/57344 train_time:30915531ms step_avg:589.51ms +grad accum step:13111/14336 +step:52444/57344 train_time:30916998ms step_avg:589.52ms +step:52445/57344 train_time:30917012ms step_avg:589.51ms +step:52446/57344 train_time:30917267ms step_avg:589.51ms +step:52447/57344 train_time:30917856ms step_avg:589.51ms +grad accum step:13112/14336 +step:52448/57344 train_time:30919240ms step_avg:589.52ms +step:52449/57344 train_time:30919256ms step_avg:589.51ms +step:52450/57344 train_time:30919500ms step_avg:589.50ms +step:52451/57344 train_time:30920049ms step_avg:589.50ms +grad accum step:13113/14336 +step:52452/57344 train_time:30921416ms step_avg:589.52ms +step:52453/57344 train_time:30921428ms step_avg:589.51ms +step:52454/57344 train_time:30921683ms step_avg:589.50ms +step:52455/57344 train_time:30922281ms step_avg:589.50ms +grad accum step:13114/14336 +step:52456/57344 train_time:30923818ms step_avg:589.52ms +step:52457/57344 train_time:30923835ms step_avg:589.51ms +step:52458/57344 train_time:30924089ms step_avg:589.50ms +step:52459/57344 train_time:30924679ms step_avg:589.50ms +grad accum step:13115/14336 +step:52460/57344 train_time:30926019ms step_avg:589.52ms +step:52461/57344 train_time:30926036ms step_avg:589.51ms +step:52462/57344 train_time:30926285ms step_avg:589.50ms +step:52463/57344 train_time:30926841ms step_avg:589.50ms +grad accum step:13116/14336 +step:52464/57344 train_time:30928186ms step_avg:589.51ms +step:52465/57344 train_time:30928203ms step_avg:589.50ms +step:52466/57344 train_time:30928444ms step_avg:589.49ms +step:52467/57344 train_time:30929014ms step_avg:589.49ms +grad accum step:13117/14336 +step:52468/57344 train_time:30930369ms step_avg:589.51ms +step:52469/57344 train_time:30930383ms step_avg:589.50ms +step:52470/57344 train_time:30930653ms step_avg:589.49ms +step:52471/57344 train_time:30931288ms step_avg:589.49ms +grad accum step:13118/14336 +step:52472/57344 train_time:30932819ms step_avg:589.51ms +step:52473/57344 train_time:30932836ms step_avg:589.50ms +step:52474/57344 train_time:30933101ms step_avg:589.49ms +step:52475/57344 train_time:30933700ms step_avg:589.49ms +grad accum step:13119/14336 +step:52476/57344 train_time:30935034ms step_avg:589.51ms +step:52477/57344 train_time:30935051ms step_avg:589.50ms +step:52478/57344 train_time:30935297ms step_avg:589.49ms +step:52479/57344 train_time:30935849ms step_avg:589.49ms +grad accum step:13120/14336 +step:52480/57344 train_time:31037157ms step_avg:591.41ms +step:52480/57344 val_loss:5.370637 train_time:31037158ms step_avg:591.41ms +step:52481/57344 train_time:31037170ms step_avg:591.40ms +step:52482/57344 train_time:31037390ms step_avg:591.39ms +step:52483/57344 train_time:31037933ms step_avg:591.39ms +grad accum step:13121/14336 +step:52484/57344 train_time:31039354ms step_avg:591.41ms +step:52485/57344 train_time:31039371ms step_avg:591.40ms +step:52486/57344 train_time:31039600ms step_avg:591.39ms +step:52487/57344 train_time:31040193ms step_avg:591.39ms +grad accum step:13122/14336 +step:52488/57344 train_time:31041549ms step_avg:591.40ms +step:52489/57344 train_time:31041563ms step_avg:591.39ms +step:52490/57344 train_time:31041812ms step_avg:591.39ms +step:52491/57344 train_time:31042383ms step_avg:591.38ms +grad accum step:13123/14336 +step:52492/57344 train_time:31043737ms step_avg:591.40ms +step:52493/57344 train_time:31043753ms step_avg:591.39ms +step:52494/57344 train_time:31044001ms step_avg:591.38ms +step:52495/57344 train_time:31044565ms step_avg:591.38ms +grad accum step:13124/14336 +step:52496/57344 train_time:31046026ms step_avg:591.40ms +step:52497/57344 train_time:31046039ms step_avg:591.39ms +step:52498/57344 train_time:31046261ms step_avg:591.38ms +step:52499/57344 train_time:31046830ms step_avg:591.38ms +grad accum step:13125/14336 +step:52500/57344 train_time:31048178ms step_avg:591.39ms +step:52501/57344 train_time:31048195ms step_avg:591.38ms +step:52502/57344 train_time:31048424ms step_avg:591.38ms +step:52503/57344 train_time:31048981ms step_avg:591.38ms +grad accum step:13126/14336 +step:52504/57344 train_time:31050377ms step_avg:591.39ms +step:52505/57344 train_time:31050390ms step_avg:591.38ms +step:52506/57344 train_time:31050635ms step_avg:591.37ms +step:52507/57344 train_time:31051202ms step_avg:591.37ms +grad accum step:13127/14336 +step:52508/57344 train_time:31052556ms step_avg:591.39ms +step:52509/57344 train_time:31052571ms step_avg:591.38ms +step:52510/57344 train_time:31052842ms step_avg:591.37ms +step:52511/57344 train_time:31053479ms step_avg:591.37ms +grad accum step:13128/14336 +step:52512/57344 train_time:31054857ms step_avg:591.39ms +step:52513/57344 train_time:31054871ms step_avg:591.37ms +step:52514/57344 train_time:31055122ms step_avg:591.37ms +step:52515/57344 train_time:31055688ms step_avg:591.37ms +grad accum step:13129/14336 +step:52516/57344 train_time:31057022ms step_avg:591.38ms +step:52517/57344 train_time:31057037ms step_avg:591.37ms +step:52518/57344 train_time:31057285ms step_avg:591.36ms +step:52519/57344 train_time:31057840ms step_avg:591.36ms +grad accum step:13130/14336 +step:52520/57344 train_time:31059179ms step_avg:591.38ms +step:52521/57344 train_time:31059196ms step_avg:591.37ms +step:52522/57344 train_time:31059447ms step_avg:591.36ms +step:52523/57344 train_time:31060009ms step_avg:591.36ms +grad accum step:13131/14336 +step:52524/57344 train_time:31061338ms step_avg:591.37ms +step:52525/57344 train_time:31061355ms step_avg:591.36ms +step:52526/57344 train_time:31061602ms step_avg:591.36ms +step:52527/57344 train_time:31062156ms step_avg:591.36ms +grad accum step:13132/14336 +step:52528/57344 train_time:31063543ms step_avg:591.37ms +step:52529/57344 train_time:31063555ms step_avg:591.36ms +step:52530/57344 train_time:31063805ms step_avg:591.35ms +step:52531/57344 train_time:31064371ms step_avg:591.35ms +grad accum step:13133/14336 +step:52532/57344 train_time:31065695ms step_avg:591.37ms +step:52533/57344 train_time:31065709ms step_avg:591.36ms +step:52534/57344 train_time:31065958ms step_avg:591.35ms +step:52535/57344 train_time:31066525ms step_avg:591.35ms +grad accum step:13134/14336 +step:52536/57344 train_time:31067891ms step_avg:591.36ms +step:52537/57344 train_time:31067905ms step_avg:591.35ms +step:52538/57344 train_time:31068152ms step_avg:591.35ms +step:52539/57344 train_time:31068719ms step_avg:591.35ms +grad accum step:13135/14336 +step:52540/57344 train_time:31070054ms step_avg:591.36ms +step:52541/57344 train_time:31070070ms step_avg:591.35ms +step:52542/57344 train_time:31070318ms step_avg:591.34ms +step:52543/57344 train_time:31070890ms step_avg:591.34ms +grad accum step:13136/14336 +step:52544/57344 train_time:31090091ms step_avg:591.70ms +step:52544/57344 val_loss:5.366584 train_time:31090092ms step_avg:591.70ms +step:52545/57344 train_time:31090104ms step_avg:591.69ms +step:52546/57344 train_time:31090399ms step_avg:591.68ms +step:52547/57344 train_time:31090952ms step_avg:591.68ms +grad accum step:13137/14336 +step:52548/57344 train_time:31092314ms step_avg:591.69ms +step:52549/57344 train_time:31092331ms step_avg:591.68ms +step:52550/57344 train_time:31092572ms step_avg:591.68ms +step:52551/57344 train_time:31093122ms step_avg:591.68ms +grad accum step:13138/14336 +step:52552/57344 train_time:31094511ms step_avg:591.69ms +step:52553/57344 train_time:31094527ms step_avg:591.68ms +step:52554/57344 train_time:31094778ms step_avg:591.67ms +step:52555/57344 train_time:31095337ms step_avg:591.67ms +grad accum step:13139/14336 +step:52556/57344 train_time:31096680ms step_avg:591.69ms +step:52557/57344 train_time:31096693ms step_avg:591.68ms +step:52558/57344 train_time:31096938ms step_avg:591.67ms +step:52559/57344 train_time:31097525ms step_avg:591.67ms +grad accum step:13140/14336 +step:52560/57344 train_time:31098940ms step_avg:591.68ms +step:52561/57344 train_time:31098956ms step_avg:591.67ms +step:52562/57344 train_time:31099175ms step_avg:591.67ms +step:52563/57344 train_time:31099725ms step_avg:591.67ms +grad accum step:13141/14336 +step:52564/57344 train_time:31101151ms step_avg:591.68ms +step:52565/57344 train_time:31101166ms step_avg:591.67ms +step:52566/57344 train_time:31101417ms step_avg:591.66ms +step:52567/57344 train_time:31101974ms step_avg:591.66ms +grad accum step:13142/14336 +step:52568/57344 train_time:31103319ms step_avg:591.68ms +step:52569/57344 train_time:31103332ms step_avg:591.67ms +step:52570/57344 train_time:31103579ms step_avg:591.66ms +step:52571/57344 train_time:31104137ms step_avg:591.66ms +grad accum step:13143/14336 +step:52572/57344 train_time:31105490ms step_avg:591.67ms +step:52573/57344 train_time:31105506ms step_avg:591.66ms +step:52574/57344 train_time:31105767ms step_avg:591.66ms +step:52575/57344 train_time:31106359ms step_avg:591.66ms +grad accum step:13144/14336 +step:52576/57344 train_time:31107679ms step_avg:591.67ms +step:52577/57344 train_time:31107696ms step_avg:591.66ms +step:52578/57344 train_time:31107947ms step_avg:591.65ms +step:52579/57344 train_time:31108514ms step_avg:591.65ms +grad accum step:13145/14336 +step:52580/57344 train_time:31109926ms step_avg:591.67ms +step:52581/57344 train_time:31109976ms step_avg:591.66ms +step:52582/57344 train_time:31110199ms step_avg:591.65ms +step:52583/57344 train_time:31110765ms step_avg:591.65ms +grad accum step:13146/14336 +step:52584/57344 train_time:31112130ms step_avg:591.67ms +step:52585/57344 train_time:31112147ms step_avg:591.65ms +step:52586/57344 train_time:31112394ms step_avg:591.65ms +step:52587/57344 train_time:31112946ms step_avg:591.65ms +grad accum step:13147/14336 +step:52588/57344 train_time:31114275ms step_avg:591.66ms +step:52589/57344 train_time:31114291ms step_avg:591.65ms +step:52590/57344 train_time:31114538ms step_avg:591.64ms +step:52591/57344 train_time:31115095ms step_avg:591.64ms +grad accum step:13148/14336 +step:52592/57344 train_time:31116411ms step_avg:591.66ms +step:52593/57344 train_time:31116428ms step_avg:591.65ms +step:52594/57344 train_time:31116674ms step_avg:591.64ms +step:52595/57344 train_time:31117230ms step_avg:591.64ms +grad accum step:13149/14336 +step:52596/57344 train_time:31118607ms step_avg:591.65ms +step:52597/57344 train_time:31118623ms step_avg:591.64ms +step:52598/57344 train_time:31118887ms step_avg:591.64ms +step:52599/57344 train_time:31119503ms step_avg:591.64ms +grad accum step:13150/14336 +step:52600/57344 train_time:31120860ms step_avg:591.65ms +step:52601/57344 train_time:31120884ms step_avg:591.64ms +step:52602/57344 train_time:31121141ms step_avg:591.63ms +step:52603/57344 train_time:31121738ms step_avg:591.63ms +grad accum step:13151/14336 +step:52604/57344 train_time:31146449ms step_avg:592.09ms +step:52605/57344 train_time:31147527ms step_avg:592.10ms +step:52606/57344 train_time:31147786ms step_avg:592.10ms +step:52607/57344 train_time:31148361ms step_avg:592.10ms +grad accum step:13152/14336 +step:52608/57344 train_time:31149735ms step_avg:592.11ms +step:52608/57344 val_loss:5.364900 train_time:31149748ms step_avg:592.11ms +step:52609/57344 train_time:31149760ms step_avg:592.10ms +step:52610/57344 train_time:31149984ms step_avg:592.09ms +step:52611/57344 train_time:31150541ms step_avg:592.09ms +grad accum step:13153/14336 +step:52612/57344 train_time:31152006ms step_avg:592.11ms +step:52613/57344 train_time:31152022ms step_avg:592.10ms +step:52614/57344 train_time:31152243ms step_avg:592.09ms +step:52615/57344 train_time:31152801ms step_avg:592.09ms +grad accum step:13154/14336 +step:52616/57344 train_time:31154122ms step_avg:592.10ms +step:52617/57344 train_time:31154137ms step_avg:592.09ms +step:52618/57344 train_time:31154380ms step_avg:592.09ms +step:52619/57344 train_time:31154938ms step_avg:592.09ms +grad accum step:13155/14336 +step:52620/57344 train_time:31156267ms step_avg:592.10ms +step:52621/57344 train_time:31156283ms step_avg:592.09ms +step:52622/57344 train_time:31156532ms step_avg:592.08ms +step:52623/57344 train_time:31157115ms step_avg:592.08ms +grad accum step:13156/14336 +step:52624/57344 train_time:31158541ms step_avg:592.10ms +step:52625/57344 train_time:31158563ms step_avg:592.09ms +step:52626/57344 train_time:31158811ms step_avg:592.08ms +step:52627/57344 train_time:31159405ms step_avg:592.08ms +grad accum step:13157/14336 +step:52628/57344 train_time:31160788ms step_avg:592.10ms +step:52629/57344 train_time:31160812ms step_avg:592.08ms +step:52630/57344 train_time:31161042ms step_avg:592.08ms +step:52631/57344 train_time:31161613ms step_avg:592.08ms +grad accum step:13158/14336 +step:52632/57344 train_time:31163051ms step_avg:592.09ms +step:52633/57344 train_time:31163068ms step_avg:592.08ms +step:52634/57344 train_time:31163315ms step_avg:592.08ms +step:52635/57344 train_time:31163874ms step_avg:592.08ms +grad accum step:13159/14336 +step:52636/57344 train_time:31165180ms step_avg:592.09ms +step:52637/57344 train_time:31165195ms step_avg:592.08ms +step:52638/57344 train_time:31165442ms step_avg:592.07ms +step:52639/57344 train_time:31165995ms step_avg:592.07ms +grad accum step:13160/14336 +step:52640/57344 train_time:31167431ms step_avg:592.09ms +step:52641/57344 train_time:31167447ms step_avg:592.08ms +step:52642/57344 train_time:31167679ms step_avg:592.07ms +step:52643/57344 train_time:31168216ms step_avg:592.07ms +grad accum step:13161/14336 +step:52644/57344 train_time:31169530ms step_avg:592.08ms +step:52645/57344 train_time:31169554ms step_avg:592.07ms +step:52646/57344 train_time:31169780ms step_avg:592.06ms +step:52647/57344 train_time:31170341ms step_avg:592.06ms +grad accum step:13162/14336 +step:52648/57344 train_time:31171773ms step_avg:592.08ms +step:52649/57344 train_time:31171789ms step_avg:592.07ms +step:52650/57344 train_time:31172038ms step_avg:592.06ms +step:52651/57344 train_time:31172594ms step_avg:592.06ms +grad accum step:13163/14336 +step:52652/57344 train_time:31173956ms step_avg:592.08ms +step:52653/57344 train_time:31173981ms step_avg:592.06ms +step:52654/57344 train_time:31174220ms step_avg:592.06ms +step:52655/57344 train_time:31174796ms step_avg:592.06ms +grad accum step:13164/14336 +step:52656/57344 train_time:31176210ms step_avg:592.07ms +step:52657/57344 train_time:31176227ms step_avg:592.06ms +step:52658/57344 train_time:31176445ms step_avg:592.06ms +step:52659/57344 train_time:31176996ms step_avg:592.05ms +grad accum step:13165/14336 +step:52660/57344 train_time:31178399ms step_avg:592.07ms +step:52661/57344 train_time:31178417ms step_avg:592.06ms +step:52662/57344 train_time:31178659ms step_avg:592.05ms +step:52663/57344 train_time:31179224ms step_avg:592.05ms +grad accum step:13166/14336 +step:52664/57344 train_time:31180605ms step_avg:592.07ms +step:52665/57344 train_time:31180621ms step_avg:592.06ms +step:52666/57344 train_time:31180869ms step_avg:592.05ms +step:52667/57344 train_time:31181435ms step_avg:592.05ms +grad accum step:13167/14336 +step:52668/57344 train_time:31182882ms step_avg:592.07ms +step:52669/57344 train_time:31182907ms step_avg:592.05ms +step:52670/57344 train_time:31183132ms step_avg:592.05ms +step:52671/57344 train_time:31183695ms step_avg:592.05ms +grad accum step:13168/14336 +step:52672/57344 train_time:31185031ms step_avg:592.06ms +step:52672/57344 val_loss:5.362838 train_time:31185040ms step_avg:592.06ms +step:52673/57344 train_time:31185052ms step_avg:592.05ms +step:52674/57344 train_time:31185287ms step_avg:592.04ms +step:52675/57344 train_time:31185870ms step_avg:592.04ms +grad accum step:13169/14336 +step:52676/57344 train_time:31187410ms step_avg:592.06ms +step:52677/57344 train_time:31187511ms step_avg:592.05ms +step:52678/57344 train_time:31187731ms step_avg:592.04ms +step:52679/57344 train_time:31188295ms step_avg:592.04ms +grad accum step:13170/14336 +step:52680/57344 train_time:31189737ms step_avg:592.06ms +step:52681/57344 train_time:31189754ms step_avg:592.05ms +step:52682/57344 train_time:31189983ms step_avg:592.04ms +step:52683/57344 train_time:31190546ms step_avg:592.04ms +grad accum step:13171/14336 +step:52684/57344 train_time:31191901ms step_avg:592.06ms +step:52685/57344 train_time:31191916ms step_avg:592.05ms +step:52686/57344 train_time:31192166ms step_avg:592.04ms +step:52687/57344 train_time:31192748ms step_avg:592.04ms +grad accum step:13172/14336 +step:52688/57344 train_time:31194318ms step_avg:592.06ms +step:52689/57344 train_time:31194340ms step_avg:592.05ms +step:52690/57344 train_time:31194561ms step_avg:592.04ms +step:52691/57344 train_time:31195125ms step_avg:592.04ms +grad accum step:13173/14336 +step:52692/57344 train_time:31196720ms step_avg:592.06ms +step:52693/57344 train_time:31196738ms step_avg:592.05ms +step:52694/57344 train_time:31196973ms step_avg:592.04ms +step:52695/57344 train_time:31197581ms step_avg:592.04ms +grad accum step:13174/14336 +step:52696/57344 train_time:31198922ms step_avg:592.05ms +step:52697/57344 train_time:31198939ms step_avg:592.04ms +step:52698/57344 train_time:31199192ms step_avg:592.04ms +step:52699/57344 train_time:31199768ms step_avg:592.04ms +grad accum step:13175/14336 +step:52700/57344 train_time:31201135ms step_avg:592.05ms +step:52701/57344 train_time:31201161ms step_avg:592.04ms +step:52702/57344 train_time:31201387ms step_avg:592.03ms +step:52703/57344 train_time:31201954ms step_avg:592.03ms +grad accum step:13176/14336 +step:52704/57344 train_time:31203383ms step_avg:592.05ms +step:52705/57344 train_time:31203401ms step_avg:592.04ms +step:52706/57344 train_time:31203638ms step_avg:592.03ms +step:52707/57344 train_time:31204183ms step_avg:592.03ms +grad accum step:13177/14336 +step:52708/57344 train_time:31205504ms step_avg:592.04ms +step:52709/57344 train_time:31205520ms step_avg:592.03ms +step:52710/57344 train_time:31205769ms step_avg:592.03ms +step:52711/57344 train_time:31206348ms step_avg:592.03ms +grad accum step:13178/14336 +step:52712/57344 train_time:31207736ms step_avg:592.04ms +step:52713/57344 train_time:31207751ms step_avg:592.03ms +step:52714/57344 train_time:31207999ms step_avg:592.02ms +step:52715/57344 train_time:31208560ms step_avg:592.02ms +grad accum step:13179/14336 +step:52716/57344 train_time:31209951ms step_avg:592.04ms +step:52717/57344 train_time:31209970ms step_avg:592.03ms +step:52718/57344 train_time:31210210ms step_avg:592.02ms +step:52719/57344 train_time:31210770ms step_avg:592.02ms +grad accum step:13180/14336 +step:52720/57344 train_time:31212143ms step_avg:592.04ms +step:52721/57344 train_time:31212158ms step_avg:592.03ms +step:52722/57344 train_time:31212406ms step_avg:592.02ms +step:52723/57344 train_time:31212971ms step_avg:592.02ms +grad accum step:13181/14336 +step:52724/57344 train_time:31214381ms step_avg:592.03ms +step:52725/57344 train_time:31214402ms step_avg:592.02ms +step:52726/57344 train_time:31214626ms step_avg:592.02ms +step:52727/57344 train_time:31215182ms step_avg:592.02ms +grad accum step:13182/14336 +step:52728/57344 train_time:31216728ms step_avg:592.03ms +step:52729/57344 train_time:31216877ms step_avg:592.02ms +step:52730/57344 train_time:31217101ms step_avg:592.02ms +step:52731/57344 train_time:31217679ms step_avg:592.02ms +grad accum step:13183/14336 +step:52732/57344 train_time:31219089ms step_avg:592.03ms +step:52733/57344 train_time:31219114ms step_avg:592.02ms +step:52734/57344 train_time:31219341ms step_avg:592.02ms +step:52735/57344 train_time:31219908ms step_avg:592.01ms +grad accum step:13184/14336 +step:52736/57344 train_time:31221321ms step_avg:592.03ms +step:52736/57344 val_loss:5.362185 train_time:31221324ms step_avg:592.03ms +step:52737/57344 train_time:31221336ms step_avg:592.02ms +step:52738/57344 train_time:31221571ms step_avg:592.01ms +step:52739/57344 train_time:31222172ms step_avg:592.01ms +grad accum step:13185/14336 +step:52740/57344 train_time:31223628ms step_avg:592.03ms +step:52741/57344 train_time:31223646ms step_avg:592.02ms +step:52742/57344 train_time:31223917ms step_avg:592.01ms +step:52743/57344 train_time:31224545ms step_avg:592.01ms +grad accum step:13186/14336 +step:52744/57344 train_time:31225869ms step_avg:592.03ms +step:52745/57344 train_time:31225912ms step_avg:592.02ms +step:52746/57344 train_time:31226138ms step_avg:592.01ms +step:52747/57344 train_time:31226705ms step_avg:592.01ms +grad accum step:13187/14336 +step:52748/57344 train_time:31228064ms step_avg:592.02ms +step:52749/57344 train_time:31228080ms step_avg:592.01ms +step:52750/57344 train_time:31228341ms step_avg:592.01ms +step:52751/57344 train_time:31228950ms step_avg:592.01ms +grad accum step:13188/14336 +step:52752/57344 train_time:31230333ms step_avg:592.02ms +step:52753/57344 train_time:31230348ms step_avg:592.01ms +step:52754/57344 train_time:31230604ms step_avg:592.00ms +step:52755/57344 train_time:31231187ms step_avg:592.00ms +grad accum step:13189/14336 +step:52756/57344 train_time:31232643ms step_avg:592.02ms +step:52757/57344 train_time:31232659ms step_avg:592.01ms +step:52758/57344 train_time:31232915ms step_avg:592.00ms +step:52759/57344 train_time:31233504ms step_avg:592.00ms +grad accum step:13190/14336 +step:52760/57344 train_time:31234933ms step_avg:592.02ms +step:52761/57344 train_time:31234950ms step_avg:592.01ms +step:52762/57344 train_time:31235210ms step_avg:592.00ms +step:52763/57344 train_time:31235794ms step_avg:592.00ms +grad accum step:13191/14336 +step:52764/57344 train_time:31237116ms step_avg:592.02ms +step:52765/57344 train_time:31237144ms step_avg:592.01ms +step:52766/57344 train_time:31237372ms step_avg:592.00ms +step:52767/57344 train_time:31237957ms step_avg:592.00ms +grad accum step:13192/14336 +step:52768/57344 train_time:31239528ms step_avg:592.02ms +step:52769/57344 train_time:31239543ms step_avg:592.01ms +step:52770/57344 train_time:31239767ms step_avg:592.00ms +step:52771/57344 train_time:31240337ms step_avg:592.00ms +grad accum step:13193/14336 +step:52772/57344 train_time:31241658ms step_avg:592.01ms +step:52773/57344 train_time:31241697ms step_avg:592.00ms +step:52774/57344 train_time:31241920ms step_avg:591.99ms +step:52775/57344 train_time:31242491ms step_avg:591.99ms +grad accum step:13194/14336 +step:52776/57344 train_time:31243900ms step_avg:592.01ms +step:52777/57344 train_time:31243919ms step_avg:592.00ms +step:52778/57344 train_time:31244153ms step_avg:591.99ms +step:52779/57344 train_time:31244733ms step_avg:591.99ms +grad accum step:13195/14336 +step:52780/57344 train_time:31246154ms step_avg:592.01ms +step:52781/57344 train_time:31246216ms step_avg:592.00ms +step:52782/57344 train_time:31246439ms step_avg:591.99ms +step:52783/57344 train_time:31247005ms step_avg:591.99ms +grad accum step:13196/14336 +step:52784/57344 train_time:31248338ms step_avg:592.00ms +step:52785/57344 train_time:31248357ms step_avg:591.99ms +step:52786/57344 train_time:31248606ms step_avg:591.99ms +step:52787/57344 train_time:31249190ms step_avg:591.99ms +grad accum step:13197/14336 +step:52788/57344 train_time:31250532ms step_avg:592.00ms +step:52789/57344 train_time:31250549ms step_avg:591.99ms +step:52790/57344 train_time:31250794ms step_avg:591.98ms +step:52791/57344 train_time:31251358ms step_avg:591.98ms +grad accum step:13198/14336 +step:52792/57344 train_time:31252767ms step_avg:592.00ms +step:52793/57344 train_time:31252783ms step_avg:591.99ms +step:52794/57344 train_time:31253029ms step_avg:591.98ms +step:52795/57344 train_time:31253598ms step_avg:591.98ms +grad accum step:13199/14336 +step:52796/57344 train_time:31255261ms step_avg:592.00ms +step:52797/57344 train_time:31255279ms step_avg:591.99ms +step:52798/57344 train_time:31255507ms step_avg:591.98ms +step:52799/57344 train_time:31256096ms step_avg:591.98ms +grad accum step:13200/14336 +step:52800/57344 train_time:31257506ms step_avg:592.00ms +step:52800/57344 val_loss:5.360997 train_time:31257516ms step_avg:592.00ms +step:52801/57344 train_time:31257528ms step_avg:591.99ms +step:52802/57344 train_time:31257756ms step_avg:591.98ms +step:52803/57344 train_time:31258330ms step_avg:591.98ms +grad accum step:13201/14336 +step:52804/57344 train_time:31259719ms step_avg:592.00ms +step:52805/57344 train_time:31259741ms step_avg:591.98ms +step:52806/57344 train_time:31259981ms step_avg:591.98ms +step:52807/57344 train_time:31260559ms step_avg:591.98ms +grad accum step:13202/14336 +step:52808/57344 train_time:31261959ms step_avg:591.99ms +step:52809/57344 train_time:31261978ms step_avg:591.98ms +step:52810/57344 train_time:31262219ms step_avg:591.98ms +step:52811/57344 train_time:31262760ms step_avg:591.97ms +grad accum step:13203/14336 +step:52812/57344 train_time:31264166ms step_avg:591.99ms +step:52813/57344 train_time:31264193ms step_avg:591.98ms +step:52814/57344 train_time:31264428ms step_avg:591.97ms +step:52815/57344 train_time:31265026ms step_avg:591.97ms +grad accum step:13204/14336 +step:52816/57344 train_time:31266373ms step_avg:591.99ms +step:52817/57344 train_time:31266388ms step_avg:591.98ms +step:52818/57344 train_time:31266638ms step_avg:591.97ms +step:52819/57344 train_time:31267228ms step_avg:591.97ms +grad accum step:13205/14336 +step:52820/57344 train_time:31268643ms step_avg:591.98ms +step:52821/57344 train_time:31268664ms step_avg:591.97ms +step:52822/57344 train_time:31268908ms step_avg:591.97ms +step:52823/57344 train_time:31269482ms step_avg:591.97ms +grad accum step:13206/14336 +step:52824/57344 train_time:31270853ms step_avg:591.98ms +step:52825/57344 train_time:31270869ms step_avg:591.97ms +step:52826/57344 train_time:31271131ms step_avg:591.96ms +step:52827/57344 train_time:31271727ms step_avg:591.96ms +grad accum step:13207/14336 +step:52828/57344 train_time:31273154ms step_avg:591.98ms +step:52829/57344 train_time:31273171ms step_avg:591.97ms +step:52830/57344 train_time:31273416ms step_avg:591.96ms +step:52831/57344 train_time:31273987ms step_avg:591.96ms +grad accum step:13208/14336 +step:52832/57344 train_time:31275342ms step_avg:591.98ms +step:52833/57344 train_time:31275359ms step_avg:591.97ms +step:52834/57344 train_time:31275604ms step_avg:591.96ms +step:52835/57344 train_time:31276169ms step_avg:591.96ms +grad accum step:13209/14336 +step:52836/57344 train_time:31277537ms step_avg:591.97ms +step:52837/57344 train_time:31277551ms step_avg:591.96ms +step:52838/57344 train_time:31277794ms step_avg:591.96ms +step:52839/57344 train_time:31278340ms step_avg:591.96ms +grad accum step:13210/14336 +step:52840/57344 train_time:31279743ms step_avg:591.97ms +step:52841/57344 train_time:31279759ms step_avg:591.96ms +step:52842/57344 train_time:31280017ms step_avg:591.95ms +step:52843/57344 train_time:31280605ms step_avg:591.95ms +grad accum step:13211/14336 +step:52844/57344 train_time:31281944ms step_avg:591.97ms +step:52845/57344 train_time:31281959ms step_avg:591.96ms +step:52846/57344 train_time:31282209ms step_avg:591.95ms +step:52847/57344 train_time:31282771ms step_avg:591.95ms +grad accum step:13212/14336 +step:52848/57344 train_time:31284189ms step_avg:591.97ms +step:52849/57344 train_time:31284215ms step_avg:591.95ms +step:52850/57344 train_time:31284443ms step_avg:591.95ms +step:52851/57344 train_time:31285003ms step_avg:591.95ms +grad accum step:13213/14336 +step:52852/57344 train_time:31286355ms step_avg:591.96ms +step:52853/57344 train_time:31286390ms step_avg:591.95ms +step:52854/57344 train_time:31286642ms step_avg:591.94ms +step:52855/57344 train_time:31287274ms step_avg:591.95ms +grad accum step:13214/14336 +step:52856/57344 train_time:31288756ms step_avg:591.96ms +step:52857/57344 train_time:31288775ms step_avg:591.95ms +step:52858/57344 train_time:31289003ms step_avg:591.94ms +step:52859/57344 train_time:31289574ms step_avg:591.94ms +grad accum step:13215/14336 +step:52860/57344 train_time:31290934ms step_avg:591.96ms +step:52861/57344 train_time:31290956ms step_avg:591.95ms +step:52862/57344 train_time:31291202ms step_avg:591.94ms +step:52863/57344 train_time:31291788ms step_avg:591.94ms +grad accum step:13216/14336 +step:52864/57344 train_time:31293184ms step_avg:591.96ms +step:52864/57344 val_loss:5.359124 train_time:31293190ms step_avg:591.96ms +step:52865/57344 train_time:31293202ms step_avg:591.95ms +step:52866/57344 train_time:31293424ms step_avg:591.94ms +step:52867/57344 train_time:31293997ms step_avg:591.94ms +grad accum step:13217/14336 +step:52868/57344 train_time:31295481ms step_avg:591.96ms +step:52869/57344 train_time:31295498ms step_avg:591.94ms +step:52870/57344 train_time:31295745ms step_avg:591.94ms +step:52871/57344 train_time:31296294ms step_avg:591.94ms +grad accum step:13218/14336 +step:52872/57344 train_time:31297660ms step_avg:591.95ms +step:52873/57344 train_time:31297681ms step_avg:591.94ms +step:52874/57344 train_time:31297946ms step_avg:591.93ms +step:52875/57344 train_time:31298587ms step_avg:591.94ms +grad accum step:13219/14336 +step:52876/57344 train_time:31299959ms step_avg:591.95ms +step:52877/57344 train_time:31299981ms step_avg:591.94ms +step:52878/57344 train_time:31300219ms step_avg:591.93ms +step:52879/57344 train_time:31300782ms step_avg:591.93ms +grad accum step:13220/14336 +step:52880/57344 train_time:31302212ms step_avg:591.95ms +step:52881/57344 train_time:31302235ms step_avg:591.94ms +step:52882/57344 train_time:31302480ms step_avg:591.93ms +step:52883/57344 train_time:31303072ms step_avg:591.93ms +grad accum step:13221/14336 +step:52884/57344 train_time:31304401ms step_avg:591.94ms +step:52885/57344 train_time:31304420ms step_avg:591.93ms +step:52886/57344 train_time:31304667ms step_avg:591.93ms +step:52887/57344 train_time:31305250ms step_avg:591.93ms +grad accum step:13222/14336 +step:52888/57344 train_time:31306627ms step_avg:591.94ms +step:52889/57344 train_time:31306644ms step_avg:591.93ms +step:52890/57344 train_time:31306883ms step_avg:591.92ms +step:52891/57344 train_time:31307444ms step_avg:591.92ms +grad accum step:13223/14336 +step:52892/57344 train_time:31308848ms step_avg:591.94ms +step:52893/57344 train_time:31308862ms step_avg:591.93ms +step:52894/57344 train_time:31309153ms step_avg:591.92ms +step:52895/57344 train_time:31309848ms step_avg:591.92ms +grad accum step:13224/14336 +step:52896/57344 train_time:31311299ms step_avg:591.94ms +step:52897/57344 train_time:31311323ms step_avg:591.93ms +step:52898/57344 train_time:31311561ms step_avg:591.92ms +step:52899/57344 train_time:31312141ms step_avg:591.92ms +grad accum step:13225/14336 +step:52900/57344 train_time:31313599ms step_avg:591.94ms +step:52901/57344 train_time:31313615ms step_avg:591.93ms +step:52902/57344 train_time:31313907ms step_avg:591.92ms +step:52903/57344 train_time:31314583ms step_avg:591.92ms +grad accum step:13226/14336 +step:52904/57344 train_time:31315930ms step_avg:591.94ms +step:52905/57344 train_time:31315947ms step_avg:591.93ms +step:52906/57344 train_time:31316199ms step_avg:591.92ms +step:52907/57344 train_time:31316785ms step_avg:591.92ms +grad accum step:13227/14336 +step:52908/57344 train_time:31318212ms step_avg:591.94ms +step:52909/57344 train_time:31318227ms step_avg:591.93ms +step:52910/57344 train_time:31318481ms step_avg:591.92ms +step:52911/57344 train_time:31319063ms step_avg:591.92ms +grad accum step:13228/14336 +step:52912/57344 train_time:31320551ms step_avg:591.94ms +step:52913/57344 train_time:31320567ms step_avg:591.93ms +step:52914/57344 train_time:31320812ms step_avg:591.92ms +step:52915/57344 train_time:31321381ms step_avg:591.92ms +grad accum step:13229/14336 +step:52916/57344 train_time:31322731ms step_avg:591.93ms +step:52917/57344 train_time:31322748ms step_avg:591.92ms +step:52918/57344 train_time:31322995ms step_avg:591.92ms +step:52919/57344 train_time:31323566ms step_avg:591.92ms +grad accum step:13230/14336 +step:52920/57344 train_time:31324959ms step_avg:591.93ms +step:52921/57344 train_time:31324974ms step_avg:591.92ms +step:52922/57344 train_time:31325225ms step_avg:591.91ms +step:52923/57344 train_time:31325791ms step_avg:591.91ms +grad accum step:13231/14336 +step:52924/57344 train_time:31327178ms step_avg:591.93ms +step:52925/57344 train_time:31327192ms step_avg:591.92ms +step:52926/57344 train_time:31327436ms step_avg:591.91ms +step:52927/57344 train_time:31327994ms step_avg:591.91ms +grad accum step:13232/14336 +step:52928/57344 train_time:31329369ms step_avg:591.92ms +step:52928/57344 val_loss:5.358187 train_time:31329372ms step_avg:591.92ms +step:52929/57344 train_time:31329384ms step_avg:591.91ms +step:52930/57344 train_time:31329607ms step_avg:591.91ms +step:52931/57344 train_time:31330156ms step_avg:591.91ms +grad accum step:13233/14336 +step:52932/57344 train_time:31331491ms step_avg:591.92ms +step:52933/57344 train_time:31331508ms step_avg:591.91ms +step:52934/57344 train_time:31331753ms step_avg:591.90ms +step:52935/57344 train_time:31332309ms step_avg:591.90ms +grad accum step:13234/14336 +step:52936/57344 train_time:31333659ms step_avg:591.92ms +step:52937/57344 train_time:31333673ms step_avg:591.90ms +step:52938/57344 train_time:31333930ms step_avg:591.90ms +step:52939/57344 train_time:31334520ms step_avg:591.90ms +grad accum step:13235/14336 +step:52940/57344 train_time:31335890ms step_avg:591.91ms +step:52941/57344 train_time:31335905ms step_avg:591.90ms +step:52942/57344 train_time:31336153ms step_avg:591.90ms +step:52943/57344 train_time:31336710ms step_avg:591.90ms +grad accum step:13236/14336 +step:52944/57344 train_time:31338106ms step_avg:591.91ms +step:52945/57344 train_time:31338123ms step_avg:591.90ms +step:52946/57344 train_time:31338382ms step_avg:591.89ms +step:52947/57344 train_time:31338967ms step_avg:591.89ms +grad accum step:13237/14336 +step:52948/57344 train_time:31340307ms step_avg:591.91ms +step:52949/57344 train_time:31340324ms step_avg:591.90ms +step:52950/57344 train_time:31340597ms step_avg:591.89ms +step:52951/57344 train_time:31341219ms step_avg:591.89ms +grad accum step:13238/14336 +step:52952/57344 train_time:31342630ms step_avg:591.91ms +step:52953/57344 train_time:31342661ms step_avg:591.90ms +step:52954/57344 train_time:31342892ms step_avg:591.89ms +step:52955/57344 train_time:31343470ms step_avg:591.89ms +grad accum step:13239/14336 +step:52956/57344 train_time:31344836ms step_avg:591.90ms +step:52957/57344 train_time:31344853ms step_avg:591.89ms +step:52958/57344 train_time:31345106ms step_avg:591.89ms +step:52959/57344 train_time:31345686ms step_avg:591.89ms +grad accum step:13240/14336 +step:52960/57344 train_time:31347044ms step_avg:591.90ms +step:52961/57344 train_time:31347060ms step_avg:591.89ms +step:52962/57344 train_time:31347309ms step_avg:591.88ms +step:52963/57344 train_time:31347892ms step_avg:591.88ms +grad accum step:13241/14336 +step:52964/57344 train_time:31349399ms step_avg:591.90ms +step:52965/57344 train_time:31349413ms step_avg:591.89ms +step:52966/57344 train_time:31349659ms step_avg:591.88ms +step:52967/57344 train_time:31350216ms step_avg:591.88ms +grad accum step:13242/14336 +step:52968/57344 train_time:31351551ms step_avg:591.90ms +step:52969/57344 train_time:31351568ms step_avg:591.89ms +step:52970/57344 train_time:31351819ms step_avg:591.88ms +step:52971/57344 train_time:31352406ms step_avg:591.88ms +grad accum step:13243/14336 +step:52972/57344 train_time:31353803ms step_avg:591.89ms +step:52973/57344 train_time:31353819ms step_avg:591.88ms +step:52974/57344 train_time:31354057ms step_avg:591.88ms +step:52975/57344 train_time:31354624ms step_avg:591.88ms +grad accum step:13244/14336 +step:52976/57344 train_time:31356057ms step_avg:591.89ms +step:52977/57344 train_time:31356073ms step_avg:591.88ms +step:52978/57344 train_time:31356327ms step_avg:591.87ms +step:52979/57344 train_time:31356904ms step_avg:591.87ms +grad accum step:13245/14336 +step:52980/57344 train_time:31358257ms step_avg:591.89ms +step:52981/57344 train_time:31358274ms step_avg:591.88ms +step:52982/57344 train_time:31358526ms step_avg:591.87ms +step:52983/57344 train_time:31359100ms step_avg:591.87ms +grad accum step:13246/14336 +step:52984/57344 train_time:31360478ms step_avg:591.89ms +step:52985/57344 train_time:31360494ms step_avg:591.87ms +step:52986/57344 train_time:31360747ms step_avg:591.87ms +step:52987/57344 train_time:31361334ms step_avg:591.87ms +grad accum step:13247/14336 +step:52988/57344 train_time:31362740ms step_avg:591.88ms +step:52989/57344 train_time:31362757ms step_avg:591.87ms +step:52990/57344 train_time:31363015ms step_avg:591.87ms +step:52991/57344 train_time:31363590ms step_avg:591.87ms +grad accum step:13248/14336 +step:52992/57344 train_time:31364952ms step_avg:591.88ms +step:52992/57344 val_loss:5.356282 train_time:31364959ms step_avg:591.88ms +step:52993/57344 train_time:31364971ms step_avg:591.87ms +step:52994/57344 train_time:31365212ms step_avg:591.86ms +step:52995/57344 train_time:31365829ms step_avg:591.86ms +grad accum step:13249/14336 +step:52996/57344 train_time:31367206ms step_avg:591.88ms +step:52997/57344 train_time:31367230ms step_avg:591.87ms +step:52998/57344 train_time:31367460ms step_avg:591.86ms +step:52999/57344 train_time:31368034ms step_avg:591.86ms +grad accum step:13250/14336 +step:53000/57344 train_time:31369435ms step_avg:591.88ms +step:53001/57344 train_time:31369453ms step_avg:591.87ms +step:53002/57344 train_time:31369680ms step_avg:591.86ms +step:53003/57344 train_time:31370231ms step_avg:591.86ms +grad accum step:13251/14336 +step:53004/57344 train_time:31371605ms step_avg:591.87ms +step:53005/57344 train_time:31371621ms step_avg:591.86ms +step:53006/57344 train_time:31371874ms step_avg:591.86ms +step:53007/57344 train_time:31372461ms step_avg:591.86ms +grad accum step:13252/14336 +step:53008/57344 train_time:31373824ms step_avg:591.87ms +step:53009/57344 train_time:31373840ms step_avg:591.86ms +step:53010/57344 train_time:31374092ms step_avg:591.85ms +step:53011/57344 train_time:31374657ms step_avg:591.85ms +grad accum step:13253/14336 +step:53012/57344 train_time:31376027ms step_avg:591.87ms +step:53013/57344 train_time:31376042ms step_avg:591.86ms +step:53014/57344 train_time:31376287ms step_avg:591.85ms +step:53015/57344 train_time:31376855ms step_avg:591.85ms +grad accum step:13254/14336 +step:53016/57344 train_time:31378211ms step_avg:591.86ms +step:53017/57344 train_time:31378233ms step_avg:591.85ms +step:53018/57344 train_time:31378460ms step_avg:591.85ms +step:53019/57344 train_time:31379004ms step_avg:591.84ms +grad accum step:13255/14336 +step:53020/57344 train_time:31380343ms step_avg:591.86ms +step:53021/57344 train_time:31380394ms step_avg:591.85ms +step:53022/57344 train_time:31380625ms step_avg:591.84ms +step:53023/57344 train_time:31381209ms step_avg:591.84ms +grad accum step:13256/14336 +step:53024/57344 train_time:31382661ms step_avg:591.86ms +step:53025/57344 train_time:31382676ms step_avg:591.85ms +step:53026/57344 train_time:31382903ms step_avg:591.84ms +step:53027/57344 train_time:31383476ms step_avg:591.84ms +grad accum step:13257/14336 +step:53028/57344 train_time:31384870ms step_avg:591.85ms +step:53029/57344 train_time:31384886ms step_avg:591.84ms +step:53030/57344 train_time:31385147ms step_avg:591.84ms +step:53031/57344 train_time:31385740ms step_avg:591.84ms +grad accum step:13258/14336 +step:53032/57344 train_time:31387110ms step_avg:591.85ms +step:53033/57344 train_time:31387156ms step_avg:591.84ms +step:53034/57344 train_time:31387382ms step_avg:591.84ms +step:53035/57344 train_time:31387955ms step_avg:591.83ms +grad accum step:13259/14336 +step:53036/57344 train_time:31389300ms step_avg:591.85ms +step:53037/57344 train_time:31389322ms step_avg:591.84ms +step:53038/57344 train_time:31389566ms step_avg:591.83ms +step:53039/57344 train_time:31390128ms step_avg:591.83ms +grad accum step:13260/14336 +step:53040/57344 train_time:31391496ms step_avg:591.85ms +step:53041/57344 train_time:31391512ms step_avg:591.83ms +step:53042/57344 train_time:31391770ms step_avg:591.83ms +step:53043/57344 train_time:31392370ms step_avg:591.83ms +grad accum step:13261/14336 +step:53044/57344 train_time:31393739ms step_avg:591.84ms +step:53045/57344 train_time:31393755ms step_avg:591.83ms +step:53046/57344 train_time:31393995ms step_avg:591.83ms +step:53047/57344 train_time:31394550ms step_avg:591.83ms +grad accum step:13262/14336 +step:53048/57344 train_time:31396053ms step_avg:591.84ms +step:53049/57344 train_time:31396066ms step_avg:591.83ms +step:53050/57344 train_time:31396287ms step_avg:591.82ms +step:53051/57344 train_time:31396891ms step_avg:591.82ms +grad accum step:13263/14336 +step:53052/57344 train_time:31398441ms step_avg:591.84ms +step:53053/57344 train_time:31398461ms step_avg:591.83ms +step:53054/57344 train_time:31398712ms step_avg:591.83ms +step:53055/57344 train_time:31399310ms step_avg:591.83ms +grad accum step:13264/14336 +step:53056/57344 train_time:31400633ms step_avg:591.84ms +step:53056/57344 val_loss:5.355115 train_time:31400641ms step_avg:591.84ms +step:53057/57344 train_time:31400653ms step_avg:591.83ms +step:53058/57344 train_time:31400886ms step_avg:591.82ms +step:53059/57344 train_time:31401472ms step_avg:591.82ms +grad accum step:13265/14336 +step:53060/57344 train_time:31402847ms step_avg:591.84ms +step:53061/57344 train_time:31402860ms step_avg:591.83ms +step:53062/57344 train_time:31403115ms step_avg:591.82ms +step:53063/57344 train_time:31403695ms step_avg:591.82ms +grad accum step:13266/14336 +step:53064/57344 train_time:31405050ms step_avg:591.83ms +step:53065/57344 train_time:31405064ms step_avg:591.82ms +step:53066/57344 train_time:31405322ms step_avg:591.82ms +step:53067/57344 train_time:31405921ms step_avg:591.82ms +grad accum step:13267/14336 +step:53068/57344 train_time:31407441ms step_avg:591.83ms +step:53069/57344 train_time:31407458ms step_avg:591.82ms +step:53070/57344 train_time:31407693ms step_avg:591.82ms +step:53071/57344 train_time:31408290ms step_avg:591.82ms +grad accum step:13268/14336 +step:53072/57344 train_time:31409663ms step_avg:591.83ms +step:53073/57344 train_time:31409799ms step_avg:591.82ms +step:53074/57344 train_time:31410024ms step_avg:591.82ms +step:53075/57344 train_time:31410590ms step_avg:591.82ms +grad accum step:13269/14336 +step:53076/57344 train_time:31412011ms step_avg:591.83ms +step:53077/57344 train_time:31412212ms step_avg:591.82ms +step:53078/57344 train_time:31412425ms step_avg:591.82ms +step:53079/57344 train_time:31412970ms step_avg:591.82ms +grad accum step:13270/14336 +step:53080/57344 train_time:31414397ms step_avg:591.83ms +step:53081/57344 train_time:31414418ms step_avg:591.82ms +step:53082/57344 train_time:31414651ms step_avg:591.81ms +step:53083/57344 train_time:31415237ms step_avg:591.81ms +grad accum step:13271/14336 +step:53084/57344 train_time:31416562ms step_avg:591.83ms +step:53085/57344 train_time:31416578ms step_avg:591.82ms +step:53086/57344 train_time:31416828ms step_avg:591.81ms +step:53087/57344 train_time:31417393ms step_avg:591.81ms +grad accum step:13272/14336 +step:53088/57344 train_time:31418742ms step_avg:591.82ms +step:53089/57344 train_time:31418767ms step_avg:591.81ms +step:53090/57344 train_time:31419000ms step_avg:591.81ms +step:53091/57344 train_time:31419558ms step_avg:591.81ms +grad accum step:13273/14336 +step:53092/57344 train_time:31420927ms step_avg:591.82ms +step:53093/57344 train_time:31420944ms step_avg:591.81ms +step:53094/57344 train_time:31421189ms step_avg:591.80ms +step:53095/57344 train_time:31421772ms step_avg:591.80ms +grad accum step:13274/14336 +step:53096/57344 train_time:31423194ms step_avg:591.82ms +step:53097/57344 train_time:31423208ms step_avg:591.81ms +step:53098/57344 train_time:31423460ms step_avg:591.80ms +step:53099/57344 train_time:31424030ms step_avg:591.80ms +grad accum step:13275/14336 +step:53100/57344 train_time:31425377ms step_avg:591.82ms +step:53101/57344 train_time:31425394ms step_avg:591.80ms +step:53102/57344 train_time:31425636ms step_avg:591.80ms +step:53103/57344 train_time:31426183ms step_avg:591.80ms +grad accum step:13276/14336 +step:53104/57344 train_time:31427558ms step_avg:591.81ms +step:53105/57344 train_time:31427573ms step_avg:591.80ms +step:53106/57344 train_time:31427825ms step_avg:591.79ms +step:53107/57344 train_time:31428394ms step_avg:591.79ms +grad accum step:13277/14336 +step:53108/57344 train_time:31429768ms step_avg:591.81ms +step:53109/57344 train_time:31429787ms step_avg:591.80ms +step:53110/57344 train_time:31430029ms step_avg:591.79ms +step:53111/57344 train_time:31430592ms step_avg:591.79ms +grad accum step:13278/14336 +step:53112/57344 train_time:31431920ms step_avg:591.80ms +step:53113/57344 train_time:31431935ms step_avg:591.79ms +step:53114/57344 train_time:31432187ms step_avg:591.79ms +step:53115/57344 train_time:31432754ms step_avg:591.79ms +grad accum step:13279/14336 +step:53116/57344 train_time:31434234ms step_avg:591.80ms +step:53117/57344 train_time:31434252ms step_avg:591.79ms +step:53118/57344 train_time:31434476ms step_avg:591.79ms +step:53119/57344 train_time:31435042ms step_avg:591.79ms +grad accum step:13280/14336 +step:53120/57344 train_time:31436437ms step_avg:591.80ms +step:53120/57344 val_loss:5.352453 train_time:31436443ms step_avg:591.80ms +step:53121/57344 train_time:31436455ms step_avg:591.79ms +step:53122/57344 train_time:31436676ms step_avg:591.78ms +step:53123/57344 train_time:31437233ms step_avg:591.78ms +grad accum step:13281/14336 +step:53124/57344 train_time:31438591ms step_avg:591.80ms +step:53125/57344 train_time:31438605ms step_avg:591.79ms +step:53126/57344 train_time:31438854ms step_avg:591.78ms +step:53127/57344 train_time:31439426ms step_avg:591.78ms +grad accum step:13282/14336 +step:53128/57344 train_time:31440820ms step_avg:591.79ms +step:53129/57344 train_time:31440842ms step_avg:591.78ms +step:53130/57344 train_time:31441072ms step_avg:591.78ms +step:53131/57344 train_time:31441644ms step_avg:591.78ms +grad accum step:13283/14336 +step:53132/57344 train_time:31443032ms step_avg:591.79ms +step:53133/57344 train_time:31443055ms step_avg:591.78ms +step:53134/57344 train_time:31443301ms step_avg:591.77ms +step:53135/57344 train_time:31443885ms step_avg:591.77ms +grad accum step:13284/14336 +step:53136/57344 train_time:31445196ms step_avg:591.79ms +step:53137/57344 train_time:31445213ms step_avg:591.78ms +step:53138/57344 train_time:31445467ms step_avg:591.77ms +step:53139/57344 train_time:31446053ms step_avg:591.77ms +grad accum step:13285/14336 +step:53140/57344 train_time:31447475ms step_avg:591.79ms +step:53141/57344 train_time:31447492ms step_avg:591.77ms +step:53142/57344 train_time:31447741ms step_avg:591.77ms +step:53143/57344 train_time:31448303ms step_avg:591.77ms +grad accum step:13286/14336 +step:53144/57344 train_time:31449658ms step_avg:591.78ms +step:53145/57344 train_time:31449677ms step_avg:591.77ms +step:53146/57344 train_time:31449915ms step_avg:591.76ms +step:53147/57344 train_time:31450475ms step_avg:591.76ms +grad accum step:13287/14336 +step:53148/57344 train_time:31451936ms step_avg:591.78ms +step:53149/57344 train_time:31451960ms step_avg:591.77ms +step:53150/57344 train_time:31452185ms step_avg:591.76ms +step:53151/57344 train_time:31452749ms step_avg:591.76ms +grad accum step:13288/14336 +step:53152/57344 train_time:31454071ms step_avg:591.78ms +step:53153/57344 train_time:31454089ms step_avg:591.77ms +step:53154/57344 train_time:31454325ms step_avg:591.76ms +step:53155/57344 train_time:31454891ms step_avg:591.76ms +grad accum step:13289/14336 +step:53156/57344 train_time:31456280ms step_avg:591.77ms +step:53157/57344 train_time:31456296ms step_avg:591.76ms +step:53158/57344 train_time:31456544ms step_avg:591.76ms +step:53159/57344 train_time:31457114ms step_avg:591.76ms +grad accum step:13290/14336 +step:53160/57344 train_time:31458527ms step_avg:591.77ms +step:53161/57344 train_time:31458545ms step_avg:591.76ms +step:53162/57344 train_time:31458770ms step_avg:591.75ms +step:53163/57344 train_time:31459342ms step_avg:591.75ms +grad accum step:13291/14336 +step:53164/57344 train_time:31460734ms step_avg:591.77ms +step:53165/57344 train_time:31460750ms step_avg:591.76ms +step:53166/57344 train_time:31461000ms step_avg:591.75ms +step:53167/57344 train_time:31461566ms step_avg:591.75ms +grad accum step:13292/14336 +step:53168/57344 train_time:31462947ms step_avg:591.76ms +step:53169/57344 train_time:31462965ms step_avg:591.75ms +step:53170/57344 train_time:31463217ms step_avg:591.75ms +step:53171/57344 train_time:31463812ms step_avg:591.75ms +grad accum step:13293/14336 +step:53172/57344 train_time:31465208ms step_avg:591.76ms +step:53173/57344 train_time:31465224ms step_avg:591.75ms +step:53174/57344 train_time:31465479ms step_avg:591.75ms +step:53175/57344 train_time:31466064ms step_avg:591.75ms +grad accum step:13294/14336 +step:53176/57344 train_time:31467436ms step_avg:591.76ms +step:53177/57344 train_time:31467451ms step_avg:591.75ms +step:53178/57344 train_time:31467727ms step_avg:591.74ms +step:53179/57344 train_time:31468346ms step_avg:591.74ms +grad accum step:13295/14336 +step:53180/57344 train_time:31469778ms step_avg:591.76ms +step:53181/57344 train_time:31469797ms step_avg:591.75ms +step:53182/57344 train_time:31470035ms step_avg:591.74ms +step:53183/57344 train_time:31470596ms step_avg:591.74ms +grad accum step:13296/14336 +step:53184/57344 train_time:31471959ms step_avg:591.76ms +step:53184/57344 val_loss:5.351868 train_time:31471964ms step_avg:591.76ms +step:53185/57344 train_time:31471976ms step_avg:591.75ms +step:53186/57344 train_time:31472201ms step_avg:591.74ms +step:53187/57344 train_time:31472764ms step_avg:591.74ms +grad accum step:13297/14336 +step:53188/57344 train_time:31474194ms step_avg:591.75ms +step:53189/57344 train_time:31474210ms step_avg:591.74ms +step:53190/57344 train_time:31474473ms step_avg:591.74ms +step:53191/57344 train_time:31475076ms step_avg:591.74ms +grad accum step:13298/14336 +step:53192/57344 train_time:31476473ms step_avg:591.75ms +step:53193/57344 train_time:31476493ms step_avg:591.74ms +step:53194/57344 train_time:31476738ms step_avg:591.73ms +step:53195/57344 train_time:31477298ms step_avg:591.73ms +grad accum step:13299/14336 +step:53196/57344 train_time:31478697ms step_avg:591.75ms +step:53197/57344 train_time:31478714ms step_avg:591.74ms +step:53198/57344 train_time:31478971ms step_avg:591.73ms +step:53199/57344 train_time:31479556ms step_avg:591.73ms +grad accum step:13300/14336 +step:53200/57344 train_time:31480969ms step_avg:591.75ms +step:53201/57344 train_time:31480987ms step_avg:591.74ms +step:53202/57344 train_time:31481210ms step_avg:591.73ms +step:53203/57344 train_time:31481771ms step_avg:591.73ms +grad accum step:13301/14336 +step:53204/57344 train_time:31483306ms step_avg:591.75ms +step:53205/57344 train_time:31483317ms step_avg:591.74ms +step:53206/57344 train_time:31483567ms step_avg:591.73ms +step:53207/57344 train_time:31484145ms step_avg:591.73ms +grad accum step:13302/14336 +step:53208/57344 train_time:31485498ms step_avg:591.74ms +step:53209/57344 train_time:31485512ms step_avg:591.73ms +step:53210/57344 train_time:31485760ms step_avg:591.73ms +step:53211/57344 train_time:31486315ms step_avg:591.73ms +grad accum step:13303/14336 +step:53212/57344 train_time:31487714ms step_avg:591.74ms +step:53213/57344 train_time:31487762ms step_avg:591.73ms +step:53214/57344 train_time:31488010ms step_avg:591.72ms +step:53215/57344 train_time:31488634ms step_avg:591.72ms +grad accum step:13304/14336 +step:53216/57344 train_time:31490039ms step_avg:591.74ms +step:53217/57344 train_time:31490054ms step_avg:591.73ms +step:53218/57344 train_time:31490314ms step_avg:591.72ms +step:53219/57344 train_time:31490948ms step_avg:591.72ms +grad accum step:13305/14336 +step:53220/57344 train_time:31492358ms step_avg:591.74ms +step:53221/57344 train_time:31492374ms step_avg:591.73ms +step:53222/57344 train_time:31492636ms step_avg:591.72ms +step:53223/57344 train_time:31493232ms step_avg:591.72ms +grad accum step:13306/14336 +step:53224/57344 train_time:31494548ms step_avg:591.74ms +step:53225/57344 train_time:31494565ms step_avg:591.73ms +step:53226/57344 train_time:31494813ms step_avg:591.72ms +step:53227/57344 train_time:31495357ms step_avg:591.72ms +grad accum step:13307/14336 +step:53228/57344 train_time:31496742ms step_avg:591.73ms +step:53229/57344 train_time:31496765ms step_avg:591.72ms +step:53230/57344 train_time:31496994ms step_avg:591.72ms +step:53231/57344 train_time:31497567ms step_avg:591.71ms +grad accum step:13308/14336 +step:53232/57344 train_time:31498916ms step_avg:591.73ms +step:53233/57344 train_time:31498933ms step_avg:591.72ms +step:53234/57344 train_time:31499180ms step_avg:591.71ms +step:53235/57344 train_time:31499740ms step_avg:591.71ms +grad accum step:13309/14336 +step:53236/57344 train_time:31501130ms step_avg:591.73ms +step:53237/57344 train_time:31501147ms step_avg:591.72ms +step:53238/57344 train_time:31501407ms step_avg:591.71ms +step:53239/57344 train_time:31501999ms step_avg:591.71ms +grad accum step:13310/14336 +step:53240/57344 train_time:31503404ms step_avg:591.72ms +step:53241/57344 train_time:31503446ms step_avg:591.71ms +step:53242/57344 train_time:31503665ms step_avg:591.71ms +step:53243/57344 train_time:31504221ms step_avg:591.71ms +grad accum step:13311/14336 +step:53244/57344 train_time:31505674ms step_avg:591.72ms +step:53245/57344 train_time:31505697ms step_avg:591.71ms +step:53246/57344 train_time:31505930ms step_avg:591.71ms +step:53247/57344 train_time:31506522ms step_avg:591.71ms +grad accum step:13312/14336 +step:53248/57344 train_time:31507954ms step_avg:591.72ms +step:53248/57344 val_loss:5.350027 train_time:31507955ms step_avg:591.72ms +step:53249/57344 train_time:31507967ms step_avg:591.71ms +step:53250/57344 train_time:31508192ms step_avg:591.70ms +step:53251/57344 train_time:31508741ms step_avg:591.70ms +grad accum step:13313/14336 +step:53252/57344 train_time:31510135ms step_avg:591.72ms +step:53253/57344 train_time:31510147ms step_avg:591.71ms +step:53254/57344 train_time:31510388ms step_avg:591.70ms +step:53255/57344 train_time:31510952ms step_avg:591.70ms +grad accum step:13314/14336 +step:53256/57344 train_time:31512341ms step_avg:591.71ms +step:53257/57344 train_time:31512357ms step_avg:591.70ms +step:53258/57344 train_time:31512614ms step_avg:591.70ms +step:53259/57344 train_time:31513204ms step_avg:591.70ms +grad accum step:13315/14336 +step:53260/57344 train_time:31514647ms step_avg:591.71ms +step:53261/57344 train_time:31514664ms step_avg:591.70ms +step:53262/57344 train_time:31514921ms step_avg:591.70ms +step:53263/57344 train_time:31515525ms step_avg:591.70ms +grad accum step:13316/14336 +step:53264/57344 train_time:31516966ms step_avg:591.71ms +step:53265/57344 train_time:31516982ms step_avg:591.70ms +step:53266/57344 train_time:31517248ms step_avg:591.70ms +step:53267/57344 train_time:31517857ms step_avg:591.70ms +grad accum step:13317/14336 +step:53268/57344 train_time:31519326ms step_avg:591.71ms +step:53269/57344 train_time:31519343ms step_avg:591.70ms +step:53270/57344 train_time:31519608ms step_avg:591.70ms +step:53271/57344 train_time:31520212ms step_avg:591.70ms +grad accum step:13318/14336 +step:53272/57344 train_time:31521617ms step_avg:591.71ms +step:53273/57344 train_time:31521634ms step_avg:591.70ms +step:53274/57344 train_time:31521880ms step_avg:591.69ms +step:53275/57344 train_time:31522430ms step_avg:591.69ms +grad accum step:13319/14336 +step:53276/57344 train_time:31523786ms step_avg:591.71ms +step:53277/57344 train_time:31523803ms step_avg:591.70ms +step:53278/57344 train_time:31524058ms step_avg:591.69ms +step:53279/57344 train_time:31524659ms step_avg:591.69ms +grad accum step:13320/14336 +step:53280/57344 train_time:31526205ms step_avg:591.71ms +step:53281/57344 train_time:31526221ms step_avg:591.70ms +step:53282/57344 train_time:31526492ms step_avg:591.69ms +step:53283/57344 train_time:31527135ms step_avg:591.69ms +grad accum step:13321/14336 +step:53284/57344 train_time:31528542ms step_avg:591.71ms +step:53285/57344 train_time:31528559ms step_avg:591.70ms +step:53286/57344 train_time:31528813ms step_avg:591.69ms +step:53287/57344 train_time:31529397ms step_avg:591.69ms +grad accum step:13322/14336 +step:53288/57344 train_time:31530803ms step_avg:591.71ms +step:53289/57344 train_time:31530818ms step_avg:591.69ms +step:53290/57344 train_time:31531069ms step_avg:591.69ms +step:53291/57344 train_time:31531631ms step_avg:591.69ms +grad accum step:13323/14336 +step:53292/57344 train_time:31532989ms step_avg:591.70ms +step:53293/57344 train_time:31533006ms step_avg:591.69ms +step:53294/57344 train_time:31533271ms step_avg:591.69ms +step:53295/57344 train_time:31533880ms step_avg:591.69ms +grad accum step:13324/14336 +step:53296/57344 train_time:31535235ms step_avg:591.70ms +step:53297/57344 train_time:31535252ms step_avg:591.69ms +step:53298/57344 train_time:31535505ms step_avg:591.68ms +step:53299/57344 train_time:31536079ms step_avg:591.68ms +grad accum step:13325/14336 +step:53300/57344 train_time:31537486ms step_avg:591.70ms +step:53301/57344 train_time:31537500ms step_avg:591.69ms +step:53302/57344 train_time:31537747ms step_avg:591.68ms +step:53303/57344 train_time:31538314ms step_avg:591.68ms +grad accum step:13326/14336 +step:53304/57344 train_time:31539748ms step_avg:591.70ms +step:53305/57344 train_time:31539764ms step_avg:591.68ms +step:53306/57344 train_time:31540025ms step_avg:591.68ms +step:53307/57344 train_time:31540636ms step_avg:591.68ms +grad accum step:13327/14336 +step:53308/57344 train_time:31542173ms step_avg:591.70ms +step:53309/57344 train_time:31542189ms step_avg:591.69ms +step:53310/57344 train_time:31542448ms step_avg:591.68ms +step:53311/57344 train_time:31543032ms step_avg:591.68ms +grad accum step:13328/14336 +step:53312/57344 train_time:31544469ms step_avg:591.70ms +step:53312/57344 val_loss:5.349732 train_time:31544471ms step_avg:591.70ms +step:53313/57344 train_time:31544483ms step_avg:591.68ms +step:53314/57344 train_time:31544711ms step_avg:591.68ms +step:53315/57344 train_time:31545281ms step_avg:591.68ms +grad accum step:13329/14336 +step:53316/57344 train_time:31546643ms step_avg:591.69ms +step:53317/57344 train_time:31546661ms step_avg:591.68ms +step:53318/57344 train_time:31546911ms step_avg:591.67ms +step:53319/57344 train_time:31547489ms step_avg:591.67ms +grad accum step:13330/14336 +step:53320/57344 train_time:31548880ms step_avg:591.69ms +step:53321/57344 train_time:31548895ms step_avg:591.68ms +step:53322/57344 train_time:31549146ms step_avg:591.67ms +step:53323/57344 train_time:31549719ms step_avg:591.67ms +grad accum step:13331/14336 +step:53324/57344 train_time:31551165ms step_avg:591.69ms +step:53325/57344 train_time:31551177ms step_avg:591.68ms +step:53326/57344 train_time:31551425ms step_avg:591.67ms +step:53327/57344 train_time:31552004ms step_avg:591.67ms +grad accum step:13332/14336 +step:53328/57344 train_time:31553339ms step_avg:591.68ms +step:53329/57344 train_time:31553356ms step_avg:591.67ms +step:53330/57344 train_time:31553603ms step_avg:591.67ms +step:53331/57344 train_time:31554156ms step_avg:591.67ms +grad accum step:13333/14336 +step:53332/57344 train_time:31555522ms step_avg:591.68ms +step:53333/57344 train_time:31555551ms step_avg:591.67ms +step:53334/57344 train_time:31555782ms step_avg:591.66ms +step:53335/57344 train_time:31556362ms step_avg:591.66ms +grad accum step:13334/14336 +step:53336/57344 train_time:31557791ms step_avg:591.68ms +step:53337/57344 train_time:31557808ms step_avg:591.67ms +step:53338/57344 train_time:31558055ms step_avg:591.66ms +step:53339/57344 train_time:31558615ms step_avg:591.66ms +grad accum step:13335/14336 +step:53340/57344 train_time:31560019ms step_avg:591.68ms +step:53341/57344 train_time:31560036ms step_avg:591.67ms +step:53342/57344 train_time:31560295ms step_avg:591.66ms +step:53343/57344 train_time:31560904ms step_avg:591.66ms +grad accum step:13336/14336 +step:53344/57344 train_time:31562277ms step_avg:591.67ms +step:53345/57344 train_time:31562293ms step_avg:591.66ms +step:53346/57344 train_time:31562547ms step_avg:591.66ms +step:53347/57344 train_time:31563118ms step_avg:591.66ms +grad accum step:13337/14336 +step:53348/57344 train_time:31564425ms step_avg:591.67ms +step:53349/57344 train_time:31564440ms step_avg:591.66ms +step:53350/57344 train_time:31564691ms step_avg:591.65ms +step:53351/57344 train_time:31565256ms step_avg:591.65ms +grad accum step:13338/14336 +step:53352/57344 train_time:31566636ms step_avg:591.67ms +step:53353/57344 train_time:31566653ms step_avg:591.66ms +step:53354/57344 train_time:31566901ms step_avg:591.65ms +step:53355/57344 train_time:31567463ms step_avg:591.65ms +grad accum step:13339/14336 +step:53356/57344 train_time:31568788ms step_avg:591.66ms +step:53357/57344 train_time:31568805ms step_avg:591.65ms +step:53358/57344 train_time:31569055ms step_avg:591.65ms +step:53359/57344 train_time:31569632ms step_avg:591.65ms +grad accum step:13340/14336 +step:53360/57344 train_time:31571063ms step_avg:591.66ms +step:53361/57344 train_time:31571080ms step_avg:591.65ms +step:53362/57344 train_time:31571331ms step_avg:591.64ms +step:53363/57344 train_time:31571898ms step_avg:591.64ms +grad accum step:13341/14336 +step:53364/57344 train_time:31573225ms step_avg:591.66ms +step:53365/57344 train_time:31573240ms step_avg:591.65ms +step:53366/57344 train_time:31573502ms step_avg:591.64ms +step:53367/57344 train_time:31574093ms step_avg:591.64ms +grad accum step:13342/14336 +step:53368/57344 train_time:31575504ms step_avg:591.66ms +step:53369/57344 train_time:31575521ms step_avg:591.65ms +step:53370/57344 train_time:31575777ms step_avg:591.64ms +step:53371/57344 train_time:31576362ms step_avg:591.64ms +grad accum step:13343/14336 +step:53372/57344 train_time:31577734ms step_avg:591.65ms +step:53373/57344 train_time:31577750ms step_avg:591.64ms +step:53374/57344 train_time:31578001ms step_avg:591.64ms +step:53375/57344 train_time:31578568ms step_avg:591.64ms +grad accum step:13344/14336 +step:53376/57344 train_time:31579969ms step_avg:591.65ms +step:53376/57344 val_loss:5.346778 train_time:31579970ms step_avg:591.65ms +step:53377/57344 train_time:31579981ms step_avg:591.64ms +step:53378/57344 train_time:31580226ms step_avg:591.63ms +step:53379/57344 train_time:31580840ms step_avg:591.63ms +grad accum step:13345/14336 +step:53380/57344 train_time:31582187ms step_avg:591.65ms +step:53381/57344 train_time:31582203ms step_avg:591.64ms +step:53382/57344 train_time:31582462ms step_avg:591.63ms +step:53383/57344 train_time:31583050ms step_avg:591.63ms +grad accum step:13346/14336 +step:53384/57344 train_time:31584411ms step_avg:591.65ms +step:53385/57344 train_time:31584437ms step_avg:591.64ms +step:53386/57344 train_time:31584678ms step_avg:591.63ms +step:53387/57344 train_time:31585247ms step_avg:591.63ms +grad accum step:13347/14336 +step:53388/57344 train_time:31586666ms step_avg:591.64ms +step:53389/57344 train_time:31586682ms step_avg:591.63ms +step:53390/57344 train_time:31586939ms step_avg:591.63ms +step:53391/57344 train_time:31587522ms step_avg:591.63ms +grad accum step:13348/14336 +step:53392/57344 train_time:31588924ms step_avg:591.64ms +step:53393/57344 train_time:31588940ms step_avg:591.63ms +step:53394/57344 train_time:31589194ms step_avg:591.62ms +step:53395/57344 train_time:31589760ms step_avg:591.62ms +grad accum step:13349/14336 +step:53396/57344 train_time:31591104ms step_avg:591.64ms +step:53397/57344 train_time:31591129ms step_avg:591.63ms +step:53398/57344 train_time:31591370ms step_avg:591.62ms +step:53399/57344 train_time:31591932ms step_avg:591.62ms +grad accum step:13350/14336 +step:53400/57344 train_time:31593276ms step_avg:591.63ms +step:53401/57344 train_time:31593293ms step_avg:591.62ms +step:53402/57344 train_time:31593548ms step_avg:591.62ms +step:53403/57344 train_time:31594141ms step_avg:591.62ms +grad accum step:13351/14336 +step:53404/57344 train_time:31595524ms step_avg:591.63ms +step:53405/57344 train_time:31595541ms step_avg:591.62ms +step:53406/57344 train_time:31595798ms step_avg:591.62ms +step:53407/57344 train_time:31596384ms step_avg:591.62ms +grad accum step:13352/14336 +step:53408/57344 train_time:31597768ms step_avg:591.63ms +step:53409/57344 train_time:31597784ms step_avg:591.62ms +step:53410/57344 train_time:31598034ms step_avg:591.61ms +step:53411/57344 train_time:31598600ms step_avg:591.61ms +grad accum step:13353/14336 +step:53412/57344 train_time:31599942ms step_avg:591.63ms +step:53413/57344 train_time:31599958ms step_avg:591.62ms +step:53414/57344 train_time:31600213ms step_avg:591.61ms +step:53415/57344 train_time:31600790ms step_avg:591.61ms +grad accum step:13354/14336 +step:53416/57344 train_time:31602307ms step_avg:591.63ms +step:53417/57344 train_time:31602319ms step_avg:591.62ms +step:53418/57344 train_time:31602574ms step_avg:591.61ms +step:53419/57344 train_time:31603169ms step_avg:591.61ms +grad accum step:13355/14336 +step:53420/57344 train_time:31604522ms step_avg:591.62ms +step:53421/57344 train_time:31604539ms step_avg:591.61ms +step:53422/57344 train_time:31604810ms step_avg:591.61ms +step:53423/57344 train_time:31605421ms step_avg:591.61ms +grad accum step:13356/14336 +step:53424/57344 train_time:31606771ms step_avg:591.62ms +step:53425/57344 train_time:31606786ms step_avg:591.61ms +step:53426/57344 train_time:31607041ms step_avg:591.60ms +step:53427/57344 train_time:31607614ms step_avg:591.60ms +grad accum step:13357/14336 +step:53428/57344 train_time:31609035ms step_avg:591.62ms +step:53429/57344 train_time:31609048ms step_avg:591.61ms +step:53430/57344 train_time:31609285ms step_avg:591.60ms +step:53431/57344 train_time:31609875ms step_avg:591.60ms +grad accum step:13358/14336 +step:53432/57344 train_time:31611267ms step_avg:591.62ms +step:53433/57344 train_time:31611284ms step_avg:591.61ms +step:53434/57344 train_time:31611567ms step_avg:591.60ms +step:53435/57344 train_time:31612210ms step_avg:591.60ms +grad accum step:13359/14336 +step:53436/57344 train_time:31613578ms step_avg:591.62ms +step:53437/57344 train_time:31613594ms step_avg:591.60ms +step:53438/57344 train_time:31613853ms step_avg:591.60ms +step:53439/57344 train_time:31614451ms step_avg:591.60ms +grad accum step:13360/14336 +step:53440/57344 train_time:31615829ms step_avg:591.61ms +step:53440/57344 val_loss:5.346203 train_time:31615830ms step_avg:591.61ms +step:53441/57344 train_time:31615842ms step_avg:591.60ms +step:53442/57344 train_time:31616066ms step_avg:591.60ms +step:53443/57344 train_time:31616632ms step_avg:591.60ms +grad accum step:13361/14336 +step:53444/57344 train_time:31617941ms step_avg:591.61ms +step:53445/57344 train_time:31617963ms step_avg:591.60ms +step:53446/57344 train_time:31618194ms step_avg:591.59ms +step:53447/57344 train_time:31618776ms step_avg:591.59ms +grad accum step:13362/14336 +step:53448/57344 train_time:31620298ms step_avg:591.61ms +step:53449/57344 train_time:31620313ms step_avg:591.60ms +step:53450/57344 train_time:31620541ms step_avg:591.59ms +step:53451/57344 train_time:31621117ms step_avg:591.59ms +grad accum step:13363/14336 +step:53452/57344 train_time:31622527ms step_avg:591.61ms +step:53453/57344 train_time:31622547ms step_avg:591.60ms +step:53454/57344 train_time:31622777ms step_avg:591.59ms +step:53455/57344 train_time:31623357ms step_avg:591.59ms +grad accum step:13364/14336 +step:53456/57344 train_time:31624969ms step_avg:591.61ms +step:53457/57344 train_time:31624989ms step_avg:591.60ms +step:53458/57344 train_time:31625211ms step_avg:591.59ms +step:53459/57344 train_time:31625778ms step_avg:591.59ms +grad accum step:13365/14336 +step:53460/57344 train_time:31627145ms step_avg:591.60ms +step:53461/57344 train_time:31627167ms step_avg:591.59ms +step:53462/57344 train_time:31627417ms step_avg:591.59ms +step:53463/57344 train_time:31628026ms step_avg:591.59ms +grad accum step:13366/14336 +step:53464/57344 train_time:31629466ms step_avg:591.60ms +step:53465/57344 train_time:31629483ms step_avg:591.59ms +step:53466/57344 train_time:31629714ms step_avg:591.59ms +step:53467/57344 train_time:31630268ms step_avg:591.58ms +grad accum step:13367/14336 +step:53468/57344 train_time:31631650ms step_avg:591.60ms +step:53469/57344 train_time:31631666ms step_avg:591.59ms +step:53470/57344 train_time:31631921ms step_avg:591.58ms +step:53471/57344 train_time:31632502ms step_avg:591.58ms +grad accum step:13368/14336 +step:53472/57344 train_time:31634004ms step_avg:591.60ms +step:53473/57344 train_time:31634023ms step_avg:591.59ms +step:53474/57344 train_time:31634249ms step_avg:591.58ms +step:53475/57344 train_time:31634820ms step_avg:591.58ms +grad accum step:13369/14336 +step:53476/57344 train_time:31636178ms step_avg:591.60ms +step:53477/57344 train_time:31636193ms step_avg:591.59ms +step:53478/57344 train_time:31636452ms step_avg:591.58ms +step:53479/57344 train_time:31637026ms step_avg:591.58ms +grad accum step:13370/14336 +step:53480/57344 train_time:31638360ms step_avg:591.59ms +step:53481/57344 train_time:31638381ms step_avg:591.58ms +step:53482/57344 train_time:31638617ms step_avg:591.58ms +step:53483/57344 train_time:31639181ms step_avg:591.57ms +grad accum step:13371/14336 +step:53484/57344 train_time:31640477ms step_avg:591.59ms +step:53485/57344 train_time:31640492ms step_avg:591.58ms +step:53486/57344 train_time:31640745ms step_avg:591.57ms +step:53487/57344 train_time:31641312ms step_avg:591.57ms +grad accum step:13372/14336 +step:53488/57344 train_time:31642771ms step_avg:591.59ms +step:53489/57344 train_time:31642789ms step_avg:591.58ms +step:53490/57344 train_time:31643037ms step_avg:591.57ms +step:53491/57344 train_time:31643667ms step_avg:591.57ms +grad accum step:13373/14336 +step:53492/57344 train_time:31645071ms step_avg:591.59ms +step:53493/57344 train_time:31645088ms step_avg:591.57ms +step:53494/57344 train_time:31645326ms step_avg:591.57ms +step:53495/57344 train_time:31645917ms step_avg:591.57ms +grad accum step:13374/14336 +step:53496/57344 train_time:31647284ms step_avg:591.58ms +step:53497/57344 train_time:31647301ms step_avg:591.57ms +step:53498/57344 train_time:31647535ms step_avg:591.56ms +step:53499/57344 train_time:31648134ms step_avg:591.56ms +grad accum step:13375/14336 +step:53500/57344 train_time:31649610ms step_avg:591.58ms +step:53501/57344 train_time:31649644ms step_avg:591.57ms +step:53502/57344 train_time:31649871ms step_avg:591.56ms +step:53503/57344 train_time:31650427ms step_avg:591.56ms +grad accum step:13376/14336 +step:53504/57344 train_time:31651914ms step_avg:591.58ms +step:53504/57344 val_loss:5.344517 train_time:31651919ms step_avg:591.58ms +step:53505/57344 train_time:31651931ms step_avg:591.57ms +step:53506/57344 train_time:31652162ms step_avg:591.56ms +step:53507/57344 train_time:31652761ms step_avg:591.56ms +grad accum step:13377/14336 +step:53508/57344 train_time:31654170ms step_avg:591.58ms +step:53509/57344 train_time:31654193ms step_avg:591.57ms +step:53510/57344 train_time:31654442ms step_avg:591.56ms +step:53511/57344 train_time:31655060ms step_avg:591.56ms +grad accum step:13378/14336 +step:53512/57344 train_time:31656430ms step_avg:591.58ms +step:53513/57344 train_time:31656449ms step_avg:591.57ms +step:53514/57344 train_time:31656697ms step_avg:591.56ms +step:53515/57344 train_time:31657279ms step_avg:591.56ms +grad accum step:13379/14336 +step:53516/57344 train_time:31658692ms step_avg:591.57ms +step:53517/57344 train_time:31658709ms step_avg:591.56ms +step:53518/57344 train_time:31658958ms step_avg:591.56ms +step:53519/57344 train_time:31659540ms step_avg:591.56ms +grad accum step:13380/14336 +step:53520/57344 train_time:31660915ms step_avg:591.57ms +step:53521/57344 train_time:31660928ms step_avg:591.56ms +step:53522/57344 train_time:31661156ms step_avg:591.55ms +step:53523/57344 train_time:31661709ms step_avg:591.55ms +grad accum step:13381/14336 +step:53524/57344 train_time:31663050ms step_avg:591.57ms +step:53525/57344 train_time:31663069ms step_avg:591.56ms +step:53526/57344 train_time:31663312ms step_avg:591.55ms +step:53527/57344 train_time:31663889ms step_avg:591.55ms +grad accum step:13382/14336 +step:53528/57344 train_time:31665471ms step_avg:591.57ms +step:53529/57344 train_time:31665488ms step_avg:591.56ms +step:53530/57344 train_time:31665753ms step_avg:591.55ms +step:53531/57344 train_time:31666365ms step_avg:591.55ms +grad accum step:13383/14336 +step:53532/57344 train_time:31667745ms step_avg:591.57ms +step:53533/57344 train_time:31667761ms step_avg:591.56ms +step:53534/57344 train_time:31668029ms step_avg:591.55ms +step:53535/57344 train_time:31668638ms step_avg:591.55ms +grad accum step:13384/14336 +step:53536/57344 train_time:31670045ms step_avg:591.57ms +step:53537/57344 train_time:31670060ms step_avg:591.55ms +step:53538/57344 train_time:31670305ms step_avg:591.55ms +step:53539/57344 train_time:31670859ms step_avg:591.55ms +grad accum step:13385/14336 +step:53540/57344 train_time:31672503ms step_avg:591.57ms +step:53541/57344 train_time:31672788ms step_avg:591.56ms +step:53542/57344 train_time:31673041ms step_avg:591.56ms +step:53543/57344 train_time:31673572ms step_avg:591.55ms +grad accum step:13386/14336 +step:53544/57344 train_time:31675099ms step_avg:591.57ms +step:53545/57344 train_time:31675131ms step_avg:591.56ms +step:53546/57344 train_time:31675353ms step_avg:591.55ms +step:53547/57344 train_time:31675934ms step_avg:591.55ms +grad accum step:13387/14336 +step:53548/57344 train_time:31677363ms step_avg:591.57ms +step:53549/57344 train_time:31677384ms step_avg:591.56ms +step:53550/57344 train_time:31677607ms step_avg:591.55ms +step:53551/57344 train_time:31678195ms step_avg:591.55ms +grad accum step:13388/14336 +step:53552/57344 train_time:31679641ms step_avg:591.57ms +step:53553/57344 train_time:31679656ms step_avg:591.56ms +step:53554/57344 train_time:31679917ms step_avg:591.55ms +step:53555/57344 train_time:31680511ms step_avg:591.55ms +grad accum step:13389/14336 +step:53556/57344 train_time:31682008ms step_avg:591.57ms +step:53557/57344 train_time:31682023ms step_avg:591.56ms +step:53558/57344 train_time:31682272ms step_avg:591.55ms +step:53559/57344 train_time:31682862ms step_avg:591.55ms +grad accum step:13390/14336 +step:53560/57344 train_time:31684261ms step_avg:591.57ms +step:53561/57344 train_time:31684276ms step_avg:591.55ms +step:53562/57344 train_time:31684531ms step_avg:591.55ms +step:53563/57344 train_time:31685103ms step_avg:591.55ms +grad accum step:13391/14336 +step:53564/57344 train_time:31686452ms step_avg:591.56ms +step:53565/57344 train_time:31686467ms step_avg:591.55ms +step:53566/57344 train_time:31686721ms step_avg:591.55ms +step:53567/57344 train_time:31687306ms step_avg:591.55ms +grad accum step:13392/14336 +step:53568/57344 train_time:31688733ms step_avg:591.56ms +step:53568/57344 val_loss:5.345055 train_time:31688739ms step_avg:591.56ms +step:53569/57344 train_time:31688751ms step_avg:591.55ms +step:53570/57344 train_time:31688975ms step_avg:591.54ms +step:53571/57344 train_time:31689541ms step_avg:591.54ms +grad accum step:13393/14336 +step:53572/57344 train_time:31690886ms step_avg:591.56ms +step:53573/57344 train_time:31690907ms step_avg:591.55ms +step:53574/57344 train_time:31691155ms step_avg:591.54ms +step:53575/57344 train_time:31691752ms step_avg:591.54ms +grad accum step:13394/14336 +step:53576/57344 train_time:31693195ms step_avg:591.56ms +step:53577/57344 train_time:31693212ms step_avg:591.55ms +step:53578/57344 train_time:31693463ms step_avg:591.54ms +step:53579/57344 train_time:31694045ms step_avg:591.54ms +grad accum step:13395/14336 +step:53580/57344 train_time:31695362ms step_avg:591.55ms +step:53581/57344 train_time:31695378ms step_avg:591.54ms +step:53582/57344 train_time:31695626ms step_avg:591.53ms +step:53583/57344 train_time:31696193ms step_avg:591.53ms +grad accum step:13396/14336 +step:53584/57344 train_time:31697589ms step_avg:591.55ms +step:53585/57344 train_time:31697603ms step_avg:591.54ms +step:53586/57344 train_time:31697861ms step_avg:591.53ms +step:53587/57344 train_time:31698444ms step_avg:591.53ms +grad accum step:13397/14336 +step:53588/57344 train_time:31699859ms step_avg:591.55ms +step:53589/57344 train_time:31699875ms step_avg:591.54ms +step:53590/57344 train_time:31700133ms step_avg:591.53ms +step:53591/57344 train_time:31700728ms step_avg:591.53ms +grad accum step:13398/14336 +step:53592/57344 train_time:31702100ms step_avg:591.55ms +step:53593/57344 train_time:31702119ms step_avg:591.53ms +step:53594/57344 train_time:31702358ms step_avg:591.53ms +step:53595/57344 train_time:31702914ms step_avg:591.53ms +grad accum step:13399/14336 +step:53596/57344 train_time:31704546ms step_avg:591.55ms +step:53597/57344 train_time:31704564ms step_avg:591.54ms +step:53598/57344 train_time:31704782ms step_avg:591.53ms +step:53599/57344 train_time:31705334ms step_avg:591.53ms +grad accum step:13400/14336 +step:53600/57344 train_time:31706700ms step_avg:591.54ms +step:53601/57344 train_time:31706718ms step_avg:591.53ms +step:53602/57344 train_time:31706970ms step_avg:591.53ms +step:53603/57344 train_time:31707572ms step_avg:591.53ms +grad accum step:13401/14336 +step:53604/57344 train_time:31708941ms step_avg:591.54ms +step:53605/57344 train_time:31708960ms step_avg:591.53ms +step:53606/57344 train_time:31709200ms step_avg:591.52ms +step:53607/57344 train_time:31709769ms step_avg:591.52ms +grad accum step:13402/14336 +step:53608/57344 train_time:31711163ms step_avg:591.54ms +step:53609/57344 train_time:31711177ms step_avg:591.53ms +step:53610/57344 train_time:31711400ms step_avg:591.52ms +step:53611/57344 train_time:31711952ms step_avg:591.52ms +grad accum step:13403/14336 +step:53612/57344 train_time:31713354ms step_avg:591.53ms +step:53613/57344 train_time:31713371ms step_avg:591.52ms +step:53614/57344 train_time:31713618ms step_avg:591.52ms +step:53615/57344 train_time:31714193ms step_avg:591.52ms +grad accum step:13404/14336 +step:53616/57344 train_time:31715527ms step_avg:591.53ms +step:53617/57344 train_time:31715551ms step_avg:591.52ms +step:53618/57344 train_time:31715779ms step_avg:591.51ms +step:53619/57344 train_time:31716342ms step_avg:591.51ms +grad accum step:13405/14336 +step:53620/57344 train_time:31717738ms step_avg:591.53ms +step:53621/57344 train_time:31717754ms step_avg:591.52ms +step:53622/57344 train_time:31718005ms step_avg:591.51ms +step:53623/57344 train_time:31718591ms step_avg:591.51ms +grad accum step:13406/14336 +step:53624/57344 train_time:31719975ms step_avg:591.53ms +step:53625/57344 train_time:31719992ms step_avg:591.51ms +step:53626/57344 train_time:31720248ms step_avg:591.51ms +step:53627/57344 train_time:31720845ms step_avg:591.51ms +grad accum step:13407/14336 +step:53628/57344 train_time:31722212ms step_avg:591.52ms +step:53629/57344 train_time:31722229ms step_avg:591.51ms +step:53630/57344 train_time:31722475ms step_avg:591.51ms +step:53631/57344 train_time:31723043ms step_avg:591.51ms +grad accum step:13408/14336 +step:53632/57344 train_time:31724599ms step_avg:591.52ms +step:53632/57344 val_loss:5.341630 train_time:31724651ms step_avg:591.52ms +step:53633/57344 train_time:31724673ms step_avg:591.51ms +step:53634/57344 train_time:31724891ms step_avg:591.51ms +step:53635/57344 train_time:31725469ms step_avg:591.51ms +grad accum step:13409/14336 +step:53636/57344 train_time:31726945ms step_avg:591.52ms +step:53637/57344 train_time:31726963ms step_avg:591.51ms +step:53638/57344 train_time:31727209ms step_avg:591.51ms +step:53639/57344 train_time:31727803ms step_avg:591.51ms +grad accum step:13410/14336 +step:53640/57344 train_time:31729268ms step_avg:591.52ms +step:53641/57344 train_time:31729285ms step_avg:591.51ms +step:53642/57344 train_time:31729530ms step_avg:591.51ms +step:53643/57344 train_time:31730081ms step_avg:591.50ms +grad accum step:13411/14336 +step:53644/57344 train_time:31731424ms step_avg:591.52ms +step:53645/57344 train_time:31731439ms step_avg:591.51ms +step:53646/57344 train_time:31731684ms step_avg:591.50ms +step:53647/57344 train_time:31732265ms step_avg:591.50ms +grad accum step:13412/14336 +step:53648/57344 train_time:31733794ms step_avg:591.52ms +step:53649/57344 train_time:31733808ms step_avg:591.51ms +step:53650/57344 train_time:31734089ms step_avg:591.50ms +step:53651/57344 train_time:31734745ms step_avg:591.50ms +grad accum step:13413/14336 +step:53652/57344 train_time:31736174ms step_avg:591.52ms +step:53653/57344 train_time:31736193ms step_avg:591.51ms +step:53654/57344 train_time:31736430ms step_avg:591.50ms +step:53655/57344 train_time:31737001ms step_avg:591.50ms +grad accum step:13414/14336 +step:53656/57344 train_time:31738435ms step_avg:591.52ms +step:53657/57344 train_time:31738453ms step_avg:591.51ms +step:53658/57344 train_time:31738686ms step_avg:591.50ms +step:53659/57344 train_time:31739245ms step_avg:591.50ms +grad accum step:13415/14336 +step:53660/57344 train_time:31740590ms step_avg:591.51ms +step:53661/57344 train_time:31740614ms step_avg:591.50ms +step:53662/57344 train_time:31740847ms step_avg:591.50ms +step:53663/57344 train_time:31741416ms step_avg:591.50ms +grad accum step:13416/14336 +step:53664/57344 train_time:31742787ms step_avg:591.51ms +step:53665/57344 train_time:31742800ms step_avg:591.50ms +step:53666/57344 train_time:31743055ms step_avg:591.49ms +step:53667/57344 train_time:31743634ms step_avg:591.49ms +grad accum step:13417/14336 +step:53668/57344 train_time:31745002ms step_avg:591.51ms +step:53669/57344 train_time:31745035ms step_avg:591.50ms +step:53670/57344 train_time:31745260ms step_avg:591.49ms +step:53671/57344 train_time:31745841ms step_avg:591.49ms +grad accum step:13418/14336 +step:53672/57344 train_time:31747263ms step_avg:591.51ms +step:53673/57344 train_time:31747279ms step_avg:591.49ms +step:53674/57344 train_time:31747508ms step_avg:591.49ms +step:53675/57344 train_time:31748085ms step_avg:591.49ms +grad accum step:13419/14336 +step:53676/57344 train_time:31749483ms step_avg:591.50ms +step:53677/57344 train_time:31749499ms step_avg:591.49ms +step:53678/57344 train_time:31749757ms step_avg:591.49ms +step:53679/57344 train_time:31750349ms step_avg:591.49ms +grad accum step:13420/14336 +step:53680/57344 train_time:31751674ms step_avg:591.50ms +step:53681/57344 train_time:31751689ms step_avg:591.49ms +step:53682/57344 train_time:31751941ms step_avg:591.48ms +step:53683/57344 train_time:31752512ms step_avg:591.48ms +grad accum step:13421/14336 +step:53684/57344 train_time:31753976ms step_avg:591.50ms +step:53685/57344 train_time:31753998ms step_avg:591.49ms +step:53686/57344 train_time:31754225ms step_avg:591.48ms +step:53687/57344 train_time:31754807ms step_avg:591.48ms +grad accum step:13422/14336 +step:53688/57344 train_time:31756252ms step_avg:591.50ms +step:53689/57344 train_time:31756267ms step_avg:591.49ms +step:53690/57344 train_time:31756516ms step_avg:591.48ms +step:53691/57344 train_time:31757087ms step_avg:591.48ms +grad accum step:13423/14336 +step:53692/57344 train_time:31764275ms step_avg:591.60ms +step:53693/57344 train_time:31764288ms step_avg:591.59ms +step:53694/57344 train_time:31764598ms step_avg:591.59ms +step:53695/57344 train_time:31765178ms step_avg:591.59ms +grad accum step:13424/14336 +step:53696/57344 train_time:31766525ms step_avg:591.60ms +step:53696/57344 val_loss:5.341227 train_time:31766526ms step_avg:591.60ms +step:53697/57344 train_time:31766538ms step_avg:591.59ms +step:53698/57344 train_time:31766810ms step_avg:591.58ms +step:53699/57344 train_time:31767401ms step_avg:591.58ms +grad accum step:13425/14336 +step:53700/57344 train_time:31768778ms step_avg:591.60ms +step:53701/57344 train_time:31768794ms step_avg:591.59ms +step:53702/57344 train_time:31769044ms step_avg:591.58ms +step:53703/57344 train_time:31769603ms step_avg:591.58ms +grad accum step:13426/14336 +step:53704/57344 train_time:31770913ms step_avg:591.59ms +step:53705/57344 train_time:31770925ms step_avg:591.58ms +step:53706/57344 train_time:31771169ms step_avg:591.58ms +step:53707/57344 train_time:31771730ms step_avg:591.58ms +grad accum step:13427/14336 +step:53708/57344 train_time:31773083ms step_avg:591.59ms +step:53709/57344 train_time:31773094ms step_avg:591.58ms +step:53710/57344 train_time:31773341ms step_avg:591.57ms +step:53711/57344 train_time:31773916ms step_avg:591.57ms +grad accum step:13428/14336 +step:53712/57344 train_time:31775270ms step_avg:591.59ms +step:53713/57344 train_time:31775281ms step_avg:591.58ms +step:53714/57344 train_time:31775526ms step_avg:591.57ms +step:53715/57344 train_time:31776095ms step_avg:591.57ms +grad accum step:13429/14336 +step:53716/57344 train_time:31777440ms step_avg:591.58ms +step:53717/57344 train_time:31777454ms step_avg:591.57ms +step:53718/57344 train_time:31777714ms step_avg:591.57ms +step:53719/57344 train_time:31778301ms step_avg:591.57ms +grad accum step:13430/14336 +step:53720/57344 train_time:31801608ms step_avg:591.99ms +step:53721/57344 train_time:31821510ms step_avg:592.35ms +step:53722/57344 train_time:31821781ms step_avg:592.34ms +step:53723/57344 train_time:31822325ms step_avg:592.34ms +grad accum step:13431/14336 +step:53724/57344 train_time:31823657ms step_avg:592.35ms +step:53725/57344 train_time:31823669ms step_avg:592.34ms +step:53726/57344 train_time:31823903ms step_avg:592.34ms +step:53727/57344 train_time:31824468ms step_avg:592.34ms +grad accum step:13432/14336 +step:53728/57344 train_time:31825857ms step_avg:592.35ms +step:53729/57344 train_time:31825870ms step_avg:592.34ms +step:53730/57344 train_time:31826116ms step_avg:592.33ms +step:53731/57344 train_time:31826678ms step_avg:592.33ms +grad accum step:13433/14336 +step:53732/57344 train_time:31828119ms step_avg:592.35ms +step:53733/57344 train_time:31828147ms step_avg:592.34ms +step:53734/57344 train_time:31828379ms step_avg:592.33ms +step:53735/57344 train_time:31828953ms step_avg:592.33ms +grad accum step:13434/14336 +step:53736/57344 train_time:31830315ms step_avg:592.35ms +step:53737/57344 train_time:31830328ms step_avg:592.34ms +step:53738/57344 train_time:31830581ms step_avg:592.33ms +step:53739/57344 train_time:31831151ms step_avg:592.33ms +grad accum step:13435/14336 +step:53740/57344 train_time:31832537ms step_avg:592.34ms +step:53741/57344 train_time:31832553ms step_avg:592.33ms +step:53742/57344 train_time:31832806ms step_avg:592.33ms +step:53743/57344 train_time:31833382ms step_avg:592.33ms +grad accum step:13436/14336 +step:53744/57344 train_time:31834726ms step_avg:592.34ms +step:53745/57344 train_time:31834743ms step_avg:592.33ms +step:53746/57344 train_time:31834994ms step_avg:592.32ms +step:53747/57344 train_time:31966236ms step_avg:594.75ms +grad accum step:13437/14336 +step:53748/57344 train_time:31967480ms step_avg:594.77ms +step:53749/57344 train_time:31967582ms step_avg:594.76ms +step:53750/57344 train_time:31967754ms step_avg:594.75ms +step:53751/57344 train_time:31968324ms step_avg:594.75ms +grad accum step:13438/14336 +step:53752/57344 train_time:31969646ms step_avg:594.76ms +step:53753/57344 train_time:31969662ms step_avg:594.75ms +step:53754/57344 train_time:31969908ms step_avg:594.74ms +step:53755/57344 train_time:31970462ms step_avg:594.74ms +grad accum step:13439/14336 +step:53756/57344 train_time:31971823ms step_avg:594.76ms +step:53757/57344 train_time:31971836ms step_avg:594.75ms +step:53758/57344 train_time:31972068ms step_avg:594.74ms +step:53759/57344 train_time:31972620ms step_avg:594.74ms +grad accum step:13440/14336 +step:53760/57344 train_time:31973945ms step_avg:594.75ms +step:53760/57344 val_loss:5.339957 train_time:31973948ms step_avg:594.75ms +step:53761/57344 train_time:31973959ms step_avg:594.74ms +step:53762/57344 train_time:31974272ms step_avg:594.74ms +step:53763/57344 train_time:31974828ms step_avg:594.74ms +grad accum step:13441/14336 +step:53764/57344 train_time:31976169ms step_avg:594.75ms +step:53765/57344 train_time:31976185ms step_avg:594.74ms +step:53766/57344 train_time:31976430ms step_avg:594.73ms +step:53767/57344 train_time:31976993ms step_avg:594.73ms +grad accum step:13442/14336 +step:53768/57344 train_time:31978342ms step_avg:594.75ms +step:53769/57344 train_time:31978358ms step_avg:594.74ms +step:53770/57344 train_time:31978604ms step_avg:594.73ms +step:53771/57344 train_time:31979161ms step_avg:594.73ms +grad accum step:13443/14336 +step:53772/57344 train_time:31980495ms step_avg:594.74ms +step:53773/57344 train_time:31980511ms step_avg:594.73ms +step:53774/57344 train_time:31980752ms step_avg:594.73ms +step:53775/57344 train_time:31981294ms step_avg:594.72ms +grad accum step:13444/14336 +step:53776/57344 train_time:31982620ms step_avg:594.74ms +step:53777/57344 train_time:31982634ms step_avg:594.73ms +step:53778/57344 train_time:31982894ms step_avg:594.72ms +step:53779/57344 train_time:31983490ms step_avg:594.72ms +grad accum step:13445/14336 +step:53780/57344 train_time:31984821ms step_avg:594.73ms +step:53781/57344 train_time:31984837ms step_avg:594.72ms +step:53782/57344 train_time:31985090ms step_avg:594.72ms +step:53783/57344 train_time:31985665ms step_avg:594.72ms +grad accum step:13446/14336 +step:53784/57344 train_time:31987021ms step_avg:594.73ms +step:53785/57344 train_time:31987060ms step_avg:594.72ms +step:53786/57344 train_time:31987292ms step_avg:594.71ms +step:53787/57344 train_time:31987872ms step_avg:594.71ms +grad accum step:13447/14336 +step:53788/57344 train_time:31989237ms step_avg:594.73ms +step:53789/57344 train_time:31989253ms step_avg:594.72ms +step:53790/57344 train_time:31989499ms step_avg:594.71ms +step:53791/57344 train_time:31990052ms step_avg:594.71ms +grad accum step:13448/14336 +step:53792/57344 train_time:31991386ms step_avg:594.72ms +step:53793/57344 train_time:31991407ms step_avg:594.71ms +step:53794/57344 train_time:31991638ms step_avg:594.71ms +step:53795/57344 train_time:31992191ms step_avg:594.71ms +grad accum step:13449/14336 +step:53796/57344 train_time:31993536ms step_avg:594.72ms +step:53797/57344 train_time:31993553ms step_avg:594.71ms +step:53798/57344 train_time:31993812ms step_avg:594.70ms +step:53799/57344 train_time:31994401ms step_avg:594.70ms +grad accum step:13450/14336 +step:53800/57344 train_time:31995788ms step_avg:594.72ms +step:53801/57344 train_time:31995826ms step_avg:594.71ms +step:53802/57344 train_time:31996057ms step_avg:594.70ms +step:53803/57344 train_time:31996683ms step_avg:594.70ms +grad accum step:13451/14336 +step:53804/57344 train_time:32056416ms step_avg:595.80ms +step:53805/57344 train_time:32056432ms step_avg:595.79ms +step:53806/57344 train_time:32056677ms step_avg:595.78ms +step:53807/57344 train_time:32057227ms step_avg:595.78ms +grad accum step:13452/14336 +step:53808/57344 train_time:32058593ms step_avg:595.80ms +step:53809/57344 train_time:32058609ms step_avg:595.79ms +step:53810/57344 train_time:32058856ms step_avg:595.78ms +step:53811/57344 train_time:32059402ms step_avg:595.78ms +grad accum step:13453/14336 +step:53812/57344 train_time:32060759ms step_avg:595.79ms +step:53813/57344 train_time:32060773ms step_avg:595.78ms +step:53814/57344 train_time:32061007ms step_avg:595.77ms +step:53815/57344 train_time:32061548ms step_avg:595.77ms +grad accum step:13454/14336 +step:53816/57344 train_time:32062848ms step_avg:595.79ms +step:53817/57344 train_time:32062871ms step_avg:595.78ms +step:53818/57344 train_time:32063107ms step_avg:595.77ms +step:53819/57344 train_time:32063664ms step_avg:595.77ms +grad accum step:13455/14336 +step:53820/57344 train_time:32065038ms step_avg:595.78ms +step:53821/57344 train_time:32065050ms step_avg:595.77ms +step:53822/57344 train_time:32065288ms step_avg:595.77ms +step:53823/57344 train_time:32065838ms step_avg:595.76ms +grad accum step:13456/14336 +step:53824/57344 train_time:32067152ms step_avg:595.78ms +step:53824/57344 val_loss:5.338264 train_time:32067153ms step_avg:595.78ms +step:53825/57344 train_time:32067165ms step_avg:595.77ms +step:53826/57344 train_time:32067389ms step_avg:595.76ms +step:53827/57344 train_time:32067953ms step_avg:595.76ms +grad accum step:13457/14336 +step:53828/57344 train_time:32069322ms step_avg:595.77ms +step:53829/57344 train_time:32069341ms step_avg:595.76ms +step:53830/57344 train_time:32069577ms step_avg:595.76ms +step:53831/57344 train_time:32070125ms step_avg:595.76ms +grad accum step:13458/14336 +step:53832/57344 train_time:32071468ms step_avg:595.77ms +step:53833/57344 train_time:32071493ms step_avg:595.76ms +step:53834/57344 train_time:32071734ms step_avg:595.75ms +step:53835/57344 train_time:32072341ms step_avg:595.75ms +grad accum step:13459/14336 +step:53836/57344 train_time:32073683ms step_avg:595.77ms +step:53837/57344 train_time:32073699ms step_avg:595.76ms +step:53838/57344 train_time:32073960ms step_avg:595.75ms +step:53839/57344 train_time:32074567ms step_avg:595.75ms +grad accum step:13460/14336 +step:53840/57344 train_time:32076002ms step_avg:595.77ms +step:53841/57344 train_time:32076035ms step_avg:595.75ms +step:53842/57344 train_time:32076258ms step_avg:595.75ms +step:53843/57344 train_time:32076821ms step_avg:595.75ms +grad accum step:13461/14336 +step:53844/57344 train_time:32078170ms step_avg:595.76ms +step:53845/57344 train_time:32078187ms step_avg:595.75ms +step:53846/57344 train_time:32078429ms step_avg:595.74ms +step:53847/57344 train_time:32078982ms step_avg:595.74ms +grad accum step:13462/14336 +step:53848/57344 train_time:32080377ms step_avg:595.76ms +step:53849/57344 train_time:32080391ms step_avg:595.75ms +step:53850/57344 train_time:32080633ms step_avg:595.74ms +step:53851/57344 train_time:32081184ms step_avg:595.74ms +grad accum step:13463/14336 +step:53852/57344 train_time:32082616ms step_avg:595.76ms +step:53853/57344 train_time:32082628ms step_avg:595.74ms +step:53854/57344 train_time:32082909ms step_avg:595.74ms +step:53855/57344 train_time:32083577ms step_avg:595.74ms +grad accum step:13464/14336 +step:53856/57344 train_time:32084993ms step_avg:595.76ms +step:53857/57344 train_time:32085010ms step_avg:595.74ms +step:53858/57344 train_time:32085262ms step_avg:595.74ms +step:53859/57344 train_time:32085838ms step_avg:595.74ms +grad accum step:13465/14336 +step:53860/57344 train_time:32087177ms step_avg:595.75ms +step:53861/57344 train_time:32087193ms step_avg:595.74ms +step:53862/57344 train_time:32087448ms step_avg:595.73ms +step:53863/57344 train_time:32088042ms step_avg:595.73ms +grad accum step:13466/14336 +step:53864/57344 train_time:32089381ms step_avg:595.75ms +step:53865/57344 train_time:32089396ms step_avg:595.74ms +step:53866/57344 train_time:32089647ms step_avg:595.73ms +step:53867/57344 train_time:32090209ms step_avg:595.73ms +grad accum step:13467/14336 +step:53868/57344 train_time:32091591ms step_avg:595.74ms +step:53869/57344 train_time:32091613ms step_avg:595.73ms +step:53870/57344 train_time:32091837ms step_avg:595.73ms +step:53871/57344 train_time:32092411ms step_avg:595.73ms +grad accum step:13468/14336 +step:53872/57344 train_time:32093823ms step_avg:595.74ms +step:53873/57344 train_time:32093840ms step_avg:595.73ms +step:53874/57344 train_time:32094088ms step_avg:595.72ms +step:53875/57344 train_time:32094659ms step_avg:595.72ms +grad accum step:13469/14336 +step:53876/57344 train_time:32096036ms step_avg:595.74ms +step:53877/57344 train_time:32096057ms step_avg:595.73ms +step:53878/57344 train_time:32096293ms step_avg:595.72ms +step:53879/57344 train_time:32096847ms step_avg:595.72ms +grad accum step:13470/14336 +step:53880/57344 train_time:32098228ms step_avg:595.74ms +step:53881/57344 train_time:32098246ms step_avg:595.72ms +step:53882/57344 train_time:32098477ms step_avg:595.72ms +step:53883/57344 train_time:32099051ms step_avg:595.72ms +grad accum step:13471/14336 +step:53884/57344 train_time:32100402ms step_avg:595.73ms +step:53885/57344 train_time:32100417ms step_avg:595.72ms +step:53886/57344 train_time:32100669ms step_avg:595.71ms +step:53887/57344 train_time:32101251ms step_avg:595.71ms +grad accum step:13472/14336 +step:53888/57344 train_time:32102666ms step_avg:595.73ms +step:53888/57344 val_loss:5.337718 train_time:32102673ms step_avg:595.73ms +step:53889/57344 train_time:32102685ms step_avg:595.72ms +step:53890/57344 train_time:32102925ms step_avg:595.71ms +step:53891/57344 train_time:32103528ms step_avg:595.71ms +grad accum step:13473/14336 +step:53892/57344 train_time:32104853ms step_avg:595.73ms +step:53893/57344 train_time:32104870ms step_avg:595.72ms +step:53894/57344 train_time:32105122ms step_avg:595.71ms +step:53895/57344 train_time:32105681ms step_avg:595.71ms +grad accum step:13474/14336 +step:53896/57344 train_time:32107043ms step_avg:595.72ms +step:53897/57344 train_time:32107059ms step_avg:595.71ms +step:53898/57344 train_time:32107312ms step_avg:595.71ms +step:53899/57344 train_time:32107888ms step_avg:595.70ms +grad accum step:13475/14336 +step:53900/57344 train_time:32109287ms step_avg:595.72ms +step:53901/57344 train_time:32109304ms step_avg:595.71ms +step:53902/57344 train_time:32109559ms step_avg:595.70ms +step:53903/57344 train_time:32110126ms step_avg:595.70ms +grad accum step:13476/14336 +step:53904/57344 train_time:32111483ms step_avg:595.72ms +step:53905/57344 train_time:32111499ms step_avg:595.71ms +step:53906/57344 train_time:32111766ms step_avg:595.70ms +step:53907/57344 train_time:32112367ms step_avg:595.70ms +grad accum step:13477/14336 +step:53908/57344 train_time:32113788ms step_avg:595.71ms +step:53909/57344 train_time:32113804ms step_avg:595.70ms +step:53910/57344 train_time:32114054ms step_avg:595.70ms +step:53911/57344 train_time:32114622ms step_avg:595.70ms +grad accum step:13478/14336 +step:53912/57344 train_time:32115960ms step_avg:595.71ms +step:53913/57344 train_time:32115975ms step_avg:595.70ms +step:53914/57344 train_time:32116227ms step_avg:595.69ms +step:53915/57344 train_time:32116794ms step_avg:595.69ms +grad accum step:13479/14336 +step:53916/57344 train_time:32118119ms step_avg:595.71ms +step:53917/57344 train_time:32118135ms step_avg:595.70ms +step:53918/57344 train_time:32118387ms step_avg:595.69ms +step:53919/57344 train_time:32118958ms step_avg:595.69ms +grad accum step:13480/14336 +step:53920/57344 train_time:32120303ms step_avg:595.70ms +step:53921/57344 train_time:32120320ms step_avg:595.69ms +step:53922/57344 train_time:32120571ms step_avg:595.69ms +step:53923/57344 train_time:32121137ms step_avg:595.69ms +grad accum step:13481/14336 +step:53924/57344 train_time:32122613ms step_avg:595.70ms +step:53925/57344 train_time:32122630ms step_avg:595.69ms +step:53926/57344 train_time:32122883ms step_avg:595.68ms +step:53927/57344 train_time:32123459ms step_avg:595.68ms +grad accum step:13482/14336 +step:53928/57344 train_time:32124813ms step_avg:595.70ms +step:53929/57344 train_time:32124843ms step_avg:595.69ms +step:53930/57344 train_time:32125091ms step_avg:595.68ms +step:53931/57344 train_time:32125688ms step_avg:595.68ms +grad accum step:13483/14336 +step:53932/57344 train_time:32127054ms step_avg:595.70ms +step:53933/57344 train_time:32127083ms step_avg:595.69ms +step:53934/57344 train_time:32127317ms step_avg:595.68ms +step:53935/57344 train_time:32127880ms step_avg:595.68ms +grad accum step:13484/14336 +step:53936/57344 train_time:32129215ms step_avg:595.69ms +step:53937/57344 train_time:32129232ms step_avg:595.68ms +step:53938/57344 train_time:32129484ms step_avg:595.67ms +step:53939/57344 train_time:32130060ms step_avg:595.67ms +grad accum step:13485/14336 +step:53940/57344 train_time:32131422ms step_avg:595.69ms +step:53941/57344 train_time:32131439ms step_avg:595.68ms +step:53942/57344 train_time:32131690ms step_avg:595.67ms +step:53943/57344 train_time:32132249ms step_avg:595.67ms +grad accum step:13486/14336 +step:53944/57344 train_time:32133715ms step_avg:595.69ms +step:53945/57344 train_time:32133730ms step_avg:595.68ms +step:53946/57344 train_time:32133976ms step_avg:595.67ms +step:53947/57344 train_time:32134535ms step_avg:595.67ms +grad accum step:13487/14336 +step:53948/57344 train_time:32135849ms step_avg:595.68ms +step:53949/57344 train_time:32135866ms step_avg:595.67ms +step:53950/57344 train_time:32136118ms step_avg:595.66ms +step:53951/57344 train_time:32136684ms step_avg:595.66ms +grad accum step:13488/14336 +step:53952/57344 train_time:32138027ms step_avg:595.68ms +step:53952/57344 val_loss:5.336123 train_time:32138028ms step_avg:595.68ms +step:53953/57344 train_time:32138531ms step_avg:595.68ms +step:53954/57344 train_time:32138627ms step_avg:595.67ms +step:53955/57344 train_time:32139150ms step_avg:595.67ms +grad accum step:13489/14336 +step:53956/57344 train_time:32140800ms step_avg:595.69ms +step:53957/57344 train_time:32140827ms step_avg:595.67ms +step:53958/57344 train_time:32141053ms step_avg:595.67ms +step:53959/57344 train_time:32141640ms step_avg:595.67ms +grad accum step:13490/14336 +step:53960/57344 train_time:32142974ms step_avg:595.68ms +step:53961/57344 train_time:32142997ms step_avg:595.67ms +step:53962/57344 train_time:32143258ms step_avg:595.66ms +step:53963/57344 train_time:32143886ms step_avg:595.67ms +grad accum step:13491/14336 +step:53964/57344 train_time:32145210ms step_avg:595.68ms +step:53965/57344 train_time:32145223ms step_avg:595.67ms +step:53966/57344 train_time:32145453ms step_avg:595.66ms +step:53967/57344 train_time:32146027ms step_avg:595.66ms +grad accum step:13492/14336 +step:53968/57344 train_time:32147390ms step_avg:595.68ms +step:53969/57344 train_time:32147418ms step_avg:595.66ms +step:53970/57344 train_time:32147639ms step_avg:595.66ms +step:53971/57344 train_time:32148205ms step_avg:595.66ms +grad accum step:13493/14336 +step:53972/57344 train_time:32149588ms step_avg:595.67ms +step:53973/57344 train_time:32149610ms step_avg:595.66ms +step:53974/57344 train_time:32149840ms step_avg:595.65ms +step:53975/57344 train_time:32150397ms step_avg:595.65ms +grad accum step:13494/14336 +step:53976/57344 train_time:32151746ms step_avg:595.67ms +step:53977/57344 train_time:32151761ms step_avg:595.66ms +step:53978/57344 train_time:32152003ms step_avg:595.65ms +step:53979/57344 train_time:32152562ms step_avg:595.65ms +grad accum step:13495/14336 +step:53980/57344 train_time:32153941ms step_avg:595.66ms +step:53981/57344 train_time:32153956ms step_avg:595.65ms +step:53982/57344 train_time:32154207ms step_avg:595.65ms +step:53983/57344 train_time:32154778ms step_avg:595.65ms +grad accum step:13496/14336 +step:53984/57344 train_time:32156202ms step_avg:595.66ms +step:53985/57344 train_time:32156218ms step_avg:595.65ms +step:53986/57344 train_time:32156441ms step_avg:595.64ms +step:53987/57344 train_time:32157006ms step_avg:595.64ms +grad accum step:13497/14336 +step:53988/57344 train_time:32158371ms step_avg:595.66ms +step:53989/57344 train_time:32158392ms step_avg:595.65ms +step:53990/57344 train_time:32158627ms step_avg:595.64ms +step:53991/57344 train_time:32159206ms step_avg:595.64ms +grad accum step:13498/14336 +step:53992/57344 train_time:32160727ms step_avg:595.66ms +step:53993/57344 train_time:32160822ms step_avg:595.65ms +step:53994/57344 train_time:32161059ms step_avg:595.64ms +step:53995/57344 train_time:32161630ms step_avg:595.64ms +grad accum step:13499/14336 +step:53996/57344 train_time:32163013ms step_avg:595.66ms +step:53997/57344 train_time:32163027ms step_avg:595.64ms +step:53998/57344 train_time:32163276ms step_avg:595.64ms +step:53999/57344 train_time:32163845ms step_avg:595.64ms +grad accum step:13500/14336 +step:54000/57344 train_time:32165178ms step_avg:595.65ms +step:54001/57344 train_time:32165193ms step_avg:595.64ms +step:54002/57344 train_time:32165439ms step_avg:595.63ms +step:54003/57344 train_time:32166004ms step_avg:595.63ms +grad accum step:13501/14336 +step:54004/57344 train_time:32167394ms step_avg:595.65ms +step:54005/57344 train_time:32167410ms step_avg:595.64ms +step:54006/57344 train_time:32167648ms step_avg:595.63ms +step:54007/57344 train_time:32168209ms step_avg:595.63ms +grad accum step:13502/14336 +step:54008/57344 train_time:32169696ms step_avg:595.65ms +step:54009/57344 train_time:32169714ms step_avg:595.64ms +step:54010/57344 train_time:32169944ms step_avg:595.63ms +step:54011/57344 train_time:32170506ms step_avg:595.63ms +grad accum step:13503/14336 +step:54012/57344 train_time:32171875ms step_avg:595.64ms +step:54013/57344 train_time:32171887ms step_avg:595.63ms +step:54014/57344 train_time:32172148ms step_avg:595.63ms +step:54015/57344 train_time:32172764ms step_avg:595.63ms +grad accum step:13504/14336 +step:54016/57344 train_time:32174167ms step_avg:595.64ms +step:54016/57344 val_loss:5.334979 train_time:32174172ms step_avg:595.64ms +step:54017/57344 train_time:32174184ms step_avg:595.63ms +step:54018/57344 train_time:32174408ms step_avg:595.62ms +step:54019/57344 train_time:32174974ms step_avg:595.62ms +grad accum step:13505/14336 +step:54020/57344 train_time:32176325ms step_avg:595.64ms +step:54021/57344 train_time:32176343ms step_avg:595.63ms +step:54022/57344 train_time:32176577ms step_avg:595.62ms +step:54023/57344 train_time:32177129ms step_avg:595.62ms +grad accum step:13506/14336 +step:54024/57344 train_time:32178463ms step_avg:595.63ms +step:54025/57344 train_time:32178481ms step_avg:595.62ms +step:54026/57344 train_time:32178726ms step_avg:595.62ms +step:54027/57344 train_time:32179298ms step_avg:595.62ms +grad accum step:13507/14336 +step:54028/57344 train_time:32180842ms step_avg:595.63ms +step:54029/57344 train_time:32180858ms step_avg:595.62ms +step:54030/57344 train_time:32181076ms step_avg:595.61ms +step:54031/57344 train_time:32181651ms step_avg:595.61ms +grad accum step:13508/14336 +step:54032/57344 train_time:32183242ms step_avg:595.63ms +step:54033/57344 train_time:32183257ms step_avg:595.62ms +step:54034/57344 train_time:32183484ms step_avg:595.62ms +step:54035/57344 train_time:32184054ms step_avg:595.61ms +grad accum step:13509/14336 +step:54036/57344 train_time:32185398ms step_avg:595.63ms +step:54037/57344 train_time:32185420ms step_avg:595.62ms +step:54038/57344 train_time:32185648ms step_avg:595.61ms +step:54039/57344 train_time:32186229ms step_avg:595.61ms +grad accum step:13510/14336 +step:54040/57344 train_time:32187591ms step_avg:595.63ms +step:54041/57344 train_time:32187615ms step_avg:595.61ms +step:54042/57344 train_time:32187854ms step_avg:595.61ms +step:54043/57344 train_time:32188417ms step_avg:595.61ms +grad accum step:13511/14336 +step:54044/57344 train_time:32189797ms step_avg:595.62ms +step:54045/57344 train_time:32189810ms step_avg:595.61ms +step:54046/57344 train_time:32190058ms step_avg:595.60ms +step:54047/57344 train_time:32190620ms step_avg:595.60ms +grad accum step:13512/14336 +step:54048/57344 train_time:32191980ms step_avg:595.62ms +step:54049/57344 train_time:32191994ms step_avg:595.61ms +step:54050/57344 train_time:32192236ms step_avg:595.60ms +step:54051/57344 train_time:32192804ms step_avg:595.60ms +grad accum step:13513/14336 +step:54052/57344 train_time:32194203ms step_avg:595.62ms +step:54053/57344 train_time:32194227ms step_avg:595.60ms +step:54054/57344 train_time:32194455ms step_avg:595.60ms +step:54055/57344 train_time:32195025ms step_avg:595.60ms +grad accum step:13514/14336 +step:54056/57344 train_time:32196365ms step_avg:595.61ms +step:54057/57344 train_time:32196381ms step_avg:595.60ms +step:54058/57344 train_time:32196637ms step_avg:595.59ms +step:54059/57344 train_time:32197213ms step_avg:595.59ms +grad accum step:13515/14336 +step:54060/57344 train_time:32198612ms step_avg:595.61ms +step:54061/57344 train_time:32198627ms step_avg:595.60ms +step:54062/57344 train_time:32198869ms step_avg:595.59ms +step:54063/57344 train_time:32199433ms step_avg:595.59ms +grad accum step:13516/14336 +step:54064/57344 train_time:32200853ms step_avg:595.61ms +step:54065/57344 train_time:32200868ms step_avg:595.60ms +step:54066/57344 train_time:32201111ms step_avg:595.59ms +step:54067/57344 train_time:32201699ms step_avg:595.59ms +grad accum step:13517/14336 +step:54068/57344 train_time:32203131ms step_avg:595.60ms +step:54069/57344 train_time:32203155ms step_avg:595.59ms +step:54070/57344 train_time:32203387ms step_avg:595.59ms +step:54071/57344 train_time:32203960ms step_avg:595.59ms +grad accum step:13518/14336 +step:54072/57344 train_time:32205287ms step_avg:595.60ms +step:54073/57344 train_time:32205305ms step_avg:595.59ms +step:54074/57344 train_time:32205557ms step_avg:595.58ms +step:54075/57344 train_time:32206165ms step_avg:595.58ms +grad accum step:13519/14336 +step:54076/57344 train_time:32207570ms step_avg:595.60ms +step:54077/57344 train_time:32207593ms step_avg:595.59ms +step:54078/57344 train_time:32207832ms step_avg:595.58ms +step:54079/57344 train_time:32208408ms step_avg:595.58ms +grad accum step:13520/14336 +step:54080/57344 train_time:32209812ms step_avg:595.60ms +step:54080/57344 val_loss:5.334038 train_time:32209815ms step_avg:595.60ms +step:54081/57344 train_time:32209827ms step_avg:595.58ms +step:54082/57344 train_time:32210065ms step_avg:595.58ms +step:54083/57344 train_time:32210660ms step_avg:595.58ms +grad accum step:13521/14336 +step:54084/57344 train_time:32211993ms step_avg:595.59ms +step:54085/57344 train_time:32212031ms step_avg:595.58ms +step:54086/57344 train_time:32212255ms step_avg:595.57ms +step:54087/57344 train_time:32212809ms step_avg:595.57ms +grad accum step:13522/14336 +step:54088/57344 train_time:32214134ms step_avg:595.59ms +step:54089/57344 train_time:32214147ms step_avg:595.58ms +step:54090/57344 train_time:32214399ms step_avg:595.57ms +step:54091/57344 train_time:32214970ms step_avg:595.57ms +grad accum step:13523/14336 +step:54092/57344 train_time:32216358ms step_avg:595.58ms +step:54093/57344 train_time:32216375ms step_avg:595.57ms +step:54094/57344 train_time:32216612ms step_avg:595.57ms +step:54095/57344 train_time:32217179ms step_avg:595.57ms +grad accum step:13524/14336 +step:54096/57344 train_time:32218563ms step_avg:595.58ms +step:54097/57344 train_time:32218578ms step_avg:595.57ms +step:54098/57344 train_time:32218843ms step_avg:595.56ms +step:54099/57344 train_time:32219451ms step_avg:595.56ms +grad accum step:13525/14336 +step:54100/57344 train_time:32220864ms step_avg:595.58ms +step:54101/57344 train_time:32220876ms step_avg:595.57ms +step:54102/57344 train_time:32221130ms step_avg:595.56ms +step:54103/57344 train_time:32221712ms step_avg:595.56ms +grad accum step:13526/14336 +step:54104/57344 train_time:32223039ms step_avg:595.58ms +step:54105/57344 train_time:32223057ms step_avg:595.57ms +step:54106/57344 train_time:32223315ms step_avg:595.56ms +step:54107/57344 train_time:32223920ms step_avg:595.56ms +grad accum step:13527/14336 +step:54108/57344 train_time:32225311ms step_avg:595.57ms +step:54109/57344 train_time:32225326ms step_avg:595.56ms +step:54110/57344 train_time:32225580ms step_avg:595.56ms +step:54111/57344 train_time:32226162ms step_avg:595.56ms +grad accum step:13528/14336 +step:54112/57344 train_time:32227598ms step_avg:595.57ms +step:54113/57344 train_time:32227617ms step_avg:595.56ms +step:54114/57344 train_time:32227842ms step_avg:595.55ms +step:54115/57344 train_time:32228407ms step_avg:595.55ms +grad accum step:13529/14336 +step:54116/57344 train_time:32229775ms step_avg:595.57ms +step:54117/57344 train_time:32229790ms step_avg:595.56ms +step:54118/57344 train_time:32230043ms step_avg:595.55ms +step:54119/57344 train_time:32230613ms step_avg:595.55ms +grad accum step:13530/14336 +step:54120/57344 train_time:32231964ms step_avg:595.56ms +step:54121/57344 train_time:32231981ms step_avg:595.55ms +step:54122/57344 train_time:32232228ms step_avg:595.55ms +step:54123/57344 train_time:32232796ms step_avg:595.55ms +grad accum step:13531/14336 +step:54124/57344 train_time:32234221ms step_avg:595.56ms +step:54125/57344 train_time:32234241ms step_avg:595.55ms +step:54126/57344 train_time:32234472ms step_avg:595.55ms +step:54127/57344 train_time:32235019ms step_avg:595.54ms +grad accum step:13532/14336 +step:54128/57344 train_time:32236355ms step_avg:595.56ms +step:54129/57344 train_time:32236369ms step_avg:595.55ms +step:54130/57344 train_time:32236627ms step_avg:595.54ms +step:54131/57344 train_time:32237212ms step_avg:595.54ms +grad accum step:13533/14336 +step:54132/57344 train_time:32238685ms step_avg:595.56ms +step:54133/57344 train_time:32238707ms step_avg:595.55ms +step:54134/57344 train_time:32238940ms step_avg:595.54ms +step:54135/57344 train_time:32239518ms step_avg:595.54ms +grad accum step:13534/14336 +step:54136/57344 train_time:32240866ms step_avg:595.55ms +step:54137/57344 train_time:32240881ms step_avg:595.54ms +step:54138/57344 train_time:32241126ms step_avg:595.54ms +step:54139/57344 train_time:32241691ms step_avg:595.54ms +grad accum step:13535/14336 +step:54140/57344 train_time:32243442ms step_avg:595.56ms +step:54141/57344 train_time:32243458ms step_avg:595.55ms +step:54142/57344 train_time:32243677ms step_avg:595.54ms +step:54143/57344 train_time:32244251ms step_avg:595.54ms +grad accum step:13536/14336 +step:54144/57344 train_time:32245649ms step_avg:595.55ms +step:54144/57344 val_loss:5.332578 train_time:32245658ms step_avg:595.55ms +step:54145/57344 train_time:32245670ms step_avg:595.54ms +step:54146/57344 train_time:32245910ms step_avg:595.54ms +step:54147/57344 train_time:32246508ms step_avg:595.54ms +grad accum step:13537/14336 +step:54148/57344 train_time:32247912ms step_avg:595.55ms +step:54149/57344 train_time:32247929ms step_avg:595.54ms +step:54150/57344 train_time:32248165ms step_avg:595.53ms +step:54151/57344 train_time:32248715ms step_avg:595.53ms +grad accum step:13538/14336 +step:54152/57344 train_time:32250188ms step_avg:595.55ms +step:54153/57344 train_time:32250203ms step_avg:595.54ms +step:54154/57344 train_time:32250464ms step_avg:595.53ms +step:54155/57344 train_time:32251055ms step_avg:595.53ms +grad accum step:13539/14336 +step:54156/57344 train_time:32252374ms step_avg:595.55ms +step:54157/57344 train_time:32252392ms step_avg:595.54ms +step:54158/57344 train_time:32252633ms step_avg:595.53ms +step:54159/57344 train_time:32253216ms step_avg:595.53ms +grad accum step:13540/14336 +step:54160/57344 train_time:32254661ms step_avg:595.54ms +step:54161/57344 train_time:32254676ms step_avg:595.53ms +step:54162/57344 train_time:32254935ms step_avg:595.53ms +step:54163/57344 train_time:32255527ms step_avg:595.53ms +grad accum step:13541/14336 +step:54164/57344 train_time:32256979ms step_avg:595.54ms +step:54165/57344 train_time:32256994ms step_avg:595.53ms +step:54166/57344 train_time:32257244ms step_avg:595.53ms +step:54167/57344 train_time:32257850ms step_avg:595.53ms +grad accum step:13542/14336 +step:54168/57344 train_time:32259273ms step_avg:595.54ms +step:54169/57344 train_time:32259289ms step_avg:595.53ms +step:54170/57344 train_time:32259556ms step_avg:595.52ms +step:54171/57344 train_time:32260160ms step_avg:595.52ms +grad accum step:13543/14336 +step:54172/57344 train_time:32261590ms step_avg:595.54ms +step:54173/57344 train_time:32261609ms step_avg:595.53ms +step:54174/57344 train_time:32261850ms step_avg:595.52ms +step:54175/57344 train_time:32262460ms step_avg:595.52ms +grad accum step:13544/14336 +step:54176/57344 train_time:32263801ms step_avg:595.54ms +step:54177/57344 train_time:32263874ms step_avg:595.53ms +step:54178/57344 train_time:32264095ms step_avg:595.52ms +step:54179/57344 train_time:32264657ms step_avg:595.52ms +grad accum step:13545/14336 +step:54180/57344 train_time:32266042ms step_avg:595.53ms +step:54181/57344 train_time:32266057ms step_avg:595.52ms +step:54182/57344 train_time:32266312ms step_avg:595.52ms +step:54183/57344 train_time:32266881ms step_avg:595.52ms +grad accum step:13546/14336 +step:54184/57344 train_time:32268331ms step_avg:595.53ms +step:54185/57344 train_time:32268347ms step_avg:595.52ms +step:54186/57344 train_time:32268616ms step_avg:595.52ms +step:54187/57344 train_time:32269232ms step_avg:595.52ms +grad accum step:13547/14336 +step:54188/57344 train_time:32270692ms step_avg:595.53ms +step:54189/57344 train_time:32270706ms step_avg:595.52ms +step:54190/57344 train_time:32270955ms step_avg:595.51ms +step:54191/57344 train_time:32271519ms step_avg:595.51ms +grad accum step:13548/14336 +step:54192/57344 train_time:32272833ms step_avg:595.53ms +step:54193/57344 train_time:32272850ms step_avg:595.52ms +step:54194/57344 train_time:32273103ms step_avg:595.51ms +step:54195/57344 train_time:32273667ms step_avg:595.51ms +grad accum step:13549/14336 +step:54196/57344 train_time:32275046ms step_avg:595.52ms +step:54197/57344 train_time:32275062ms step_avg:595.51ms +step:54198/57344 train_time:32275307ms step_avg:595.51ms +step:54199/57344 train_time:32275875ms step_avg:595.51ms +grad accum step:13550/14336 +step:54200/57344 train_time:32277291ms step_avg:595.52ms +step:54201/57344 train_time:32277307ms step_avg:595.51ms +step:54202/57344 train_time:32277562ms step_avg:595.51ms +step:54203/57344 train_time:32278139ms step_avg:595.50ms +grad accum step:13551/14336 +step:54204/57344 train_time:32279495ms step_avg:595.52ms +step:54205/57344 train_time:32279512ms step_avg:595.51ms +step:54206/57344 train_time:32279788ms step_avg:595.50ms +step:54207/57344 train_time:32280433ms step_avg:595.50ms +grad accum step:13552/14336 +step:54208/57344 train_time:32281836ms step_avg:595.52ms +step:54208/57344 val_loss:5.331984 train_time:32281839ms step_avg:595.52ms +step:54209/57344 train_time:32281851ms step_avg:595.51ms +step:54210/57344 train_time:32282073ms step_avg:595.50ms +step:54211/57344 train_time:32282640ms step_avg:595.50ms +grad accum step:13553/14336 +step:54212/57344 train_time:32284050ms step_avg:595.51ms +step:54213/57344 train_time:32284065ms step_avg:595.50ms +step:54214/57344 train_time:32284324ms step_avg:595.50ms +step:54215/57344 train_time:32284909ms step_avg:595.50ms +grad accum step:13554/14336 +step:54216/57344 train_time:32286277ms step_avg:595.51ms +step:54217/57344 train_time:32286289ms step_avg:595.50ms +step:54218/57344 train_time:32286540ms step_avg:595.49ms +step:54219/57344 train_time:32287120ms step_avg:595.49ms +grad accum step:13555/14336 +step:54220/57344 train_time:32288550ms step_avg:595.51ms +step:54221/57344 train_time:32288566ms step_avg:595.50ms +step:54222/57344 train_time:32288828ms step_avg:595.49ms +step:54223/57344 train_time:32289410ms step_avg:595.49ms +grad accum step:13556/14336 +step:54224/57344 train_time:32290730ms step_avg:595.51ms +step:54225/57344 train_time:32290747ms step_avg:595.50ms +step:54226/57344 train_time:32290995ms step_avg:595.49ms +step:54227/57344 train_time:32291551ms step_avg:595.49ms +grad accum step:13557/14336 +step:54228/57344 train_time:32292980ms step_avg:595.50ms +step:54229/57344 train_time:32292996ms step_avg:595.49ms +step:54230/57344 train_time:32293262ms step_avg:595.49ms +step:54231/57344 train_time:32293865ms step_avg:595.49ms +grad accum step:13558/14336 +step:54232/57344 train_time:32295199ms step_avg:595.50ms +step:54233/57344 train_time:32295215ms step_avg:595.49ms +step:54234/57344 train_time:32295465ms step_avg:595.48ms +step:54235/57344 train_time:32296037ms step_avg:595.48ms +grad accum step:13559/14336 +step:54236/57344 train_time:32297391ms step_avg:595.50ms +step:54237/57344 train_time:32297408ms step_avg:595.49ms +step:54238/57344 train_time:32297657ms step_avg:595.48ms +step:54239/57344 train_time:32298228ms step_avg:595.48ms +grad accum step:13560/14336 +step:54240/57344 train_time:32299628ms step_avg:595.49ms +step:54241/57344 train_time:32299643ms step_avg:595.48ms +step:54242/57344 train_time:32299900ms step_avg:595.48ms +step:54243/57344 train_time:32300485ms step_avg:595.48ms +grad accum step:13561/14336 +step:54244/57344 train_time:32301859ms step_avg:595.49ms +step:54245/57344 train_time:32301876ms step_avg:595.48ms +step:54246/57344 train_time:32302124ms step_avg:595.47ms +step:54247/57344 train_time:32302684ms step_avg:595.47ms +grad accum step:13562/14336 +step:54248/57344 train_time:32304037ms step_avg:595.49ms +step:54249/57344 train_time:32304054ms step_avg:595.48ms +step:54250/57344 train_time:32304303ms step_avg:595.47ms +step:54251/57344 train_time:32304855ms step_avg:595.47ms +grad accum step:13563/14336 +step:54252/57344 train_time:32306315ms step_avg:595.49ms +step:54253/57344 train_time:32306332ms step_avg:595.48ms +step:54254/57344 train_time:32306598ms step_avg:595.47ms +step:54255/57344 train_time:32307234ms step_avg:595.47ms +grad accum step:13564/14336 +step:54256/57344 train_time:32308679ms step_avg:595.49ms +step:54257/57344 train_time:32308695ms step_avg:595.48ms +step:54258/57344 train_time:32308943ms step_avg:595.47ms +step:54259/57344 train_time:32309514ms step_avg:595.47ms +grad accum step:13565/14336 +step:54260/57344 train_time:32310916ms step_avg:595.48ms +step:54261/57344 train_time:32310933ms step_avg:595.47ms +step:54262/57344 train_time:32311174ms step_avg:595.47ms +step:54263/57344 train_time:32311727ms step_avg:595.47ms +grad accum step:13566/14336 +step:54264/57344 train_time:32313313ms step_avg:595.48ms +step:54265/57344 train_time:32313335ms step_avg:595.47ms +step:54266/57344 train_time:32313560ms step_avg:595.47ms +step:54267/57344 train_time:32314142ms step_avg:595.47ms +grad accum step:13567/14336 +step:54268/57344 train_time:32315538ms step_avg:595.48ms +step:54269/57344 train_time:32315553ms step_avg:595.47ms +step:54270/57344 train_time:32315802ms step_avg:595.46ms +step:54271/57344 train_time:32316374ms step_avg:595.46ms +grad accum step:13568/14336 +step:54272/57344 train_time:32317858ms step_avg:595.48ms +step:54272/57344 val_loss:5.330531 train_time:32317859ms step_avg:595.48ms +step:54273/57344 train_time:32317871ms step_avg:595.47ms +step:54274/57344 train_time:32318148ms step_avg:595.46ms +step:54275/57344 train_time:32318843ms step_avg:595.46ms +grad accum step:13569/14336 +step:54276/57344 train_time:32320250ms step_avg:595.48ms +step:54277/57344 train_time:32320264ms step_avg:595.47ms +step:54278/57344 train_time:32320520ms step_avg:595.46ms +step:54279/57344 train_time:32321116ms step_avg:595.46ms +grad accum step:13570/14336 +step:54280/57344 train_time:32322555ms step_avg:595.48ms +step:54281/57344 train_time:32322581ms step_avg:595.47ms +step:54282/57344 train_time:32322813ms step_avg:595.46ms +step:54283/57344 train_time:32323379ms step_avg:595.46ms +grad accum step:13571/14336 +step:54284/57344 train_time:32324733ms step_avg:595.47ms +step:54285/57344 train_time:32324757ms step_avg:595.46ms +step:54286/57344 train_time:32324992ms step_avg:595.46ms +step:54287/57344 train_time:32325580ms step_avg:595.46ms +grad accum step:13572/14336 +step:54288/57344 train_time:32327204ms step_avg:595.48ms +step:54289/57344 train_time:32327221ms step_avg:595.47ms +step:54290/57344 train_time:32327508ms step_avg:595.46ms +step:54291/57344 train_time:32328228ms step_avg:595.46ms +grad accum step:13573/14336 +step:54292/57344 train_time:32329675ms step_avg:595.48ms +step:54293/57344 train_time:32329691ms step_avg:595.47ms +step:54294/57344 train_time:32329940ms step_avg:595.46ms +step:54295/57344 train_time:32330507ms step_avg:595.46ms +grad accum step:13574/14336 +step:54296/57344 train_time:32331924ms step_avg:595.48ms +step:54297/57344 train_time:32331941ms step_avg:595.46ms +step:54298/57344 train_time:32332196ms step_avg:595.46ms +step:54299/57344 train_time:32332784ms step_avg:595.46ms +grad accum step:13575/14336 +step:54300/57344 train_time:32334216ms step_avg:595.47ms +step:54301/57344 train_time:32334243ms step_avg:595.46ms +step:54302/57344 train_time:32334472ms step_avg:595.46ms +step:54303/57344 train_time:32335041ms step_avg:595.46ms +grad accum step:13576/14336 +step:54304/57344 train_time:32336435ms step_avg:595.47ms +step:54305/57344 train_time:32336479ms step_avg:595.46ms +step:54306/57344 train_time:32336703ms step_avg:595.45ms +step:54307/57344 train_time:32337275ms step_avg:595.45ms +grad accum step:13577/14336 +step:54308/57344 train_time:32338678ms step_avg:595.47ms +step:54309/57344 train_time:32338693ms step_avg:595.46ms +step:54310/57344 train_time:32338961ms step_avg:595.45ms +step:54311/57344 train_time:32339576ms step_avg:595.45ms +grad accum step:13578/14336 +step:54312/57344 train_time:32341012ms step_avg:595.47ms +step:54313/57344 train_time:32341029ms step_avg:595.46ms +step:54314/57344 train_time:32341274ms step_avg:595.45ms +step:54315/57344 train_time:32341853ms step_avg:595.45ms +grad accum step:13579/14336 +step:54316/57344 train_time:32343335ms step_avg:595.47ms +step:54317/57344 train_time:32343351ms step_avg:595.46ms +step:54318/57344 train_time:32343610ms step_avg:595.45ms +step:54319/57344 train_time:32344218ms step_avg:595.45ms +grad accum step:13580/14336 +step:54320/57344 train_time:32345609ms step_avg:595.46ms +step:54321/57344 train_time:32345655ms step_avg:595.45ms +step:54322/57344 train_time:32345884ms step_avg:595.45ms +step:54323/57344 train_time:32346485ms step_avg:595.45ms +grad accum step:13581/14336 +step:54324/57344 train_time:32347997ms step_avg:595.46ms +step:54325/57344 train_time:32348013ms step_avg:595.45ms +step:54326/57344 train_time:32348269ms step_avg:595.45ms +step:54327/57344 train_time:32348864ms step_avg:595.45ms +grad accum step:13582/14336 +step:54328/57344 train_time:32350209ms step_avg:595.46ms +step:54329/57344 train_time:32350225ms step_avg:595.45ms +step:54330/57344 train_time:32350475ms step_avg:595.44ms +step:54331/57344 train_time:32351052ms step_avg:595.44ms +grad accum step:13583/14336 +step:54332/57344 train_time:32352505ms step_avg:595.46ms +step:54333/57344 train_time:32352521ms step_avg:595.45ms +step:54334/57344 train_time:32352778ms step_avg:595.44ms +step:54335/57344 train_time:32353352ms step_avg:595.44ms +grad accum step:13584/14336 +step:54336/57344 train_time:32354695ms step_avg:595.46ms +step:54336/57344 val_loss:5.331734 train_time:32354697ms step_avg:595.46ms +step:54337/57344 train_time:32354709ms step_avg:595.45ms +step:54338/57344 train_time:32354926ms step_avg:595.44ms +step:54339/57344 train_time:32355485ms step_avg:595.44ms +grad accum step:13585/14336 +step:54340/57344 train_time:32356836ms step_avg:595.45ms +step:54341/57344 train_time:32356854ms step_avg:595.44ms +step:54342/57344 train_time:32357101ms step_avg:595.43ms +step:54343/57344 train_time:32357695ms step_avg:595.43ms +grad accum step:13586/14336 +step:54344/57344 train_time:32359160ms step_avg:595.45ms +step:54345/57344 train_time:32359177ms step_avg:595.44ms +step:54346/57344 train_time:32359430ms step_avg:595.43ms +step:54347/57344 train_time:32360026ms step_avg:595.43ms +grad accum step:13587/14336 +step:54348/57344 train_time:32361382ms step_avg:595.45ms +step:54349/57344 train_time:32361452ms step_avg:595.44ms +step:54350/57344 train_time:32361672ms step_avg:595.43ms +step:54351/57344 train_time:32362233ms step_avg:595.43ms +grad accum step:13588/14336 +step:54352/57344 train_time:32363618ms step_avg:595.44ms +step:54353/57344 train_time:32363633ms step_avg:595.43ms +step:54354/57344 train_time:32363894ms step_avg:595.43ms +step:54355/57344 train_time:32364481ms step_avg:595.43ms +grad accum step:13589/14336 +step:54356/57344 train_time:32365908ms step_avg:595.44ms +step:54357/57344 train_time:32365929ms step_avg:595.43ms +step:54358/57344 train_time:32366162ms step_avg:595.43ms +step:54359/57344 train_time:32366753ms step_avg:595.43ms +grad accum step:13590/14336 +step:54360/57344 train_time:32368154ms step_avg:595.44ms +step:54361/57344 train_time:32368169ms step_avg:595.43ms +step:54362/57344 train_time:32368418ms step_avg:595.42ms +step:54363/57344 train_time:32368986ms step_avg:595.42ms +grad accum step:13591/14336 +step:54364/57344 train_time:32370432ms step_avg:595.44ms +step:54365/57344 train_time:32370448ms step_avg:595.43ms +step:54366/57344 train_time:32370704ms step_avg:595.42ms +step:54367/57344 train_time:32371313ms step_avg:595.42ms +grad accum step:13592/14336 +step:54368/57344 train_time:32372753ms step_avg:595.44ms +step:54369/57344 train_time:32372784ms step_avg:595.43ms +step:54370/57344 train_time:32373012ms step_avg:595.42ms +step:54371/57344 train_time:32373584ms step_avg:595.42ms +grad accum step:13593/14336 +step:54372/57344 train_time:32375068ms step_avg:595.44ms +step:54373/57344 train_time:32375088ms step_avg:595.43ms +step:54374/57344 train_time:32375324ms step_avg:595.42ms +step:54375/57344 train_time:32375898ms step_avg:595.42ms +grad accum step:13594/14336 +step:54376/57344 train_time:32377350ms step_avg:595.43ms +step:54377/57344 train_time:32377365ms step_avg:595.42ms +step:54378/57344 train_time:32377606ms step_avg:595.42ms +step:54379/57344 train_time:32378173ms step_avg:595.42ms +grad accum step:13595/14336 +step:54380/57344 train_time:32379553ms step_avg:595.43ms +step:54381/57344 train_time:32379569ms step_avg:595.42ms +step:54382/57344 train_time:32379824ms step_avg:595.41ms +step:54383/57344 train_time:32380406ms step_avg:595.41ms +grad accum step:13596/14336 +step:54384/57344 train_time:32381772ms step_avg:595.43ms +step:54385/57344 train_time:32381789ms step_avg:595.42ms +step:54386/57344 train_time:32382039ms step_avg:595.41ms +step:54387/57344 train_time:32382626ms step_avg:595.41ms +grad accum step:13597/14336 +step:54388/57344 train_time:32384039ms step_avg:595.43ms +step:54389/57344 train_time:32384053ms step_avg:595.42ms +step:54390/57344 train_time:32384310ms step_avg:595.41ms +step:54391/57344 train_time:32384889ms step_avg:595.41ms +grad accum step:13598/14336 +step:54392/57344 train_time:32386277ms step_avg:595.42ms +step:54393/57344 train_time:32386294ms step_avg:595.41ms +step:54394/57344 train_time:32386542ms step_avg:595.41ms +step:54395/57344 train_time:32387093ms step_avg:595.41ms +grad accum step:13599/14336 +step:54396/57344 train_time:32388492ms step_avg:595.42ms +step:54397/57344 train_time:32388519ms step_avg:595.41ms +step:54398/57344 train_time:32388745ms step_avg:595.40ms +step:54399/57344 train_time:32389317ms step_avg:595.40ms +grad accum step:13600/14336 +step:54400/57344 train_time:32390671ms step_avg:595.42ms +step:54400/57344 val_loss:5.329116 train_time:32390675ms step_avg:595.42ms +step:54401/57344 train_time:32390686ms step_avg:595.41ms +step:54402/57344 train_time:32390917ms step_avg:595.40ms +step:54403/57344 train_time:32391493ms step_avg:595.40ms +grad accum step:13601/14336 +step:54404/57344 train_time:32392908ms step_avg:595.41ms +step:54405/57344 train_time:32392923ms step_avg:595.40ms +step:54406/57344 train_time:32393186ms step_avg:595.40ms +step:54407/57344 train_time:32393784ms step_avg:595.40ms +grad accum step:13602/14336 +step:54408/57344 train_time:32395186ms step_avg:595.41ms +step:54409/57344 train_time:32395204ms step_avg:595.40ms +step:54410/57344 train_time:32395450ms step_avg:595.40ms +step:54411/57344 train_time:32396018ms step_avg:595.39ms +grad accum step:13603/14336 +step:54412/57344 train_time:32397413ms step_avg:595.41ms +step:54413/57344 train_time:32397446ms step_avg:595.40ms +step:54414/57344 train_time:32397679ms step_avg:595.39ms +step:54415/57344 train_time:32398265ms step_avg:595.39ms +grad accum step:13604/14336 +step:54416/57344 train_time:32399641ms step_avg:595.41ms +step:54417/57344 train_time:32399663ms step_avg:595.40ms +step:54418/57344 train_time:32399898ms step_avg:595.39ms +step:54419/57344 train_time:32400475ms step_avg:595.39ms +grad accum step:13605/14336 +step:54420/57344 train_time:32401931ms step_avg:595.40ms +step:54421/57344 train_time:32401947ms step_avg:595.39ms +step:54422/57344 train_time:32402203ms step_avg:595.39ms +step:54423/57344 train_time:32402789ms step_avg:595.39ms +grad accum step:13606/14336 +step:54424/57344 train_time:32404154ms step_avg:595.40ms +step:54425/57344 train_time:32404170ms step_avg:595.39ms +step:54426/57344 train_time:32404420ms step_avg:595.38ms +step:54427/57344 train_time:32404984ms step_avg:595.38ms +grad accum step:13607/14336 +step:54428/57344 train_time:32406483ms step_avg:595.40ms +step:54429/57344 train_time:32406503ms step_avg:595.39ms +step:54430/57344 train_time:32406748ms step_avg:595.38ms +step:54431/57344 train_time:32407392ms step_avg:595.38ms +grad accum step:13608/14336 +step:54432/57344 train_time:32408825ms step_avg:595.40ms +step:54433/57344 train_time:32408840ms step_avg:595.39ms +step:54434/57344 train_time:32409080ms step_avg:595.38ms +step:54435/57344 train_time:32409697ms step_avg:595.38ms +grad accum step:13609/14336 +step:54436/57344 train_time:32411140ms step_avg:595.40ms +step:54437/57344 train_time:32411160ms step_avg:595.39ms +step:54438/57344 train_time:32411398ms step_avg:595.38ms +step:54439/57344 train_time:32411964ms step_avg:595.38ms +grad accum step:13610/14336 +step:54440/57344 train_time:32413365ms step_avg:595.40ms +step:54441/57344 train_time:32413379ms step_avg:595.39ms +step:54442/57344 train_time:32413636ms step_avg:595.38ms +step:54443/57344 train_time:32414213ms step_avg:595.38ms +grad accum step:13611/14336 +step:54444/57344 train_time:32415554ms step_avg:595.39ms +step:54445/57344 train_time:32415569ms step_avg:595.38ms +step:54446/57344 train_time:32415813ms step_avg:595.38ms +step:54447/57344 train_time:32416367ms step_avg:595.37ms +grad accum step:13612/14336 +step:54448/57344 train_time:32417754ms step_avg:595.39ms +step:54449/57344 train_time:32417772ms step_avg:595.38ms +step:54450/57344 train_time:32418021ms step_avg:595.37ms +step:54451/57344 train_time:32418604ms step_avg:595.37ms +grad accum step:13613/14336 +step:54452/57344 train_time:32419974ms step_avg:595.39ms +step:54453/57344 train_time:32420007ms step_avg:595.38ms +step:54454/57344 train_time:32420227ms step_avg:595.37ms +step:54455/57344 train_time:32420807ms step_avg:595.37ms +grad accum step:13614/14336 +step:54456/57344 train_time:32422305ms step_avg:595.39ms +step:54457/57344 train_time:32422324ms step_avg:595.37ms +step:54458/57344 train_time:32422567ms step_avg:595.37ms +step:54459/57344 train_time:32423156ms step_avg:595.37ms +grad accum step:13615/14336 +step:54460/57344 train_time:32424578ms step_avg:595.38ms +step:54461/57344 train_time:32424593ms step_avg:595.37ms +step:54462/57344 train_time:32424841ms step_avg:595.37ms +step:54463/57344 train_time:32425414ms step_avg:595.37ms +grad accum step:13616/14336 +step:54464/57344 train_time:32426779ms step_avg:595.38ms +step:54464/57344 val_loss:5.327961 train_time:32426781ms step_avg:595.38ms +step:54465/57344 train_time:32426793ms step_avg:595.37ms +step:54466/57344 train_time:32427022ms step_avg:595.36ms +step:54467/57344 train_time:32427604ms step_avg:595.36ms +grad accum step:13617/14336 +step:54468/57344 train_time:32429003ms step_avg:595.38ms +step:54469/57344 train_time:32429038ms step_avg:595.37ms +step:54470/57344 train_time:32429281ms step_avg:595.36ms +step:54471/57344 train_time:32429908ms step_avg:595.36ms +grad accum step:13618/14336 +step:54472/57344 train_time:32431425ms step_avg:595.38ms +step:54473/57344 train_time:32431447ms step_avg:595.37ms +step:54474/57344 train_time:32431672ms step_avg:595.36ms +step:54475/57344 train_time:32432237ms step_avg:595.36ms +grad accum step:13619/14336 +step:54476/57344 train_time:32433623ms step_avg:595.37ms +step:54477/57344 train_time:32433638ms step_avg:595.36ms +step:54478/57344 train_time:32433886ms step_avg:595.36ms +step:54479/57344 train_time:32434439ms step_avg:595.36ms +grad accum step:13620/14336 +step:54480/57344 train_time:32435776ms step_avg:595.37ms +step:54481/57344 train_time:32435823ms step_avg:595.36ms +step:54482/57344 train_time:32436045ms step_avg:595.35ms +step:54483/57344 train_time:32436609ms step_avg:595.35ms +grad accum step:13621/14336 +step:54484/57344 train_time:32437987ms step_avg:595.37ms +step:54485/57344 train_time:32438004ms step_avg:595.36ms +step:54486/57344 train_time:32438256ms step_avg:595.35ms +step:54487/57344 train_time:32438842ms step_avg:595.35ms +grad accum step:13622/14336 +step:54488/57344 train_time:32440287ms step_avg:595.37ms +step:54489/57344 train_time:32440305ms step_avg:595.36ms +step:54490/57344 train_time:32440549ms step_avg:595.35ms +step:54491/57344 train_time:32441136ms step_avg:595.35ms +grad accum step:13623/14336 +step:54492/57344 train_time:32442575ms step_avg:595.36ms +step:54493/57344 train_time:32442590ms step_avg:595.35ms +step:54494/57344 train_time:32442835ms step_avg:595.35ms +step:54495/57344 train_time:32443412ms step_avg:595.35ms +grad accum step:13624/14336 +step:54496/57344 train_time:32444887ms step_avg:595.36ms +step:54497/57344 train_time:32444902ms step_avg:595.35ms +step:54498/57344 train_time:32445152ms step_avg:595.35ms +step:54499/57344 train_time:32445726ms step_avg:595.35ms +grad accum step:13625/14336 +step:54500/57344 train_time:32447088ms step_avg:595.36ms +step:54501/57344 train_time:32447101ms step_avg:595.35ms +step:54502/57344 train_time:32447354ms step_avg:595.34ms +step:54503/57344 train_time:32447961ms step_avg:595.34ms +grad accum step:13626/14336 +step:54504/57344 train_time:32449437ms step_avg:595.36ms +step:54505/57344 train_time:32449452ms step_avg:595.35ms +step:54506/57344 train_time:32449738ms step_avg:595.34ms +step:54507/57344 train_time:32450388ms step_avg:595.34ms +grad accum step:13627/14336 +step:54508/57344 train_time:32451842ms step_avg:595.36ms +step:54509/57344 train_time:32451858ms step_avg:595.35ms +step:54510/57344 train_time:32452119ms step_avg:595.34ms +step:54511/57344 train_time:32452719ms step_avg:595.34ms +grad accum step:13628/14336 +step:54512/57344 train_time:32454111ms step_avg:595.36ms +step:54513/57344 train_time:32454128ms step_avg:595.35ms +step:54514/57344 train_time:32454385ms step_avg:595.34ms +step:54515/57344 train_time:32454964ms step_avg:595.34ms +grad accum step:13629/14336 +step:54516/57344 train_time:32456273ms step_avg:595.35ms +step:54517/57344 train_time:32456290ms step_avg:595.34ms +step:54518/57344 train_time:32456544ms step_avg:595.34ms +step:54519/57344 train_time:32457112ms step_avg:595.34ms +grad accum step:13630/14336 +step:54520/57344 train_time:32458427ms step_avg:595.35ms +step:54521/57344 train_time:32458444ms step_avg:595.34ms +step:54522/57344 train_time:32458703ms step_avg:595.33ms +step:54523/57344 train_time:32459297ms step_avg:595.33ms +grad accum step:13631/14336 +step:54524/57344 train_time:32460653ms step_avg:595.35ms +step:54525/57344 train_time:32460670ms step_avg:595.34ms +step:54526/57344 train_time:32460920ms step_avg:595.33ms +step:54527/57344 train_time:32461480ms step_avg:595.33ms +grad accum step:13632/14336 +step:54528/57344 train_time:32463090ms step_avg:595.35ms +step:54528/57344 val_loss:5.327446 train_time:32463097ms step_avg:595.35ms +step:54529/57344 train_time:32463109ms step_avg:595.34ms +step:54530/57344 train_time:32463370ms step_avg:595.33ms +step:54531/57344 train_time:32464025ms step_avg:595.33ms +grad accum step:13633/14336 +step:54532/57344 train_time:32465337ms step_avg:595.34ms +step:54533/57344 train_time:32465354ms step_avg:595.33ms +step:54534/57344 train_time:32465602ms step_avg:595.33ms +step:54535/57344 train_time:32466154ms step_avg:595.33ms +grad accum step:13634/14336 +step:54536/57344 train_time:32467555ms step_avg:595.34ms +step:54537/57344 train_time:32467579ms step_avg:595.33ms +step:54538/57344 train_time:32467814ms step_avg:595.32ms +step:54539/57344 train_time:32468370ms step_avg:595.32ms +grad accum step:13635/14336 +step:54540/57344 train_time:32469802ms step_avg:595.34ms +step:54541/57344 train_time:32469828ms step_avg:595.33ms +step:54542/57344 train_time:32470066ms step_avg:595.32ms +step:54543/57344 train_time:32470690ms step_avg:595.32ms +grad accum step:13636/14336 +step:54544/57344 train_time:32472068ms step_avg:595.34ms +step:54545/57344 train_time:32472091ms step_avg:595.33ms +step:54546/57344 train_time:32472319ms step_avg:595.32ms +step:54547/57344 train_time:32472912ms step_avg:595.32ms +grad accum step:13637/14336 +step:54548/57344 train_time:32474619ms step_avg:595.34ms +step:54549/57344 train_time:32474638ms step_avg:595.33ms +step:54550/57344 train_time:32474860ms step_avg:595.32ms +step:54551/57344 train_time:32475428ms step_avg:595.32ms +grad accum step:13638/14336 +step:54552/57344 train_time:32476760ms step_avg:595.34ms +step:54553/57344 train_time:32476779ms step_avg:595.33ms +step:54554/57344 train_time:32477040ms step_avg:595.32ms +step:54555/57344 train_time:32477647ms step_avg:595.32ms +grad accum step:13639/14336 +step:54556/57344 train_time:32479018ms step_avg:595.33ms +step:54557/57344 train_time:32479046ms step_avg:595.32ms +step:54558/57344 train_time:32479273ms step_avg:595.32ms +step:54559/57344 train_time:32479856ms step_avg:595.32ms +grad accum step:13640/14336 +step:54560/57344 train_time:32481323ms step_avg:595.33ms +step:54561/57344 train_time:32481340ms step_avg:595.32ms +step:54562/57344 train_time:32481593ms step_avg:595.32ms +step:54563/57344 train_time:32482199ms step_avg:595.32ms +grad accum step:13641/14336 +step:54564/57344 train_time:32483524ms step_avg:595.33ms +step:54565/57344 train_time:32483539ms step_avg:595.32ms +step:54566/57344 train_time:32483816ms step_avg:595.31ms +step:54567/57344 train_time:32484460ms step_avg:595.31ms +grad accum step:13642/14336 +step:54568/57344 train_time:32485943ms step_avg:595.33ms +step:54569/57344 train_time:32485959ms step_avg:595.32ms +step:54570/57344 train_time:32486195ms step_avg:595.31ms +step:54571/57344 train_time:32486835ms step_avg:595.31ms +grad accum step:13643/14336 +step:54572/57344 train_time:32488370ms step_avg:595.33ms +step:54573/57344 train_time:32488384ms step_avg:595.32ms +step:54574/57344 train_time:32488629ms step_avg:595.31ms +step:54575/57344 train_time:32489226ms step_avg:595.31ms +grad accum step:13644/14336 +step:54576/57344 train_time:32490658ms step_avg:595.33ms +step:54577/57344 train_time:32490702ms step_avg:595.32ms +step:54578/57344 train_time:32490931ms step_avg:595.31ms +step:54579/57344 train_time:32491506ms step_avg:595.31ms +grad accum step:13645/14336 +step:54580/57344 train_time:32492881ms step_avg:595.33ms +step:54581/57344 train_time:32492905ms step_avg:595.32ms +step:54582/57344 train_time:32493138ms step_avg:595.31ms +step:54583/57344 train_time:32493698ms step_avg:595.31ms +grad accum step:13646/14336 +step:54584/57344 train_time:32495081ms step_avg:595.32ms +step:54585/57344 train_time:32495097ms step_avg:595.31ms +step:54586/57344 train_time:32495360ms step_avg:595.31ms +step:54587/57344 train_time:32495962ms step_avg:595.31ms +grad accum step:13647/14336 +step:54588/57344 train_time:32497364ms step_avg:595.32ms +step:54589/57344 train_time:32497380ms step_avg:595.31ms +step:54590/57344 train_time:32497622ms step_avg:595.30ms +step:54591/57344 train_time:32498195ms step_avg:595.30ms +grad accum step:13648/14336 +step:54592/57344 train_time:32499559ms step_avg:595.32ms +step:54592/57344 val_loss:5.326375 train_time:32499562ms step_avg:595.32ms +step:54593/57344 train_time:32499574ms step_avg:595.31ms +step:54594/57344 train_time:32499817ms step_avg:595.30ms +step:54595/57344 train_time:32500449ms step_avg:595.30ms +grad accum step:13649/14336 +step:54596/57344 train_time:32501987ms step_avg:595.32ms +step:54597/57344 train_time:32502004ms step_avg:595.31ms +step:54598/57344 train_time:32502239ms step_avg:595.30ms +step:54599/57344 train_time:32502831ms step_avg:595.30ms +grad accum step:13650/14336 +step:54600/57344 train_time:32504237ms step_avg:595.32ms +step:54601/57344 train_time:32504252ms step_avg:595.31ms +step:54602/57344 train_time:32504501ms step_avg:595.30ms +step:54603/57344 train_time:32505070ms step_avg:595.30ms +grad accum step:13651/14336 +step:54604/57344 train_time:32506434ms step_avg:595.31ms +step:54605/57344 train_time:32506449ms step_avg:595.30ms +step:54606/57344 train_time:32506695ms step_avg:595.30ms +step:54607/57344 train_time:32507258ms step_avg:595.29ms +grad accum step:13652/14336 +step:54608/57344 train_time:32508623ms step_avg:595.31ms +step:54609/57344 train_time:32508645ms step_avg:595.30ms +step:54610/57344 train_time:32508891ms step_avg:595.29ms +step:54611/57344 train_time:32509481ms step_avg:595.29ms +grad accum step:13653/14336 +step:54612/57344 train_time:32510865ms step_avg:595.31ms +step:54613/57344 train_time:32510881ms step_avg:595.30ms +step:54614/57344 train_time:32511130ms step_avg:595.29ms +step:54615/57344 train_time:32511687ms step_avg:595.29ms +grad accum step:13654/14336 +step:54616/57344 train_time:32513082ms step_avg:595.30ms +step:54617/57344 train_time:32513097ms step_avg:595.29ms +step:54618/57344 train_time:32513332ms step_avg:595.29ms +step:54619/57344 train_time:32513913ms step_avg:595.29ms +grad accum step:13655/14336 +step:54620/57344 train_time:32515298ms step_avg:595.30ms +step:54621/57344 train_time:32515321ms step_avg:595.29ms +step:54622/57344 train_time:32515565ms step_avg:595.28ms +step:54623/57344 train_time:32516149ms step_avg:595.28ms +grad accum step:13656/14336 +step:54624/57344 train_time:32517574ms step_avg:595.30ms +step:54625/57344 train_time:32517588ms step_avg:595.29ms +step:54626/57344 train_time:32517844ms step_avg:595.28ms +step:54627/57344 train_time:32518436ms step_avg:595.28ms +grad accum step:13657/14336 +step:54628/57344 train_time:32519845ms step_avg:595.30ms +step:54629/57344 train_time:32519872ms step_avg:595.29ms +step:54630/57344 train_time:32520103ms step_avg:595.28ms +step:54631/57344 train_time:32520691ms step_avg:595.28ms +grad accum step:13658/14336 +step:54632/57344 train_time:32522208ms step_avg:595.30ms +step:54633/57344 train_time:32522225ms step_avg:595.29ms +step:54634/57344 train_time:32522470ms step_avg:595.28ms +step:54635/57344 train_time:32523037ms step_avg:595.28ms +grad accum step:13659/14336 +step:54636/57344 train_time:32524449ms step_avg:595.29ms +step:54637/57344 train_time:32524465ms step_avg:595.28ms +step:54638/57344 train_time:32524719ms step_avg:595.28ms +step:54639/57344 train_time:32525286ms step_avg:595.28ms +grad accum step:13660/14336 +step:54640/57344 train_time:32526609ms step_avg:595.29ms +step:54641/57344 train_time:32526627ms step_avg:595.28ms +step:54642/57344 train_time:32526884ms step_avg:595.27ms +step:54643/57344 train_time:32527484ms step_avg:595.27ms +grad accum step:13661/14336 +step:54644/57344 train_time:32528909ms step_avg:595.29ms +step:54645/57344 train_time:32528922ms step_avg:595.28ms +step:54646/57344 train_time:32529173ms step_avg:595.27ms +step:54647/57344 train_time:32529753ms step_avg:595.27ms +grad accum step:13662/14336 +step:54648/57344 train_time:32531125ms step_avg:595.28ms +step:54649/57344 train_time:32531143ms step_avg:595.27ms +step:54650/57344 train_time:32531384ms step_avg:595.27ms +step:54651/57344 train_time:32531957ms step_avg:595.27ms +grad accum step:13663/14336 +step:54652/57344 train_time:32533357ms step_avg:595.28ms +step:54653/57344 train_time:32533373ms step_avg:595.27ms +step:54654/57344 train_time:32533630ms step_avg:595.27ms +step:54655/57344 train_time:32534208ms step_avg:595.26ms +grad accum step:13664/14336 +step:54656/57344 train_time:32535575ms step_avg:595.28ms +step:54656/57344 val_loss:5.324928 train_time:32535581ms step_avg:595.28ms +step:54657/57344 train_time:32535593ms step_avg:595.27ms +step:54658/57344 train_time:32535830ms step_avg:595.26ms +step:54659/57344 train_time:32536429ms step_avg:595.26ms +grad accum step:13665/14336 +step:54660/57344 train_time:32537808ms step_avg:595.28ms +step:54661/57344 train_time:32537825ms step_avg:595.27ms +step:54662/57344 train_time:32538077ms step_avg:595.26ms +step:54663/57344 train_time:32538653ms step_avg:595.26ms +grad accum step:13666/14336 +step:54664/57344 train_time:32540053ms step_avg:595.27ms +step:54665/57344 train_time:32540068ms step_avg:595.26ms +step:54666/57344 train_time:32540317ms step_avg:595.26ms +step:54667/57344 train_time:32540891ms step_avg:595.26ms +grad accum step:13667/14336 +step:54668/57344 train_time:32542308ms step_avg:595.27ms +step:54669/57344 train_time:32542325ms step_avg:595.26ms +step:54670/57344 train_time:32542588ms step_avg:595.25ms +step:54671/57344 train_time:32543197ms step_avg:595.26ms +grad accum step:13668/14336 +step:54672/57344 train_time:32544605ms step_avg:595.27ms +step:54673/57344 train_time:32544622ms step_avg:595.26ms +step:54674/57344 train_time:32544875ms step_avg:595.25ms +step:54675/57344 train_time:32545459ms step_avg:595.25ms +grad accum step:13669/14336 +step:54676/57344 train_time:32546822ms step_avg:595.27ms +step:54677/57344 train_time:32546839ms step_avg:595.26ms +step:54678/57344 train_time:32547122ms step_avg:595.25ms +step:54679/57344 train_time:32547779ms step_avg:595.25ms +grad accum step:13670/14336 +step:54680/57344 train_time:32549228ms step_avg:595.27ms +step:54681/57344 train_time:32549240ms step_avg:595.26ms +step:54682/57344 train_time:32549471ms step_avg:595.25ms +step:54683/57344 train_time:32550059ms step_avg:595.25ms +grad accum step:13671/14336 +step:54684/57344 train_time:32551468ms step_avg:595.26ms +step:54685/57344 train_time:32551486ms step_avg:595.25ms +step:54686/57344 train_time:32551733ms step_avg:595.25ms +step:54687/57344 train_time:32552307ms step_avg:595.25ms +grad accum step:13672/14336 +step:54688/57344 train_time:32553701ms step_avg:595.26ms +step:54689/57344 train_time:32553715ms step_avg:595.25ms +step:54690/57344 train_time:32553966ms step_avg:595.25ms +step:54691/57344 train_time:32554540ms step_avg:595.24ms +grad accum step:13673/14336 +step:54692/57344 train_time:32555909ms step_avg:595.26ms +step:54693/57344 train_time:32555926ms step_avg:595.25ms +step:54694/57344 train_time:32556173ms step_avg:595.24ms +step:54695/57344 train_time:32556729ms step_avg:595.24ms +grad accum step:13674/14336 +step:54696/57344 train_time:32558144ms step_avg:595.26ms +step:54697/57344 train_time:32558159ms step_avg:595.25ms +step:54698/57344 train_time:32558408ms step_avg:595.24ms +step:54699/57344 train_time:32558982ms step_avg:595.24ms +grad accum step:13675/14336 +step:54700/57344 train_time:32560402ms step_avg:595.25ms +step:54701/57344 train_time:32560413ms step_avg:595.24ms +step:54702/57344 train_time:32560639ms step_avg:595.24ms +step:54703/57344 train_time:32561189ms step_avg:595.24ms +grad accum step:13676/14336 +step:54704/57344 train_time:32562768ms step_avg:595.25ms +step:54705/57344 train_time:32562784ms step_avg:595.24ms +step:54706/57344 train_time:32563062ms step_avg:595.24ms +step:54707/57344 train_time:32563700ms step_avg:595.24ms +grad accum step:13677/14336 +step:54708/57344 train_time:32565101ms step_avg:595.25ms +step:54709/57344 train_time:32565120ms step_avg:595.24ms +step:54710/57344 train_time:32565371ms step_avg:595.24ms +step:54711/57344 train_time:32565941ms step_avg:595.24ms +grad accum step:13678/14336 +step:54712/57344 train_time:32567273ms step_avg:595.25ms +step:54713/57344 train_time:32567290ms step_avg:595.24ms +step:54714/57344 train_time:32567541ms step_avg:595.23ms +step:54715/57344 train_time:32568103ms step_avg:595.23ms +grad accum step:13679/14336 +step:54716/57344 train_time:32569477ms step_avg:595.25ms +step:54717/57344 train_time:32569494ms step_avg:595.24ms +step:54718/57344 train_time:32569747ms step_avg:595.23ms +step:54719/57344 train_time:32570315ms step_avg:595.23ms +grad accum step:13680/14336 +step:54720/57344 train_time:32571740ms step_avg:595.24ms +step:54720/57344 val_loss:5.323860 train_time:32571755ms step_avg:595.24ms +step:54721/57344 train_time:32571768ms step_avg:595.23ms +step:54722/57344 train_time:32571996ms step_avg:595.23ms +step:54723/57344 train_time:32572565ms step_avg:595.23ms +grad accum step:13681/14336 +step:54724/57344 train_time:32573905ms step_avg:595.24ms +step:54725/57344 train_time:32573921ms step_avg:595.23ms +step:54726/57344 train_time:32574165ms step_avg:595.22ms +step:54727/57344 train_time:32574731ms step_avg:595.22ms +grad accum step:13682/14336 +step:54728/57344 train_time:32576222ms step_avg:595.24ms +step:54729/57344 train_time:32576238ms step_avg:595.23ms +step:54730/57344 train_time:32576494ms step_avg:595.22ms +step:54731/57344 train_time:32577090ms step_avg:595.22ms +grad accum step:13683/14336 +step:54732/57344 train_time:32578575ms step_avg:595.24ms +step:54733/57344 train_time:32578592ms step_avg:595.23ms +step:54734/57344 train_time:32578845ms step_avg:595.22ms +step:54735/57344 train_time:32579434ms step_avg:595.22ms +grad accum step:13684/14336 +step:54736/57344 train_time:32580806ms step_avg:595.24ms +step:54737/57344 train_time:32580823ms step_avg:595.22ms +step:54738/57344 train_time:32581073ms step_avg:595.22ms +step:54739/57344 train_time:32581634ms step_avg:595.22ms +grad accum step:13685/14336 +step:54740/57344 train_time:32583158ms step_avg:595.23ms +step:54741/57344 train_time:32583182ms step_avg:595.22ms +step:54742/57344 train_time:32583408ms step_avg:595.22ms +step:54743/57344 train_time:32583989ms step_avg:595.22ms +grad accum step:13686/14336 +step:54744/57344 train_time:32585338ms step_avg:595.23ms +step:54745/57344 train_time:32585349ms step_avg:595.22ms +step:54746/57344 train_time:32585600ms step_avg:595.21ms +step:54747/57344 train_time:32586185ms step_avg:595.21ms +grad accum step:13687/14336 +step:54748/57344 train_time:32587558ms step_avg:595.23ms +step:54749/57344 train_time:32587593ms step_avg:595.22ms +step:54750/57344 train_time:32587830ms step_avg:595.21ms +step:54751/57344 train_time:32588407ms step_avg:595.21ms +grad accum step:13688/14336 +step:54752/57344 train_time:32589799ms step_avg:595.23ms +step:54753/57344 train_time:32589822ms step_avg:595.22ms +step:54754/57344 train_time:32590065ms step_avg:595.21ms +step:54755/57344 train_time:32590631ms step_avg:595.21ms +grad accum step:13689/14336 +step:54756/57344 train_time:32592056ms step_avg:595.22ms +step:54757/57344 train_time:32592067ms step_avg:595.21ms +step:54758/57344 train_time:32592303ms step_avg:595.21ms +step:54759/57344 train_time:32592878ms step_avg:595.21ms +grad accum step:13690/14336 +step:54760/57344 train_time:32594237ms step_avg:595.22ms +step:54761/57344 train_time:32594252ms step_avg:595.21ms +step:54762/57344 train_time:32594509ms step_avg:595.20ms +step:54763/57344 train_time:32595091ms step_avg:595.20ms +grad accum step:13691/14336 +step:54764/57344 train_time:32596472ms step_avg:595.22ms +step:54765/57344 train_time:32596485ms step_avg:595.21ms +step:54766/57344 train_time:32596710ms step_avg:595.20ms +step:54767/57344 train_time:32597273ms step_avg:595.20ms +grad accum step:13692/14336 +step:54768/57344 train_time:32598659ms step_avg:595.21ms +step:54769/57344 train_time:32598681ms step_avg:595.20ms +step:54770/57344 train_time:32598909ms step_avg:595.20ms +step:54771/57344 train_time:32599486ms step_avg:595.20ms +grad accum step:13693/14336 +step:54772/57344 train_time:32600868ms step_avg:595.21ms +step:54773/57344 train_time:32600881ms step_avg:595.20ms +step:54774/57344 train_time:32601146ms step_avg:595.19ms +step:54775/57344 train_time:32601776ms step_avg:595.19ms +grad accum step:13694/14336 +step:54776/57344 train_time:32603193ms step_avg:595.21ms +step:54777/57344 train_time:32603209ms step_avg:595.20ms +step:54778/57344 train_time:32603457ms step_avg:595.19ms +step:54779/57344 train_time:32604004ms step_avg:595.19ms +grad accum step:13695/14336 +step:54780/57344 train_time:32695695ms step_avg:596.85ms +step:54781/57344 train_time:32695729ms step_avg:596.84ms +step:54782/57344 train_time:32695937ms step_avg:596.84ms +step:54783/57344 train_time:32696498ms step_avg:596.84ms +grad accum step:13696/14336 +step:54784/57344 train_time:32697840ms step_avg:596.85ms +step:54784/57344 val_loss:5.322510 train_time:32697841ms step_avg:596.85ms +step:54785/57344 train_time:32697853ms step_avg:596.84ms +step:54786/57344 train_time:32698138ms step_avg:596.83ms +step:54787/57344 train_time:32698722ms step_avg:596.83ms +grad accum step:13697/14336 +step:54788/57344 train_time:32700199ms step_avg:596.85ms +step:54789/57344 train_time:32700211ms step_avg:596.84ms +step:54790/57344 train_time:32700450ms step_avg:596.83ms +step:54791/57344 train_time:32701020ms step_avg:596.83ms +grad accum step:13698/14336 +step:54792/57344 train_time:32702410ms step_avg:596.85ms +step:54793/57344 train_time:32702426ms step_avg:596.84ms +step:54794/57344 train_time:32702671ms step_avg:596.83ms +step:54795/57344 train_time:32703221ms step_avg:596.83ms +grad accum step:13699/14336 +step:54796/57344 train_time:32704565ms step_avg:596.84ms +step:54797/57344 train_time:32704582ms step_avg:596.83ms +step:54798/57344 train_time:32704807ms step_avg:596.82ms +step:54799/57344 train_time:32705370ms step_avg:596.82ms +grad accum step:13700/14336 +step:54800/57344 train_time:32706769ms step_avg:596.84ms +step:54801/57344 train_time:32706799ms step_avg:596.83ms +step:54802/57344 train_time:32707021ms step_avg:596.82ms +step:54803/57344 train_time:32707591ms step_avg:596.82ms +grad accum step:13701/14336 +step:54804/57344 train_time:32709094ms step_avg:596.84ms +step:54805/57344 train_time:32709111ms step_avg:596.83ms +step:54806/57344 train_time:32709362ms step_avg:596.82ms +step:54807/57344 train_time:32709943ms step_avg:596.82ms +grad accum step:13702/14336 +step:54808/57344 train_time:32711313ms step_avg:596.83ms +step:54809/57344 train_time:32711329ms step_avg:596.82ms +step:54810/57344 train_time:32711574ms step_avg:596.82ms +step:54811/57344 train_time:32712118ms step_avg:596.82ms +grad accum step:13703/14336 +step:54812/57344 train_time:32713439ms step_avg:596.83ms +step:54813/57344 train_time:32713456ms step_avg:596.82ms +step:54814/57344 train_time:32713707ms step_avg:596.81ms +step:54815/57344 train_time:32714292ms step_avg:596.81ms +grad accum step:13704/14336 +step:54816/57344 train_time:32715678ms step_avg:596.83ms +step:54817/57344 train_time:32715695ms step_avg:596.82ms +step:54818/57344 train_time:32715938ms step_avg:596.81ms +step:54819/57344 train_time:32716477ms step_avg:596.81ms +grad accum step:13705/14336 +step:54820/57344 train_time:32717838ms step_avg:596.82ms +step:54821/57344 train_time:32717849ms step_avg:596.81ms +step:54822/57344 train_time:32718099ms step_avg:596.81ms +step:54823/57344 train_time:32718668ms step_avg:596.81ms +grad accum step:13706/14336 +step:54824/57344 train_time:32720045ms step_avg:596.82ms +step:54825/57344 train_time:32720062ms step_avg:596.81ms +step:54826/57344 train_time:32720313ms step_avg:596.80ms +step:54827/57344 train_time:32720885ms step_avg:596.80ms +grad accum step:13707/14336 +step:54828/57344 train_time:32722290ms step_avg:596.82ms +step:54829/57344 train_time:32722328ms step_avg:596.81ms +step:54830/57344 train_time:32722547ms step_avg:596.80ms +step:54831/57344 train_time:32723109ms step_avg:596.80ms +grad accum step:13708/14336 +step:54832/57344 train_time:32724473ms step_avg:596.81ms +step:54833/57344 train_time:32724490ms step_avg:596.80ms +step:54834/57344 train_time:32724735ms step_avg:596.80ms +step:54835/57344 train_time:32725291ms step_avg:596.80ms +grad accum step:13709/14336 +step:54836/57344 train_time:32726638ms step_avg:596.81ms +step:54837/57344 train_time:32726656ms step_avg:596.80ms +step:54838/57344 train_time:32726905ms step_avg:596.79ms +step:54839/57344 train_time:32727488ms step_avg:596.79ms +grad accum step:13710/14336 +step:54840/57344 train_time:32782270ms step_avg:597.78ms +step:54841/57344 train_time:32782282ms step_avg:597.77ms +step:54842/57344 train_time:32782555ms step_avg:597.76ms +step:54843/57344 train_time:32783129ms step_avg:597.76ms +grad accum step:13711/14336 +step:54844/57344 train_time:32784441ms step_avg:597.78ms +step:54845/57344 train_time:32784458ms step_avg:597.77ms +step:54846/57344 train_time:32784708ms step_avg:597.76ms +step:54847/57344 train_time:32785272ms step_avg:597.76ms +grad accum step:13712/14336 +step:54848/57344 train_time:32786643ms step_avg:597.77ms +step:54848/57344 val_loss:5.322264 train_time:32786644ms step_avg:597.77ms +step:54849/57344 train_time:32786656ms step_avg:597.76ms +step:54850/57344 train_time:32786874ms step_avg:597.76ms +step:54851/57344 train_time:32787409ms step_avg:597.75ms +grad accum step:13713/14336 +step:54852/57344 train_time:32788790ms step_avg:597.77ms +step:54853/57344 train_time:32788805ms step_avg:597.76ms +step:54854/57344 train_time:32789052ms step_avg:597.75ms +step:54855/57344 train_time:32789598ms step_avg:597.75ms +grad accum step:13714/14336 +step:54856/57344 train_time:32790943ms step_avg:597.76ms +step:54857/57344 train_time:32790958ms step_avg:597.75ms +step:54858/57344 train_time:32791215ms step_avg:597.75ms +step:54859/57344 train_time:32791810ms step_avg:597.75ms +grad accum step:13715/14336 +step:54860/57344 train_time:32793531ms step_avg:597.77ms +step:54861/57344 train_time:32793550ms step_avg:597.76ms +step:54862/57344 train_time:32793820ms step_avg:597.75ms +step:54863/57344 train_time:32794452ms step_avg:597.75ms +grad accum step:13716/14336 +step:54864/57344 train_time:32795804ms step_avg:597.77ms +step:54865/57344 train_time:32795817ms step_avg:597.75ms +step:54866/57344 train_time:32796079ms step_avg:597.75ms +step:54867/57344 train_time:32796724ms step_avg:597.75ms +grad accum step:13717/14336 +step:54868/57344 train_time:32798094ms step_avg:597.76ms +step:54869/57344 train_time:32798113ms step_avg:597.75ms +step:54870/57344 train_time:32798350ms step_avg:597.75ms +step:54871/57344 train_time:32798905ms step_avg:597.75ms +grad accum step:13718/14336 +step:54872/57344 train_time:32800375ms step_avg:597.76ms +step:54873/57344 train_time:32800396ms step_avg:597.75ms +step:54874/57344 train_time:32800635ms step_avg:597.74ms +step:54875/57344 train_time:32801195ms step_avg:597.74ms +grad accum step:13719/14336 +step:54876/57344 train_time:32802649ms step_avg:597.76ms +step:54877/57344 train_time:32802671ms step_avg:597.75ms +step:54878/57344 train_time:32802900ms step_avg:597.74ms +step:54879/57344 train_time:32803483ms step_avg:597.74ms +grad accum step:13720/14336 +step:54880/57344 train_time:32804889ms step_avg:597.76ms +step:54881/57344 train_time:32804902ms step_avg:597.75ms +step:54882/57344 train_time:32805156ms step_avg:597.74ms +step:54883/57344 train_time:32805722ms step_avg:597.74ms +grad accum step:13721/14336 +step:54884/57344 train_time:32807172ms step_avg:597.75ms +step:54885/57344 train_time:32807191ms step_avg:597.74ms +step:54886/57344 train_time:32807443ms step_avg:597.74ms +step:54887/57344 train_time:32808071ms step_avg:597.74ms +grad accum step:13722/14336 +step:54888/57344 train_time:32809565ms step_avg:597.75ms +step:54889/57344 train_time:32809578ms step_avg:597.74ms +step:54890/57344 train_time:32809799ms step_avg:597.74ms +step:54891/57344 train_time:32810354ms step_avg:597.74ms +grad accum step:13723/14336 +step:54892/57344 train_time:32811707ms step_avg:597.75ms +step:54893/57344 train_time:32811721ms step_avg:597.74ms +step:54894/57344 train_time:32811975ms step_avg:597.73ms +step:54895/57344 train_time:32812607ms step_avg:597.73ms +grad accum step:13724/14336 +step:54896/57344 train_time:32814089ms step_avg:597.75ms +step:54897/57344 train_time:32814105ms step_avg:597.74ms +step:54898/57344 train_time:32814353ms step_avg:597.73ms +step:54899/57344 train_time:32814923ms step_avg:597.73ms +grad accum step:13725/14336 +step:54900/57344 train_time:32816461ms step_avg:597.75ms +step:54901/57344 train_time:32816483ms step_avg:597.74ms +step:54902/57344 train_time:32816706ms step_avg:597.73ms +step:54903/57344 train_time:32817267ms step_avg:597.73ms +grad accum step:13726/14336 +step:54904/57344 train_time:32818634ms step_avg:597.75ms +step:54905/57344 train_time:32818650ms step_avg:597.74ms +step:54906/57344 train_time:32818885ms step_avg:597.73ms +step:54907/57344 train_time:32819470ms step_avg:597.73ms +grad accum step:13727/14336 +step:54908/57344 train_time:32820901ms step_avg:597.74ms +step:54909/57344 train_time:32820919ms step_avg:597.73ms +step:54910/57344 train_time:32821168ms step_avg:597.73ms +step:54911/57344 train_time:32821739ms step_avg:597.73ms +grad accum step:13728/14336 +step:54912/57344 train_time:32823055ms step_avg:597.74ms +step:54912/57344 val_loss:5.321797 train_time:32823066ms step_avg:597.74ms +step:54913/57344 train_time:32823078ms step_avg:597.73ms +step:54914/57344 train_time:32823335ms step_avg:597.72ms +step:54915/57344 train_time:32823981ms step_avg:597.72ms +grad accum step:13729/14336 +step:54916/57344 train_time:32825380ms step_avg:597.74ms +step:54917/57344 train_time:32825398ms step_avg:597.73ms +step:54918/57344 train_time:32825658ms step_avg:597.72ms +step:54919/57344 train_time:32826265ms step_avg:597.72ms +grad accum step:13730/14336 +step:54920/57344 train_time:32827632ms step_avg:597.74ms +step:54921/57344 train_time:32827652ms step_avg:597.72ms +step:54922/57344 train_time:32827889ms step_avg:597.72ms +step:54923/57344 train_time:32828454ms step_avg:597.72ms +grad accum step:13731/14336 +step:54924/57344 train_time:32829855ms step_avg:597.73ms +step:54925/57344 train_time:32829871ms step_avg:597.72ms +step:54926/57344 train_time:32830121ms step_avg:597.72ms +step:54927/57344 train_time:32830697ms step_avg:597.72ms +grad accum step:13732/14336 +step:54928/57344 train_time:32832067ms step_avg:597.73ms +step:54929/57344 train_time:32832165ms step_avg:597.72ms +step:54930/57344 train_time:32832401ms step_avg:597.71ms +step:54931/57344 train_time:32833030ms step_avg:597.71ms +grad accum step:13733/14336 +step:54932/57344 train_time:32834443ms step_avg:597.73ms +step:54933/57344 train_time:32834463ms step_avg:597.72ms +step:54934/57344 train_time:32834712ms step_avg:597.71ms +step:54935/57344 train_time:32835310ms step_avg:597.71ms +grad accum step:13734/14336 +step:54936/57344 train_time:32836794ms step_avg:597.73ms +step:54937/57344 train_time:32836812ms step_avg:597.72ms +step:54938/57344 train_time:32837043ms step_avg:597.71ms +step:54939/57344 train_time:32837598ms step_avg:597.71ms +grad accum step:13735/14336 +step:54940/57344 train_time:32838892ms step_avg:597.72ms +step:54941/57344 train_time:32838908ms step_avg:597.71ms +step:54942/57344 train_time:32839161ms step_avg:597.71ms +step:54943/57344 train_time:32839730ms step_avg:597.71ms +grad accum step:13736/14336 +step:54944/57344 train_time:32841107ms step_avg:597.72ms +step:54945/57344 train_time:32841126ms step_avg:597.71ms +step:54946/57344 train_time:32841366ms step_avg:597.70ms +step:54947/57344 train_time:32841937ms step_avg:597.70ms +grad accum step:13737/14336 +step:54948/57344 train_time:32843306ms step_avg:597.72ms +step:54949/57344 train_time:32843323ms step_avg:597.71ms +step:54950/57344 train_time:32843577ms step_avg:597.70ms +step:54951/57344 train_time:32844154ms step_avg:597.70ms +grad accum step:13738/14336 +step:54952/57344 train_time:32845480ms step_avg:597.71ms +step:54953/57344 train_time:32845496ms step_avg:597.70ms +step:54954/57344 train_time:32845860ms step_avg:597.70ms +step:54955/57344 train_time:32846300ms step_avg:597.69ms +grad accum step:13739/14336 +step:54956/57344 train_time:32847833ms step_avg:597.71ms +step:54957/57344 train_time:32847857ms step_avg:597.70ms +step:54958/57344 train_time:32848082ms step_avg:597.69ms +step:54959/57344 train_time:32848651ms step_avg:597.69ms +grad accum step:13740/14336 +step:54960/57344 train_time:32850034ms step_avg:597.71ms +step:54961/57344 train_time:32850050ms step_avg:597.70ms +step:54962/57344 train_time:32850289ms step_avg:597.69ms +step:54963/57344 train_time:32850854ms step_avg:597.69ms +grad accum step:13741/14336 +step:54964/57344 train_time:32852204ms step_avg:597.70ms +step:54965/57344 train_time:32852225ms step_avg:597.69ms +step:54966/57344 train_time:32852456ms step_avg:597.69ms +step:54967/57344 train_time:32853018ms step_avg:597.69ms +grad accum step:13742/14336 +step:54968/57344 train_time:32854367ms step_avg:597.70ms +step:54969/57344 train_time:32854385ms step_avg:597.69ms +step:54970/57344 train_time:32854632ms step_avg:597.68ms +step:54971/57344 train_time:32855221ms step_avg:597.68ms +grad accum step:13743/14336 +step:54972/57344 train_time:32856683ms step_avg:597.70ms +step:54973/57344 train_time:32856703ms step_avg:597.69ms +step:54974/57344 train_time:32856948ms step_avg:597.68ms +step:54975/57344 train_time:32857534ms step_avg:597.68ms +grad accum step:13744/14336 +step:54976/57344 train_time:32858915ms step_avg:597.70ms +step:54976/57344 val_loss:5.319905 train_time:32858918ms step_avg:597.70ms +step:54977/57344 train_time:32858930ms step_avg:597.69ms +step:54978/57344 train_time:32859164ms step_avg:597.68ms +step:54979/57344 train_time:32859751ms step_avg:597.68ms +grad accum step:13745/14336 +step:54980/57344 train_time:32861136ms step_avg:597.69ms +step:54981/57344 train_time:32861155ms step_avg:597.68ms +step:54982/57344 train_time:32861402ms step_avg:597.68ms +step:54983/57344 train_time:32861976ms step_avg:597.68ms +grad accum step:13746/14336 +step:54984/57344 train_time:32863374ms step_avg:597.69ms +step:54985/57344 train_time:32863391ms step_avg:597.68ms +step:54986/57344 train_time:32863644ms step_avg:597.67ms +step:54987/57344 train_time:32864221ms step_avg:597.67ms +grad accum step:13747/14336 +step:54988/57344 train_time:32865613ms step_avg:597.69ms +step:54989/57344 train_time:32865645ms step_avg:597.68ms +step:54990/57344 train_time:32865880ms step_avg:597.67ms +step:54991/57344 train_time:32866470ms step_avg:597.67ms +grad accum step:13748/14336 +step:54992/57344 train_time:32867881ms step_avg:597.68ms +step:54993/57344 train_time:32867895ms step_avg:597.67ms +step:54994/57344 train_time:32868145ms step_avg:597.67ms +step:54995/57344 train_time:32868720ms step_avg:597.67ms +grad accum step:13749/14336 +step:54996/57344 train_time:32870244ms step_avg:597.68ms +step:54997/57344 train_time:32870273ms step_avg:597.67ms +step:54998/57344 train_time:32870497ms step_avg:597.67ms +step:54999/57344 train_time:32871070ms step_avg:597.67ms +grad accum step:13750/14336 +step:55000/57344 train_time:32872432ms step_avg:597.68ms +step:55001/57344 train_time:32872450ms step_avg:597.67ms +step:55002/57344 train_time:32872688ms step_avg:597.66ms +step:55003/57344 train_time:32873236ms step_avg:597.66ms +grad accum step:13751/14336 +step:55004/57344 train_time:32874623ms step_avg:597.68ms +step:55005/57344 train_time:32874639ms step_avg:597.67ms +step:55006/57344 train_time:32874886ms step_avg:597.66ms +step:55007/57344 train_time:32875448ms step_avg:597.66ms +grad accum step:13752/14336 +step:55008/57344 train_time:32876792ms step_avg:597.67ms +step:55009/57344 train_time:32876811ms step_avg:597.66ms +step:55010/57344 train_time:32877032ms step_avg:597.66ms +step:55011/57344 train_time:32877591ms step_avg:597.65ms +grad accum step:13753/14336 +step:55012/57344 train_time:32878958ms step_avg:597.67ms +step:55013/57344 train_time:32878974ms step_avg:597.66ms +step:55014/57344 train_time:32879222ms step_avg:597.65ms +step:55015/57344 train_time:32879786ms step_avg:597.65ms +grad accum step:13754/14336 +step:55016/57344 train_time:32881451ms step_avg:597.67ms +step:55017/57344 train_time:32881469ms step_avg:597.66ms +step:55018/57344 train_time:32881713ms step_avg:597.65ms +step:55019/57344 train_time:32882309ms step_avg:597.65ms +grad accum step:13755/14336 +step:55020/57344 train_time:32883835ms step_avg:597.67ms +step:55021/57344 train_time:32883851ms step_avg:597.66ms +step:55022/57344 train_time:32884066ms step_avg:597.65ms +step:55023/57344 train_time:32884643ms step_avg:597.65ms +grad accum step:13756/14336 +step:55024/57344 train_time:32886056ms step_avg:597.67ms +step:55025/57344 train_time:32886080ms step_avg:597.66ms +step:55026/57344 train_time:32886318ms step_avg:597.65ms +step:55027/57344 train_time:32886932ms step_avg:597.65ms +grad accum step:13757/14336 +step:55028/57344 train_time:32888342ms step_avg:597.67ms +step:55029/57344 train_time:32888363ms step_avg:597.66ms +step:55030/57344 train_time:32888603ms step_avg:597.65ms +step:55031/57344 train_time:32889175ms step_avg:597.65ms +grad accum step:13758/14336 +step:55032/57344 train_time:32890688ms step_avg:597.66ms +step:55033/57344 train_time:32890711ms step_avg:597.65ms +step:55034/57344 train_time:32890943ms step_avg:597.65ms +step:55035/57344 train_time:32891502ms step_avg:597.65ms +grad accum step:13759/14336 +step:55036/57344 train_time:32892836ms step_avg:597.66ms +step:55037/57344 train_time:32892853ms step_avg:597.65ms +step:55038/57344 train_time:32893095ms step_avg:597.64ms +step:55039/57344 train_time:32893707ms step_avg:597.64ms +grad accum step:13760/14336 +step:55040/57344 train_time:32895093ms step_avg:597.66ms +step:55040/57344 val_loss:5.319589 train_time:32895098ms step_avg:597.66ms +step:55041/57344 train_time:32895110ms step_avg:597.65ms +step:55042/57344 train_time:32895349ms step_avg:597.64ms +step:55043/57344 train_time:32895961ms step_avg:597.64ms +grad accum step:13761/14336 +step:55044/57344 train_time:32897389ms step_avg:597.66ms +step:55045/57344 train_time:32897404ms step_avg:597.65ms +step:55046/57344 train_time:32897659ms step_avg:597.64ms +step:55047/57344 train_time:32898267ms step_avg:597.64ms +grad accum step:13762/14336 +step:55048/57344 train_time:32899739ms step_avg:597.66ms +step:55049/57344 train_time:32899756ms step_avg:597.64ms +step:55050/57344 train_time:32900000ms step_avg:597.64ms +step:55051/57344 train_time:32900572ms step_avg:597.64ms +grad accum step:13763/14336 +step:55052/57344 train_time:32901973ms step_avg:597.65ms +step:55053/57344 train_time:32901988ms step_avg:597.64ms +step:55054/57344 train_time:32902247ms step_avg:597.64ms +step:55055/57344 train_time:32902827ms step_avg:597.64ms +grad accum step:13764/14336 +step:55056/57344 train_time:32904153ms step_avg:597.65ms +step:55057/57344 train_time:32904188ms step_avg:597.64ms +step:55058/57344 train_time:32904417ms step_avg:597.63ms +step:55059/57344 train_time:32905025ms step_avg:597.63ms +grad accum step:13765/14336 +step:55060/57344 train_time:32906555ms step_avg:597.65ms +step:55061/57344 train_time:32906577ms step_avg:597.64ms +step:55062/57344 train_time:32906816ms step_avg:597.63ms +step:55063/57344 train_time:32907387ms step_avg:597.63ms +grad accum step:13766/14336 +step:55064/57344 train_time:32908749ms step_avg:597.65ms +step:55065/57344 train_time:32908774ms step_avg:597.64ms +step:55066/57344 train_time:32909010ms step_avg:597.63ms +step:55067/57344 train_time:32909604ms step_avg:597.63ms +grad accum step:13767/14336 +step:55068/57344 train_time:32911183ms step_avg:597.65ms +step:55069/57344 train_time:32911202ms step_avg:597.64ms +step:55070/57344 train_time:32911424ms step_avg:597.63ms +step:55071/57344 train_time:32911979ms step_avg:597.63ms +grad accum step:13768/14336 +step:55072/57344 train_time:32913381ms step_avg:597.64ms +step:55073/57344 train_time:32913415ms step_avg:597.63ms +step:55074/57344 train_time:32913642ms step_avg:597.63ms +step:55075/57344 train_time:32914210ms step_avg:597.63ms +grad accum step:13769/14336 +step:55076/57344 train_time:32915716ms step_avg:597.64ms +step:55077/57344 train_time:32915792ms step_avg:597.63ms +step:55078/57344 train_time:32916009ms step_avg:597.63ms +step:55079/57344 train_time:32916573ms step_avg:597.62ms +grad accum step:13770/14336 +step:55080/57344 train_time:32917951ms step_avg:597.64ms +step:55081/57344 train_time:32917972ms step_avg:597.63ms +step:55082/57344 train_time:32918216ms step_avg:597.62ms +step:55083/57344 train_time:32918783ms step_avg:597.62ms +grad accum step:13771/14336 +step:55084/57344 train_time:32920230ms step_avg:597.64ms +step:55085/57344 train_time:32920245ms step_avg:597.63ms +step:55086/57344 train_time:32920486ms step_avg:597.62ms +step:55087/57344 train_time:32921044ms step_avg:597.62ms +grad accum step:13772/14336 +step:55088/57344 train_time:32922406ms step_avg:597.63ms +step:55089/57344 train_time:32922421ms step_avg:597.62ms +step:55090/57344 train_time:32922681ms step_avg:597.62ms +step:55091/57344 train_time:32923275ms step_avg:597.62ms +grad accum step:13773/14336 +step:55092/57344 train_time:32924625ms step_avg:597.63ms +step:55093/57344 train_time:32924640ms step_avg:597.62ms +step:55094/57344 train_time:32924894ms step_avg:597.61ms +step:55095/57344 train_time:32925482ms step_avg:597.61ms +grad accum step:13774/14336 +step:55096/57344 train_time:32927021ms step_avg:597.63ms +step:55097/57344 train_time:32927035ms step_avg:597.62ms +step:55098/57344 train_time:32927285ms step_avg:597.61ms +step:55099/57344 train_time:32927851ms step_avg:597.61ms +grad accum step:13775/14336 +step:55100/57344 train_time:32929222ms step_avg:597.63ms +step:55101/57344 train_time:32929238ms step_avg:597.62ms +step:55102/57344 train_time:32929489ms step_avg:597.61ms +step:55103/57344 train_time:32930099ms step_avg:597.61ms +grad accum step:13776/14336 +step:55104/57344 train_time:32931565ms step_avg:597.63ms +step:55104/57344 val_loss:5.318570 train_time:32931575ms step_avg:597.63ms +step:55105/57344 train_time:32931587ms step_avg:597.62ms +step:55106/57344 train_time:32931817ms step_avg:597.61ms +step:55107/57344 train_time:32932390ms step_avg:597.61ms +grad accum step:13777/14336 +step:55108/57344 train_time:32933801ms step_avg:597.62ms +step:55109/57344 train_time:32933905ms step_avg:597.61ms +step:55110/57344 train_time:32934130ms step_avg:597.61ms +step:55111/57344 train_time:32934720ms step_avg:597.61ms +grad accum step:13778/14336 +step:55112/57344 train_time:32936291ms step_avg:597.62ms +step:55113/57344 train_time:32936311ms step_avg:597.61ms +step:55114/57344 train_time:32936535ms step_avg:597.61ms +step:55115/57344 train_time:32937113ms step_avg:597.61ms +grad accum step:13779/14336 +step:55116/57344 train_time:32938510ms step_avg:597.62ms +step:55117/57344 train_time:32938529ms step_avg:597.61ms +step:55118/57344 train_time:32938787ms step_avg:597.60ms +step:55119/57344 train_time:32939383ms step_avg:597.60ms +grad accum step:13780/14336 +step:55120/57344 train_time:32940753ms step_avg:597.62ms +step:55121/57344 train_time:32940778ms step_avg:597.61ms +step:55122/57344 train_time:32941020ms step_avg:597.60ms +step:55123/57344 train_time:32941604ms step_avg:597.60ms +grad accum step:13781/14336 +step:55124/57344 train_time:32943024ms step_avg:597.62ms +step:55125/57344 train_time:32943043ms step_avg:597.61ms +step:55126/57344 train_time:32943279ms step_avg:597.60ms +step:55127/57344 train_time:32943873ms step_avg:597.60ms +grad accum step:13782/14336 +step:55128/57344 train_time:32945284ms step_avg:597.61ms +step:55129/57344 train_time:32945298ms step_avg:597.60ms +step:55130/57344 train_time:32945521ms step_avg:597.60ms +step:55131/57344 train_time:32946083ms step_avg:597.60ms +grad accum step:13783/14336 +step:55132/57344 train_time:32947480ms step_avg:597.61ms +step:55133/57344 train_time:32947501ms step_avg:597.60ms +step:55134/57344 train_time:32947731ms step_avg:597.59ms +step:55135/57344 train_time:32948287ms step_avg:597.59ms +grad accum step:13784/14336 +step:55136/57344 train_time:32949657ms step_avg:597.61ms +step:55137/57344 train_time:32949673ms step_avg:597.60ms +step:55138/57344 train_time:32949938ms step_avg:597.59ms +step:55139/57344 train_time:32950564ms step_avg:597.59ms +grad accum step:13785/14336 +step:55140/57344 train_time:32952013ms step_avg:597.61ms +step:55141/57344 train_time:32952035ms step_avg:597.60ms +step:55142/57344 train_time:32952272ms step_avg:597.59ms +step:55143/57344 train_time:32952823ms step_avg:597.59ms +grad accum step:13786/14336 +step:55144/57344 train_time:32954213ms step_avg:597.60ms +step:55145/57344 train_time:32954230ms step_avg:597.59ms +step:55146/57344 train_time:32954451ms step_avg:597.59ms +step:55147/57344 train_time:32955022ms step_avg:597.59ms +grad accum step:13787/14336 +step:55148/57344 train_time:32956455ms step_avg:597.60ms +step:55149/57344 train_time:32956474ms step_avg:597.59ms +step:55150/57344 train_time:32956695ms step_avg:597.58ms +step:55151/57344 train_time:32957233ms step_avg:597.58ms +grad accum step:13788/14336 +step:55152/57344 train_time:32958588ms step_avg:597.60ms +step:55153/57344 train_time:32958621ms step_avg:597.59ms +step:55154/57344 train_time:32958851ms step_avg:597.58ms +step:55155/57344 train_time:32959447ms step_avg:597.58ms +grad accum step:13789/14336 +step:55156/57344 train_time:32960865ms step_avg:597.59ms +step:55157/57344 train_time:32960885ms step_avg:597.58ms +step:55158/57344 train_time:32961119ms step_avg:597.58ms +step:55159/57344 train_time:32961691ms step_avg:597.58ms +grad accum step:13790/14336 +step:55160/57344 train_time:32963094ms step_avg:597.59ms +step:55161/57344 train_time:32963110ms step_avg:597.58ms +step:55162/57344 train_time:32963368ms step_avg:597.57ms +step:55163/57344 train_time:32963956ms step_avg:597.57ms +grad accum step:13791/14336 +step:55164/57344 train_time:32965355ms step_avg:597.59ms +step:55165/57344 train_time:32965388ms step_avg:597.58ms +step:55166/57344 train_time:32965615ms step_avg:597.57ms +step:55167/57344 train_time:32966196ms step_avg:597.57ms +grad accum step:13792/14336 +step:55168/57344 train_time:32967578ms step_avg:597.59ms +step:55168/57344 val_loss:5.317809 train_time:32967580ms step_avg:597.59ms +step:55169/57344 train_time:32967592ms step_avg:597.57ms +step:55170/57344 train_time:32967822ms step_avg:597.57ms +step:55171/57344 train_time:32968401ms step_avg:597.57ms +grad accum step:13793/14336 +step:55172/57344 train_time:32969812ms step_avg:597.58ms +step:55173/57344 train_time:32969830ms step_avg:597.57ms +step:55174/57344 train_time:32970073ms step_avg:597.57ms +step:55175/57344 train_time:32970646ms step_avg:597.56ms +grad accum step:13794/14336 +step:55176/57344 train_time:32972002ms step_avg:597.58ms +step:55177/57344 train_time:32972018ms step_avg:597.57ms +step:55178/57344 train_time:32972245ms step_avg:597.56ms +step:55179/57344 train_time:32972810ms step_avg:597.56ms +grad accum step:13795/14336 +step:55180/57344 train_time:32974155ms step_avg:597.57ms +step:55181/57344 train_time:32974172ms step_avg:597.56ms +step:55182/57344 train_time:32974438ms step_avg:597.56ms +step:55183/57344 train_time:32975060ms step_avg:597.56ms +grad accum step:13796/14336 +step:55184/57344 train_time:32976485ms step_avg:597.57ms +step:55185/57344 train_time:32976507ms step_avg:597.56ms +step:55186/57344 train_time:32976758ms step_avg:597.56ms +step:55187/57344 train_time:32977364ms step_avg:597.56ms +grad accum step:13797/14336 +step:55188/57344 train_time:32978672ms step_avg:597.57ms +step:55189/57344 train_time:32978687ms step_avg:597.56ms +step:55190/57344 train_time:32978944ms step_avg:597.55ms +step:55191/57344 train_time:32979525ms step_avg:597.55ms +grad accum step:13798/14336 +step:55192/57344 train_time:32980864ms step_avg:597.57ms +step:55193/57344 train_time:32980886ms step_avg:597.56ms +step:55194/57344 train_time:32981133ms step_avg:597.55ms +step:55195/57344 train_time:32981721ms step_avg:597.55ms +grad accum step:13799/14336 +step:55196/57344 train_time:32983147ms step_avg:597.56ms +step:55197/57344 train_time:32983168ms step_avg:597.55ms +step:55198/57344 train_time:32983422ms step_avg:597.55ms +step:55199/57344 train_time:32984026ms step_avg:597.55ms +grad accum step:13800/14336 +step:55200/57344 train_time:32985511ms step_avg:597.56ms +step:55201/57344 train_time:32985538ms step_avg:597.55ms +step:55202/57344 train_time:32985772ms step_avg:597.55ms +step:55203/57344 train_time:32986389ms step_avg:597.55ms +grad accum step:13801/14336 +step:55204/57344 train_time:32987834ms step_avg:597.56ms +step:55205/57344 train_time:32987846ms step_avg:597.55ms +step:55206/57344 train_time:32988093ms step_avg:597.55ms +step:55207/57344 train_time:32988678ms step_avg:597.55ms +grad accum step:13802/14336 +step:55208/57344 train_time:32990090ms step_avg:597.56ms +step:55209/57344 train_time:32990148ms step_avg:597.55ms +step:55210/57344 train_time:32990368ms step_avg:597.54ms +step:55211/57344 train_time:32990931ms step_avg:597.54ms +grad accum step:13803/14336 +step:55212/57344 train_time:32992286ms step_avg:597.56ms +step:55213/57344 train_time:32992302ms step_avg:597.55ms +step:55214/57344 train_time:32992556ms step_avg:597.54ms +step:55215/57344 train_time:32993154ms step_avg:597.54ms +grad accum step:13804/14336 +step:55216/57344 train_time:32994586ms step_avg:597.55ms +step:55217/57344 train_time:32994608ms step_avg:597.54ms +step:55218/57344 train_time:32994844ms step_avg:597.54ms +step:55219/57344 train_time:32995420ms step_avg:597.54ms +grad accum step:13805/14336 +step:55220/57344 train_time:32996770ms step_avg:597.55ms +step:55221/57344 train_time:32996792ms step_avg:597.54ms +step:55222/57344 train_time:32997038ms step_avg:597.53ms +step:55223/57344 train_time:32997602ms step_avg:597.53ms +grad accum step:13806/14336 +step:55224/57344 train_time:32998939ms step_avg:597.55ms +step:55225/57344 train_time:32998958ms step_avg:597.54ms +step:55226/57344 train_time:32999181ms step_avg:597.53ms +step:55227/57344 train_time:32999736ms step_avg:597.53ms +grad accum step:13807/14336 +step:55228/57344 train_time:33001092ms step_avg:597.54ms +step:55229/57344 train_time:33001116ms step_avg:597.53ms +step:55230/57344 train_time:33001359ms step_avg:597.53ms +step:55231/57344 train_time:33001944ms step_avg:597.53ms +grad accum step:13808/14336 +step:55232/57344 train_time:33003325ms step_avg:597.54ms +step:55232/57344 val_loss:5.317148 train_time:33003329ms step_avg:597.54ms +step:55233/57344 train_time:33003341ms step_avg:597.53ms +step:55234/57344 train_time:33003575ms step_avg:597.52ms +step:55235/57344 train_time:33004191ms step_avg:597.52ms +grad accum step:13809/14336 +step:55236/57344 train_time:33005655ms step_avg:597.54ms +step:55237/57344 train_time:33005673ms step_avg:597.53ms +step:55238/57344 train_time:33005924ms step_avg:597.52ms +step:55239/57344 train_time:33006523ms step_avg:597.52ms +grad accum step:13810/14336 +step:55240/57344 train_time:33007966ms step_avg:597.54ms +step:55241/57344 train_time:33007982ms step_avg:597.53ms +step:55242/57344 train_time:33008200ms step_avg:597.52ms +step:55243/57344 train_time:33008756ms step_avg:597.52ms +grad accum step:13811/14336 +step:55244/57344 train_time:33010111ms step_avg:597.53ms +step:55245/57344 train_time:33010133ms step_avg:597.52ms +step:55246/57344 train_time:33010384ms step_avg:597.52ms +step:55247/57344 train_time:33010980ms step_avg:597.52ms +grad accum step:13812/14336 +step:55248/57344 train_time:33012457ms step_avg:597.53ms +step:55249/57344 train_time:33012473ms step_avg:597.52ms +step:55250/57344 train_time:33012727ms step_avg:597.52ms +step:55251/57344 train_time:33013312ms step_avg:597.52ms +grad accum step:13813/14336 +step:55252/57344 train_time:33014651ms step_avg:597.53ms +step:55253/57344 train_time:33014667ms step_avg:597.52ms +step:55254/57344 train_time:33014935ms step_avg:597.51ms +step:55255/57344 train_time:33015547ms step_avg:597.51ms +grad accum step:13814/14336 +step:55256/57344 train_time:33016919ms step_avg:597.53ms +step:55257/57344 train_time:33016938ms step_avg:597.52ms +step:55258/57344 train_time:33017187ms step_avg:597.51ms +step:55259/57344 train_time:33017770ms step_avg:597.51ms +grad accum step:13815/14336 +step:55260/57344 train_time:33019150ms step_avg:597.52ms +step:55261/57344 train_time:33019171ms step_avg:597.51ms +step:55262/57344 train_time:33019413ms step_avg:597.51ms +step:55263/57344 train_time:33019975ms step_avg:597.51ms +grad accum step:13816/14336 +step:55264/57344 train_time:33021318ms step_avg:597.52ms +step:55265/57344 train_time:33021334ms step_avg:597.51ms +step:55266/57344 train_time:33021581ms step_avg:597.50ms +step:55267/57344 train_time:33022154ms step_avg:597.50ms +grad accum step:13817/14336 +step:55268/57344 train_time:33023646ms step_avg:597.52ms +step:55269/57344 train_time:33023668ms step_avg:597.51ms +step:55270/57344 train_time:33023885ms step_avg:597.50ms +step:55271/57344 train_time:33024458ms step_avg:597.50ms +grad accum step:13818/14336 +step:55272/57344 train_time:33025868ms step_avg:597.52ms +step:55273/57344 train_time:33025880ms step_avg:597.50ms +step:55274/57344 train_time:33026107ms step_avg:597.50ms +step:55275/57344 train_time:33026671ms step_avg:597.50ms +grad accum step:13819/14336 +step:55276/57344 train_time:33028050ms step_avg:597.51ms +step:55277/57344 train_time:33028067ms step_avg:597.50ms +step:55278/57344 train_time:33028288ms step_avg:597.49ms +step:55279/57344 train_time:33028839ms step_avg:597.49ms +grad accum step:13820/14336 +step:55280/57344 train_time:33030192ms step_avg:597.51ms +step:55281/57344 train_time:33030206ms step_avg:597.50ms +step:55282/57344 train_time:33030456ms step_avg:597.49ms +step:55283/57344 train_time:33031019ms step_avg:597.49ms +grad accum step:13821/14336 +step:55284/57344 train_time:33032348ms step_avg:597.50ms +step:55285/57344 train_time:33032362ms step_avg:597.49ms +step:55286/57344 train_time:33032612ms step_avg:597.49ms +step:55287/57344 train_time:33033202ms step_avg:597.49ms +grad accum step:13822/14336 +step:55288/57344 train_time:33034688ms step_avg:597.50ms +step:55289/57344 train_time:33034923ms step_avg:597.50ms +step:55290/57344 train_time:33035140ms step_avg:597.49ms +step:55291/57344 train_time:33035702ms step_avg:597.49ms +grad accum step:13823/14336 +step:55292/57344 train_time:33037087ms step_avg:597.50ms +step:55293/57344 train_time:33037105ms step_avg:597.49ms +step:55294/57344 train_time:33037349ms step_avg:597.49ms +step:55295/57344 train_time:33037958ms step_avg:597.49ms +grad accum step:13824/14336 +step:55296/57344 train_time:33039311ms step_avg:597.50ms +step:55296/57344 val_loss:5.316221 train_time:33039314ms step_avg:597.50ms +step:55297/57344 train_time:33039326ms step_avg:597.49ms +step:55298/57344 train_time:33039549ms step_avg:597.48ms +step:55299/57344 train_time:33040102ms step_avg:597.48ms +grad accum step:13825/14336 +step:55300/57344 train_time:33041493ms step_avg:597.50ms +step:55301/57344 train_time:33041514ms step_avg:597.48ms +step:55302/57344 train_time:33041745ms step_avg:597.48ms +step:55303/57344 train_time:33042326ms step_avg:597.48ms +grad accum step:13826/14336 +step:55304/57344 train_time:33043857ms step_avg:597.49ms +step:55305/57344 train_time:33043873ms step_avg:597.48ms +step:55306/57344 train_time:33044094ms step_avg:597.48ms +step:55307/57344 train_time:33044667ms step_avg:597.48ms +grad accum step:13827/14336 +step:55308/57344 train_time:33046028ms step_avg:597.49ms +step:55309/57344 train_time:33046049ms step_avg:597.48ms +step:55310/57344 train_time:33046303ms step_avg:597.47ms +step:55311/57344 train_time:33046914ms step_avg:597.47ms +grad accum step:13828/14336 +step:55312/57344 train_time:33048442ms step_avg:597.49ms +step:55313/57344 train_time:33048468ms step_avg:597.48ms +step:55314/57344 train_time:33048694ms step_avg:597.47ms +step:55315/57344 train_time:33049277ms step_avg:597.47ms +grad accum step:13829/14336 +step:55316/57344 train_time:33050902ms step_avg:597.49ms +step:55317/57344 train_time:33050921ms step_avg:597.48ms +step:55318/57344 train_time:33051180ms step_avg:597.48ms +step:55319/57344 train_time:33051858ms step_avg:597.48ms +grad accum step:13830/14336 +step:55320/57344 train_time:33053240ms step_avg:597.49ms +step:55321/57344 train_time:33053255ms step_avg:597.48ms +step:55322/57344 train_time:33053498ms step_avg:597.47ms +step:55323/57344 train_time:33054040ms step_avg:597.47ms +grad accum step:13831/14336 +step:55324/57344 train_time:33055343ms step_avg:597.49ms +step:55325/57344 train_time:33055360ms step_avg:597.48ms +step:55326/57344 train_time:33055610ms step_avg:597.47ms +step:55327/57344 train_time:33056196ms step_avg:597.47ms +grad accum step:13832/14336 +step:55328/57344 train_time:33057557ms step_avg:597.48ms +step:55329/57344 train_time:33057573ms step_avg:597.47ms +step:55330/57344 train_time:33057836ms step_avg:597.47ms +step:55331/57344 train_time:33058436ms step_avg:597.47ms +grad accum step:13833/14336 +step:55332/57344 train_time:33059816ms step_avg:597.48ms +step:55333/57344 train_time:33059840ms step_avg:597.47ms +step:55334/57344 train_time:33060076ms step_avg:597.46ms +step:55335/57344 train_time:33060668ms step_avg:597.46ms +grad accum step:13834/14336 +step:55336/57344 train_time:33062040ms step_avg:597.48ms +step:55337/57344 train_time:33062054ms step_avg:597.47ms +step:55338/57344 train_time:33062310ms step_avg:597.46ms +step:55339/57344 train_time:33062886ms step_avg:597.46ms +grad accum step:13835/14336 +step:55340/57344 train_time:33064199ms step_avg:597.47ms +step:55341/57344 train_time:33064213ms step_avg:597.46ms +step:55342/57344 train_time:33064463ms step_avg:597.46ms +step:55343/57344 train_time:33065020ms step_avg:597.46ms +grad accum step:13836/14336 +step:55344/57344 train_time:33066369ms step_avg:597.47ms +step:55345/57344 train_time:33066384ms step_avg:597.46ms +step:55346/57344 train_time:33066644ms step_avg:597.45ms +step:55347/57344 train_time:33067252ms step_avg:597.45ms +grad accum step:13837/14336 +step:55348/57344 train_time:33068705ms step_avg:597.47ms +step:55349/57344 train_time:33068722ms step_avg:597.46ms +step:55350/57344 train_time:33068974ms step_avg:597.45ms +step:55351/57344 train_time:33069565ms step_avg:597.45ms +grad accum step:13838/14336 +step:55352/57344 train_time:33070958ms step_avg:597.47ms +step:55353/57344 train_time:33070973ms step_avg:597.46ms +step:55354/57344 train_time:33071228ms step_avg:597.45ms +step:55355/57344 train_time:33071817ms step_avg:597.45ms +grad accum step:13839/14336 +step:55356/57344 train_time:33073175ms step_avg:597.46ms +step:55357/57344 train_time:33073191ms step_avg:597.45ms +step:55358/57344 train_time:33073441ms step_avg:597.45ms +step:55359/57344 train_time:33074010ms step_avg:597.45ms +grad accum step:13840/14336 +step:55360/57344 train_time:33075403ms step_avg:597.46ms +step:55360/57344 val_loss:5.315194 train_time:33075408ms step_avg:597.46ms +step:55361/57344 train_time:33075420ms step_avg:597.45ms +step:55362/57344 train_time:33075649ms step_avg:597.44ms +step:55363/57344 train_time:33076220ms step_avg:597.44ms +grad accum step:13841/14336 +step:55364/57344 train_time:33077568ms step_avg:597.46ms +step:55365/57344 train_time:33077592ms step_avg:597.45ms +step:55366/57344 train_time:33077826ms step_avg:597.44ms +step:55367/57344 train_time:33078404ms step_avg:597.44ms +grad accum step:13842/14336 +step:55368/57344 train_time:33079748ms step_avg:597.45ms +step:55369/57344 train_time:33079766ms step_avg:597.44ms +step:55370/57344 train_time:33080011ms step_avg:597.44ms +step:55371/57344 train_time:33080575ms step_avg:597.44ms +grad accum step:13843/14336 +step:55372/57344 train_time:33081888ms step_avg:597.45ms +step:55373/57344 train_time:33081903ms step_avg:597.44ms +step:55374/57344 train_time:33082175ms step_avg:597.43ms +step:55375/57344 train_time:33082800ms step_avg:597.43ms +grad accum step:13844/14336 +step:55376/57344 train_time:33084219ms step_avg:597.45ms +step:55377/57344 train_time:33084236ms step_avg:597.44ms +step:55378/57344 train_time:33084477ms step_avg:597.43ms +step:55379/57344 train_time:33085026ms step_avg:597.43ms +grad accum step:13845/14336 +step:55380/57344 train_time:33086425ms step_avg:597.44ms +step:55381/57344 train_time:33086443ms step_avg:597.43ms +step:55382/57344 train_time:33086702ms step_avg:597.43ms +step:55383/57344 train_time:33087320ms step_avg:597.43ms +grad accum step:13846/14336 +step:55384/57344 train_time:33088726ms step_avg:597.44ms +step:55385/57344 train_time:33088743ms step_avg:597.43ms +step:55386/57344 train_time:33088989ms step_avg:597.43ms +step:55387/57344 train_time:33089546ms step_avg:597.42ms +grad accum step:13847/14336 +step:55388/57344 train_time:33090911ms step_avg:597.44ms +step:55389/57344 train_time:33090945ms step_avg:597.43ms +step:55390/57344 train_time:33091170ms step_avg:597.42ms +step:55391/57344 train_time:33091740ms step_avg:597.42ms +grad accum step:13848/14336 +step:55392/57344 train_time:33093155ms step_avg:597.44ms +step:55393/57344 train_time:33093170ms step_avg:597.43ms +step:55394/57344 train_time:33093421ms step_avg:597.42ms +step:55395/57344 train_time:33094003ms step_avg:597.42ms +grad accum step:13849/14336 +step:55396/57344 train_time:33095575ms step_avg:597.44ms +step:55397/57344 train_time:33095591ms step_avg:597.43ms +step:55398/57344 train_time:33095852ms step_avg:597.42ms +step:55399/57344 train_time:33096485ms step_avg:597.42ms +grad accum step:13850/14336 +step:55400/57344 train_time:33097912ms step_avg:597.44ms +step:55401/57344 train_time:33098082ms step_avg:597.43ms +step:55402/57344 train_time:33098312ms step_avg:597.42ms +step:55403/57344 train_time:33098917ms step_avg:597.42ms +grad accum step:13851/14336 +step:55404/57344 train_time:33100372ms step_avg:597.44ms +step:55405/57344 train_time:33100385ms step_avg:597.43ms +step:55406/57344 train_time:33100630ms step_avg:597.42ms +step:55407/57344 train_time:33101249ms step_avg:597.42ms +grad accum step:13852/14336 +step:55408/57344 train_time:33102633ms step_avg:597.43ms +step:55409/57344 train_time:33102662ms step_avg:597.42ms +step:55410/57344 train_time:33102887ms step_avg:597.42ms +step:55411/57344 train_time:33103446ms step_avg:597.42ms +grad accum step:13853/14336 +step:55412/57344 train_time:33104809ms step_avg:597.43ms +step:55413/57344 train_time:33104825ms step_avg:597.42ms +step:55414/57344 train_time:33105075ms step_avg:597.41ms +step:55415/57344 train_time:33105636ms step_avg:597.41ms +grad accum step:13854/14336 +step:55416/57344 train_time:33106988ms step_avg:597.43ms +step:55417/57344 train_time:33107007ms step_avg:597.42ms +step:55418/57344 train_time:33107239ms step_avg:597.41ms +step:55419/57344 train_time:33107824ms step_avg:597.41ms +grad accum step:13855/14336 +step:55420/57344 train_time:33109218ms step_avg:597.42ms +step:55421/57344 train_time:33109233ms step_avg:597.41ms +step:55422/57344 train_time:33109455ms step_avg:597.41ms +step:55423/57344 train_time:33110007ms step_avg:597.41ms +grad accum step:13856/14336 +step:55424/57344 train_time:33111361ms step_avg:597.42ms +step:55424/57344 val_loss:5.314674 train_time:33111367ms step_avg:597.42ms +step:55425/57344 train_time:33111379ms step_avg:597.41ms +step:55426/57344 train_time:33111625ms step_avg:597.40ms +step:55427/57344 train_time:33112239ms step_avg:597.40ms +grad accum step:13857/14336 +step:55428/57344 train_time:33113668ms step_avg:597.42ms +step:55429/57344 train_time:33113689ms step_avg:597.41ms +step:55430/57344 train_time:33113916ms step_avg:597.40ms +step:55431/57344 train_time:33114479ms step_avg:597.40ms +grad accum step:13858/14336 +step:55432/57344 train_time:33115875ms step_avg:597.41ms +step:55433/57344 train_time:33115890ms step_avg:597.40ms +step:55434/57344 train_time:33116141ms step_avg:597.40ms +step:55435/57344 train_time:33116751ms step_avg:597.40ms +grad accum step:13859/14336 +step:55436/57344 train_time:33118392ms step_avg:597.42ms +step:55437/57344 train_time:33118417ms step_avg:597.41ms +step:55438/57344 train_time:33118648ms step_avg:597.40ms +step:55439/57344 train_time:33119224ms step_avg:597.40ms +grad accum step:13860/14336 +step:55440/57344 train_time:33120546ms step_avg:597.41ms +step:55441/57344 train_time:33120559ms step_avg:597.40ms +step:55442/57344 train_time:33120811ms step_avg:597.40ms +step:55443/57344 train_time:33121437ms step_avg:597.40ms +grad accum step:13861/14336 +step:55444/57344 train_time:33122976ms step_avg:597.41ms +step:55445/57344 train_time:33122992ms step_avg:597.40ms +step:55446/57344 train_time:33123237ms step_avg:597.40ms +step:55447/57344 train_time:33123810ms step_avg:597.40ms +grad accum step:13862/14336 +step:55448/57344 train_time:33125220ms step_avg:597.41ms +step:55449/57344 train_time:33125242ms step_avg:597.40ms +step:55450/57344 train_time:33125485ms step_avg:597.39ms +step:55451/57344 train_time:33126063ms step_avg:597.39ms +grad accum step:13863/14336 +step:55452/57344 train_time:33127522ms step_avg:597.41ms +step:55453/57344 train_time:33127547ms step_avg:597.40ms +step:55454/57344 train_time:33127778ms step_avg:597.39ms +step:55455/57344 train_time:33128365ms step_avg:597.39ms +grad accum step:13864/14336 +step:55456/57344 train_time:33129802ms step_avg:597.41ms +step:55457/57344 train_time:33129820ms step_avg:597.40ms +step:55458/57344 train_time:33130059ms step_avg:597.39ms +step:55459/57344 train_time:33130612ms step_avg:597.39ms +grad accum step:13865/14336 +step:55460/57344 train_time:33131952ms step_avg:597.40ms +step:55461/57344 train_time:33131966ms step_avg:597.39ms +step:55462/57344 train_time:33132211ms step_avg:597.39ms +step:55463/57344 train_time:33132776ms step_avg:597.39ms +grad accum step:13866/14336 +step:55464/57344 train_time:33134205ms step_avg:597.40ms +step:55465/57344 train_time:33134231ms step_avg:597.39ms +step:55466/57344 train_time:33134462ms step_avg:597.38ms +step:55467/57344 train_time:33135043ms step_avg:597.38ms +grad accum step:13867/14336 +step:55468/57344 train_time:33136433ms step_avg:597.40ms +step:55469/57344 train_time:33136449ms step_avg:597.39ms +step:55470/57344 train_time:33136689ms step_avg:597.38ms +step:55471/57344 train_time:33137253ms step_avg:597.38ms +grad accum step:13868/14336 +step:55472/57344 train_time:33138714ms step_avg:597.40ms +step:55473/57344 train_time:33138729ms step_avg:597.38ms +step:55474/57344 train_time:33138947ms step_avg:597.38ms +step:55475/57344 train_time:33139490ms step_avg:597.38ms +grad accum step:13869/14336 +step:55476/57344 train_time:33140852ms step_avg:597.39ms +step:55477/57344 train_time:33140867ms step_avg:597.38ms +step:55478/57344 train_time:33141120ms step_avg:597.37ms +step:55479/57344 train_time:33141691ms step_avg:597.37ms +grad accum step:13870/14336 +step:55480/57344 train_time:33143021ms step_avg:597.39ms +step:55481/57344 train_time:33143038ms step_avg:597.38ms +step:55482/57344 train_time:33143277ms step_avg:597.37ms +step:55483/57344 train_time:33143828ms step_avg:597.37ms +grad accum step:13871/14336 +step:55484/57344 train_time:33145200ms step_avg:597.38ms +step:55485/57344 train_time:33145215ms step_avg:597.37ms +step:55486/57344 train_time:33145461ms step_avg:597.37ms +step:55487/57344 train_time:33146025ms step_avg:597.37ms +grad accum step:13872/14336 +step:55488/57344 train_time:33147442ms step_avg:597.38ms +step:55488/57344 val_loss:5.314028 train_time:33147445ms step_avg:597.38ms +step:55489/57344 train_time:33147457ms step_avg:597.37ms +step:55490/57344 train_time:33147695ms step_avg:597.36ms +step:55491/57344 train_time:33148306ms step_avg:597.36ms +grad accum step:13873/14336 +step:55492/57344 train_time:33149748ms step_avg:597.38ms +step:55493/57344 train_time:33149768ms step_avg:597.37ms +step:55494/57344 train_time:33149999ms step_avg:597.36ms +step:55495/57344 train_time:33150579ms step_avg:597.36ms +grad accum step:13874/14336 +step:55496/57344 train_time:33151901ms step_avg:597.37ms +step:55497/57344 train_time:33151920ms step_avg:597.36ms +step:55498/57344 train_time:33152169ms step_avg:597.36ms +step:55499/57344 train_time:33152778ms step_avg:597.36ms +grad accum step:13875/14336 +step:55500/57344 train_time:33154264ms step_avg:597.37ms +step:55501/57344 train_time:33154282ms step_avg:597.36ms +step:55502/57344 train_time:33154519ms step_avg:597.36ms +step:55503/57344 train_time:33155114ms step_avg:597.36ms +grad accum step:13876/14336 +step:55504/57344 train_time:33156528ms step_avg:597.37ms +step:55505/57344 train_time:33156546ms step_avg:597.36ms +step:55506/57344 train_time:33156797ms step_avg:597.36ms +step:55507/57344 train_time:33157367ms step_avg:597.35ms +grad accum step:13877/14336 +step:55508/57344 train_time:33158728ms step_avg:597.37ms +step:55509/57344 train_time:33158745ms step_avg:597.36ms +step:55510/57344 train_time:33159012ms step_avg:597.35ms +step:55511/57344 train_time:33159617ms step_avg:597.35ms +grad accum step:13878/14336 +step:55512/57344 train_time:33161013ms step_avg:597.37ms +step:55513/57344 train_time:33161040ms step_avg:597.36ms +step:55514/57344 train_time:33161275ms step_avg:597.35ms +step:55515/57344 train_time:33161841ms step_avg:597.35ms +grad accum step:13879/14336 +step:55516/57344 train_time:33163238ms step_avg:597.36ms +step:55517/57344 train_time:33163254ms step_avg:597.35ms +step:55518/57344 train_time:33163496ms step_avg:597.35ms +step:55519/57344 train_time:33164049ms step_avg:597.35ms +grad accum step:13880/14336 +step:55520/57344 train_time:33165511ms step_avg:597.36ms +step:55521/57344 train_time:33165529ms step_avg:597.35ms +step:55522/57344 train_time:33165767ms step_avg:597.34ms +step:55523/57344 train_time:33166384ms step_avg:597.34ms +grad accum step:13881/14336 +step:55524/57344 train_time:33167850ms step_avg:597.36ms +step:55525/57344 train_time:33167873ms step_avg:597.35ms +step:55526/57344 train_time:33168106ms step_avg:597.34ms +step:55527/57344 train_time:33168679ms step_avg:597.34ms +grad accum step:13882/14336 +step:55528/57344 train_time:33170069ms step_avg:597.36ms +step:55529/57344 train_time:33170090ms step_avg:597.35ms +step:55530/57344 train_time:33170324ms step_avg:597.34ms +step:55531/57344 train_time:33170892ms step_avg:597.34ms +grad accum step:13883/14336 +step:55532/57344 train_time:33172288ms step_avg:597.35ms +step:55533/57344 train_time:33172357ms step_avg:597.34ms +step:55534/57344 train_time:33172588ms step_avg:597.34ms +step:55535/57344 train_time:33173200ms step_avg:597.34ms +grad accum step:13884/14336 +step:55536/57344 train_time:33174564ms step_avg:597.35ms +step:55537/57344 train_time:33174586ms step_avg:597.34ms +step:55538/57344 train_time:33174832ms step_avg:597.34ms +step:55539/57344 train_time:33175413ms step_avg:597.34ms +grad accum step:13885/14336 +step:55540/57344 train_time:33176852ms step_avg:597.35ms +step:55541/57344 train_time:33176869ms step_avg:597.34ms +step:55542/57344 train_time:33177107ms step_avg:597.33ms +step:55543/57344 train_time:33177667ms step_avg:597.33ms +grad accum step:13886/14336 +step:55544/57344 train_time:33179095ms step_avg:597.35ms +step:55545/57344 train_time:33179125ms step_avg:597.34ms +step:55546/57344 train_time:33179371ms step_avg:597.33ms +step:55547/57344 train_time:33180011ms step_avg:597.33ms +grad accum step:13887/14336 +step:55548/57344 train_time:33181489ms step_avg:597.35ms +step:55549/57344 train_time:33181512ms step_avg:597.34ms +step:55550/57344 train_time:33181755ms step_avg:597.33ms +step:55551/57344 train_time:33182345ms step_avg:597.33ms +grad accum step:13888/14336 +step:55552/57344 train_time:33183785ms step_avg:597.35ms +step:55552/57344 val_loss:5.313580 train_time:33183793ms step_avg:597.35ms +step:55553/57344 train_time:33183805ms step_avg:597.34ms +step:55554/57344 train_time:33184043ms step_avg:597.33ms +step:55555/57344 train_time:33184657ms step_avg:597.33ms +grad accum step:13889/14336 +step:55556/57344 train_time:33186089ms step_avg:597.34ms +step:55557/57344 train_time:33186106ms step_avg:597.33ms +step:55558/57344 train_time:33186360ms step_avg:597.33ms +step:55559/57344 train_time:33186932ms step_avg:597.33ms +grad accum step:13890/14336 +step:55560/57344 train_time:33188317ms step_avg:597.34ms +step:55561/57344 train_time:33188333ms step_avg:597.33ms +step:55562/57344 train_time:33188588ms step_avg:597.33ms +step:55563/57344 train_time:33189168ms step_avg:597.32ms +grad accum step:13891/14336 +step:55564/57344 train_time:33190551ms step_avg:597.34ms +step:55565/57344 train_time:33190567ms step_avg:597.33ms +step:55566/57344 train_time:33190814ms step_avg:597.32ms +step:55567/57344 train_time:33191387ms step_avg:597.32ms +grad accum step:13892/14336 +step:55568/57344 train_time:33192792ms step_avg:597.34ms +step:55569/57344 train_time:33192808ms step_avg:597.33ms +step:55570/57344 train_time:33193056ms step_avg:597.32ms +step:55571/57344 train_time:33193614ms step_avg:597.32ms +grad accum step:13893/14336 +step:55572/57344 train_time:33195057ms step_avg:597.33ms +step:55573/57344 train_time:33195075ms step_avg:597.32ms +step:55574/57344 train_time:33195325ms step_avg:597.32ms +step:55575/57344 train_time:33195911ms step_avg:597.32ms +grad accum step:13894/14336 +step:55576/57344 train_time:33197324ms step_avg:597.33ms +step:55577/57344 train_time:33197338ms step_avg:597.32ms +step:55578/57344 train_time:33197567ms step_avg:597.31ms +step:55579/57344 train_time:33198130ms step_avg:597.31ms +grad accum step:13895/14336 +step:55580/57344 train_time:33199484ms step_avg:597.33ms +step:55581/57344 train_time:33199501ms step_avg:597.32ms +step:55582/57344 train_time:33199748ms step_avg:597.31ms +step:55583/57344 train_time:33200301ms step_avg:597.31ms +grad accum step:13896/14336 +step:55584/57344 train_time:33201712ms step_avg:597.32ms +step:55585/57344 train_time:33201728ms step_avg:597.31ms +step:55586/57344 train_time:33201978ms step_avg:597.31ms +step:55587/57344 train_time:33202550ms step_avg:597.31ms +grad accum step:13897/14336 +step:55588/57344 train_time:33203942ms step_avg:597.32ms +step:55589/57344 train_time:33203958ms step_avg:597.31ms +step:55590/57344 train_time:33204213ms step_avg:597.31ms +step:55591/57344 train_time:33204830ms step_avg:597.31ms +grad accum step:13898/14336 +step:55592/57344 train_time:33206342ms step_avg:597.32ms +step:55593/57344 train_time:33206358ms step_avg:597.31ms +step:55594/57344 train_time:33206609ms step_avg:597.31ms +step:55595/57344 train_time:33207173ms step_avg:597.31ms +grad accum step:13899/14336 +step:55596/57344 train_time:33208534ms step_avg:597.32ms +step:55597/57344 train_time:33208551ms step_avg:597.31ms +step:55598/57344 train_time:33208793ms step_avg:597.30ms +step:55599/57344 train_time:33209367ms step_avg:597.30ms +grad accum step:13900/14336 +step:55600/57344 train_time:33210756ms step_avg:597.32ms +step:55601/57344 train_time:33210771ms step_avg:597.31ms +step:55602/57344 train_time:33211026ms step_avg:597.30ms +step:55603/57344 train_time:33211611ms step_avg:597.30ms +grad accum step:13901/14336 +step:55604/57344 train_time:33213037ms step_avg:597.31ms +step:55605/57344 train_time:33213052ms step_avg:597.30ms +step:55606/57344 train_time:33213309ms step_avg:597.30ms +step:55607/57344 train_time:33213975ms step_avg:597.30ms +grad accum step:13902/14336 +step:55608/57344 train_time:33215411ms step_avg:597.31ms +step:55609/57344 train_time:33215431ms step_avg:597.30ms +step:55610/57344 train_time:33215678ms step_avg:597.30ms +step:55611/57344 train_time:33216267ms step_avg:597.30ms +grad accum step:13903/14336 +step:55612/57344 train_time:33217635ms step_avg:597.31ms +step:55613/57344 train_time:33217653ms step_avg:597.30ms +step:55614/57344 train_time:33217897ms step_avg:597.29ms +step:55615/57344 train_time:33218475ms step_avg:597.29ms +grad accum step:13904/14336 +step:55616/57344 train_time:33219918ms step_avg:597.31ms +step:55616/57344 val_loss:5.312858 train_time:33219949ms step_avg:597.31ms +step:55617/57344 train_time:33219961ms step_avg:597.30ms +step:55618/57344 train_time:33220184ms step_avg:597.29ms +step:55619/57344 train_time:33220736ms step_avg:597.29ms +grad accum step:13905/14336 +step:55620/57344 train_time:33222094ms step_avg:597.30ms +step:55621/57344 train_time:33222107ms step_avg:597.29ms +step:55622/57344 train_time:33222357ms step_avg:597.29ms +step:55623/57344 train_time:33222945ms step_avg:597.29ms +grad accum step:13906/14336 +step:55624/57344 train_time:33224365ms step_avg:597.30ms +step:55625/57344 train_time:33224378ms step_avg:597.29ms +step:55626/57344 train_time:33224621ms step_avg:597.29ms +step:55627/57344 train_time:33225197ms step_avg:597.29ms +grad accum step:13907/14336 +step:55628/57344 train_time:33226643ms step_avg:597.30ms +step:55629/57344 train_time:33226660ms step_avg:597.29ms +step:55630/57344 train_time:33226915ms step_avg:597.28ms +step:55631/57344 train_time:33227490ms step_avg:597.28ms +grad accum step:13908/14336 +step:55632/57344 train_time:33228838ms step_avg:597.30ms +step:55633/57344 train_time:33228855ms step_avg:597.29ms +step:55634/57344 train_time:33229113ms step_avg:597.28ms +step:55635/57344 train_time:33229708ms step_avg:597.28ms +grad accum step:13909/14336 +step:55636/57344 train_time:33231167ms step_avg:597.30ms +step:55637/57344 train_time:33231179ms step_avg:597.29ms +step:55638/57344 train_time:33231429ms step_avg:597.28ms +step:55639/57344 train_time:33231996ms step_avg:597.28ms +grad accum step:13910/14336 +step:55640/57344 train_time:33233350ms step_avg:597.29ms +step:55641/57344 train_time:33233373ms step_avg:597.28ms +step:55642/57344 train_time:33233630ms step_avg:597.28ms +step:55643/57344 train_time:33234231ms step_avg:597.28ms +grad accum step:13911/14336 +step:55644/57344 train_time:33235616ms step_avg:597.29ms +step:55645/57344 train_time:33235643ms step_avg:597.28ms +step:55646/57344 train_time:33235870ms step_avg:597.27ms +step:55647/57344 train_time:33236435ms step_avg:597.27ms +grad accum step:13912/14336 +step:55648/57344 train_time:33237918ms step_avg:597.29ms +step:55649/57344 train_time:33237934ms step_avg:597.28ms +step:55650/57344 train_time:33238189ms step_avg:597.27ms +step:55651/57344 train_time:33238793ms step_avg:597.27ms +grad accum step:13913/14336 +step:55652/57344 train_time:33240181ms step_avg:597.29ms +step:55653/57344 train_time:33240195ms step_avg:597.28ms +step:55654/57344 train_time:33240456ms step_avg:597.27ms +step:55655/57344 train_time:33241055ms step_avg:597.27ms +grad accum step:13914/14336 +step:55656/57344 train_time:33242539ms step_avg:597.29ms +step:55657/57344 train_time:33242658ms step_avg:597.28ms +step:55658/57344 train_time:33242884ms step_avg:597.27ms +step:55659/57344 train_time:33243464ms step_avg:597.27ms +grad accum step:13915/14336 +step:55660/57344 train_time:33244945ms step_avg:597.29ms +step:55661/57344 train_time:33244991ms step_avg:597.28ms +step:55662/57344 train_time:33245231ms step_avg:597.27ms +step:55663/57344 train_time:33245837ms step_avg:597.27ms +grad accum step:13916/14336 +step:55664/57344 train_time:33247174ms step_avg:597.28ms +step:55665/57344 train_time:33247192ms step_avg:597.27ms +step:55666/57344 train_time:33247439ms step_avg:597.27ms +step:55667/57344 train_time:33248004ms step_avg:597.27ms +grad accum step:13917/14336 +step:55668/57344 train_time:33249403ms step_avg:597.28ms +step:55669/57344 train_time:33249418ms step_avg:597.27ms +step:55670/57344 train_time:33249673ms step_avg:597.26ms +step:55671/57344 train_time:33250289ms step_avg:597.26ms +grad accum step:13918/14336 +step:55672/57344 train_time:33251616ms step_avg:597.28ms +step:55673/57344 train_time:33251640ms step_avg:597.27ms +step:55674/57344 train_time:33251882ms step_avg:597.26ms +step:55675/57344 train_time:33252471ms step_avg:597.26ms +grad accum step:13919/14336 +step:55676/57344 train_time:33253832ms step_avg:597.27ms +step:55677/57344 train_time:33253846ms step_avg:597.26ms +step:55678/57344 train_time:33254101ms step_avg:597.26ms +step:55679/57344 train_time:33254691ms step_avg:597.26ms +grad accum step:13920/14336 +step:55680/57344 train_time:33256100ms step_avg:597.27ms +step:55680/57344 val_loss:5.311756 train_time:33256107ms step_avg:597.27ms +step:55681/57344 train_time:33256748ms step_avg:597.27ms +step:55682/57344 train_time:33256935ms step_avg:597.27ms +step:55683/57344 train_time:33257211ms step_avg:597.26ms +grad accum step:13921/14336 +step:55684/57344 train_time:33259022ms step_avg:597.28ms +step:55685/57344 train_time:33259038ms step_avg:597.27ms +step:55686/57344 train_time:33259262ms step_avg:597.26ms +step:55687/57344 train_time:33259832ms step_avg:597.26ms +grad accum step:13922/14336 +step:55688/57344 train_time:33261240ms step_avg:597.28ms +step:55689/57344 train_time:33261258ms step_avg:597.27ms +step:55690/57344 train_time:33261503ms step_avg:597.26ms +step:55691/57344 train_time:33262129ms step_avg:597.26ms +grad accum step:13923/14336 +step:55692/57344 train_time:33263519ms step_avg:597.28ms +step:55693/57344 train_time:33263539ms step_avg:597.27ms +step:55694/57344 train_time:33263790ms step_avg:597.26ms +step:55695/57344 train_time:33264383ms step_avg:597.26ms +grad accum step:13924/14336 +step:55696/57344 train_time:33265801ms step_avg:597.27ms +step:55697/57344 train_time:33265815ms step_avg:597.26ms +step:55698/57344 train_time:33266070ms step_avg:597.26ms +step:55699/57344 train_time:33266662ms step_avg:597.26ms +grad accum step:13925/14336 +step:55700/57344 train_time:33268058ms step_avg:597.27ms +step:55701/57344 train_time:33268075ms step_avg:597.26ms +step:55702/57344 train_time:33268314ms step_avg:597.26ms +step:55703/57344 train_time:33268897ms step_avg:597.26ms +grad accum step:13926/14336 +step:55704/57344 train_time:33270324ms step_avg:597.27ms +step:55705/57344 train_time:33270368ms step_avg:597.26ms +step:55706/57344 train_time:33270633ms step_avg:597.25ms +step:55707/57344 train_time:33271161ms step_avg:597.25ms +grad accum step:13927/14336 +step:55708/57344 train_time:33272569ms step_avg:597.27ms +step:55709/57344 train_time:33272584ms step_avg:597.26ms +step:55710/57344 train_time:33272834ms step_avg:597.25ms +step:55711/57344 train_time:33273414ms step_avg:597.25ms +grad accum step:13928/14336 +step:55712/57344 train_time:33274926ms step_avg:597.27ms +step:55713/57344 train_time:33274939ms step_avg:597.26ms +step:55714/57344 train_time:33275170ms step_avg:597.25ms +step:55715/57344 train_time:33275745ms step_avg:597.25ms +grad accum step:13929/14336 +step:55716/57344 train_time:33277191ms step_avg:597.26ms +step:55717/57344 train_time:33277215ms step_avg:597.25ms +step:55718/57344 train_time:33277445ms step_avg:597.25ms +step:55719/57344 train_time:33278029ms step_avg:597.25ms +grad accum step:13930/14336 +step:55720/57344 train_time:33279438ms step_avg:597.26ms +step:55721/57344 train_time:33279454ms step_avg:597.25ms +step:55722/57344 train_time:33279720ms step_avg:597.25ms +step:55723/57344 train_time:33280323ms step_avg:597.25ms +grad accum step:13931/14336 +step:55724/57344 train_time:33281641ms step_avg:597.26ms +step:55725/57344 train_time:33281657ms step_avg:597.25ms +step:55726/57344 train_time:33281913ms step_avg:597.24ms +step:55727/57344 train_time:33282512ms step_avg:597.24ms +grad accum step:13932/14336 +step:55728/57344 train_time:33283926ms step_avg:597.26ms +step:55729/57344 train_time:33283958ms step_avg:597.25ms +step:55730/57344 train_time:33284191ms step_avg:597.24ms +step:55731/57344 train_time:33284777ms step_avg:597.24ms +grad accum step:13933/14336 +step:55732/57344 train_time:33286095ms step_avg:597.25ms +step:55733/57344 train_time:33286110ms step_avg:597.24ms +step:55734/57344 train_time:33286361ms step_avg:597.24ms +step:55735/57344 train_time:33286944ms step_avg:597.24ms +grad accum step:13934/14336 +step:55736/57344 train_time:33288332ms step_avg:597.25ms +step:55737/57344 train_time:33288353ms step_avg:597.24ms +step:55738/57344 train_time:33288584ms step_avg:597.23ms +step:55739/57344 train_time:33289144ms step_avg:597.23ms +grad accum step:13935/14336 +step:55740/57344 train_time:33290511ms step_avg:597.25ms +step:55741/57344 train_time:33290530ms step_avg:597.24ms +step:55742/57344 train_time:33290781ms step_avg:597.23ms +step:55743/57344 train_time:33291385ms step_avg:597.23ms +grad accum step:13936/14336 +step:55744/57344 train_time:33293026ms step_avg:597.25ms +step:55744/57344 val_loss:5.311058 train_time:33293112ms step_avg:597.25ms +step:55745/57344 train_time:33293124ms step_avg:597.24ms +step:55746/57344 train_time:33293368ms step_avg:597.23ms +step:55747/57344 train_time:33293991ms step_avg:597.23ms +grad accum step:13937/14336 +step:55748/57344 train_time:33295390ms step_avg:597.25ms +step:55749/57344 train_time:33295406ms step_avg:597.24ms +step:55750/57344 train_time:33295653ms step_avg:597.23ms +step:55751/57344 train_time:33296216ms step_avg:597.23ms +grad accum step:13938/14336 +step:55752/57344 train_time:33297595ms step_avg:597.24ms +step:55753/57344 train_time:33297611ms step_avg:597.23ms +step:55754/57344 train_time:33297869ms step_avg:597.23ms +step:55755/57344 train_time:33298493ms step_avg:597.23ms +grad accum step:13939/14336 +step:55756/57344 train_time:33299988ms step_avg:597.24ms +step:55757/57344 train_time:33300004ms step_avg:597.23ms +step:55758/57344 train_time:33300223ms step_avg:597.23ms +step:55759/57344 train_time:33300786ms step_avg:597.23ms +grad accum step:13940/14336 +step:55760/57344 train_time:33302174ms step_avg:597.24ms +step:55761/57344 train_time:33302193ms step_avg:597.23ms +step:55762/57344 train_time:33302438ms step_avg:597.22ms +step:55763/57344 train_time:33303034ms step_avg:597.22ms +grad accum step:13941/14336 +step:55764/57344 train_time:33304549ms step_avg:597.24ms +step:55765/57344 train_time:33304596ms step_avg:597.23ms +step:55766/57344 train_time:33304815ms step_avg:597.22ms +step:55767/57344 train_time:33305374ms step_avg:597.22ms +grad accum step:13942/14336 +step:55768/57344 train_time:33306858ms step_avg:597.24ms +step:55769/57344 train_time:33306892ms step_avg:597.23ms +step:55770/57344 train_time:33307126ms step_avg:597.22ms +step:55771/57344 train_time:33307709ms step_avg:597.22ms +grad accum step:13943/14336 +step:55772/57344 train_time:33309106ms step_avg:597.24ms +step:55773/57344 train_time:33309120ms step_avg:597.23ms +step:55774/57344 train_time:33309376ms step_avg:597.22ms +step:55775/57344 train_time:33309952ms step_avg:597.22ms +grad accum step:13944/14336 +step:55776/57344 train_time:33311360ms step_avg:597.23ms +step:55777/57344 train_time:33311518ms step_avg:597.23ms +step:55778/57344 train_time:33311735ms step_avg:597.22ms +step:55779/57344 train_time:33312353ms step_avg:597.22ms +grad accum step:13945/14336 +step:55780/57344 train_time:33313944ms step_avg:597.24ms +step:55781/57344 train_time:33313988ms step_avg:597.23ms +step:55782/57344 train_time:33314273ms step_avg:597.22ms +step:55783/57344 train_time:33314956ms step_avg:597.22ms +grad accum step:13946/14336 +step:55784/57344 train_time:33316477ms step_avg:597.24ms +step:55785/57344 train_time:33316498ms step_avg:597.23ms +step:55786/57344 train_time:33316735ms step_avg:597.22ms +step:55787/57344 train_time:33317328ms step_avg:597.22ms +grad accum step:13947/14336 +step:55788/57344 train_time:33318636ms step_avg:597.24ms +step:55789/57344 train_time:33318655ms step_avg:597.23ms +step:55790/57344 train_time:33318904ms step_avg:597.22ms +step:55791/57344 train_time:33319498ms step_avg:597.22ms +grad accum step:13948/14336 +step:55792/57344 train_time:33320859ms step_avg:597.23ms +step:55793/57344 train_time:33320874ms step_avg:597.22ms +step:55794/57344 train_time:33321128ms step_avg:597.22ms +step:55795/57344 train_time:33321700ms step_avg:597.22ms +grad accum step:13949/14336 +step:55796/57344 train_time:33323036ms step_avg:597.23ms +step:55797/57344 train_time:33323053ms step_avg:597.22ms +step:55798/57344 train_time:33323305ms step_avg:597.21ms +step:55799/57344 train_time:33323872ms step_avg:597.21ms +grad accum step:13950/14336 +step:55800/57344 train_time:33325340ms step_avg:597.23ms +step:55801/57344 train_time:33325710ms step_avg:597.22ms +step:55802/57344 train_time:33325928ms step_avg:597.22ms +step:55803/57344 train_time:33326476ms step_avg:597.22ms +grad accum step:13951/14336 +step:55804/57344 train_time:33327909ms step_avg:597.23ms +step:55805/57344 train_time:33327925ms step_avg:597.22ms +step:55806/57344 train_time:33328175ms step_avg:597.21ms +step:55807/57344 train_time:33328740ms step_avg:597.21ms +grad accum step:13952/14336 +step:55808/57344 train_time:33330089ms step_avg:597.23ms +step:55808/57344 val_loss:5.311450 train_time:33330097ms step_avg:597.23ms +step:55809/57344 train_time:33330109ms step_avg:597.22ms +step:55810/57344 train_time:33330342ms step_avg:597.21ms +step:55811/57344 train_time:33330926ms step_avg:597.21ms +grad accum step:13953/14336 +step:55812/57344 train_time:33332286ms step_avg:597.22ms +step:55813/57344 train_time:33332299ms step_avg:597.21ms +step:55814/57344 train_time:33332553ms step_avg:597.21ms +step:55815/57344 train_time:33333130ms step_avg:597.21ms +grad accum step:13954/14336 +step:55816/57344 train_time:33334659ms step_avg:597.22ms +step:55817/57344 train_time:33334676ms step_avg:597.21ms +step:55818/57344 train_time:33334954ms step_avg:597.21ms +step:55819/57344 train_time:33335597ms step_avg:597.21ms +grad accum step:13955/14336 +step:55820/57344 train_time:33337007ms step_avg:597.22ms +step:55821/57344 train_time:33337027ms step_avg:597.21ms +step:55822/57344 train_time:33337248ms step_avg:597.21ms +step:55823/57344 train_time:33337815ms step_avg:597.21ms +grad accum step:13956/14336 +step:55824/57344 train_time:33339215ms step_avg:597.22ms +step:55825/57344 train_time:33339230ms step_avg:597.21ms +step:55826/57344 train_time:33339479ms step_avg:597.20ms +step:55827/57344 train_time:33340048ms step_avg:597.20ms +grad accum step:13957/14336 +step:55828/57344 train_time:33341422ms step_avg:597.22ms +step:55829/57344 train_time:33341440ms step_avg:597.21ms +step:55830/57344 train_time:33341688ms step_avg:597.20ms +step:55831/57344 train_time:33342272ms step_avg:597.20ms +grad accum step:13958/14336 +step:55832/57344 train_time:33343782ms step_avg:597.22ms +step:55833/57344 train_time:33343807ms step_avg:597.21ms +step:55834/57344 train_time:33344033ms step_avg:597.20ms +step:55835/57344 train_time:33344624ms step_avg:597.20ms +grad accum step:13959/14336 +step:55836/57344 train_time:33346036ms step_avg:597.21ms +step:55837/57344 train_time:33346054ms step_avg:597.20ms +step:55838/57344 train_time:33346294ms step_avg:597.20ms +step:55839/57344 train_time:33346860ms step_avg:597.20ms +grad accum step:13960/14336 +step:55840/57344 train_time:33348256ms step_avg:597.21ms +step:55841/57344 train_time:33348274ms step_avg:597.20ms +step:55842/57344 train_time:33348688ms step_avg:597.20ms +step:55843/57344 train_time:33349088ms step_avg:597.19ms +grad accum step:13961/14336 +step:55844/57344 train_time:33350499ms step_avg:597.21ms +step:55845/57344 train_time:33350513ms step_avg:597.20ms +step:55846/57344 train_time:33350750ms step_avg:597.19ms +step:55847/57344 train_time:33351309ms step_avg:597.19ms +grad accum step:13962/14336 +step:55848/57344 train_time:33352725ms step_avg:597.21ms +step:55849/57344 train_time:33352739ms step_avg:597.19ms +step:55850/57344 train_time:33353004ms step_avg:597.19ms +step:55851/57344 train_time:33353617ms step_avg:597.19ms +grad accum step:13963/14336 +step:55852/57344 train_time:33355021ms step_avg:597.20ms +step:55853/57344 train_time:33355113ms step_avg:597.19ms +step:55854/57344 train_time:33355335ms step_avg:597.19ms +step:55855/57344 train_time:33355900ms step_avg:597.19ms +grad accum step:13964/14336 +step:55856/57344 train_time:33357327ms step_avg:597.20ms +step:55857/57344 train_time:33357347ms step_avg:597.19ms +step:55858/57344 train_time:33357572ms step_avg:597.19ms +step:55859/57344 train_time:33358148ms step_avg:597.18ms +grad accum step:13965/14336 +step:55860/57344 train_time:33359528ms step_avg:597.20ms +step:55861/57344 train_time:33359542ms step_avg:597.19ms +step:55862/57344 train_time:33359793ms step_avg:597.18ms +step:55863/57344 train_time:33360365ms step_avg:597.18ms +grad accum step:13966/14336 +step:55864/57344 train_time:33361747ms step_avg:597.20ms +step:55865/57344 train_time:33361760ms step_avg:597.19ms +step:55866/57344 train_time:33362018ms step_avg:597.18ms +step:55867/57344 train_time:33362619ms step_avg:597.18ms +grad accum step:13967/14336 +step:55868/57344 train_time:33364029ms step_avg:597.19ms +step:55869/57344 train_time:33364050ms step_avg:597.18ms +step:55870/57344 train_time:33364283ms step_avg:597.18ms +step:55871/57344 train_time:33364845ms step_avg:597.18ms +grad accum step:13968/14336 +step:55872/57344 train_time:33367038ms step_avg:597.20ms +step:55872/57344 val_loss:5.310041 train_time:33367043ms step_avg:597.21ms +step:55873/57344 train_time:33367055ms step_avg:597.19ms +step:55874/57344 train_time:33367276ms step_avg:597.19ms +step:55875/57344 train_time:33367846ms step_avg:597.19ms +grad accum step:13969/14336 +step:55876/57344 train_time:33369255ms step_avg:597.20ms +step:55877/57344 train_time:33369274ms step_avg:597.19ms +step:55878/57344 train_time:33369520ms step_avg:597.19ms +step:55879/57344 train_time:33370144ms step_avg:597.19ms +grad accum step:13970/14336 +step:55880/57344 train_time:33371551ms step_avg:597.20ms +step:55881/57344 train_time:33371566ms step_avg:597.19ms +step:55882/57344 train_time:33371827ms step_avg:597.18ms +step:55883/57344 train_time:33372431ms step_avg:597.18ms +grad accum step:13971/14336 +step:55884/57344 train_time:33373941ms step_avg:597.20ms +step:55885/57344 train_time:33373967ms step_avg:597.19ms +step:55886/57344 train_time:33374202ms step_avg:597.18ms +step:55887/57344 train_time:33374787ms step_avg:597.18ms +grad accum step:13972/14336 +step:55888/57344 train_time:33376188ms step_avg:597.20ms +step:55889/57344 train_time:33376206ms step_avg:597.19ms +step:55890/57344 train_time:33376608ms step_avg:597.18ms +step:55891/57344 train_time:33377038ms step_avg:597.18ms +grad accum step:13973/14336 +step:55892/57344 train_time:33378444ms step_avg:597.20ms +step:55893/57344 train_time:33378461ms step_avg:597.19ms +step:55894/57344 train_time:33378708ms step_avg:597.18ms +step:55895/57344 train_time:33379297ms step_avg:597.18ms +grad accum step:13974/14336 +step:55896/57344 train_time:33380695ms step_avg:597.19ms +step:55897/57344 train_time:33380712ms step_avg:597.18ms +step:55898/57344 train_time:33380978ms step_avg:597.18ms +step:55899/57344 train_time:33381595ms step_avg:597.18ms +grad accum step:13975/14336 +step:55900/57344 train_time:33383017ms step_avg:597.19ms +step:55901/57344 train_time:33383032ms step_avg:597.18ms +step:55902/57344 train_time:33383278ms step_avg:597.18ms +step:55903/57344 train_time:33383838ms step_avg:597.17ms +grad accum step:13976/14336 +step:55904/57344 train_time:33385262ms step_avg:597.19ms +step:55905/57344 train_time:33385274ms step_avg:597.18ms +step:55906/57344 train_time:33385531ms step_avg:597.17ms +step:55907/57344 train_time:33386121ms step_avg:597.17ms +grad accum step:13977/14336 +step:55908/57344 train_time:33387478ms step_avg:597.19ms +step:55909/57344 train_time:33387493ms step_avg:597.18ms +step:55910/57344 train_time:33387748ms step_avg:597.17ms +step:55911/57344 train_time:33388337ms step_avg:597.17ms +grad accum step:13978/14336 +step:55912/57344 train_time:33389761ms step_avg:597.18ms +step:55913/57344 train_time:33389778ms step_avg:597.17ms +step:55914/57344 train_time:33390030ms step_avg:597.17ms +step:55915/57344 train_time:33390619ms step_avg:597.17ms +grad accum step:13979/14336 +step:55916/57344 train_time:33392275ms step_avg:597.19ms +step:55917/57344 train_time:33392289ms step_avg:597.18ms +step:55918/57344 train_time:33392566ms step_avg:597.17ms +step:55919/57344 train_time:33393206ms step_avg:597.17ms +grad accum step:13980/14336 +step:55920/57344 train_time:33394576ms step_avg:597.18ms +step:55921/57344 train_time:33394592ms step_avg:597.17ms +step:55922/57344 train_time:33394851ms step_avg:597.17ms +step:55923/57344 train_time:33395436ms step_avg:597.17ms +grad accum step:13981/14336 +step:55924/57344 train_time:33396826ms step_avg:597.18ms +step:55925/57344 train_time:33396842ms step_avg:597.17ms +step:55926/57344 train_time:33397097ms step_avg:597.17ms +step:55927/57344 train_time:33397680ms step_avg:597.17ms +grad accum step:13982/14336 +step:55928/57344 train_time:33399097ms step_avg:597.18ms +step:55929/57344 train_time:33399113ms step_avg:597.17ms +step:55930/57344 train_time:33399367ms step_avg:597.16ms +step:55931/57344 train_time:33399946ms step_avg:597.16ms +grad accum step:13983/14336 +step:55932/57344 train_time:33401339ms step_avg:597.18ms +step:55933/57344 train_time:33401356ms step_avg:597.17ms +step:55934/57344 train_time:33401612ms step_avg:597.16ms +step:55935/57344 train_time:33402209ms step_avg:597.16ms +grad accum step:13984/14336 +step:55936/57344 train_time:33403627ms step_avg:597.18ms +step:55936/57344 val_loss:5.309947 train_time:33403635ms step_avg:597.18ms +step:55937/57344 train_time:33403647ms step_avg:597.17ms +step:55938/57344 train_time:33403889ms step_avg:597.16ms +step:55939/57344 train_time:33404502ms step_avg:597.16ms +grad accum step:13985/14336 +step:55940/57344 train_time:33405967ms step_avg:597.17ms +step:55941/57344 train_time:33405983ms step_avg:597.16ms +step:55942/57344 train_time:33406214ms step_avg:597.16ms +step:55943/57344 train_time:33406789ms step_avg:597.16ms +grad accum step:13986/14336 +step:55944/57344 train_time:33408161ms step_avg:597.17ms +step:55945/57344 train_time:33408178ms step_avg:597.16ms +step:55946/57344 train_time:33408404ms step_avg:597.15ms +step:55947/57344 train_time:33408975ms step_avg:597.15ms +grad accum step:13987/14336 +step:55948/57344 train_time:33410315ms step_avg:597.17ms +step:55949/57344 train_time:33410338ms step_avg:597.16ms +step:55950/57344 train_time:33410583ms step_avg:597.15ms +step:55951/57344 train_time:33411179ms step_avg:597.15ms +grad accum step:13988/14336 +step:55952/57344 train_time:33412707ms step_avg:597.17ms +step:55953/57344 train_time:33412762ms step_avg:597.16ms +step:55954/57344 train_time:33412994ms step_avg:597.15ms +step:55955/57344 train_time:33413591ms step_avg:597.15ms +grad accum step:13989/14336 +step:55956/57344 train_time:33414928ms step_avg:597.16ms +step:55957/57344 train_time:33414943ms step_avg:597.15ms +step:55958/57344 train_time:33415195ms step_avg:597.15ms +step:55959/57344 train_time:33415788ms step_avg:597.15ms +grad accum step:13990/14336 +step:55960/57344 train_time:33417265ms step_avg:597.16ms +step:55961/57344 train_time:33417287ms step_avg:597.15ms +step:55962/57344 train_time:33417529ms step_avg:597.15ms +step:55963/57344 train_time:33418108ms step_avg:597.15ms +grad accum step:13991/14336 +step:55964/57344 train_time:33419583ms step_avg:597.16ms +step:55965/57344 train_time:33419617ms step_avg:597.15ms +step:55966/57344 train_time:33419847ms step_avg:597.15ms +step:55967/57344 train_time:33420428ms step_avg:597.15ms +grad accum step:13992/14336 +step:55968/57344 train_time:33421900ms step_avg:597.16ms +step:55969/57344 train_time:33421917ms step_avg:597.15ms +step:55970/57344 train_time:33422137ms step_avg:597.14ms +step:55971/57344 train_time:33422700ms step_avg:597.14ms +grad accum step:13993/14336 +step:55972/57344 train_time:33424118ms step_avg:597.16ms +step:55973/57344 train_time:33424139ms step_avg:597.15ms +step:55974/57344 train_time:33424387ms step_avg:597.14ms +step:55975/57344 train_time:33424969ms step_avg:597.14ms +grad accum step:13994/14336 +step:55976/57344 train_time:33426479ms step_avg:597.16ms +step:55977/57344 train_time:33426495ms step_avg:597.15ms +step:55978/57344 train_time:33426714ms step_avg:597.14ms +step:55979/57344 train_time:33427275ms step_avg:597.14ms +grad accum step:13995/14336 +step:55980/57344 train_time:33428723ms step_avg:597.15ms +step:55981/57344 train_time:33428757ms step_avg:597.14ms +step:55982/57344 train_time:33428989ms step_avg:597.14ms +step:55983/57344 train_time:33429563ms step_avg:597.14ms +grad accum step:13996/14336 +step:55984/57344 train_time:33430980ms step_avg:597.15ms +step:55985/57344 train_time:33431001ms step_avg:597.14ms +step:55986/57344 train_time:33431220ms step_avg:597.14ms +step:55987/57344 train_time:33431785ms step_avg:597.13ms +grad accum step:13997/14336 +step:55988/57344 train_time:33433192ms step_avg:597.15ms +step:55989/57344 train_time:33433206ms step_avg:597.14ms +step:55990/57344 train_time:33433467ms step_avg:597.13ms +step:55991/57344 train_time:33434053ms step_avg:597.13ms +grad accum step:13998/14336 +step:55992/57344 train_time:33435424ms step_avg:597.15ms +step:55993/57344 train_time:33435441ms step_avg:597.14ms +step:55994/57344 train_time:33435684ms step_avg:597.13ms +step:55995/57344 train_time:33436271ms step_avg:597.13ms +grad accum step:13999/14336 +step:55996/57344 train_time:33437673ms step_avg:597.14ms +step:55997/57344 train_time:33437690ms step_avg:597.13ms +step:55998/57344 train_time:33437943ms step_avg:597.13ms +step:55999/57344 train_time:33438515ms step_avg:597.13ms +grad accum step:14000/14336 +step:56000/57344 train_time:33439959ms step_avg:597.14ms +step:56000/57344 val_loss:5.308713 train_time:33439963ms step_avg:597.14ms +step:56001/57344 train_time:33439975ms step_avg:597.13ms +step:56002/57344 train_time:33440208ms step_avg:597.13ms +step:56003/57344 train_time:33440800ms step_avg:597.13ms +grad accum step:14001/14336 +step:56004/57344 train_time:33442362ms step_avg:597.14ms +step:56005/57344 train_time:33442382ms step_avg:597.13ms +step:56006/57344 train_time:33442630ms step_avg:597.13ms +step:56007/57344 train_time:33443212ms step_avg:597.13ms +grad accum step:14002/14336 +step:56008/57344 train_time:33444524ms step_avg:597.14ms +step:56009/57344 train_time:33444540ms step_avg:597.13ms +step:56010/57344 train_time:33444784ms step_avg:597.12ms +step:56011/57344 train_time:33445324ms step_avg:597.12ms +grad accum step:14003/14336 +step:56012/57344 train_time:33446750ms step_avg:597.14ms +step:56013/57344 train_time:33446767ms step_avg:597.13ms +step:56014/57344 train_time:33447015ms step_avg:597.12ms +step:56015/57344 train_time:33447592ms step_avg:597.12ms +grad accum step:14004/14336 +step:56016/57344 train_time:33449226ms step_avg:597.14ms +step:56017/57344 train_time:33449244ms step_avg:597.13ms +step:56018/57344 train_time:33449465ms step_avg:597.12ms +step:56019/57344 train_time:33450024ms step_avg:597.12ms +grad accum step:14005/14336 +step:56020/57344 train_time:33451448ms step_avg:597.13ms +step:56021/57344 train_time:33451469ms step_avg:597.12ms +step:56022/57344 train_time:33451717ms step_avg:597.12ms +step:56023/57344 train_time:33452294ms step_avg:597.12ms +grad accum step:14006/14336 +step:56024/57344 train_time:33453631ms step_avg:597.13ms +step:56025/57344 train_time:33453649ms step_avg:597.12ms +step:56026/57344 train_time:33453898ms step_avg:597.11ms +step:56027/57344 train_time:33454496ms step_avg:597.11ms +grad accum step:14007/14336 +step:56028/57344 train_time:33455933ms step_avg:597.13ms +step:56029/57344 train_time:33455958ms step_avg:597.12ms +step:56030/57344 train_time:33456198ms step_avg:597.11ms +step:56031/57344 train_time:33456766ms step_avg:597.11ms +grad accum step:14008/14336 +step:56032/57344 train_time:33458142ms step_avg:597.13ms +step:56033/57344 train_time:33458158ms step_avg:597.12ms +step:56034/57344 train_time:33458411ms step_avg:597.11ms +step:56035/57344 train_time:33459011ms step_avg:597.11ms +grad accum step:14009/14336 +step:56036/57344 train_time:33460528ms step_avg:597.13ms +step:56037/57344 train_time:33460550ms step_avg:597.12ms +step:56038/57344 train_time:33460801ms step_avg:597.11ms +step:56039/57344 train_time:33461389ms step_avg:597.11ms +grad accum step:14010/14336 +step:56040/57344 train_time:33462758ms step_avg:597.12ms +step:56041/57344 train_time:33462773ms step_avg:597.11ms +step:56042/57344 train_time:33463020ms step_avg:597.11ms +step:56043/57344 train_time:33463571ms step_avg:597.11ms +grad accum step:14011/14336 +step:56044/57344 train_time:33464912ms step_avg:597.12ms +step:56045/57344 train_time:33464927ms step_avg:597.11ms +step:56046/57344 train_time:33465191ms step_avg:597.10ms +step:56047/57344 train_time:33465786ms step_avg:597.10ms +grad accum step:14012/14336 +step:56048/57344 train_time:33467169ms step_avg:597.12ms +step:56049/57344 train_time:33467185ms step_avg:597.11ms +step:56050/57344 train_time:33467435ms step_avg:597.10ms +step:56051/57344 train_time:33468008ms step_avg:597.10ms +grad accum step:14013/14336 +step:56052/57344 train_time:33469411ms step_avg:597.11ms +step:56053/57344 train_time:33469426ms step_avg:597.10ms +step:56054/57344 train_time:33469687ms step_avg:597.10ms +step:56055/57344 train_time:33470286ms step_avg:597.10ms +grad accum step:14014/14336 +step:56056/57344 train_time:33471725ms step_avg:597.11ms +step:56057/57344 train_time:33471744ms step_avg:597.10ms +step:56058/57344 train_time:33471990ms step_avg:597.10ms +step:56059/57344 train_time:33472561ms step_avg:597.10ms +grad accum step:14015/14336 +step:56060/57344 train_time:33473994ms step_avg:597.11ms +step:56061/57344 train_time:33474017ms step_avg:597.10ms +step:56062/57344 train_time:33474243ms step_avg:597.09ms +step:56063/57344 train_time:33474812ms step_avg:597.09ms +grad accum step:14016/14336 +step:56064/57344 train_time:33476173ms step_avg:597.11ms +step:56064/57344 val_loss:5.308554 train_time:33476175ms step_avg:597.11ms +step:56065/57344 train_time:33476187ms step_avg:597.10ms +step:56066/57344 train_time:33476409ms step_avg:597.09ms +step:56067/57344 train_time:33476973ms step_avg:597.09ms +grad accum step:14017/14336 +step:56068/57344 train_time:33478313ms step_avg:597.10ms +step:56069/57344 train_time:33478343ms step_avg:597.09ms +step:56070/57344 train_time:33478576ms step_avg:597.09ms +step:56071/57344 train_time:33479165ms step_avg:597.09ms +grad accum step:14018/14336 +step:56072/57344 train_time:33480568ms step_avg:597.10ms +step:56073/57344 train_time:33480583ms step_avg:597.09ms +step:56074/57344 train_time:33480846ms step_avg:597.08ms +step:56075/57344 train_time:33481440ms step_avg:597.08ms +grad accum step:14019/14336 +step:56076/57344 train_time:33482814ms step_avg:597.10ms +step:56077/57344 train_time:33482833ms step_avg:597.09ms +step:56078/57344 train_time:33483086ms step_avg:597.08ms +step:56079/57344 train_time:33483680ms step_avg:597.08ms +grad accum step:14020/14336 +step:56080/57344 train_time:33485107ms step_avg:597.10ms +step:56081/57344 train_time:33485134ms step_avg:597.09ms +step:56082/57344 train_time:33485358ms step_avg:597.08ms +step:56083/57344 train_time:33485937ms step_avg:597.08ms +grad accum step:14021/14336 +step:56084/57344 train_time:33487486ms step_avg:597.10ms +step:56085/57344 train_time:33487500ms step_avg:597.08ms +step:56086/57344 train_time:33487757ms step_avg:597.08ms +step:56087/57344 train_time:33488341ms step_avg:597.08ms +grad accum step:14022/14336 +step:56088/57344 train_time:33489906ms step_avg:597.10ms +step:56089/57344 train_time:33489924ms step_avg:597.09ms +step:56090/57344 train_time:33490153ms step_avg:597.08ms +step:56091/57344 train_time:33490732ms step_avg:597.08ms +grad accum step:14023/14336 +step:56092/57344 train_time:33492086ms step_avg:597.09ms +step:56093/57344 train_time:33492104ms step_avg:597.08ms +step:56094/57344 train_time:33492356ms step_avg:597.08ms +step:56095/57344 train_time:33492938ms step_avg:597.08ms +grad accum step:14024/14336 +step:56096/57344 train_time:33494271ms step_avg:597.09ms +step:56097/57344 train_time:33494289ms step_avg:597.08ms +step:56098/57344 train_time:33494540ms step_avg:597.07ms +step:56099/57344 train_time:33495125ms step_avg:597.07ms +grad accum step:14025/14336 +step:56100/57344 train_time:33496427ms step_avg:597.08ms +step:56101/57344 train_time:33496446ms step_avg:597.07ms +step:56102/57344 train_time:33496713ms step_avg:597.07ms +step:56103/57344 train_time:33497346ms step_avg:597.07ms +grad accum step:14026/14336 +step:56104/57344 train_time:33498781ms step_avg:597.08ms +step:56105/57344 train_time:33498804ms step_avg:597.07ms +step:56106/57344 train_time:33499049ms step_avg:597.07ms +step:56107/57344 train_time:33499629ms step_avg:597.07ms +grad accum step:14027/14336 +step:56108/57344 train_time:33501095ms step_avg:597.08ms +step:56109/57344 train_time:33501114ms step_avg:597.07ms +step:56110/57344 train_time:33501354ms step_avg:597.07ms +step:56111/57344 train_time:33501903ms step_avg:597.06ms +grad accum step:14028/14336 +step:56112/57344 train_time:33503261ms step_avg:597.08ms +step:56113/57344 train_time:33503278ms step_avg:597.07ms +step:56114/57344 train_time:33503536ms step_avg:597.06ms +step:56115/57344 train_time:33504140ms step_avg:597.06ms +grad accum step:14029/14336 +step:56116/57344 train_time:33505490ms step_avg:597.08ms +step:56117/57344 train_time:33505504ms step_avg:597.07ms +step:56118/57344 train_time:33505756ms step_avg:597.06ms +step:56119/57344 train_time:33506323ms step_avg:597.06ms +grad accum step:14030/14336 +step:56120/57344 train_time:33507804ms step_avg:597.07ms +step:56121/57344 train_time:33507826ms step_avg:597.06ms +step:56122/57344 train_time:33508069ms step_avg:597.06ms +step:56123/57344 train_time:33508640ms step_avg:597.06ms +grad accum step:14031/14336 +step:56124/57344 train_time:33510110ms step_avg:597.07ms +step:56125/57344 train_time:33510124ms step_avg:597.06ms +step:56126/57344 train_time:33510375ms step_avg:597.06ms +step:56127/57344 train_time:33510946ms step_avg:597.06ms +grad accum step:14032/14336 +step:56128/57344 train_time:33512316ms step_avg:597.07ms +step:56128/57344 val_loss:5.307552 train_time:33512326ms step_avg:597.07ms +step:56129/57344 train_time:33512338ms step_avg:597.06ms +step:56130/57344 train_time:33512570ms step_avg:597.05ms +step:56131/57344 train_time:33513157ms step_avg:597.05ms +grad accum step:14033/14336 +step:56132/57344 train_time:33514599ms step_avg:597.07ms +step:56133/57344 train_time:33514615ms step_avg:597.06ms +step:56134/57344 train_time:33514874ms step_avg:597.05ms +step:56135/57344 train_time:33515458ms step_avg:597.05ms +grad accum step:14034/14336 +step:56136/57344 train_time:33516900ms step_avg:597.07ms +step:56137/57344 train_time:33516916ms step_avg:597.06ms +step:56138/57344 train_time:33517172ms step_avg:597.05ms +step:56139/57344 train_time:33517783ms step_avg:597.05ms +grad accum step:14035/14336 +step:56140/57344 train_time:33519284ms step_avg:597.07ms +step:56141/57344 train_time:33519301ms step_avg:597.06ms +step:56142/57344 train_time:33519550ms step_avg:597.05ms +step:56143/57344 train_time:33520112ms step_avg:597.05ms +grad accum step:14036/14336 +step:56144/57344 train_time:33521474ms step_avg:597.06ms +step:56145/57344 train_time:33521491ms step_avg:597.05ms +step:56146/57344 train_time:33521737ms step_avg:597.05ms +step:56147/57344 train_time:33522315ms step_avg:597.05ms +grad accum step:14037/14336 +step:56148/57344 train_time:33523702ms step_avg:597.06ms +step:56149/57344 train_time:33523718ms step_avg:597.05ms +step:56150/57344 train_time:33523979ms step_avg:597.04ms +step:56151/57344 train_time:33524591ms step_avg:597.04ms +grad accum step:14038/14336 +step:56152/57344 train_time:33526051ms step_avg:597.06ms +step:56153/57344 train_time:33526069ms step_avg:597.05ms +step:56154/57344 train_time:33526303ms step_avg:597.04ms +step:56155/57344 train_time:33526862ms step_avg:597.04ms +grad accum step:14039/14336 +step:56156/57344 train_time:33528201ms step_avg:597.05ms +step:56157/57344 train_time:33528273ms step_avg:597.05ms +step:56158/57344 train_time:33528499ms step_avg:597.04ms +step:56159/57344 train_time:33529065ms step_avg:597.04ms +grad accum step:14040/14336 +step:56160/57344 train_time:33530442ms step_avg:597.05ms +step:56161/57344 train_time:33530454ms step_avg:597.04ms +step:56162/57344 train_time:33530702ms step_avg:597.04ms +step:56163/57344 train_time:33531265ms step_avg:597.03ms +grad accum step:14041/14336 +step:56164/57344 train_time:33532756ms step_avg:597.05ms +step:56165/57344 train_time:33532774ms step_avg:597.04ms +step:56166/57344 train_time:33533051ms step_avg:597.03ms +step:56167/57344 train_time:33533719ms step_avg:597.04ms +grad accum step:14042/14336 +step:56168/57344 train_time:33535194ms step_avg:597.05ms +step:56169/57344 train_time:33535214ms step_avg:597.04ms +step:56170/57344 train_time:33535457ms step_avg:597.04ms +step:56171/57344 train_time:33536082ms step_avg:597.04ms +grad accum step:14043/14336 +step:56172/57344 train_time:33537450ms step_avg:597.05ms +step:56173/57344 train_time:33537465ms step_avg:597.04ms +step:56174/57344 train_time:33537715ms step_avg:597.03ms +step:56175/57344 train_time:33538315ms step_avg:597.03ms +grad accum step:14044/14336 +step:56176/57344 train_time:33539692ms step_avg:597.05ms +step:56177/57344 train_time:33539707ms step_avg:597.04ms +step:56178/57344 train_time:33539953ms step_avg:597.03ms +step:56179/57344 train_time:33540514ms step_avg:597.03ms +grad accum step:14045/14336 +step:56180/57344 train_time:33541913ms step_avg:597.04ms +step:56181/57344 train_time:33541940ms step_avg:597.03ms +step:56182/57344 train_time:33542176ms step_avg:597.03ms +step:56183/57344 train_time:33542772ms step_avg:597.03ms +grad accum step:14046/14336 +step:56184/57344 train_time:33544163ms step_avg:597.04ms +step:56185/57344 train_time:33544183ms step_avg:597.03ms +step:56186/57344 train_time:33544433ms step_avg:597.02ms +step:56187/57344 train_time:33545027ms step_avg:597.02ms +grad accum step:14047/14336 +step:56188/57344 train_time:33546476ms step_avg:597.04ms +step:56189/57344 train_time:33546497ms step_avg:597.03ms +step:56190/57344 train_time:33546738ms step_avg:597.02ms +step:56191/57344 train_time:33547310ms step_avg:597.02ms +grad accum step:14048/14336 +step:56192/57344 train_time:33548757ms step_avg:597.04ms +step:56192/57344 val_loss:5.307443 train_time:33548760ms step_avg:597.04ms +step:56193/57344 train_time:33548772ms step_avg:597.03ms +step:56194/57344 train_time:33549005ms step_avg:597.02ms +step:56195/57344 train_time:33549596ms step_avg:597.02ms +grad accum step:14049/14336 +step:56196/57344 train_time:33550959ms step_avg:597.03ms +step:56197/57344 train_time:33550982ms step_avg:597.02ms +step:56198/57344 train_time:33551212ms step_avg:597.02ms +step:56199/57344 train_time:33551800ms step_avg:597.02ms +grad accum step:14050/14336 +step:56200/57344 train_time:33553232ms step_avg:597.03ms +step:56201/57344 train_time:33553253ms step_avg:597.02ms +step:56202/57344 train_time:33553501ms step_avg:597.02ms +step:56203/57344 train_time:33554110ms step_avg:597.02ms +grad accum step:14051/14336 +step:56204/57344 train_time:33555485ms step_avg:597.03ms +step:56205/57344 train_time:33555501ms step_avg:597.02ms +step:56206/57344 train_time:33555751ms step_avg:597.01ms +step:56207/57344 train_time:33556324ms step_avg:597.01ms +grad accum step:14052/14336 +step:56208/57344 train_time:33557658ms step_avg:597.03ms +step:56209/57344 train_time:33557672ms step_avg:597.02ms +step:56210/57344 train_time:33557926ms step_avg:597.01ms +step:56211/57344 train_time:33558508ms step_avg:597.01ms +grad accum step:14053/14336 +step:56212/57344 train_time:33559845ms step_avg:597.02ms +step:56213/57344 train_time:33559864ms step_avg:597.01ms +step:56214/57344 train_time:33560111ms step_avg:597.01ms +step:56215/57344 train_time:33560678ms step_avg:597.01ms +grad accum step:14054/14336 +step:56216/57344 train_time:33562139ms step_avg:597.02ms +step:56217/57344 train_time:33562163ms step_avg:597.01ms +step:56218/57344 train_time:33562388ms step_avg:597.00ms +step:56219/57344 train_time:33562965ms step_avg:597.00ms +grad accum step:14055/14336 +step:56220/57344 train_time:33564730ms step_avg:597.02ms +step:56221/57344 train_time:33564750ms step_avg:597.01ms +step:56222/57344 train_time:33564982ms step_avg:597.01ms +step:56223/57344 train_time:33565541ms step_avg:597.01ms +grad accum step:14056/14336 +step:56224/57344 train_time:33566873ms step_avg:597.02ms +step:56225/57344 train_time:33566892ms step_avg:597.01ms +step:56226/57344 train_time:33567145ms step_avg:597.00ms +step:56227/57344 train_time:33567734ms step_avg:597.00ms +grad accum step:14057/14336 +step:56228/57344 train_time:33569387ms step_avg:597.02ms +step:56229/57344 train_time:33569404ms step_avg:597.01ms +step:56230/57344 train_time:33569631ms step_avg:597.01ms +step:56231/57344 train_time:33570214ms step_avg:597.01ms +grad accum step:14058/14336 +step:56232/57344 train_time:33571565ms step_avg:597.02ms +step:56233/57344 train_time:33571582ms step_avg:597.01ms +step:56234/57344 train_time:33571838ms step_avg:597.00ms +step:56235/57344 train_time:33572431ms step_avg:597.00ms +grad accum step:14059/14336 +step:56236/57344 train_time:33573814ms step_avg:597.02ms +step:56237/57344 train_time:33573830ms step_avg:597.01ms +step:56238/57344 train_time:33574129ms step_avg:597.00ms +step:56239/57344 train_time:33574828ms step_avg:597.00ms +grad accum step:14060/14336 +step:56240/57344 train_time:33576210ms step_avg:597.02ms +step:56241/57344 train_time:33576227ms step_avg:597.01ms +step:56242/57344 train_time:33576480ms step_avg:597.00ms +step:56243/57344 train_time:33577049ms step_avg:597.00ms +grad accum step:14061/14336 +step:56244/57344 train_time:33578632ms step_avg:597.02ms +step:56245/57344 train_time:33578683ms step_avg:597.01ms +step:56246/57344 train_time:33578937ms step_avg:597.00ms +step:56247/57344 train_time:33579606ms step_avg:597.00ms +grad accum step:14062/14336 +step:56248/57344 train_time:33581059ms step_avg:597.02ms +step:56249/57344 train_time:33581078ms step_avg:597.01ms +step:56250/57344 train_time:33581297ms step_avg:597.00ms +step:56251/57344 train_time:33581867ms step_avg:597.00ms +grad accum step:14063/14336 +step:56252/57344 train_time:33583324ms step_avg:597.02ms +step:56253/57344 train_time:33583340ms step_avg:597.01ms +step:56254/57344 train_time:33583575ms step_avg:597.00ms +step:56255/57344 train_time:33584165ms step_avg:597.00ms +grad accum step:14064/14336 +step:56256/57344 train_time:33585615ms step_avg:597.01ms +step:56256/57344 val_loss:5.306889 train_time:33585893ms step_avg:597.02ms +step:56257/57344 train_time:33585905ms step_avg:597.01ms +step:56258/57344 train_time:33586142ms step_avg:597.00ms +step:56259/57344 train_time:33586738ms step_avg:597.00ms +grad accum step:14065/14336 +step:56260/57344 train_time:33588143ms step_avg:597.02ms +step:56261/57344 train_time:33588161ms step_avg:597.01ms +step:56262/57344 train_time:33588402ms step_avg:597.00ms +step:56263/57344 train_time:33588973ms step_avg:597.00ms +grad accum step:14066/14336 +step:56264/57344 train_time:33590418ms step_avg:597.01ms +step:56265/57344 train_time:33590438ms step_avg:597.00ms +step:56266/57344 train_time:33590683ms step_avg:597.00ms +step:56267/57344 train_time:33591255ms step_avg:597.00ms +grad accum step:14067/14336 +step:56268/57344 train_time:33592676ms step_avg:597.01ms +step:56269/57344 train_time:33592694ms step_avg:597.00ms +step:56270/57344 train_time:33592953ms step_avg:597.00ms +step:56271/57344 train_time:33593568ms step_avg:597.00ms +grad accum step:14068/14336 +step:56272/57344 train_time:33594928ms step_avg:597.01ms +step:56273/57344 train_time:33594945ms step_avg:597.00ms +step:56274/57344 train_time:33595195ms step_avg:596.99ms +step:56275/57344 train_time:33595773ms step_avg:596.99ms +grad accum step:14069/14336 +step:56276/57344 train_time:33597131ms step_avg:597.01ms +step:56277/57344 train_time:33597147ms step_avg:597.00ms +step:56278/57344 train_time:33597400ms step_avg:596.99ms +step:56279/57344 train_time:33597984ms step_avg:596.99ms +grad accum step:14070/14336 +step:56280/57344 train_time:33599818ms step_avg:597.01ms +step:56281/57344 train_time:33599831ms step_avg:597.00ms +step:56282/57344 train_time:33600076ms step_avg:597.00ms +step:56283/57344 train_time:33600700ms step_avg:597.00ms +grad accum step:14071/14336 +step:56284/57344 train_time:33602078ms step_avg:597.01ms +step:56285/57344 train_time:33602425ms step_avg:597.00ms +step:56286/57344 train_time:33602640ms step_avg:597.00ms +step:56287/57344 train_time:33603196ms step_avg:597.00ms +grad accum step:14072/14336 +step:56288/57344 train_time:33604593ms step_avg:597.01ms +step:56289/57344 train_time:33604611ms step_avg:597.00ms +step:56290/57344 train_time:33604860ms step_avg:597.00ms +step:56291/57344 train_time:33605490ms step_avg:597.00ms +grad accum step:14073/14336 +step:56292/57344 train_time:33606889ms step_avg:597.01ms +step:56293/57344 train_time:33606906ms step_avg:597.00ms +step:56294/57344 train_time:33607161ms step_avg:596.99ms +step:56295/57344 train_time:33607729ms step_avg:596.99ms +grad accum step:14074/14336 +step:56296/57344 train_time:33609122ms step_avg:597.01ms +step:56297/57344 train_time:33609140ms step_avg:597.00ms +step:56298/57344 train_time:33609382ms step_avg:596.99ms +step:56299/57344 train_time:33609960ms step_avg:596.99ms +grad accum step:14075/14336 +step:56300/57344 train_time:33611326ms step_avg:597.00ms +step:56301/57344 train_time:33611341ms step_avg:596.99ms +step:56302/57344 train_time:33611588ms step_avg:596.99ms +step:56303/57344 train_time:33612147ms step_avg:596.99ms +grad accum step:14076/14336 +step:56304/57344 train_time:33613563ms step_avg:597.00ms +step:56305/57344 train_time:33613579ms step_avg:596.99ms +step:56306/57344 train_time:33613850ms step_avg:596.99ms +step:56307/57344 train_time:33614478ms step_avg:596.99ms +grad accum step:14077/14336 +step:56308/57344 train_time:33615856ms step_avg:597.00ms +step:56309/57344 train_time:33615871ms step_avg:596.99ms +step:56310/57344 train_time:33616139ms step_avg:596.98ms +step:56311/57344 train_time:33616762ms step_avg:596.98ms +grad accum step:14078/14336 +step:56312/57344 train_time:33618160ms step_avg:597.00ms +step:56313/57344 train_time:33618176ms step_avg:596.99ms +step:56314/57344 train_time:33618430ms step_avg:596.98ms +step:56315/57344 train_time:33619001ms step_avg:596.98ms +grad accum step:14079/14336 +step:56316/57344 train_time:33620390ms step_avg:597.00ms +step:56317/57344 train_time:33620405ms step_avg:596.99ms +step:56318/57344 train_time:33620649ms step_avg:596.98ms +step:56319/57344 train_time:33621210ms step_avg:596.98ms +grad accum step:14080/14336 +step:56320/57344 train_time:33622609ms step_avg:596.99ms +step:56320/57344 val_loss:5.306214 train_time:33622613ms step_avg:596.99ms +step:56321/57344 train_time:33622625ms step_avg:596.98ms +step:56322/57344 train_time:33622862ms step_avg:596.98ms +step:56323/57344 train_time:33623477ms step_avg:596.98ms +grad accum step:14081/14336 +step:56324/57344 train_time:33624854ms step_avg:596.99ms +step:56325/57344 train_time:33624869ms step_avg:596.98ms +step:56326/57344 train_time:33625113ms step_avg:596.97ms +step:56327/57344 train_time:33625685ms step_avg:596.97ms +grad accum step:14082/14336 +step:56328/57344 train_time:33627079ms step_avg:596.99ms +step:56329/57344 train_time:33627091ms step_avg:596.98ms +step:56330/57344 train_time:33627331ms step_avg:596.97ms +step:56331/57344 train_time:33627923ms step_avg:596.97ms +grad accum step:14083/14336 +step:56332/57344 train_time:33629358ms step_avg:596.98ms +step:56333/57344 train_time:33629383ms step_avg:596.97ms +step:56334/57344 train_time:33629612ms step_avg:596.97ms +step:56335/57344 train_time:33630205ms step_avg:596.97ms +grad accum step:14084/14336 +step:56336/57344 train_time:33631762ms step_avg:596.99ms +step:56337/57344 train_time:33631779ms step_avg:596.97ms +step:56338/57344 train_time:33632013ms step_avg:596.97ms +step:56339/57344 train_time:33632631ms step_avg:596.97ms +grad accum step:14085/14336 +step:56340/57344 train_time:33634033ms step_avg:596.98ms +step:56341/57344 train_time:33634051ms step_avg:596.97ms +step:56342/57344 train_time:33634292ms step_avg:596.97ms +step:56343/57344 train_time:33634855ms step_avg:596.97ms +grad accum step:14086/14336 +step:56344/57344 train_time:33636222ms step_avg:596.98ms +step:56345/57344 train_time:33636245ms step_avg:596.97ms +step:56346/57344 train_time:33636484ms step_avg:596.96ms +step:56347/57344 train_time:33637056ms step_avg:596.96ms +grad accum step:14087/14336 +step:56348/57344 train_time:33638426ms step_avg:596.98ms +step:56349/57344 train_time:33638440ms step_avg:596.97ms +step:56350/57344 train_time:33638696ms step_avg:596.96ms +step:56351/57344 train_time:33639285ms step_avg:596.96ms +grad accum step:14088/14336 +step:56352/57344 train_time:33640667ms step_avg:596.97ms +step:56353/57344 train_time:33640683ms step_avg:596.96ms +step:56354/57344 train_time:33640944ms step_avg:596.96ms +step:56355/57344 train_time:33641539ms step_avg:596.96ms +grad accum step:14089/14336 +step:56356/57344 train_time:33642920ms step_avg:596.97ms +step:56357/57344 train_time:33642955ms step_avg:596.96ms +step:56358/57344 train_time:33643177ms step_avg:596.95ms +step:56359/57344 train_time:33643729ms step_avg:596.95ms +grad accum step:14090/14336 +step:56360/57344 train_time:33645080ms step_avg:596.97ms +step:56361/57344 train_time:33645096ms step_avg:596.96ms +step:56362/57344 train_time:33645343ms step_avg:596.95ms +step:56363/57344 train_time:33645935ms step_avg:596.95ms +grad accum step:14091/14336 +step:56364/57344 train_time:33647304ms step_avg:596.96ms +step:56365/57344 train_time:33647321ms step_avg:596.95ms +step:56366/57344 train_time:33647591ms step_avg:596.95ms +step:56367/57344 train_time:33648237ms step_avg:596.95ms +grad accum step:14092/14336 +step:56368/57344 train_time:33649712ms step_avg:596.96ms +step:56369/57344 train_time:33649728ms step_avg:596.95ms +step:56370/57344 train_time:33649990ms step_avg:596.95ms +step:56371/57344 train_time:33650583ms step_avg:596.95ms +grad accum step:14093/14336 +step:56372/57344 train_time:33652117ms step_avg:596.97ms +step:56373/57344 train_time:33652135ms step_avg:596.95ms +step:56374/57344 train_time:33652382ms step_avg:596.95ms +step:56375/57344 train_time:33652950ms step_avg:596.95ms +grad accum step:14094/14336 +step:56376/57344 train_time:33654282ms step_avg:596.96ms +step:56377/57344 train_time:33654303ms step_avg:596.95ms +step:56378/57344 train_time:33654542ms step_avg:596.94ms +step:56379/57344 train_time:33655106ms step_avg:596.94ms +grad accum step:14095/14336 +step:56380/57344 train_time:33656525ms step_avg:596.96ms +step:56381/57344 train_time:33656545ms step_avg:596.95ms +step:56382/57344 train_time:33656768ms step_avg:596.94ms +step:56383/57344 train_time:33657337ms step_avg:596.94ms +grad accum step:14096/14336 +step:56384/57344 train_time:33658768ms step_avg:596.96ms +step:56384/57344 val_loss:5.305820 train_time:33658770ms step_avg:596.96ms +step:56385/57344 train_time:33658782ms step_avg:596.95ms +step:56386/57344 train_time:33659011ms step_avg:596.94ms +step:56387/57344 train_time:33659577ms step_avg:596.94ms +grad accum step:14097/14336 +step:56388/57344 train_time:33660960ms step_avg:596.95ms +step:56389/57344 train_time:33660976ms step_avg:596.94ms +step:56390/57344 train_time:33661233ms step_avg:596.94ms +step:56391/57344 train_time:33661836ms step_avg:596.94ms +grad accum step:14098/14336 +step:56392/57344 train_time:33663249ms step_avg:596.95ms +step:56393/57344 train_time:33663263ms step_avg:596.94ms +step:56394/57344 train_time:33663516ms step_avg:596.93ms +step:56395/57344 train_time:33664084ms step_avg:596.93ms +grad accum step:14099/14336 +step:56396/57344 train_time:33665407ms step_avg:596.95ms +step:56397/57344 train_time:33665425ms step_avg:596.94ms +step:56398/57344 train_time:33665673ms step_avg:596.93ms +step:56399/57344 train_time:33666254ms step_avg:596.93ms +grad accum step:14100/14336 +step:56400/57344 train_time:33667614ms step_avg:596.94ms +step:56401/57344 train_time:33667633ms step_avg:596.93ms +step:56402/57344 train_time:33667874ms step_avg:596.93ms +step:56403/57344 train_time:33668445ms step_avg:596.93ms +grad accum step:14101/14336 +step:56404/57344 train_time:33669884ms step_avg:596.94ms +step:56405/57344 train_time:33669903ms step_avg:596.93ms +step:56406/57344 train_time:33670147ms step_avg:596.92ms +step:56407/57344 train_time:33670714ms step_avg:596.92ms +grad accum step:14102/14336 +step:56408/57344 train_time:33672123ms step_avg:596.94ms +step:56409/57344 train_time:33672137ms step_avg:596.93ms +step:56410/57344 train_time:33672362ms step_avg:596.92ms +step:56411/57344 train_time:33672931ms step_avg:596.92ms +grad accum step:14103/14336 +step:56412/57344 train_time:33674376ms step_avg:596.94ms +step:56413/57344 train_time:33674392ms step_avg:596.93ms +step:56414/57344 train_time:33674653ms step_avg:596.92ms +step:56415/57344 train_time:33675270ms step_avg:596.92ms +grad accum step:14104/14336 +step:56416/57344 train_time:33676734ms step_avg:596.94ms +step:56417/57344 train_time:33676756ms step_avg:596.93ms +step:56418/57344 train_time:33677022ms step_avg:596.92ms +step:56419/57344 train_time:33677646ms step_avg:596.92ms +grad accum step:14105/14336 +step:56420/57344 train_time:33679108ms step_avg:596.94ms +step:56421/57344 train_time:33679122ms step_avg:596.93ms +step:56422/57344 train_time:33679387ms step_avg:596.92ms +step:56423/57344 train_time:33680016ms step_avg:596.92ms +grad accum step:14106/14336 +step:56424/57344 train_time:33681494ms step_avg:596.94ms +step:56425/57344 train_time:33681512ms step_avg:596.93ms +step:56426/57344 train_time:33681740ms step_avg:596.92ms +step:56427/57344 train_time:33682334ms step_avg:596.92ms +grad accum step:14107/14336 +step:56428/57344 train_time:33683785ms step_avg:596.93ms +step:56429/57344 train_time:33683802ms step_avg:596.92ms +step:56430/57344 train_time:33684032ms step_avg:596.92ms +step:56431/57344 train_time:33684624ms step_avg:596.92ms +grad accum step:14108/14336 +step:56432/57344 train_time:33686048ms step_avg:596.93ms +step:56433/57344 train_time:33686063ms step_avg:596.92ms +step:56434/57344 train_time:33686311ms step_avg:596.92ms +step:56435/57344 train_time:33686888ms step_avg:596.91ms +grad accum step:14109/14336 +step:56436/57344 train_time:33688431ms step_avg:596.93ms +step:56437/57344 train_time:33688453ms step_avg:596.92ms +step:56438/57344 train_time:33688699ms step_avg:596.92ms +step:56439/57344 train_time:33689296ms step_avg:596.92ms +grad accum step:14110/14336 +step:56440/57344 train_time:33690674ms step_avg:596.93ms +step:56441/57344 train_time:33690689ms step_avg:596.92ms +step:56442/57344 train_time:33690941ms step_avg:596.91ms +step:56443/57344 train_time:33691524ms step_avg:596.91ms +grad accum step:14111/14336 +step:56444/57344 train_time:33692931ms step_avg:596.93ms +step:56445/57344 train_time:33692947ms step_avg:596.92ms +step:56446/57344 train_time:33693196ms step_avg:596.91ms +step:56447/57344 train_time:33693762ms step_avg:596.91ms +grad accum step:14112/14336 +step:56448/57344 train_time:33695086ms step_avg:596.92ms +step:56448/57344 val_loss:5.305137 train_time:33695087ms step_avg:596.92ms +step:56449/57344 train_time:33695099ms step_avg:596.91ms +step:56450/57344 train_time:33695330ms step_avg:596.91ms +step:56451/57344 train_time:33695903ms step_avg:596.91ms +grad accum step:14113/14336 +step:56452/57344 train_time:33697375ms step_avg:596.92ms +step:56453/57344 train_time:33697393ms step_avg:596.91ms +step:56454/57344 train_time:33697635ms step_avg:596.90ms +step:56455/57344 train_time:33698200ms step_avg:596.90ms +grad accum step:14114/14336 +step:56456/57344 train_time:33699606ms step_avg:596.92ms +step:56457/57344 train_time:33699622ms step_avg:596.91ms +step:56458/57344 train_time:33699876ms step_avg:596.90ms +step:56459/57344 train_time:33700528ms step_avg:596.90ms +grad accum step:14115/14336 +step:56460/57344 train_time:33701916ms step_avg:596.92ms +step:56461/57344 train_time:33701933ms step_avg:596.91ms +step:56462/57344 train_time:33702185ms step_avg:596.90ms +step:56463/57344 train_time:33702757ms step_avg:596.90ms +grad accum step:14116/14336 +step:56464/57344 train_time:33704233ms step_avg:596.92ms +step:56465/57344 train_time:33704254ms step_avg:596.91ms +step:56466/57344 train_time:33704497ms step_avg:596.90ms +step:56467/57344 train_time:33705078ms step_avg:596.90ms +grad accum step:14117/14336 +step:56468/57344 train_time:33706456ms step_avg:596.91ms +step:56469/57344 train_time:33706488ms step_avg:596.90ms +step:56470/57344 train_time:33706730ms step_avg:596.90ms +step:56471/57344 train_time:33707331ms step_avg:596.90ms +grad accum step:14118/14336 +step:56472/57344 train_time:33708784ms step_avg:596.91ms +step:56473/57344 train_time:33708804ms step_avg:596.90ms +step:56474/57344 train_time:33709046ms step_avg:596.89ms +step:56475/57344 train_time:33709650ms step_avg:596.90ms +grad accum step:14119/14336 +step:56476/57344 train_time:33711079ms step_avg:596.91ms +step:56477/57344 train_time:33711103ms step_avg:596.90ms +step:56478/57344 train_time:33711348ms step_avg:596.89ms +step:56479/57344 train_time:33711956ms step_avg:596.89ms +grad accum step:14120/14336 +step:56480/57344 train_time:33713345ms step_avg:596.91ms +step:56481/57344 train_time:33713388ms step_avg:596.90ms +step:56482/57344 train_time:33713613ms step_avg:596.89ms +step:56483/57344 train_time:33714184ms step_avg:596.89ms +grad accum step:14121/14336 +step:56484/57344 train_time:33715559ms step_avg:596.90ms +step:56485/57344 train_time:33715574ms step_avg:596.89ms +step:56486/57344 train_time:33715826ms step_avg:596.89ms +step:56487/57344 train_time:33716403ms step_avg:596.89ms +grad accum step:14122/14336 +step:56488/57344 train_time:33717779ms step_avg:596.90ms +step:56489/57344 train_time:33717796ms step_avg:596.89ms +step:56490/57344 train_time:33718046ms step_avg:596.89ms +step:56491/57344 train_time:33718612ms step_avg:596.88ms +grad accum step:14123/14336 +step:56492/57344 train_time:33719938ms step_avg:596.90ms +step:56493/57344 train_time:33719954ms step_avg:596.89ms +step:56494/57344 train_time:33720207ms step_avg:596.88ms +step:56495/57344 train_time:33720775ms step_avg:596.88ms +grad accum step:14124/14336 +step:56496/57344 train_time:33722138ms step_avg:596.89ms +step:56497/57344 train_time:33722150ms step_avg:596.88ms +step:56498/57344 train_time:33722375ms step_avg:596.88ms +step:56499/57344 train_time:33722940ms step_avg:596.88ms +grad accum step:14125/14336 +step:56500/57344 train_time:33724313ms step_avg:596.89ms +step:56501/57344 train_time:33724350ms step_avg:596.88ms +step:56502/57344 train_time:33724572ms step_avg:596.87ms +step:56503/57344 train_time:33725149ms step_avg:596.87ms +grad accum step:14126/14336 +step:56504/57344 train_time:33726622ms step_avg:596.89ms +step:56505/57344 train_time:33726636ms step_avg:596.88ms +step:56506/57344 train_time:33726893ms step_avg:596.87ms +step:56507/57344 train_time:33727493ms step_avg:596.87ms +grad accum step:14127/14336 +step:56508/57344 train_time:33728910ms step_avg:596.89ms +step:56509/57344 train_time:33728929ms step_avg:596.88ms +step:56510/57344 train_time:33729172ms step_avg:596.87ms +step:56511/57344 train_time:33729737ms step_avg:596.87ms +grad accum step:14128/14336 +step:56512/57344 train_time:33731145ms step_avg:596.88ms +step:56512/57344 val_loss:5.304876 train_time:33731160ms step_avg:596.88ms +step:56513/57344 train_time:33731172ms step_avg:596.87ms +step:56514/57344 train_time:33731409ms step_avg:596.87ms +step:56515/57344 train_time:33731994ms step_avg:596.87ms +grad accum step:14129/14336 +step:56516/57344 train_time:33733407ms step_avg:596.88ms +step:56517/57344 train_time:33733427ms step_avg:596.87ms +step:56518/57344 train_time:33733657ms step_avg:596.87ms +step:56519/57344 train_time:33734221ms step_avg:596.87ms +grad accum step:14130/14336 +step:56520/57344 train_time:33735649ms step_avg:596.88ms +step:56521/57344 train_time:33735667ms step_avg:596.87ms +step:56522/57344 train_time:33735926ms step_avg:596.86ms +step:56523/57344 train_time:33736547ms step_avg:596.86ms +grad accum step:14131/14336 +step:56524/57344 train_time:33738005ms step_avg:596.88ms +step:56525/57344 train_time:33738062ms step_avg:596.87ms +step:56526/57344 train_time:33738288ms step_avg:596.86ms +step:56527/57344 train_time:33738854ms step_avg:596.86ms +grad accum step:14132/14336 +step:56528/57344 train_time:33740331ms step_avg:596.88ms +step:56529/57344 train_time:33740347ms step_avg:596.87ms +step:56530/57344 train_time:33740568ms step_avg:596.86ms +step:56531/57344 train_time:33741150ms step_avg:596.86ms +grad accum step:14133/14336 +step:56532/57344 train_time:33742711ms step_avg:596.88ms +step:56533/57344 train_time:33742914ms step_avg:596.87ms +step:56534/57344 train_time:33743136ms step_avg:596.86ms +step:56535/57344 train_time:33743740ms step_avg:596.86ms +grad accum step:14134/14336 +step:56536/57344 train_time:33745126ms step_avg:596.88ms +step:56537/57344 train_time:33745170ms step_avg:596.87ms +step:56538/57344 train_time:33745408ms step_avg:596.86ms +step:56539/57344 train_time:33746012ms step_avg:596.86ms +grad accum step:14135/14336 +step:56540/57344 train_time:33747476ms step_avg:596.88ms +step:56541/57344 train_time:33747489ms step_avg:596.87ms +step:56542/57344 train_time:33747735ms step_avg:596.86ms +step:56543/57344 train_time:33748350ms step_avg:596.86ms +grad accum step:14136/14336 +step:56544/57344 train_time:33749750ms step_avg:596.88ms +step:56545/57344 train_time:33749766ms step_avg:596.87ms +step:56546/57344 train_time:33750013ms step_avg:596.86ms +step:56547/57344 train_time:33750587ms step_avg:596.86ms +grad accum step:14137/14336 +step:56548/57344 train_time:33751949ms step_avg:596.87ms +step:56549/57344 train_time:33751964ms step_avg:596.86ms +step:56550/57344 train_time:33752216ms step_avg:596.86ms +step:56551/57344 train_time:33752779ms step_avg:596.86ms +grad accum step:14138/14336 +step:56552/57344 train_time:33754123ms step_avg:596.87ms +step:56553/57344 train_time:33754138ms step_avg:596.86ms +step:56554/57344 train_time:33754392ms step_avg:596.85ms +step:56555/57344 train_time:33754988ms step_avg:596.85ms +grad accum step:14139/14336 +step:56556/57344 train_time:33756495ms step_avg:596.87ms +step:56557/57344 train_time:33756512ms step_avg:596.86ms +step:56558/57344 train_time:33756757ms step_avg:596.85ms +step:56559/57344 train_time:33757303ms step_avg:596.85ms +grad accum step:14140/14336 +step:56560/57344 train_time:33758654ms step_avg:596.86ms +step:56561/57344 train_time:33758671ms step_avg:596.85ms +step:56562/57344 train_time:33758916ms step_avg:596.85ms +step:56563/57344 train_time:33759467ms step_avg:596.85ms +grad accum step:14141/14336 +step:56564/57344 train_time:33760819ms step_avg:596.86ms +step:56565/57344 train_time:33760840ms step_avg:596.85ms +step:56566/57344 train_time:33761073ms step_avg:596.84ms +step:56567/57344 train_time:33761635ms step_avg:596.84ms +grad accum step:14142/14336 +step:56568/57344 train_time:33762985ms step_avg:596.86ms +step:56569/57344 train_time:33763001ms step_avg:596.85ms +step:56570/57344 train_time:33763255ms step_avg:596.84ms +step:56571/57344 train_time:33763825ms step_avg:596.84ms +grad accum step:14143/14336 +step:56572/57344 train_time:33765220ms step_avg:596.85ms +step:56573/57344 train_time:33765239ms step_avg:596.84ms +step:56574/57344 train_time:33765489ms step_avg:596.84ms +step:56575/57344 train_time:33766072ms step_avg:596.84ms +grad accum step:14144/14336 +step:56576/57344 train_time:33767475ms step_avg:596.85ms +step:56576/57344 val_loss:5.304440 train_time:33767486ms step_avg:596.85ms +step:56577/57344 train_time:33767498ms step_avg:596.84ms +step:56578/57344 train_time:33767722ms step_avg:596.83ms +step:56579/57344 train_time:33768291ms step_avg:596.83ms +grad accum step:14145/14336 +step:56580/57344 train_time:33769689ms step_avg:596.85ms +step:56581/57344 train_time:33769705ms step_avg:596.84ms +step:56582/57344 train_time:33769955ms step_avg:596.83ms +step:56583/57344 train_time:33770548ms step_avg:596.83ms +grad accum step:14146/14336 +step:56584/57344 train_time:33772034ms step_avg:596.85ms +step:56585/57344 train_time:33772049ms step_avg:596.84ms +step:56586/57344 train_time:33772308ms step_avg:596.83ms +step:56587/57344 train_time:33772906ms step_avg:596.83ms +grad accum step:14147/14336 +step:56588/57344 train_time:33774341ms step_avg:596.85ms +step:56589/57344 train_time:33774356ms step_avg:596.84ms +step:56590/57344 train_time:33774611ms step_avg:596.83ms +step:56591/57344 train_time:33775187ms step_avg:596.83ms +grad accum step:14148/14336 +step:56592/57344 train_time:33776483ms step_avg:596.84ms +step:56593/57344 train_time:33776500ms step_avg:596.83ms +step:56594/57344 train_time:33776748ms step_avg:596.83ms +step:56595/57344 train_time:33777320ms step_avg:596.83ms +grad accum step:14149/14336 +step:56596/57344 train_time:33778662ms step_avg:596.84ms +step:56597/57344 train_time:33778679ms step_avg:596.83ms +step:56598/57344 train_time:33778945ms step_avg:596.82ms +step:56599/57344 train_time:33779557ms step_avg:596.82ms +grad accum step:14150/14336 +step:56600/57344 train_time:33780954ms step_avg:596.84ms +step:56601/57344 train_time:33780988ms step_avg:596.83ms +step:56602/57344 train_time:33781216ms step_avg:596.82ms +step:56603/57344 train_time:33782111ms step_avg:596.83ms +grad accum step:14151/14336 +step:56604/57344 train_time:33783106ms step_avg:596.83ms +step:56605/57344 train_time:33783123ms step_avg:596.82ms +step:56606/57344 train_time:33783368ms step_avg:596.82ms +step:56607/57344 train_time:33783940ms step_avg:596.82ms +grad accum step:14152/14336 +step:56608/57344 train_time:33785300ms step_avg:596.83ms +step:56609/57344 train_time:33785314ms step_avg:596.82ms +step:56610/57344 train_time:33785580ms step_avg:596.81ms +step:56611/57344 train_time:33786178ms step_avg:596.81ms +grad accum step:14153/14336 +step:56612/57344 train_time:33787545ms step_avg:596.83ms +step:56613/57344 train_time:33787560ms step_avg:596.82ms +step:56614/57344 train_time:33787808ms step_avg:596.81ms +step:56615/57344 train_time:33788364ms step_avg:596.81ms +grad accum step:14154/14336 +step:56616/57344 train_time:33789737ms step_avg:596.82ms +step:56617/57344 train_time:33789754ms step_avg:596.81ms +step:56618/57344 train_time:33790019ms step_avg:596.81ms +step:56619/57344 train_time:33790640ms step_avg:596.81ms +grad accum step:14155/14336 +step:56620/57344 train_time:33792090ms step_avg:596.82ms +step:56621/57344 train_time:33792108ms step_avg:596.81ms +step:56622/57344 train_time:33792348ms step_avg:596.81ms +step:56623/57344 train_time:33792922ms step_avg:596.81ms +grad accum step:14156/14336 +step:56624/57344 train_time:33794305ms step_avg:596.82ms +step:56625/57344 train_time:33794322ms step_avg:596.81ms +step:56626/57344 train_time:33794567ms step_avg:596.80ms +step:56627/57344 train_time:33795127ms step_avg:596.80ms +grad accum step:14157/14336 +step:56628/57344 train_time:33796541ms step_avg:596.82ms +step:56629/57344 train_time:33796560ms step_avg:596.81ms +step:56630/57344 train_time:33796805ms step_avg:596.80ms +step:56631/57344 train_time:33797384ms step_avg:596.80ms +grad accum step:14158/14336 +step:56632/57344 train_time:33798738ms step_avg:596.81ms +step:56633/57344 train_time:33798754ms step_avg:596.80ms +step:56634/57344 train_time:33799008ms step_avg:596.80ms +step:56635/57344 train_time:33799592ms step_avg:596.80ms +grad accum step:14159/14336 +step:56636/57344 train_time:33800948ms step_avg:596.81ms +step:56637/57344 train_time:33800965ms step_avg:596.80ms +step:56638/57344 train_time:33801224ms step_avg:596.79ms +step:56639/57344 train_time:33801805ms step_avg:596.79ms +grad accum step:14160/14336 +step:56640/57344 train_time:33803152ms step_avg:596.81ms +step:56640/57344 val_loss:5.304251 train_time:33803165ms step_avg:596.81ms +step:56641/57344 train_time:33803176ms step_avg:596.80ms +step:56642/57344 train_time:33803397ms step_avg:596.79ms +step:56643/57344 train_time:33803974ms step_avg:596.79ms +grad accum step:14161/14336 +step:56644/57344 train_time:33805412ms step_avg:596.80ms +step:56645/57344 train_time:33805429ms step_avg:596.79ms +step:56646/57344 train_time:33805686ms step_avg:596.79ms +step:56647/57344 train_time:33806272ms step_avg:596.79ms +grad accum step:14162/14336 +step:56648/57344 train_time:33807631ms step_avg:596.80ms +step:56649/57344 train_time:33807647ms step_avg:596.79ms +step:56650/57344 train_time:33807932ms step_avg:596.79ms +step:56651/57344 train_time:33808605ms step_avg:596.79ms +grad accum step:14163/14336 +step:56652/57344 train_time:33810024ms step_avg:596.80ms +step:56653/57344 train_time:33810036ms step_avg:596.79ms +step:56654/57344 train_time:33810297ms step_avg:596.79ms +step:56655/57344 train_time:33810908ms step_avg:596.79ms +grad accum step:14164/14336 +step:56656/57344 train_time:33812240ms step_avg:596.80ms +step:56657/57344 train_time:33812252ms step_avg:596.79ms +step:56658/57344 train_time:33812503ms step_avg:596.78ms +step:56659/57344 train_time:33813071ms step_avg:596.78ms +grad accum step:14165/14336 +step:56660/57344 train_time:33814458ms step_avg:596.80ms +step:56661/57344 train_time:33814521ms step_avg:596.79ms +step:56662/57344 train_time:33814763ms step_avg:596.78ms +step:56663/57344 train_time:33815389ms step_avg:596.78ms +grad accum step:14166/14336 +step:56664/57344 train_time:33816803ms step_avg:596.80ms +step:56665/57344 train_time:33816819ms step_avg:596.78ms +step:56666/57344 train_time:33817070ms step_avg:596.78ms +step:56667/57344 train_time:33817637ms step_avg:596.78ms +grad accum step:14167/14336 +step:56668/57344 train_time:33819116ms step_avg:596.79ms +step:56669/57344 train_time:33819132ms step_avg:596.78ms +step:56670/57344 train_time:33819367ms step_avg:596.78ms +step:56671/57344 train_time:33819971ms step_avg:596.78ms +grad accum step:14168/14336 +step:56672/57344 train_time:33821523ms step_avg:596.79ms +step:56673/57344 train_time:33821539ms step_avg:596.78ms +step:56674/57344 train_time:33821781ms step_avg:596.78ms +step:56675/57344 train_time:33822342ms step_avg:596.78ms +grad accum step:14169/14336 +step:56676/57344 train_time:33823729ms step_avg:596.79ms +step:56677/57344 train_time:33823747ms step_avg:596.78ms +step:56678/57344 train_time:33823983ms step_avg:596.77ms +step:56679/57344 train_time:33824585ms step_avg:596.77ms +grad accum step:14170/14336 +step:56680/57344 train_time:33825976ms step_avg:596.79ms +step:56681/57344 train_time:33825992ms step_avg:596.78ms +step:56682/57344 train_time:33826239ms step_avg:596.77ms +step:56683/57344 train_time:33826804ms step_avg:596.77ms +grad accum step:14171/14336 +step:56684/57344 train_time:33828245ms step_avg:596.79ms +step:56685/57344 train_time:33828260ms step_avg:596.78ms +step:56686/57344 train_time:33828527ms step_avg:596.77ms +step:56687/57344 train_time:33829136ms step_avg:596.77ms +grad accum step:14172/14336 +step:56688/57344 train_time:33830553ms step_avg:596.79ms +step:56689/57344 train_time:33830574ms step_avg:596.77ms +step:56690/57344 train_time:33830795ms step_avg:596.77ms +step:56691/57344 train_time:33831641ms step_avg:596.77ms +grad accum step:14173/14336 +step:56692/57344 train_time:33832795ms step_avg:596.78ms +step:56693/57344 train_time:33832812ms step_avg:596.77ms +step:56694/57344 train_time:33833047ms step_avg:596.77ms +step:56695/57344 train_time:33833617ms step_avg:596.77ms +grad accum step:14174/14336 +step:56696/57344 train_time:33834982ms step_avg:596.78ms +step:56697/57344 train_time:33834998ms step_avg:596.77ms +step:56698/57344 train_time:33835246ms step_avg:596.76ms +step:56699/57344 train_time:33835837ms step_avg:596.76ms +grad accum step:14175/14336 +step:56700/57344 train_time:33837274ms step_avg:596.78ms +step:56701/57344 train_time:33837291ms step_avg:596.77ms +step:56702/57344 train_time:33837548ms step_avg:596.76ms +step:56703/57344 train_time:33838135ms step_avg:596.76ms +grad accum step:14176/14336 +step:56704/57344 train_time:33839508ms step_avg:596.77ms +step:56704/57344 val_loss:5.303982 train_time:33839509ms step_avg:596.77ms +step:56705/57344 train_time:33839521ms step_avg:596.76ms +step:56706/57344 train_time:33839757ms step_avg:596.76ms +step:56707/57344 train_time:33840360ms step_avg:596.76ms +grad accum step:14177/14336 +step:56708/57344 train_time:33841729ms step_avg:596.77ms +step:56709/57344 train_time:33841744ms step_avg:596.76ms +step:56710/57344 train_time:33842029ms step_avg:596.76ms +step:56711/57344 train_time:33842697ms step_avg:596.76ms +grad accum step:14178/14336 +step:56712/57344 train_time:33844103ms step_avg:596.77ms +step:56713/57344 train_time:33844119ms step_avg:596.76ms +step:56714/57344 train_time:33844356ms step_avg:596.75ms +step:56715/57344 train_time:33844959ms step_avg:596.75ms +grad accum step:14179/14336 +step:56716/57344 train_time:33846563ms step_avg:596.77ms +step:56717/57344 train_time:33846587ms step_avg:596.76ms +step:56718/57344 train_time:33846829ms step_avg:596.76ms +step:56719/57344 train_time:33847460ms step_avg:596.76ms +grad accum step:14180/14336 +step:56720/57344 train_time:33848861ms step_avg:596.77ms +step:56721/57344 train_time:33848893ms step_avg:596.76ms +step:56722/57344 train_time:33849137ms step_avg:596.76ms +step:56723/57344 train_time:33849752ms step_avg:596.76ms +grad accum step:14181/14336 +step:56724/57344 train_time:33851112ms step_avg:596.77ms +step:56725/57344 train_time:33851127ms step_avg:596.76ms +step:56726/57344 train_time:33851382ms step_avg:596.75ms +step:56727/57344 train_time:33851971ms step_avg:596.75ms +grad accum step:14182/14336 +step:56728/57344 train_time:33853366ms step_avg:596.77ms +step:56729/57344 train_time:33853380ms step_avg:596.76ms +step:56730/57344 train_time:33853619ms step_avg:596.75ms +step:56731/57344 train_time:33854186ms step_avg:596.75ms +grad accum step:14183/14336 +step:56732/57344 train_time:33855596ms step_avg:596.76ms +step:56733/57344 train_time:33855611ms step_avg:596.75ms +step:56734/57344 train_time:33855858ms step_avg:596.75ms +step:56735/57344 train_time:33856414ms step_avg:596.75ms +grad accum step:14184/14336 +step:56736/57344 train_time:33858021ms step_avg:596.76ms +step:56737/57344 train_time:33858046ms step_avg:596.75ms +step:56738/57344 train_time:33858263ms step_avg:596.75ms +step:56739/57344 train_time:33858841ms step_avg:596.75ms +grad accum step:14185/14336 +step:56740/57344 train_time:33860217ms step_avg:596.76ms +step:56741/57344 train_time:33860235ms step_avg:596.75ms +step:56742/57344 train_time:33860497ms step_avg:596.74ms +step:56743/57344 train_time:33861118ms step_avg:596.75ms +grad accum step:14186/14336 +step:56744/57344 train_time:33862999ms step_avg:596.77ms +step:56745/57344 train_time:33863013ms step_avg:596.76ms +step:56746/57344 train_time:33863237ms step_avg:596.75ms +step:56747/57344 train_time:33863804ms step_avg:596.75ms +grad accum step:14187/14336 +step:56748/57344 train_time:33865201ms step_avg:596.76ms +step:56749/57344 train_time:33865219ms step_avg:596.75ms +step:56750/57344 train_time:33865467ms step_avg:596.75ms +step:56751/57344 train_time:33866081ms step_avg:596.75ms +grad accum step:14188/14336 +step:56752/57344 train_time:33868047ms step_avg:596.77ms +step:56753/57344 train_time:33868067ms step_avg:596.76ms +step:56754/57344 train_time:33868292ms step_avg:596.76ms +step:56755/57344 train_time:33868882ms step_avg:596.76ms +grad accum step:14189/14336 +step:56756/57344 train_time:33870295ms step_avg:596.77ms +step:56757/57344 train_time:33870312ms step_avg:596.76ms +step:56758/57344 train_time:33870564ms step_avg:596.75ms +step:56759/57344 train_time:33871125ms step_avg:596.75ms +grad accum step:14190/14336 +step:56760/57344 train_time:33872533ms step_avg:596.77ms +step:56761/57344 train_time:33872555ms step_avg:596.76ms +step:56762/57344 train_time:33872779ms step_avg:596.75ms +step:56763/57344 train_time:33873374ms step_avg:596.75ms +grad accum step:14191/14336 +step:56764/57344 train_time:33874873ms step_avg:596.77ms +step:56765/57344 train_time:33874892ms step_avg:596.76ms +step:56766/57344 train_time:33875115ms step_avg:596.75ms +step:56767/57344 train_time:33875679ms step_avg:596.75ms +grad accum step:14192/14336 +step:56768/57344 train_time:33877156ms step_avg:596.77ms +step:56768/57344 val_loss:5.303730 train_time:33877159ms step_avg:596.77ms +step:56769/57344 train_time:33877171ms step_avg:596.75ms +step:56770/57344 train_time:33877397ms step_avg:596.75ms +step:56771/57344 train_time:33877972ms step_avg:596.75ms +grad accum step:14193/14336 +step:56772/57344 train_time:33879432ms step_avg:596.76ms +step:56773/57344 train_time:33879466ms step_avg:596.75ms +step:56774/57344 train_time:33879688ms step_avg:596.75ms +step:56775/57344 train_time:33880289ms step_avg:596.75ms +grad accum step:14194/14336 +step:56776/57344 train_time:33881756ms step_avg:596.76ms +step:56777/57344 train_time:33881773ms step_avg:596.75ms +step:56778/57344 train_time:33882015ms step_avg:596.75ms +step:56779/57344 train_time:33882575ms step_avg:596.74ms +grad accum step:14195/14336 +step:56780/57344 train_time:33883911ms step_avg:596.76ms +step:56781/57344 train_time:33883929ms step_avg:596.75ms +step:56782/57344 train_time:33884180ms step_avg:596.74ms +step:56783/57344 train_time:33884756ms step_avg:596.74ms +grad accum step:14196/14336 +step:56784/57344 train_time:33886286ms step_avg:596.76ms +step:56785/57344 train_time:33886304ms step_avg:596.75ms +step:56786/57344 train_time:33886536ms step_avg:596.74ms +step:56787/57344 train_time:33887122ms step_avg:596.74ms +grad accum step:14197/14336 +step:56788/57344 train_time:33888682ms step_avg:596.76ms +step:56789/57344 train_time:33888697ms step_avg:596.75ms +step:56790/57344 train_time:33888921ms step_avg:596.74ms +step:56791/57344 train_time:33889514ms step_avg:596.74ms +grad accum step:14198/14336 +step:56792/57344 train_time:33890922ms step_avg:596.76ms +step:56793/57344 train_time:33890936ms step_avg:596.74ms +step:56794/57344 train_time:33891179ms step_avg:596.74ms +step:56795/57344 train_time:33891734ms step_avg:596.74ms +grad accum step:14199/14336 +step:56796/57344 train_time:33893163ms step_avg:596.75ms +step:56797/57344 train_time:33893186ms step_avg:596.74ms +step:56798/57344 train_time:33893405ms step_avg:596.74ms +step:56799/57344 train_time:33893966ms step_avg:596.74ms +grad accum step:14200/14336 +step:56800/57344 train_time:33895324ms step_avg:596.75ms +step:56801/57344 train_time:33895340ms step_avg:596.74ms +step:56802/57344 train_time:33895591ms step_avg:596.73ms +step:56803/57344 train_time:33896182ms step_avg:596.73ms +grad accum step:14201/14336 +step:56804/57344 train_time:33897772ms step_avg:596.75ms +step:56805/57344 train_time:33897788ms step_avg:596.74ms +step:56806/57344 train_time:33898038ms step_avg:596.73ms +step:56807/57344 train_time:33898611ms step_avg:596.73ms +grad accum step:14202/14336 +step:56808/57344 train_time:33899979ms step_avg:596.75ms +step:56809/57344 train_time:33899999ms step_avg:596.74ms +step:56810/57344 train_time:33900252ms step_avg:596.73ms +step:56811/57344 train_time:33900847ms step_avg:596.73ms +grad accum step:14203/14336 +step:56812/57344 train_time:33902210ms step_avg:596.74ms +step:56813/57344 train_time:33902234ms step_avg:596.73ms +step:56814/57344 train_time:33902469ms step_avg:596.73ms +step:56815/57344 train_time:33903056ms step_avg:596.73ms +grad accum step:14204/14336 +step:56816/57344 train_time:33904433ms step_avg:596.74ms +step:56817/57344 train_time:33904450ms step_avg:596.73ms +step:56818/57344 train_time:33904704ms step_avg:596.72ms +step:56819/57344 train_time:33905310ms step_avg:596.72ms +grad accum step:14205/14336 +step:56820/57344 train_time:33906771ms step_avg:596.74ms +step:56821/57344 train_time:33906790ms step_avg:596.73ms +step:56822/57344 train_time:33907023ms step_avg:596.72ms +step:56823/57344 train_time:33907607ms step_avg:596.72ms +grad accum step:14206/14336 +step:56824/57344 train_time:33908942ms step_avg:596.74ms +step:56825/57344 train_time:33908957ms step_avg:596.73ms +step:56826/57344 train_time:33909239ms step_avg:596.72ms +step:56827/57344 train_time:33909886ms step_avg:596.72ms +grad accum step:14207/14336 +step:56828/57344 train_time:33911234ms step_avg:596.73ms +step:56829/57344 train_time:33911256ms step_avg:596.72ms +step:56830/57344 train_time:33911490ms step_avg:596.72ms +step:56831/57344 train_time:33912060ms step_avg:596.72ms +grad accum step:14208/14336 +step:56832/57344 train_time:33913512ms step_avg:596.73ms +step:56832/57344 val_loss:5.303309 train_time:33913515ms step_avg:596.73ms +step:56833/57344 train_time:33913527ms step_avg:596.72ms +step:56834/57344 train_time:33913778ms step_avg:596.72ms +step:56835/57344 train_time:33914408ms step_avg:596.72ms +grad accum step:14209/14336 +step:56836/57344 train_time:33915850ms step_avg:596.73ms +step:56837/57344 train_time:33915866ms step_avg:596.72ms +step:56838/57344 train_time:33916111ms step_avg:596.72ms +step:56839/57344 train_time:33916649ms step_avg:596.71ms +grad accum step:14210/14336 +step:56840/57344 train_time:33917980ms step_avg:596.73ms +step:56841/57344 train_time:33917995ms step_avg:596.72ms +step:56842/57344 train_time:33918243ms step_avg:596.71ms +step:56843/57344 train_time:33918825ms step_avg:596.71ms +grad accum step:14211/14336 +step:56844/57344 train_time:33920219ms step_avg:596.72ms +step:56845/57344 train_time:33920244ms step_avg:596.71ms +step:56846/57344 train_time:33920473ms step_avg:596.71ms +step:56847/57344 train_time:33921042ms step_avg:596.71ms +grad accum step:14212/14336 +step:56848/57344 train_time:33922440ms step_avg:596.72ms +step:56849/57344 train_time:33922453ms step_avg:596.71ms +step:56850/57344 train_time:33922710ms step_avg:596.71ms +step:56851/57344 train_time:33923297ms step_avg:596.71ms +grad accum step:14213/14336 +step:56852/57344 train_time:33924756ms step_avg:596.72ms +step:56853/57344 train_time:33924788ms step_avg:596.71ms +step:56854/57344 train_time:33925029ms step_avg:596.70ms +step:56855/57344 train_time:33925654ms step_avg:596.70ms +grad accum step:14214/14336 +step:56856/57344 train_time:33927316ms step_avg:596.72ms +step:56857/57344 train_time:33927342ms step_avg:596.71ms +step:56858/57344 train_time:33927566ms step_avg:596.71ms +step:56859/57344 train_time:33928135ms step_avg:596.71ms +grad accum step:14215/14336 +step:56860/57344 train_time:33929571ms step_avg:596.72ms +step:56861/57344 train_time:33929591ms step_avg:596.71ms +step:56862/57344 train_time:33929831ms step_avg:596.70ms +step:56863/57344 train_time:33930392ms step_avg:596.70ms +grad accum step:14216/14336 +step:56864/57344 train_time:33931771ms step_avg:596.72ms +step:56865/57344 train_time:33931786ms step_avg:596.71ms +step:56866/57344 train_time:33932047ms step_avg:596.70ms +step:56867/57344 train_time:33932639ms step_avg:596.70ms +grad accum step:14217/14336 +step:56868/57344 train_time:33934176ms step_avg:596.72ms +step:56869/57344 train_time:33934200ms step_avg:596.71ms +step:56870/57344 train_time:33934424ms step_avg:596.70ms +step:56871/57344 train_time:33935015ms step_avg:596.70ms +grad accum step:14218/14336 +step:56872/57344 train_time:33936577ms step_avg:596.72ms +step:56873/57344 train_time:33936807ms step_avg:596.71ms +step:56874/57344 train_time:33937020ms step_avg:596.71ms +step:56875/57344 train_time:33937585ms step_avg:596.70ms +grad accum step:14219/14336 +step:56876/57344 train_time:33938929ms step_avg:596.72ms +step:56877/57344 train_time:33938955ms step_avg:596.71ms +step:56878/57344 train_time:33939186ms step_avg:596.70ms +step:56879/57344 train_time:33939752ms step_avg:596.70ms +grad accum step:14220/14336 +step:56880/57344 train_time:33941162ms step_avg:596.72ms +step:56881/57344 train_time:33941179ms step_avg:596.71ms +step:56882/57344 train_time:33941429ms step_avg:596.70ms +step:56883/57344 train_time:33942033ms step_avg:596.70ms +grad accum step:14221/14336 +step:56884/57344 train_time:33943498ms step_avg:596.71ms +step:56885/57344 train_time:33943569ms step_avg:596.71ms +step:56886/57344 train_time:33943814ms step_avg:596.70ms +step:56887/57344 train_time:33944458ms step_avg:596.70ms +grad accum step:14222/14336 +step:56888/57344 train_time:33946135ms step_avg:596.72ms +step:56889/57344 train_time:33946153ms step_avg:596.71ms +step:56890/57344 train_time:33946382ms step_avg:596.70ms +step:56891/57344 train_time:33946985ms step_avg:596.70ms +grad accum step:14223/14336 +step:56892/57344 train_time:33948374ms step_avg:596.72ms +step:56893/57344 train_time:33948391ms step_avg:596.71ms +step:56894/57344 train_time:33948652ms step_avg:596.70ms +step:56895/57344 train_time:33949251ms step_avg:596.70ms +grad accum step:14224/14336 +step:56896/57344 train_time:33950694ms step_avg:596.71ms +step:56896/57344 val_loss:5.302948 train_time:33950699ms step_avg:596.72ms +step:56897/57344 train_time:33950711ms step_avg:596.70ms +step:56898/57344 train_time:33950936ms step_avg:596.70ms +step:56899/57344 train_time:33951504ms step_avg:596.70ms +grad accum step:14225/14336 +step:56900/57344 train_time:33952898ms step_avg:596.71ms +step:56901/57344 train_time:33952912ms step_avg:596.70ms +step:56902/57344 train_time:33953165ms step_avg:596.70ms +step:56903/57344 train_time:33953742ms step_avg:596.70ms +grad accum step:14226/14336 +step:56904/57344 train_time:33955137ms step_avg:596.71ms +step:56905/57344 train_time:33955153ms step_avg:596.70ms +step:56906/57344 train_time:33955401ms step_avg:596.69ms +step:56907/57344 train_time:33955993ms step_avg:596.69ms +grad accum step:14227/14336 +step:56908/57344 train_time:33957400ms step_avg:596.71ms +step:56909/57344 train_time:33957651ms step_avg:596.70ms +step:56910/57344 train_time:33957992ms step_avg:596.70ms +step:56911/57344 train_time:33958515ms step_avg:596.70ms +grad accum step:14228/14336 +step:56912/57344 train_time:33960053ms step_avg:596.71ms +step:56913/57344 train_time:33960289ms step_avg:596.71ms +step:56914/57344 train_time:33960517ms step_avg:596.70ms +step:56915/57344 train_time:33961097ms step_avg:596.70ms +grad accum step:14229/14336 +step:56916/57344 train_time:33962507ms step_avg:596.71ms +step:56917/57344 train_time:33962541ms step_avg:596.70ms +step:56918/57344 train_time:33962764ms step_avg:596.70ms +step:56919/57344 train_time:33963342ms step_avg:596.70ms +grad accum step:14230/14336 +step:56920/57344 train_time:33964731ms step_avg:596.71ms +step:56921/57344 train_time:33964747ms step_avg:596.70ms +step:56922/57344 train_time:33964991ms step_avg:596.69ms +step:56923/57344 train_time:33965578ms step_avg:596.69ms +grad accum step:14231/14336 +step:56924/57344 train_time:33967037ms step_avg:596.71ms +step:56925/57344 train_time:33967059ms step_avg:596.70ms +step:56926/57344 train_time:33967307ms step_avg:596.69ms +step:56927/57344 train_time:33967904ms step_avg:596.69ms +grad accum step:14232/14336 +step:56928/57344 train_time:33969275ms step_avg:596.71ms +step:56929/57344 train_time:33969292ms step_avg:596.70ms +step:56930/57344 train_time:33969545ms step_avg:596.69ms +step:56931/57344 train_time:33970117ms step_avg:596.69ms +grad accum step:14233/14336 +step:56932/57344 train_time:33971518ms step_avg:596.70ms +step:56933/57344 train_time:33971533ms step_avg:596.69ms +step:56934/57344 train_time:33971780ms step_avg:596.69ms +step:56935/57344 train_time:33972355ms step_avg:596.69ms +grad accum step:14234/14336 +step:56936/57344 train_time:33973775ms step_avg:596.70ms +step:56937/57344 train_time:33973792ms step_avg:596.69ms +step:56938/57344 train_time:33974030ms step_avg:596.68ms +step:56939/57344 train_time:33974608ms step_avg:596.68ms +grad accum step:14235/14336 +step:56940/57344 train_time:33976123ms step_avg:596.70ms +step:56941/57344 train_time:33976138ms step_avg:596.69ms +step:56942/57344 train_time:33976413ms step_avg:596.68ms +step:56943/57344 train_time:33977050ms step_avg:596.69ms +grad accum step:14236/14336 +step:56944/57344 train_time:33978656ms step_avg:596.70ms +step:56945/57344 train_time:33978673ms step_avg:596.69ms +step:56946/57344 train_time:33978890ms step_avg:596.69ms +step:56947/57344 train_time:33979449ms step_avg:596.69ms +grad accum step:14237/14336 +step:56948/57344 train_time:33980811ms step_avg:596.70ms +step:56949/57344 train_time:33980826ms step_avg:596.69ms +step:56950/57344 train_time:33981078ms step_avg:596.68ms +step:56951/57344 train_time:33981651ms step_avg:596.68ms +grad accum step:14238/14336 +step:56952/57344 train_time:33983060ms step_avg:596.70ms +step:56953/57344 train_time:33983082ms step_avg:596.69ms +step:56954/57344 train_time:33983333ms step_avg:596.68ms +step:56955/57344 train_time:33983938ms step_avg:596.68ms +grad accum step:14239/14336 +step:56956/57344 train_time:33985439ms step_avg:596.70ms +step:56957/57344 train_time:33985453ms step_avg:596.69ms +step:56958/57344 train_time:33985711ms step_avg:596.68ms +step:56959/57344 train_time:33986297ms step_avg:596.68ms +grad accum step:14240/14336 +step:56960/57344 train_time:33987892ms step_avg:596.70ms +step:56960/57344 val_loss:5.302761 train_time:33987944ms step_avg:596.70ms +step:56961/57344 train_time:33987956ms step_avg:596.69ms +step:56962/57344 train_time:33988174ms step_avg:596.68ms +step:56963/57344 train_time:33988751ms step_avg:596.68ms +grad accum step:14241/14336 +step:56964/57344 train_time:33990324ms step_avg:596.70ms +step:56965/57344 train_time:33990338ms step_avg:596.69ms +step:56966/57344 train_time:33990552ms step_avg:596.68ms +step:56967/57344 train_time:33991129ms step_avg:596.68ms +grad accum step:14242/14336 +step:56968/57344 train_time:33992585ms step_avg:596.70ms +step:56969/57344 train_time:33992603ms step_avg:596.69ms +step:56970/57344 train_time:33992847ms step_avg:596.68ms +step:56971/57344 train_time:33993454ms step_avg:596.68ms +grad accum step:14243/14336 +step:56972/57344 train_time:33994820ms step_avg:596.69ms +step:56973/57344 train_time:33994841ms step_avg:596.68ms +step:56974/57344 train_time:33995086ms step_avg:596.68ms +step:56975/57344 train_time:33995685ms step_avg:596.68ms +grad accum step:14244/14336 +step:56976/57344 train_time:33997123ms step_avg:596.69ms +step:56977/57344 train_time:33997141ms step_avg:596.68ms +step:56978/57344 train_time:33997382ms step_avg:596.68ms +step:56979/57344 train_time:33997952ms step_avg:596.68ms +grad accum step:14245/14336 +step:56980/57344 train_time:33999353ms step_avg:596.69ms +step:56981/57344 train_time:33999371ms step_avg:596.68ms +step:56982/57344 train_time:33999620ms step_avg:596.67ms +step:56983/57344 train_time:34000202ms step_avg:596.67ms +grad accum step:14246/14336 +step:56984/57344 train_time:34001597ms step_avg:596.69ms +step:56985/57344 train_time:34001612ms step_avg:596.68ms +step:56986/57344 train_time:34001864ms step_avg:596.67ms +step:56987/57344 train_time:34002450ms step_avg:596.67ms +grad accum step:14247/14336 +step:56988/57344 train_time:34003863ms step_avg:596.68ms +step:56989/57344 train_time:34003884ms step_avg:596.67ms +step:56990/57344 train_time:34004132ms step_avg:596.67ms +step:56991/57344 train_time:34004730ms step_avg:596.67ms +grad accum step:14248/14336 +step:56992/57344 train_time:34006202ms step_avg:596.68ms +step:56993/57344 train_time:34006226ms step_avg:596.67ms +step:56994/57344 train_time:34006479ms step_avg:596.67ms +step:56995/57344 train_time:34007081ms step_avg:596.67ms +grad accum step:14249/14336 +step:56996/57344 train_time:34008386ms step_avg:596.68ms +step:56997/57344 train_time:34008403ms step_avg:596.67ms +step:56998/57344 train_time:34008645ms step_avg:596.66ms +step:56999/57344 train_time:34009197ms step_avg:596.66ms +grad accum step:14250/14336 +step:57000/57344 train_time:34010588ms step_avg:596.68ms +step:57001/57344 train_time:34010603ms step_avg:596.67ms +step:57002/57344 train_time:34010856ms step_avg:596.66ms +step:57003/57344 train_time:34011439ms step_avg:596.66ms +grad accum step:14251/14336 +step:57004/57344 train_time:34012863ms step_avg:596.68ms +step:57005/57344 train_time:34012884ms step_avg:596.66ms +step:57006/57344 train_time:34013128ms step_avg:596.66ms +step:57007/57344 train_time:34013746ms step_avg:596.66ms +grad accum step:14252/14336 +step:57008/57344 train_time:34015217ms step_avg:596.67ms +step:57009/57344 train_time:34015235ms step_avg:596.66ms +step:57010/57344 train_time:34015480ms step_avg:596.66ms +step:57011/57344 train_time:34016091ms step_avg:596.66ms +grad accum step:14253/14336 +step:57012/57344 train_time:34017506ms step_avg:596.67ms +step:57013/57344 train_time:34017524ms step_avg:596.66ms +step:57014/57344 train_time:34017743ms step_avg:596.66ms +step:57015/57344 train_time:34018307ms step_avg:596.66ms +grad accum step:14254/14336 +step:57016/57344 train_time:34019728ms step_avg:596.67ms +step:57017/57344 train_time:34019743ms step_avg:596.66ms +step:57018/57344 train_time:34019983ms step_avg:596.65ms +step:57019/57344 train_time:34020527ms step_avg:596.65ms +grad accum step:14255/14336 +step:57020/57344 train_time:34021882ms step_avg:596.67ms +step:57021/57344 train_time:34021898ms step_avg:596.66ms +step:57022/57344 train_time:34022155ms step_avg:596.65ms +step:57023/57344 train_time:34022732ms step_avg:596.65ms +grad accum step:14256/14336 +step:57024/57344 train_time:34024069ms step_avg:596.66ms +step:57024/57344 val_loss:5.302407 train_time:34024070ms step_avg:596.66ms +step:57025/57344 train_time:34024082ms step_avg:596.65ms +step:57026/57344 train_time:34024312ms step_avg:596.65ms +step:57027/57344 train_time:34024897ms step_avg:596.65ms +grad accum step:14257/14336 +step:57028/57344 train_time:34026378ms step_avg:596.66ms +step:57029/57344 train_time:34026392ms step_avg:596.65ms +step:57030/57344 train_time:34026635ms step_avg:596.64ms +step:57031/57344 train_time:34027211ms step_avg:596.64ms +grad accum step:14258/14336 +step:57032/57344 train_time:34028584ms step_avg:596.66ms +step:57033/57344 train_time:34028596ms step_avg:596.65ms +step:57034/57344 train_time:34028849ms step_avg:596.64ms +step:57035/57344 train_time:34029436ms step_avg:596.64ms +grad accum step:14259/14336 +step:57036/57344 train_time:34030800ms step_avg:596.65ms +step:57037/57344 train_time:34030816ms step_avg:596.64ms +step:57038/57344 train_time:34031070ms step_avg:596.64ms +step:57039/57344 train_time:34031644ms step_avg:596.64ms +grad accum step:14260/14336 +step:57040/57344 train_time:34033035ms step_avg:596.65ms +step:57041/57344 train_time:34033050ms step_avg:596.64ms +step:57042/57344 train_time:34033307ms step_avg:596.64ms +step:57043/57344 train_time:34033896ms step_avg:596.64ms +grad accum step:14261/14336 +step:57044/57344 train_time:34035319ms step_avg:596.65ms +step:57045/57344 train_time:34035335ms step_avg:596.64ms +step:57046/57344 train_time:34035590ms step_avg:596.63ms +step:57047/57344 train_time:34036177ms step_avg:596.63ms +grad accum step:14262/14336 +step:57048/57344 train_time:34037776ms step_avg:596.65ms +step:57049/57344 train_time:34037791ms step_avg:596.64ms +step:57050/57344 train_time:34038012ms step_avg:596.63ms +step:57051/57344 train_time:34038577ms step_avg:596.63ms +grad accum step:14263/14336 +step:57052/57344 train_time:34040042ms step_avg:596.65ms +step:57053/57344 train_time:34040063ms step_avg:596.64ms +step:57054/57344 train_time:34040301ms step_avg:596.63ms +step:57055/57344 train_time:34040918ms step_avg:596.63ms +grad accum step:14264/14336 +step:57056/57344 train_time:34042535ms step_avg:596.65ms +step:57057/57344 train_time:34042558ms step_avg:596.64ms +step:57058/57344 train_time:34042781ms step_avg:596.63ms +step:57059/57344 train_time:34043379ms step_avg:596.63ms +grad accum step:14265/14336 +step:57060/57344 train_time:34044774ms step_avg:596.65ms +step:57061/57344 train_time:34044818ms step_avg:596.64ms +step:57062/57344 train_time:34045038ms step_avg:596.63ms +step:57063/57344 train_time:34045592ms step_avg:596.63ms +grad accum step:14266/14336 +step:57064/57344 train_time:34046997ms step_avg:596.65ms +step:57065/57344 train_time:34047015ms step_avg:596.64ms +step:57066/57344 train_time:34047267ms step_avg:596.63ms +step:57067/57344 train_time:34047853ms step_avg:596.63ms +grad accum step:14267/14336 +step:57068/57344 train_time:34049238ms step_avg:596.64ms +step:57069/57344 train_time:34049253ms step_avg:596.63ms +step:57070/57344 train_time:34049507ms step_avg:596.63ms +step:57071/57344 train_time:34050094ms step_avg:596.63ms +grad accum step:14268/14336 +step:57072/57344 train_time:34051555ms step_avg:596.64ms +step:57073/57344 train_time:34051571ms step_avg:596.63ms +step:57074/57344 train_time:34051811ms step_avg:596.63ms +step:57075/57344 train_time:34052378ms step_avg:596.63ms +grad accum step:14269/14336 +step:57076/57344 train_time:34053742ms step_avg:596.64ms +step:57077/57344 train_time:34053762ms step_avg:596.63ms +step:57078/57344 train_time:34053996ms step_avg:596.62ms +step:57079/57344 train_time:34054551ms step_avg:596.62ms +grad accum step:14270/14336 +step:57080/57344 train_time:34056102ms step_avg:596.64ms +step:57081/57344 train_time:34056117ms step_avg:596.63ms +step:57082/57344 train_time:34056381ms step_avg:596.62ms +step:57083/57344 train_time:34056995ms step_avg:596.62ms +grad accum step:14271/14336 +step:57084/57344 train_time:34058332ms step_avg:596.64ms +step:57085/57344 train_time:34058350ms step_avg:596.63ms +step:57086/57344 train_time:34058597ms step_avg:596.62ms +step:57087/57344 train_time:34059167ms step_avg:596.62ms +grad accum step:14272/14336 +step:57088/57344 train_time:34060587ms step_avg:596.63ms +step:57088/57344 val_loss:5.302102 train_time:34060595ms step_avg:596.63ms +step:57089/57344 train_time:34060607ms step_avg:596.62ms +step:57090/57344 train_time:34060858ms step_avg:596.62ms +step:57091/57344 train_time:34061475ms step_avg:596.62ms +grad accum step:14273/14336 +step:57092/57344 train_time:34062826ms step_avg:596.63ms +step:57093/57344 train_time:34062865ms step_avg:596.62ms +step:57094/57344 train_time:34063095ms step_avg:596.61ms +step:57095/57344 train_time:34063689ms step_avg:596.61ms +grad accum step:14274/14336 +step:57096/57344 train_time:34065126ms step_avg:596.63ms +step:57097/57344 train_time:34065146ms step_avg:596.62ms +step:57098/57344 train_time:34065398ms step_avg:596.61ms +step:57099/57344 train_time:34065995ms step_avg:596.61ms +grad accum step:14275/14336 +step:57100/57344 train_time:34067533ms step_avg:596.63ms +step:57101/57344 train_time:34067573ms step_avg:596.62ms +step:57102/57344 train_time:34067812ms step_avg:596.61ms +step:57103/57344 train_time:34068450ms step_avg:596.61ms +grad accum step:14276/14336 +step:57104/57344 train_time:34069851ms step_avg:596.63ms +step:57105/57344 train_time:34069867ms step_avg:596.62ms +step:57106/57344 train_time:34070117ms step_avg:596.61ms +step:57107/57344 train_time:34070689ms step_avg:596.61ms +grad accum step:14277/14336 +step:57108/57344 train_time:34072035ms step_avg:596.62ms +step:57109/57344 train_time:34072049ms step_avg:596.61ms +step:57110/57344 train_time:34072308ms step_avg:596.61ms +step:57111/57344 train_time:34072889ms step_avg:596.61ms +grad accum step:14278/14336 +step:57112/57344 train_time:34074432ms step_avg:596.62ms +step:57113/57344 train_time:34074450ms step_avg:596.61ms +step:57114/57344 train_time:34074689ms step_avg:596.61ms +step:57115/57344 train_time:34075267ms step_avg:596.61ms +grad accum step:14279/14336 +step:57116/57344 train_time:34076648ms step_avg:596.62ms +step:57117/57344 train_time:34076663ms step_avg:596.61ms +step:57118/57344 train_time:34076926ms step_avg:596.61ms +step:57119/57344 train_time:34077523ms step_avg:596.61ms +grad accum step:14280/14336 +step:57120/57344 train_time:34078941ms step_avg:596.62ms +step:57121/57344 train_time:34078955ms step_avg:596.61ms +step:57122/57344 train_time:34079229ms step_avg:596.60ms +step:57123/57344 train_time:34079869ms step_avg:596.61ms +grad accum step:14281/14336 +step:57124/57344 train_time:34081339ms step_avg:596.62ms +step:57125/57344 train_time:34081356ms step_avg:596.61ms +step:57126/57344 train_time:34081607ms step_avg:596.60ms +step:57127/57344 train_time:34082183ms step_avg:596.60ms +grad accum step:14282/14336 +step:57128/57344 train_time:34083551ms step_avg:596.62ms +step:57129/57344 train_time:34083568ms step_avg:596.61ms +step:57130/57344 train_time:34083803ms step_avg:596.60ms +step:57131/57344 train_time:34084370ms step_avg:596.60ms +grad accum step:14283/14336 +step:57132/57344 train_time:34085743ms step_avg:596.61ms +step:57133/57344 train_time:34085761ms step_avg:596.60ms +step:57134/57344 train_time:34086008ms step_avg:596.60ms +step:57135/57344 train_time:34086578ms step_avg:596.60ms +grad accum step:14284/14336 +step:57136/57344 train_time:34087953ms step_avg:596.61ms +step:57137/57344 train_time:34087970ms step_avg:596.60ms +step:57138/57344 train_time:34088205ms step_avg:596.59ms +step:57139/57344 train_time:34088807ms step_avg:596.59ms +grad accum step:14285/14336 +step:57140/57344 train_time:34090392ms step_avg:596.61ms +step:57141/57344 train_time:34090409ms step_avg:596.60ms +step:57142/57344 train_time:34090653ms step_avg:596.60ms +step:57143/57344 train_time:34091275ms step_avg:596.60ms +grad accum step:14286/14336 +step:57144/57344 train_time:34092696ms step_avg:596.61ms +step:57145/57344 train_time:34092722ms step_avg:596.60ms +step:57146/57344 train_time:34092962ms step_avg:596.59ms +step:57147/57344 train_time:34093536ms step_avg:596.59ms +grad accum step:14287/14336 +step:57148/57344 train_time:34094970ms step_avg:596.61ms +step:57149/57344 train_time:34094985ms step_avg:596.60ms +step:57150/57344 train_time:34095240ms step_avg:596.59ms +step:57151/57344 train_time:34095838ms step_avg:596.59ms +grad accum step:14288/14336 +step:57152/57344 train_time:34097308ms step_avg:596.61ms +step:57152/57344 val_loss:5.301837 train_time:34097318ms step_avg:596.61ms +step:57153/57344 train_time:34097330ms step_avg:596.60ms +step:57154/57344 train_time:34097559ms step_avg:596.59ms +step:57155/57344 train_time:34098131ms step_avg:596.59ms +grad accum step:14289/14336 +step:57156/57344 train_time:34099551ms step_avg:596.60ms +step:57157/57344 train_time:34099569ms step_avg:596.59ms +step:57158/57344 train_time:34099818ms step_avg:596.59ms +step:57159/57344 train_time:34100400ms step_avg:596.59ms +grad accum step:14290/14336 +step:57160/57344 train_time:34101810ms step_avg:596.60ms +step:57161/57344 train_time:34101829ms step_avg:596.59ms +step:57162/57344 train_time:34102084ms step_avg:596.59ms +step:57163/57344 train_time:34102709ms step_avg:596.59ms +grad accum step:14291/14336 +step:57164/57344 train_time:34104088ms step_avg:596.60ms +step:57165/57344 train_time:34104106ms step_avg:596.59ms +step:57166/57344 train_time:34104364ms step_avg:596.58ms +step:57167/57344 train_time:34104970ms step_avg:596.58ms +grad accum step:14292/14336 +step:57168/57344 train_time:34106475ms step_avg:596.60ms +step:57169/57344 train_time:34106490ms step_avg:596.59ms +step:57170/57344 train_time:34106738ms step_avg:596.58ms +step:57171/57344 train_time:34107323ms step_avg:596.58ms +grad accum step:14293/14336 +step:57172/57344 train_time:34108755ms step_avg:596.60ms +step:57173/57344 train_time:34108775ms step_avg:596.59ms +step:57174/57344 train_time:34109030ms step_avg:596.58ms +step:57175/57344 train_time:34109638ms step_avg:596.58ms +grad accum step:14294/14336 +step:57176/57344 train_time:34111080ms step_avg:596.60ms +step:57177/57344 train_time:34111102ms step_avg:596.59ms +step:57178/57344 train_time:34111326ms step_avg:596.58ms +step:57179/57344 train_time:34111897ms step_avg:596.58ms +grad accum step:14295/14336 +step:57180/57344 train_time:34113267ms step_avg:596.59ms +step:57181/57344 train_time:34113283ms step_avg:596.58ms +step:57182/57344 train_time:34113535ms step_avg:596.58ms +step:57183/57344 train_time:34114133ms step_avg:596.58ms +grad accum step:14296/14336 +step:57184/57344 train_time:34115542ms step_avg:596.59ms +step:57185/57344 train_time:34115557ms step_avg:596.58ms +step:57186/57344 train_time:34115835ms step_avg:596.58ms +step:57187/57344 train_time:34116476ms step_avg:596.58ms +grad accum step:14297/14336 +step:57188/57344 train_time:34117867ms step_avg:596.59ms +step:57189/57344 train_time:34117886ms step_avg:596.58ms +step:57190/57344 train_time:34118135ms step_avg:596.58ms +step:57191/57344 train_time:34118734ms step_avg:596.58ms +grad accum step:14298/14336 +step:57192/57344 train_time:34120160ms step_avg:596.59ms +step:57193/57344 train_time:34120175ms step_avg:596.58ms +step:57194/57344 train_time:34120439ms step_avg:596.57ms +step:57195/57344 train_time:34121043ms step_avg:596.57ms +grad accum step:14299/14336 +step:57196/57344 train_time:34122462ms step_avg:596.59ms +step:57197/57344 train_time:34122485ms step_avg:596.58ms +step:57198/57344 train_time:34122721ms step_avg:596.57ms +step:57199/57344 train_time:34123288ms step_avg:596.57ms +grad accum step:14300/14336 +step:57200/57344 train_time:34124707ms step_avg:596.59ms +step:57201/57344 train_time:34124726ms step_avg:596.58ms +step:57202/57344 train_time:34124986ms step_avg:596.57ms +step:57203/57344 train_time:34125627ms step_avg:596.57ms +grad accum step:14301/14336 +step:57204/57344 train_time:34127005ms step_avg:596.58ms +step:57205/57344 train_time:34127024ms step_avg:596.57ms +step:57206/57344 train_time:34127270ms step_avg:596.57ms +step:57207/57344 train_time:34127838ms step_avg:596.57ms +grad accum step:14302/14336 +step:57208/57344 train_time:34129170ms step_avg:596.58ms +step:57209/57344 train_time:34129185ms step_avg:596.57ms +step:57210/57344 train_time:34129432ms step_avg:596.56ms +step:57211/57344 train_time:34129998ms step_avg:596.56ms +grad accum step:14303/14336 +step:57212/57344 train_time:34131469ms step_avg:596.58ms +step:57213/57344 train_time:34131485ms step_avg:596.57ms +step:57214/57344 train_time:34131769ms step_avg:596.56ms +step:57215/57344 train_time:34132420ms step_avg:596.56ms +grad accum step:14304/14336 +step:57216/57344 train_time:34133858ms step_avg:596.58ms +step:57216/57344 val_loss:5.301659 train_time:34133866ms step_avg:596.58ms +step:57217/57344 train_time:34133878ms step_avg:596.57ms +step:57218/57344 train_time:34134112ms step_avg:596.56ms +step:57219/57344 train_time:34134686ms step_avg:596.56ms +grad accum step:14305/14336 +step:57220/57344 train_time:34136080ms step_avg:596.58ms +step:57221/57344 train_time:34136096ms step_avg:596.57ms +step:57222/57344 train_time:34136346ms step_avg:596.56ms +step:57223/57344 train_time:34136920ms step_avg:596.56ms +grad accum step:14306/14336 +step:57224/57344 train_time:34138358ms step_avg:596.57ms +step:57225/57344 train_time:34138390ms step_avg:596.56ms +step:57226/57344 train_time:34138628ms step_avg:596.56ms +step:57227/57344 train_time:34139225ms step_avg:596.56ms +grad accum step:14307/14336 +step:57228/57344 train_time:34140639ms step_avg:596.57ms +step:57229/57344 train_time:34140657ms step_avg:596.56ms +step:57230/57344 train_time:34140920ms step_avg:596.56ms +step:57231/57344 train_time:34141592ms step_avg:596.56ms +grad accum step:14308/14336 +step:57232/57344 train_time:34143122ms step_avg:596.57ms +step:57233/57344 train_time:34143137ms step_avg:596.56ms +step:57234/57344 train_time:34143401ms step_avg:596.56ms +step:57235/57344 train_time:34143993ms step_avg:596.56ms +grad accum step:14309/14336 +step:57236/57344 train_time:34145360ms step_avg:596.57ms +step:57237/57344 train_time:34145373ms step_avg:596.56ms +step:57238/57344 train_time:34145620ms step_avg:596.56ms +step:57239/57344 train_time:34146185ms step_avg:596.55ms +grad accum step:14310/14336 +step:57240/57344 train_time:34147515ms step_avg:596.57ms +step:57241/57344 train_time:34147554ms step_avg:596.56ms +step:57242/57344 train_time:34147791ms step_avg:596.55ms +step:57243/57344 train_time:34148394ms step_avg:596.55ms +grad accum step:14311/14336 +step:57244/57344 train_time:34149926ms step_avg:596.57ms +step:57245/57344 train_time:34149953ms step_avg:596.56ms +step:57246/57344 train_time:34150170ms step_avg:596.55ms +step:57247/57344 train_time:34150732ms step_avg:596.55ms +grad accum step:14312/14336 +step:57248/57344 train_time:34152219ms step_avg:596.57ms +step:57249/57344 train_time:34152236ms step_avg:596.56ms +step:57250/57344 train_time:34152553ms step_avg:596.55ms +step:57251/57344 train_time:34153230ms step_avg:596.55ms +grad accum step:14313/14336 +step:57252/57344 train_time:34154690ms step_avg:596.57ms +step:57253/57344 train_time:34154736ms step_avg:596.56ms +step:57254/57344 train_time:34154959ms step_avg:596.55ms +step:57255/57344 train_time:34155539ms step_avg:596.55ms +grad accum step:14314/14336 +step:57256/57344 train_time:34156958ms step_avg:596.57ms +step:57257/57344 train_time:34156972ms step_avg:596.56ms +step:57258/57344 train_time:34157227ms step_avg:596.55ms +step:57259/57344 train_time:34157814ms step_avg:596.55ms +grad accum step:14315/14336 +step:57260/57344 train_time:34159229ms step_avg:596.56ms +step:57261/57344 train_time:34159256ms step_avg:596.55ms +step:57262/57344 train_time:34159481ms step_avg:596.55ms +step:57263/57344 train_time:34160048ms step_avg:596.55ms +grad accum step:14316/14336 +step:57264/57344 train_time:34161533ms step_avg:596.56ms +step:57265/57344 train_time:34161554ms step_avg:596.55ms +step:57266/57344 train_time:34161774ms step_avg:596.55ms +step:57267/57344 train_time:34162360ms step_avg:596.55ms +grad accum step:14317/14336 +step:57268/57344 train_time:34163798ms step_avg:596.56ms +step:57269/57344 train_time:34163816ms step_avg:596.55ms +step:57270/57344 train_time:34164073ms step_avg:596.54ms +step:57271/57344 train_time:34164684ms step_avg:596.54ms +grad accum step:14318/14336 +step:57272/57344 train_time:34166125ms step_avg:596.56ms +step:57273/57344 train_time:34166147ms step_avg:596.55ms +step:57274/57344 train_time:34166392ms step_avg:596.54ms +step:57275/57344 train_time:34167005ms step_avg:596.54ms +grad accum step:14319/14336 +step:57276/57344 train_time:34168450ms step_avg:596.56ms +step:57277/57344 train_time:34168469ms step_avg:596.55ms +step:57278/57344 train_time:34168728ms step_avg:596.54ms +step:57279/57344 train_time:34169345ms step_avg:596.54ms +grad accum step:14320/14336 +step:57280/57344 train_time:34170853ms step_avg:596.56ms +step:57280/57344 val_loss:5.301528 train_time:34170859ms step_avg:596.56ms +step:57281/57344 train_time:34170871ms step_avg:596.55ms +step:57282/57344 train_time:34171100ms step_avg:596.54ms +step:57283/57344 train_time:34171674ms step_avg:596.54ms +grad accum step:14321/14336 +step:57284/57344 train_time:34173040ms step_avg:596.55ms +step:57285/57344 train_time:34173057ms step_avg:596.54ms +step:57286/57344 train_time:34173298ms step_avg:596.54ms +step:57287/57344 train_time:34173889ms step_avg:596.54ms +grad accum step:14322/14336 +step:57288/57344 train_time:34175405ms step_avg:596.55ms +step:57289/57344 train_time:34175424ms step_avg:596.54ms +step:57290/57344 train_time:34175647ms step_avg:596.54ms +step:57291/57344 train_time:34176213ms step_avg:596.54ms +grad accum step:14323/14336 +step:57292/57344 train_time:34177620ms step_avg:596.55ms +step:57293/57344 train_time:34177632ms step_avg:596.54ms +step:57294/57344 train_time:34177877ms step_avg:596.54ms +step:57295/57344 train_time:34178426ms step_avg:596.53ms +grad accum step:14324/14336 +step:57296/57344 train_time:34179830ms step_avg:596.55ms +step:57297/57344 train_time:34179846ms step_avg:596.54ms +step:57298/57344 train_time:34180121ms step_avg:596.53ms +step:57299/57344 train_time:34180754ms step_avg:596.53ms +grad accum step:14325/14336 +step:57300/57344 train_time:34182152ms step_avg:596.55ms +step:57301/57344 train_time:34182169ms step_avg:596.54ms +step:57302/57344 train_time:34182402ms step_avg:596.53ms +step:57303/57344 train_time:34182986ms step_avg:596.53ms +grad accum step:14326/14336 +step:57304/57344 train_time:34184341ms step_avg:596.54ms +step:57305/57344 train_time:34184367ms step_avg:596.53ms +step:57306/57344 train_time:34184617ms step_avg:596.53ms +step:57307/57344 train_time:34185231ms step_avg:596.53ms +grad accum step:14327/14336 +step:57308/57344 train_time:34186990ms step_avg:596.55ms +step:57309/57344 train_time:34187006ms step_avg:596.54ms +step:57310/57344 train_time:34187233ms step_avg:596.53ms +step:57311/57344 train_time:34187821ms step_avg:596.53ms +grad accum step:14328/14336 +step:57312/57344 train_time:34189250ms step_avg:596.55ms +step:57313/57344 train_time:34189268ms step_avg:596.54ms +step:57314/57344 train_time:34189541ms step_avg:596.53ms +step:57315/57344 train_time:34190180ms step_avg:596.53ms +grad accum step:14329/14336 +step:57316/57344 train_time:34191554ms step_avg:596.54ms +step:57317/57344 train_time:34191568ms step_avg:596.53ms +step:57318/57344 train_time:34191839ms step_avg:596.53ms +step:57319/57344 train_time:34192487ms step_avg:596.53ms +grad accum step:14330/14336 +step:57320/57344 train_time:34193912ms step_avg:596.54ms +step:57321/57344 train_time:34193927ms step_avg:596.53ms +step:57322/57344 train_time:34194177ms step_avg:596.53ms +step:57323/57344 train_time:34194743ms step_avg:596.53ms +grad accum step:14331/14336 +step:57324/57344 train_time:34196127ms step_avg:596.54ms +step:57325/57344 train_time:34196144ms step_avg:596.53ms +step:57326/57344 train_time:34196400ms step_avg:596.53ms +step:57327/57344 train_time:34197008ms step_avg:596.53ms +grad accum step:14332/14336 +step:57328/57344 train_time:34198422ms step_avg:596.54ms +step:57329/57344 train_time:34198436ms step_avg:596.53ms +step:57330/57344 train_time:34198687ms step_avg:596.52ms +step:57331/57344 train_time:34199268ms step_avg:596.52ms +grad accum step:14333/14336 +step:57332/57344 train_time:34200669ms step_avg:596.54ms +step:57333/57344 train_time:34200685ms step_avg:596.53ms +step:57334/57344 train_time:34200932ms step_avg:596.52ms +step:57335/57344 train_time:34201490ms step_avg:596.52ms +grad accum step:14334/14336 +step:57336/57344 train_time:34202884ms step_avg:596.53ms +step:57337/57344 train_time:34202898ms step_avg:596.52ms +step:57338/57344 train_time:34203153ms step_avg:596.52ms +step:57339/57344 train_time:34203752ms step_avg:596.52ms +grad accum step:14335/14336 +step:57340/57344 train_time:34205176ms step_avg:596.53ms +step:57341/57344 train_time:34205191ms step_avg:596.52ms +step:57342/57344 train_time:34205455ms step_avg:596.52ms +step:57343/57344 train_time:34206054ms step_avg:596.52ms +grad accum step:14336/14336 +step:57344/57344 train_time:34207379ms step_avg:596.53ms +step:57344/57344 val_loss:5.301472 train_time:34207380ms step_avg:596.53ms