diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..8359589bfe7a4ba2cb0e5c26c055b76869d1f204 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +r128_s2_lr5e-4/tokenizer.json filter=lfs diff=lfs merge=lfs -text +r256_s4_lr1e-4/tokenizer.json filter=lfs diff=lfs merge=lfs -text +r32_s4_lr5e-4/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/r128_s2_lr5e-4/config.json b/r128_s2_lr5e-4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..987575daa317c2939b631825630abfd894b2a024 --- /dev/null +++ b/r128_s2_lr5e-4/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.55.0", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/r128_s2_lr5e-4/generation_config.json b/r128_s2_lr5e-4/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..80f7b497637c090c5f61121199e4970ea6ef1f25 --- /dev/null +++ b/r128_s2_lr5e-4/generation_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.55.0" +} diff --git a/r128_s2_lr5e-4/model-00001-of-00030.safetensors b/r128_s2_lr5e-4/model-00001-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..410f37f4511c24e80d21e847dc18281461e11336 --- /dev/null +++ b/r128_s2_lr5e-4/model-00001-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c060098accde629acc69c5eb000b4a3ed801bc3cf23fe0e11470d668215a7de +size 4584408792 diff --git a/r128_s2_lr5e-4/model-00002-of-00030.safetensors b/r128_s2_lr5e-4/model-00002-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..02ada456ae9a5a30500040e47df28b6453b93f99 --- /dev/null +++ b/r128_s2_lr5e-4/model-00002-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c600a5cfe9cb3559e15e31427d43df300e91158f88d04ecb7fd7547a5c04d31e +size 4664167352 diff --git a/r128_s2_lr5e-4/model-00003-of-00030.safetensors b/r128_s2_lr5e-4/model-00003-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a1725149740e9c46a7853e7fad857f4f30fe9d8 --- /dev/null +++ b/r128_s2_lr5e-4/model-00003-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4deec3146d6aa8d46263b5d3f50f80812c23fbefdfae3fb74d6ab49ddc2d5d17 +size 4999711672 diff --git a/r128_s2_lr5e-4/model-00004-of-00030.safetensors b/r128_s2_lr5e-4/model-00004-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0db6e82a2bbee92432d6c11965b5ad14d119fc07 --- /dev/null +++ b/r128_s2_lr5e-4/model-00004-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3deb422e62712728c2a01a6e7ff7e88421716e143f62e0da650199bdb8b34a2 +size 4966157008 diff --git a/r128_s2_lr5e-4/model-00005-of-00030.safetensors b/r128_s2_lr5e-4/model-00005-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15f125ff88ce3d3c87eb8598d85a0463d12dad66 --- /dev/null +++ b/r128_s2_lr5e-4/model-00005-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44d991a5c58927b206eb28d8b6489095d02a621a693bbe32778324b2d426818a +size 4664134384 diff --git a/r128_s2_lr5e-4/model-00006-of-00030.safetensors b/r128_s2_lr5e-4/model-00006-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..740fa841e1131275d1a4167defcc32778dfc2409 --- /dev/null +++ b/r128_s2_lr5e-4/model-00006-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a31a39a7f45379fb3d518126214b564525d26638600f5cb70e3e4d909ae17f1 +size 4664167384 diff --git a/r128_s2_lr5e-4/model-00007-of-00030.safetensors b/r128_s2_lr5e-4/model-00007-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4fad42af208ee421560b95791c3db9b1e1e6dc96 --- /dev/null +++ b/r128_s2_lr5e-4/model-00007-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0271fb9750097420419bf964adcfe0559aaaf2b129b9dc83b756987575d3041 +size 4664167376 diff --git a/r128_s2_lr5e-4/model-00008-of-00030.safetensors b/r128_s2_lr5e-4/model-00008-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42b42f22a551eb446deca4a6fd18dcb828ed14a9 --- /dev/null +++ b/r128_s2_lr5e-4/model-00008-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:391a47e6bb3efecad407c78ecfc55ebe83f999cced9866eba8df9be643938648 +size 4999711704 diff --git a/r128_s2_lr5e-4/model-00009-of-00030.safetensors b/r128_s2_lr5e-4/model-00009-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b84d62d6ecc022d224d833cb658ebccd609adc30 --- /dev/null +++ b/r128_s2_lr5e-4/model-00009-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70c71260f78dc09de1777bf69dc3dd9c9400c6a38e125cde111b09cc2441f2a8 +size 4966157032 diff --git a/r128_s2_lr5e-4/model-00010-of-00030.safetensors b/r128_s2_lr5e-4/model-00010-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4765a98f54d033ff72e98a936054caf908d7fc6a --- /dev/null +++ b/r128_s2_lr5e-4/model-00010-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3266b3112d0bdf8922223a4bc23be87fd0303e9285ec735afed7ff4bf182660 +size 4664134384 diff --git a/r128_s2_lr5e-4/model-00011-of-00030.safetensors b/r128_s2_lr5e-4/model-00011-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..64385b3f7ffb730c0889c8c98be45b8fa9beb602 --- /dev/null +++ b/r128_s2_lr5e-4/model-00011-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c6f4e490ecc3748c7211577531efbeb474806baf095c022c24bbf869ba15049 +size 4664167384 diff --git a/r128_s2_lr5e-4/model-00012-of-00030.safetensors b/r128_s2_lr5e-4/model-00012-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..79678867357f91cdae081173fcb5f731bbaf9e96 --- /dev/null +++ b/r128_s2_lr5e-4/model-00012-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e7a541a0878c7ab3949239405f06bd4f34ae3d82948c538262f0d33e78a6140 +size 4664167376 diff --git a/r128_s2_lr5e-4/model-00013-of-00030.safetensors b/r128_s2_lr5e-4/model-00013-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88207b5c555b359b3e54d182d7440c118bc4ccb7 --- /dev/null +++ b/r128_s2_lr5e-4/model-00013-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:806777f11beff1b329d74a0ce2dd127c613fe4e773728a8614e05b5b107854a3 +size 4999711704 diff --git a/r128_s2_lr5e-4/model-00014-of-00030.safetensors b/r128_s2_lr5e-4/model-00014-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..050bfc52cbb5e93475be7f7545042f7a83836b53 --- /dev/null +++ b/r128_s2_lr5e-4/model-00014-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:387fbeb2343f40c30b8638b3ce798c5a4273aff4659877915c60e17045a92892 +size 4966157032 diff --git a/r128_s2_lr5e-4/model-00015-of-00030.safetensors b/r128_s2_lr5e-4/model-00015-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6736c3ec81f326c896dc2194a44c93c1b011777a --- /dev/null +++ b/r128_s2_lr5e-4/model-00015-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c7d18b64a8e50f4cc2e3771c4ff8f21466b0481f4a5cc8c6b12a433afad9506 +size 4664134384 diff --git a/r128_s2_lr5e-4/model-00016-of-00030.safetensors b/r128_s2_lr5e-4/model-00016-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..718470b4c7ee4115ffb17314aaa8d1209c63fbc0 --- /dev/null +++ b/r128_s2_lr5e-4/model-00016-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98db9d55f7edfe4c63a3b5882269564c12a22c891872dd3a9e72f97aea438357 +size 4664167384 diff --git a/r128_s2_lr5e-4/model-00017-of-00030.safetensors b/r128_s2_lr5e-4/model-00017-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..02e3851d0c743d600f92e893c8efd728de391ae3 --- /dev/null +++ b/r128_s2_lr5e-4/model-00017-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c2360a6eac45ebc65c8542cd70eb2b097676809c8005ffa44e0c36f5e24eef8 +size 4664167376 diff --git a/r128_s2_lr5e-4/model-00018-of-00030.safetensors b/r128_s2_lr5e-4/model-00018-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69fd44db5a45e55063b29d36f0770b78be11784d --- /dev/null +++ b/r128_s2_lr5e-4/model-00018-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e92f2ccedeaabc1187e3686a3c2cc473850b9903ed5346788f58cae20af840e +size 4999711704 diff --git a/r128_s2_lr5e-4/model-00019-of-00030.safetensors b/r128_s2_lr5e-4/model-00019-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad445a189f571618ebde98af0336fc70bb02afab --- /dev/null +++ b/r128_s2_lr5e-4/model-00019-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3df76ff7e24cb7845b4dd9370f6bff29a304cb7582a4091db378e23080c2e06d +size 4966157032 diff --git a/r128_s2_lr5e-4/model-00020-of-00030.safetensors b/r128_s2_lr5e-4/model-00020-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c4c79aab1df021135daddad4927ef4b6d1353bdb --- /dev/null +++ b/r128_s2_lr5e-4/model-00020-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27c48ee8caee796df0b8ef1cf8aa2633a29aa34ddb954f11d6964de1924ddccb +size 4664134384 diff --git a/r128_s2_lr5e-4/model-00021-of-00030.safetensors b/r128_s2_lr5e-4/model-00021-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c77015a43b5bef0b435b86bf696ac70d8c3b5920 --- /dev/null +++ b/r128_s2_lr5e-4/model-00021-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8beb3a216bc74d58503e2bafb0c2d7732a971a143943ff9e587ad76211f848f1 +size 4664167384 diff --git a/r128_s2_lr5e-4/model-00022-of-00030.safetensors b/r128_s2_lr5e-4/model-00022-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b82a9d7a6395176fbf9b005271f60675326b7254 --- /dev/null +++ b/r128_s2_lr5e-4/model-00022-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea8270481c232cf98dd82cdb75df45c6e061f7cc93468d41dd79ff0f0c5fb83 +size 4664167376 diff --git a/r128_s2_lr5e-4/model-00023-of-00030.safetensors b/r128_s2_lr5e-4/model-00023-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ebded8e7b029efd85e0de5ee669ad922deb27f3e --- /dev/null +++ b/r128_s2_lr5e-4/model-00023-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b73dc6fc4f933eb31f05018046c60d5af7b5dd1c0e0f50ec67ec39b8e7775a3 +size 4999711704 diff --git a/r128_s2_lr5e-4/model-00024-of-00030.safetensors b/r128_s2_lr5e-4/model-00024-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d9b2b77b3a97a97e4a96ec6dc3ee29b37ee45ab9 --- /dev/null +++ b/r128_s2_lr5e-4/model-00024-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e97692430a05851f0f6dd40ba04957e2f9824cfcfce121bacea11b6eca96fe7 +size 4966157032 diff --git a/r128_s2_lr5e-4/model-00025-of-00030.safetensors b/r128_s2_lr5e-4/model-00025-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..41059ef23b180bc5ba480d867edfbf52052aeaf3 --- /dev/null +++ b/r128_s2_lr5e-4/model-00025-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe2e6c5bc8c7a395e720e965822b20c32f84a406cd2e3a549a44aa7e656efbd1 +size 4664134384 diff --git a/r128_s2_lr5e-4/model-00026-of-00030.safetensors b/r128_s2_lr5e-4/model-00026-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8740e70a2fa54f996f36ed725dcd67cec9d39b7c --- /dev/null +++ b/r128_s2_lr5e-4/model-00026-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05d2e828fa51689e98ab50c4015fb9da972013b91b552406114e8833cfec11ad +size 4664167384 diff --git a/r128_s2_lr5e-4/model-00027-of-00030.safetensors b/r128_s2_lr5e-4/model-00027-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69af3e7df746688aef5665f4d58e46fabe6b5cf2 --- /dev/null +++ b/r128_s2_lr5e-4/model-00027-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb0d870d347165187f56075ea217082bd460d74ae6295e8cb048d2cef01f7af0 +size 4664167376 diff --git a/r128_s2_lr5e-4/model-00028-of-00030.safetensors b/r128_s2_lr5e-4/model-00028-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de5a15f29ac2586630bdb2258807a8479b0402a7 --- /dev/null +++ b/r128_s2_lr5e-4/model-00028-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c41e14db6386f25fbd4418b6820e51486c74deb8295a5053e682bf60f77afead +size 4999711704 diff --git a/r128_s2_lr5e-4/model-00029-of-00030.safetensors b/r128_s2_lr5e-4/model-00029-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3467229bb65ce0cf57074ddbe39e5f01145ed46a --- /dev/null +++ b/r128_s2_lr5e-4/model-00029-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0fe72357e00866101d165e5566dd62c5c2997ac123a0e4e0bfd7ac6c1c8093d +size 4966173512 diff --git a/r128_s2_lr5e-4/model-00030-of-00030.safetensors b/r128_s2_lr5e-4/model-00030-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a3611c4c41c3d8c9e551befba34877f5daa8f50 --- /dev/null +++ b/r128_s2_lr5e-4/model-00030-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3eac50c2c10b890c008479074e5ac76193b05dd0e0619aa707cdb9233c8bd9d +size 2101346432 diff --git a/r128_s2_lr5e-4/model.safetensors.index.json b/r128_s2_lr5e-4/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..b1522ada6907d0b278c1a1409ee51a3e88273e4a --- /dev/null +++ b/r128_s2_lr5e-4/model.safetensors.index.json @@ -0,0 +1,731 @@ +{ + "metadata": { + "total_parameters": 70553706496, + "total_size": 141107412992 + }, + "weight_map": { + "lm_head.weight": "model-00030-of-00030.safetensors", + "model.embed_tokens.weight": "model-00001-of-00030.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00030.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.input_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.10.input_layernorm.weight": "model-00005-of-00030.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00005-of-00030.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.input_layernorm.weight": "model-00005-of-00030.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00005-of-00030.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.input_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.13.input_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.input_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.15.input_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.16.input_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.input_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.18.input_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.19.input_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.2.input_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.20.input_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.21.input_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.22.input_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.input_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.24.input_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.input_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.input_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.27.input_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.input_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.29.input_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.3.input_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.30.input_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.input_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.32.input_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.33.input_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.input_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.35.input_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.36.input_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.input_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.38.input_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.input_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.4.input_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.40.input_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.41.input_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.input_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.42.mlp.down_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.mlp.gate_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.mlp.up_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.43.input_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.43.mlp.down_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.43.mlp.gate_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.43.mlp.up_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.44.input_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.44.mlp.down_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.mlp.gate_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.mlp.up_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.input_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.45.mlp.down_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.mlp.gate_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.mlp.up_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.46.input_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.46.mlp.down_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.46.mlp.gate_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.46.mlp.up_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.47.input_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.47.mlp.down_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.mlp.gate_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.mlp.up_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.input_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.48.mlp.down_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.mlp.gate_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.mlp.up_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.49.input_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.49.mlp.down_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.49.mlp.gate_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.49.mlp.up_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.5.input_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.50.input_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.50.mlp.down_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.mlp.gate_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.mlp.up_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.input_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.51.mlp.down_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.mlp.gate_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.mlp.up_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.52.input_layernorm.weight": "model-00020-of-00030.safetensors", + "model.layers.52.mlp.down_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.mlp.gate_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.mlp.up_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00020-of-00030.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.input_layernorm.weight": "model-00020-of-00030.safetensors", + "model.layers.53.mlp.down_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.mlp.gate_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.mlp.up_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00020-of-00030.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.input_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.54.mlp.down_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.54.mlp.gate_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.mlp.up_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.55.input_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.55.mlp.down_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.mlp.gate_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.mlp.up_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.input_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.56.mlp.down_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.mlp.gate_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.mlp.up_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.57.input_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.57.mlp.down_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.57.mlp.gate_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.57.mlp.up_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.58.input_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.58.mlp.down_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.mlp.gate_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.mlp.up_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.input_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.59.mlp.down_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.mlp.gate_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.mlp.up_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.6.input_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.60.input_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.60.mlp.down_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.60.mlp.gate_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.60.mlp.up_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.60.post_attention_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.60.self_attn.k_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.60.self_attn.o_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.60.self_attn.q_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.60.self_attn.v_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.61.input_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.61.mlp.down_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.mlp.gate_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.mlp.up_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.post_attention_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.61.self_attn.k_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.self_attn.o_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.self_attn.q_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.self_attn.v_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.input_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.62.mlp.down_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.mlp.gate_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.mlp.up_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.post_attention_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.62.self_attn.k_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.self_attn.o_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.self_attn.q_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.self_attn.v_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.63.input_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.63.mlp.down_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.63.mlp.gate_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.63.mlp.up_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.63.post_attention_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.63.self_attn.k_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.63.self_attn.o_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.63.self_attn.q_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.63.self_attn.v_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.64.input_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.64.mlp.down_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.mlp.gate_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.mlp.up_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.post_attention_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.64.self_attn.k_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.self_attn.o_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.self_attn.q_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.self_attn.v_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.input_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.65.mlp.down_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.mlp.gate_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.mlp.up_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.post_attention_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.65.self_attn.k_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.self_attn.o_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.self_attn.q_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.self_attn.v_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.66.input_layernorm.weight": "model-00025-of-00030.safetensors", + "model.layers.66.mlp.down_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.mlp.gate_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.mlp.up_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.post_attention_layernorm.weight": "model-00025-of-00030.safetensors", + "model.layers.66.self_attn.k_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.self_attn.o_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.self_attn.q_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.self_attn.v_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.input_layernorm.weight": "model-00025-of-00030.safetensors", + "model.layers.67.mlp.down_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.mlp.gate_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.mlp.up_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.post_attention_layernorm.weight": "model-00025-of-00030.safetensors", + "model.layers.67.self_attn.k_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.self_attn.o_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.self_attn.q_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.self_attn.v_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.input_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.68.mlp.down_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.68.mlp.gate_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.mlp.up_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.post_attention_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.68.self_attn.k_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.self_attn.o_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.self_attn.q_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.self_attn.v_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.69.input_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.69.mlp.down_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.mlp.gate_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.mlp.up_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.post_attention_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.69.self_attn.k_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.self_attn.o_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.self_attn.q_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.self_attn.v_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.7.input_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.70.input_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.70.mlp.down_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.mlp.gate_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.mlp.up_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.post_attention_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.70.self_attn.k_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.self_attn.o_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.self_attn.q_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.self_attn.v_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.71.input_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.71.mlp.down_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.71.mlp.gate_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.71.mlp.up_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.71.post_attention_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.71.self_attn.k_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.71.self_attn.o_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.71.self_attn.q_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.71.self_attn.v_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.72.input_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.72.mlp.down_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.mlp.gate_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.mlp.up_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.post_attention_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.72.self_attn.k_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.self_attn.o_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.self_attn.q_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.self_attn.v_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.input_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.73.mlp.down_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.mlp.gate_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.mlp.up_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.post_attention_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.73.self_attn.k_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.self_attn.o_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.self_attn.q_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.self_attn.v_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.74.input_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.74.mlp.down_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.74.mlp.gate_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.74.mlp.up_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.74.post_attention_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.74.self_attn.k_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.74.self_attn.o_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.74.self_attn.q_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.74.self_attn.v_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.75.input_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.75.mlp.down_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.mlp.gate_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.mlp.up_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.post_attention_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.75.self_attn.k_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.self_attn.o_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.self_attn.q_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.self_attn.v_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.input_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.76.mlp.down_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.mlp.gate_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.mlp.up_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.post_attention_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.76.self_attn.k_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.self_attn.o_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.self_attn.q_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.self_attn.v_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.77.input_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.77.mlp.down_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.77.mlp.gate_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.77.mlp.up_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.77.post_attention_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.77.self_attn.k_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.77.self_attn.o_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.77.self_attn.q_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.77.self_attn.v_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.78.input_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.78.mlp.down_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.mlp.gate_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.mlp.up_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.post_attention_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.78.self_attn.k_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.self_attn.o_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.self_attn.q_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.self_attn.v_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.input_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.79.mlp.down_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.mlp.gate_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.mlp.up_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.post_attention_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.79.self_attn.k_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.self_attn.o_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.self_attn.q_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.self_attn.v_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.8.input_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.input_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00004-of-00030.safetensors", + "model.norm.weight": "model-00029-of-00030.safetensors" + } +} diff --git a/r128_s2_lr5e-4/special_tokens_map.json b/r128_s2_lr5e-4/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..14daf4588e61b4e4983af0fccaba4d5500c0977c --- /dev/null +++ b/r128_s2_lr5e-4/special_tokens_map.json @@ -0,0 +1,26 @@ +{ + "additional_special_tokens": [ + { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/r128_s2_lr5e-4/tokenizer.json b/r128_s2_lr5e-4/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/r128_s2_lr5e-4/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/r128_s2_lr5e-4/tokenizer_config.json b/r128_s2_lr5e-4/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d0bfd3305ba1f87f7b435611053cf7715148516c --- /dev/null +++ b/r128_s2_lr5e-4/tokenizer_config.json @@ -0,0 +1,2068 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "padding_side": "left", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/r256_s4_lr1e-4/config.json b/r256_s4_lr1e-4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..987575daa317c2939b631825630abfd894b2a024 --- /dev/null +++ b/r256_s4_lr1e-4/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.55.0", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/r256_s4_lr1e-4/generation_config.json b/r256_s4_lr1e-4/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..80f7b497637c090c5f61121199e4970ea6ef1f25 --- /dev/null +++ b/r256_s4_lr1e-4/generation_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.55.0" +} diff --git a/r256_s4_lr1e-4/model-00001-of-00030.safetensors b/r256_s4_lr1e-4/model-00001-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..265bf2424905c413a2e9df89a7cbb7c325fbc871 --- /dev/null +++ b/r256_s4_lr1e-4/model-00001-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ea9635d48083f7e4535aeb2bb6fca89564ef9712ee73af5c9b6a5a221c026f7 +size 4584408792 diff --git a/r256_s4_lr1e-4/model-00002-of-00030.safetensors b/r256_s4_lr1e-4/model-00002-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7f07dd81d95528833d9576e84e88cbfb11d68941 --- /dev/null +++ b/r256_s4_lr1e-4/model-00002-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04e0c3f73a8556ff5279c132696233ab54a7a608e4c7f71c6b25087b475dacac +size 4664167352 diff --git a/r256_s4_lr1e-4/model-00003-of-00030.safetensors b/r256_s4_lr1e-4/model-00003-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cc98babca20e7673221d3ad7ed634a55ed5c5602 --- /dev/null +++ b/r256_s4_lr1e-4/model-00003-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df24a85a722d4a7a06f4d99c5821f43609890cc3770361987f4721f601fc1f4f +size 4999711672 diff --git a/r256_s4_lr1e-4/model-00004-of-00030.safetensors b/r256_s4_lr1e-4/model-00004-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef44425302259eb97f0e050ac2ba4d435ca3c3f6 --- /dev/null +++ b/r256_s4_lr1e-4/model-00004-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33be8efd41e851f9e42ba944489901b4a9db4e285b596f4ae90600864cb205b2 +size 4966157008 diff --git a/r256_s4_lr1e-4/model-00005-of-00030.safetensors b/r256_s4_lr1e-4/model-00005-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..73374bf77ac2c432c41228d0711fb1b0738d420b --- /dev/null +++ b/r256_s4_lr1e-4/model-00005-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66c97c6c5e1b6ec587eb9d0f9daf3faccb5ef57dabd1efc6d039064c819df9fa +size 4664134384 diff --git a/r256_s4_lr1e-4/model-00006-of-00030.safetensors b/r256_s4_lr1e-4/model-00006-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e44f98175f1e71e9c7040b25864b55ea9f1759ff --- /dev/null +++ b/r256_s4_lr1e-4/model-00006-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da1b407dacd5b478b60419c17f6b30dfb568474417958f202250a18a65d80a34 +size 4664167384 diff --git a/r256_s4_lr1e-4/model-00007-of-00030.safetensors b/r256_s4_lr1e-4/model-00007-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d06815bd541d9ba56e5c4ccd754f043546fcc8e --- /dev/null +++ b/r256_s4_lr1e-4/model-00007-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3188bd387c9fe7a40f365e7432b3f6ae5c398d82caa54ec5faf64fb55121b924 +size 4664167376 diff --git a/r256_s4_lr1e-4/model-00008-of-00030.safetensors b/r256_s4_lr1e-4/model-00008-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69d9a3749a91d2ac2756a0e069733f01d0ab4129 --- /dev/null +++ b/r256_s4_lr1e-4/model-00008-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0715ff8cbefc90af6516f6f6754e5fc98fe161ea494e699e3ae487039f259b51 +size 4999711704 diff --git a/r256_s4_lr1e-4/model-00009-of-00030.safetensors b/r256_s4_lr1e-4/model-00009-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb00c6acf9bbc20e0b868b50c34f445b958cba1c --- /dev/null +++ b/r256_s4_lr1e-4/model-00009-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e367b768afda7f28e5a9d1c7bc8ecde1e231df3d8cb8fd13b110ffea4b17b9 +size 4966157032 diff --git a/r256_s4_lr1e-4/model-00010-of-00030.safetensors b/r256_s4_lr1e-4/model-00010-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3939c376c08a2b83fac813ee6fc23ea30b8de195 --- /dev/null +++ b/r256_s4_lr1e-4/model-00010-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5893ab10945d2845defafcc3eb77b3fe3379e4b1d37b87838224c3d85700d2d +size 4664134384 diff --git a/r256_s4_lr1e-4/model-00011-of-00030.safetensors b/r256_s4_lr1e-4/model-00011-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9463442c03a030981c77f2a7f919341519638c0 --- /dev/null +++ b/r256_s4_lr1e-4/model-00011-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d67eb7e7c432eb72c98fffcf936de95e920fa876026bf97ce6524d1cd22c78d5 +size 4664167384 diff --git a/r256_s4_lr1e-4/model-00012-of-00030.safetensors b/r256_s4_lr1e-4/model-00012-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..295391111d8ae7dbe791159e9e456d1d490c2107 --- /dev/null +++ b/r256_s4_lr1e-4/model-00012-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:003aa9dbbe48aedc3e85f7d0bacf587cc44d590cab809f9bcac4c97d8395ea3e +size 4664167376 diff --git a/r256_s4_lr1e-4/model-00013-of-00030.safetensors b/r256_s4_lr1e-4/model-00013-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b1c82e657804c709f750bfd6738128fa1a2a228d --- /dev/null +++ b/r256_s4_lr1e-4/model-00013-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e866114ecec0dd5269b4a9430aaad0e1becc40eeb9adcc203af466fd3aac12 +size 4999711704 diff --git a/r256_s4_lr1e-4/model-00014-of-00030.safetensors b/r256_s4_lr1e-4/model-00014-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e6c24c2ce2f101653e9eac0915c2140ff9fa2efb --- /dev/null +++ b/r256_s4_lr1e-4/model-00014-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86e067c3fdeabd6f3fecbc6fc01c9faa1bde6b39ad9fc82c107d862b26e43f9f +size 4966157032 diff --git a/r256_s4_lr1e-4/model-00015-of-00030.safetensors b/r256_s4_lr1e-4/model-00015-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..504a2531674e6101dee2d290f02e3b2e974e5d53 --- /dev/null +++ b/r256_s4_lr1e-4/model-00015-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:174b2e5c32cab7ee578dcba66c57581771757f4059507cbe38900bae92af0e0b +size 4664134384 diff --git a/r256_s4_lr1e-4/model-00016-of-00030.safetensors b/r256_s4_lr1e-4/model-00016-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43457c4a28db17ca5d88583d77ac18590ba70150 --- /dev/null +++ b/r256_s4_lr1e-4/model-00016-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eda26c30400813ba0e4e272f4e9ec1a1c30473b113bc044e2e67af3ad1df5cb +size 4664167384 diff --git a/r256_s4_lr1e-4/model-00017-of-00030.safetensors b/r256_s4_lr1e-4/model-00017-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6c4a09102770b4710e639406d454de4abcd3094d --- /dev/null +++ b/r256_s4_lr1e-4/model-00017-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66fb9873b05b8045a712225b010039b645cb6e1c026a6511db598c50aabbeac6 +size 4664167376 diff --git a/r256_s4_lr1e-4/model-00018-of-00030.safetensors b/r256_s4_lr1e-4/model-00018-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..873938c9aabc2bce2654066ce209b5a286bb3d09 --- /dev/null +++ b/r256_s4_lr1e-4/model-00018-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:664034e14ba8a62f9030dee69922b5b377bb01385c3ffb2bcfcb95ff56edeed9 +size 4999711704 diff --git a/r256_s4_lr1e-4/model-00019-of-00030.safetensors b/r256_s4_lr1e-4/model-00019-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f7bc5c8d81003ea22f3e90edeeed85f9a05088a --- /dev/null +++ b/r256_s4_lr1e-4/model-00019-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2d679910e15e09493571a43996cad67491cc1a7c942aa6f612dfcd912cd3003 +size 4966157032 diff --git a/r256_s4_lr1e-4/model-00020-of-00030.safetensors b/r256_s4_lr1e-4/model-00020-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..859947108549463ccadcb663914c1fe7891577ed --- /dev/null +++ b/r256_s4_lr1e-4/model-00020-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11e2b140490cf7bb6f950611f99e25af2cb2a5122ee947533f454b4d7e82cbf5 +size 4664134384 diff --git a/r256_s4_lr1e-4/model-00021-of-00030.safetensors b/r256_s4_lr1e-4/model-00021-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15a9681faa726f5e61189d1e4c16b84160d1514c --- /dev/null +++ b/r256_s4_lr1e-4/model-00021-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d838a72d4f4df53b598e04cbce5714c1f011e263d17319007402a270a54d45b +size 4664167384 diff --git a/r256_s4_lr1e-4/model-00022-of-00030.safetensors b/r256_s4_lr1e-4/model-00022-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..163ea756b00a674d47ddd3885b587ee4355d953e --- /dev/null +++ b/r256_s4_lr1e-4/model-00022-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c60fb8e47534257e39f8e034449484e9e1cee42178ad19078211e8c60eeca2e2 +size 4664167376 diff --git a/r256_s4_lr1e-4/model-00023-of-00030.safetensors b/r256_s4_lr1e-4/model-00023-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..862f88eb3fd7a47bd5d5ef9759adfae6932c8f7d --- /dev/null +++ b/r256_s4_lr1e-4/model-00023-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfa5aa353762fd78353a8b843e0215ded491371094c086fa8fdbcac23ba79d8c +size 4999711704 diff --git a/r256_s4_lr1e-4/model-00024-of-00030.safetensors b/r256_s4_lr1e-4/model-00024-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8f39f25e4036a0ae8fb7c5cbcaf24b6deb10a0d --- /dev/null +++ b/r256_s4_lr1e-4/model-00024-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91d3e4a84430fe0e24a1bc64344dd1585a3254d0e40d8488fe00ba97bfac2711 +size 4966157032 diff --git a/r256_s4_lr1e-4/model-00025-of-00030.safetensors b/r256_s4_lr1e-4/model-00025-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f87cbe9b38660c56c02f1bad9144feb232e6e37 --- /dev/null +++ b/r256_s4_lr1e-4/model-00025-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f18a5bb9e15812bb932bbb23b2374e5104f5c6a4e9121d98b7433252cbd7f14a +size 4664134384 diff --git a/r256_s4_lr1e-4/model-00026-of-00030.safetensors b/r256_s4_lr1e-4/model-00026-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fbad27926487c0c31debeda8cc4ea5f0374074c3 --- /dev/null +++ b/r256_s4_lr1e-4/model-00026-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:578f0f965544a506e77d880522f4133a11cd84de53eb91c52eac939f08586559 +size 4664167384 diff --git a/r256_s4_lr1e-4/model-00027-of-00030.safetensors b/r256_s4_lr1e-4/model-00027-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..db6a8a1953b5d651f30eab8c27b0fa3c638d2c1f --- /dev/null +++ b/r256_s4_lr1e-4/model-00027-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22b02a87cf56ce9aed4b664df18965e911741b6e33cedf747ddce56119e96e6e +size 4664167376 diff --git a/r256_s4_lr1e-4/model-00028-of-00030.safetensors b/r256_s4_lr1e-4/model-00028-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33244f722ae5e9a37cf0d05b34719793ab4b16e8 --- /dev/null +++ b/r256_s4_lr1e-4/model-00028-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78d5be1312b9bd5d92610ca0844ebccf07523d5804104f17e0dd53721417e298 +size 4999711704 diff --git a/r256_s4_lr1e-4/model-00029-of-00030.safetensors b/r256_s4_lr1e-4/model-00029-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0acf871ffabbe7d5d0ba0f97972e6d0a88634660 --- /dev/null +++ b/r256_s4_lr1e-4/model-00029-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0317d55a0f815bcfd77264efe8dd8ecb81d2832b5a3303f6d21b0905bcf72a2f +size 4966173512 diff --git a/r256_s4_lr1e-4/model-00030-of-00030.safetensors b/r256_s4_lr1e-4/model-00030-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a3611c4c41c3d8c9e551befba34877f5daa8f50 --- /dev/null +++ b/r256_s4_lr1e-4/model-00030-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3eac50c2c10b890c008479074e5ac76193b05dd0e0619aa707cdb9233c8bd9d +size 2101346432 diff --git a/r256_s4_lr1e-4/model.safetensors.index.json b/r256_s4_lr1e-4/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..b1522ada6907d0b278c1a1409ee51a3e88273e4a --- /dev/null +++ b/r256_s4_lr1e-4/model.safetensors.index.json @@ -0,0 +1,731 @@ +{ + "metadata": { + "total_parameters": 70553706496, + "total_size": 141107412992 + }, + "weight_map": { + "lm_head.weight": "model-00030-of-00030.safetensors", + "model.embed_tokens.weight": "model-00001-of-00030.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00030.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.input_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.10.input_layernorm.weight": "model-00005-of-00030.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00005-of-00030.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.input_layernorm.weight": "model-00005-of-00030.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00005-of-00030.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.input_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.13.input_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.input_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.15.input_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.16.input_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.input_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.18.input_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.19.input_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.2.input_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.20.input_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.21.input_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.22.input_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.input_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.24.input_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.input_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.input_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.27.input_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.input_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.29.input_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.3.input_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.30.input_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.input_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.32.input_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.33.input_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.input_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.35.input_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.36.input_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.input_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.38.input_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.input_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.4.input_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.40.input_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.41.input_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.input_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.42.mlp.down_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.mlp.gate_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.mlp.up_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.43.input_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.43.mlp.down_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.43.mlp.gate_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.43.mlp.up_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.44.input_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.44.mlp.down_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.mlp.gate_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.mlp.up_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.input_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.45.mlp.down_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.mlp.gate_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.mlp.up_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.46.input_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.46.mlp.down_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.46.mlp.gate_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.46.mlp.up_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.47.input_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.47.mlp.down_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.mlp.gate_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.mlp.up_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.input_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.48.mlp.down_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.mlp.gate_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.mlp.up_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.49.input_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.49.mlp.down_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.49.mlp.gate_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.49.mlp.up_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.5.input_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.50.input_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.50.mlp.down_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.mlp.gate_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.mlp.up_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.input_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.51.mlp.down_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.mlp.gate_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.mlp.up_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.52.input_layernorm.weight": "model-00020-of-00030.safetensors", + "model.layers.52.mlp.down_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.mlp.gate_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.mlp.up_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00020-of-00030.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.input_layernorm.weight": "model-00020-of-00030.safetensors", + "model.layers.53.mlp.down_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.mlp.gate_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.mlp.up_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00020-of-00030.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.input_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.54.mlp.down_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.54.mlp.gate_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.mlp.up_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.55.input_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.55.mlp.down_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.mlp.gate_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.mlp.up_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.input_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.56.mlp.down_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.mlp.gate_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.mlp.up_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.57.input_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.57.mlp.down_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.57.mlp.gate_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.57.mlp.up_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.58.input_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.58.mlp.down_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.mlp.gate_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.mlp.up_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.input_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.59.mlp.down_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.mlp.gate_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.mlp.up_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.6.input_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.60.input_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.60.mlp.down_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.60.mlp.gate_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.60.mlp.up_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.60.post_attention_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.60.self_attn.k_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.60.self_attn.o_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.60.self_attn.q_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.60.self_attn.v_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.61.input_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.61.mlp.down_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.mlp.gate_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.mlp.up_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.post_attention_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.61.self_attn.k_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.self_attn.o_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.self_attn.q_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.self_attn.v_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.input_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.62.mlp.down_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.mlp.gate_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.mlp.up_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.post_attention_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.62.self_attn.k_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.self_attn.o_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.self_attn.q_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.self_attn.v_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.63.input_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.63.mlp.down_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.63.mlp.gate_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.63.mlp.up_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.63.post_attention_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.63.self_attn.k_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.63.self_attn.o_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.63.self_attn.q_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.63.self_attn.v_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.64.input_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.64.mlp.down_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.mlp.gate_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.mlp.up_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.post_attention_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.64.self_attn.k_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.self_attn.o_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.self_attn.q_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.self_attn.v_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.input_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.65.mlp.down_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.mlp.gate_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.mlp.up_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.post_attention_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.65.self_attn.k_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.self_attn.o_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.self_attn.q_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.self_attn.v_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.66.input_layernorm.weight": "model-00025-of-00030.safetensors", + "model.layers.66.mlp.down_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.mlp.gate_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.mlp.up_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.post_attention_layernorm.weight": "model-00025-of-00030.safetensors", + "model.layers.66.self_attn.k_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.self_attn.o_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.self_attn.q_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.self_attn.v_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.input_layernorm.weight": "model-00025-of-00030.safetensors", + "model.layers.67.mlp.down_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.mlp.gate_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.mlp.up_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.post_attention_layernorm.weight": "model-00025-of-00030.safetensors", + "model.layers.67.self_attn.k_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.self_attn.o_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.self_attn.q_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.self_attn.v_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.input_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.68.mlp.down_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.68.mlp.gate_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.mlp.up_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.post_attention_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.68.self_attn.k_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.self_attn.o_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.self_attn.q_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.self_attn.v_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.69.input_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.69.mlp.down_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.mlp.gate_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.mlp.up_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.post_attention_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.69.self_attn.k_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.self_attn.o_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.self_attn.q_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.self_attn.v_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.7.input_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.70.input_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.70.mlp.down_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.mlp.gate_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.mlp.up_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.post_attention_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.70.self_attn.k_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.self_attn.o_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.self_attn.q_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.self_attn.v_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.71.input_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.71.mlp.down_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.71.mlp.gate_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.71.mlp.up_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.71.post_attention_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.71.self_attn.k_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.71.self_attn.o_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.71.self_attn.q_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.71.self_attn.v_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.72.input_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.72.mlp.down_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.mlp.gate_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.mlp.up_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.post_attention_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.72.self_attn.k_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.self_attn.o_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.self_attn.q_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.self_attn.v_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.input_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.73.mlp.down_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.mlp.gate_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.mlp.up_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.post_attention_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.73.self_attn.k_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.self_attn.o_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.self_attn.q_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.self_attn.v_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.74.input_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.74.mlp.down_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.74.mlp.gate_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.74.mlp.up_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.74.post_attention_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.74.self_attn.k_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.74.self_attn.o_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.74.self_attn.q_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.74.self_attn.v_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.75.input_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.75.mlp.down_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.mlp.gate_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.mlp.up_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.post_attention_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.75.self_attn.k_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.self_attn.o_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.self_attn.q_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.self_attn.v_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.input_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.76.mlp.down_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.mlp.gate_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.mlp.up_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.post_attention_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.76.self_attn.k_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.self_attn.o_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.self_attn.q_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.self_attn.v_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.77.input_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.77.mlp.down_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.77.mlp.gate_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.77.mlp.up_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.77.post_attention_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.77.self_attn.k_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.77.self_attn.o_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.77.self_attn.q_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.77.self_attn.v_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.78.input_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.78.mlp.down_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.mlp.gate_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.mlp.up_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.post_attention_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.78.self_attn.k_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.self_attn.o_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.self_attn.q_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.self_attn.v_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.input_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.79.mlp.down_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.mlp.gate_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.mlp.up_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.post_attention_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.79.self_attn.k_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.self_attn.o_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.self_attn.q_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.self_attn.v_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.8.input_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.input_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00004-of-00030.safetensors", + "model.norm.weight": "model-00029-of-00030.safetensors" + } +} diff --git a/r256_s4_lr1e-4/special_tokens_map.json b/r256_s4_lr1e-4/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..14daf4588e61b4e4983af0fccaba4d5500c0977c --- /dev/null +++ b/r256_s4_lr1e-4/special_tokens_map.json @@ -0,0 +1,26 @@ +{ + "additional_special_tokens": [ + { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/r256_s4_lr1e-4/tokenizer.json b/r256_s4_lr1e-4/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/r256_s4_lr1e-4/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/r256_s4_lr1e-4/tokenizer_config.json b/r256_s4_lr1e-4/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d0bfd3305ba1f87f7b435611053cf7715148516c --- /dev/null +++ b/r256_s4_lr1e-4/tokenizer_config.json @@ -0,0 +1,2068 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "padding_side": "left", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/r32_s4_lr5e-4/config.json b/r32_s4_lr5e-4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..987575daa317c2939b631825630abfd894b2a024 --- /dev/null +++ b/r32_s4_lr5e-4/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.55.0", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/r32_s4_lr5e-4/generation_config.json b/r32_s4_lr5e-4/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..80f7b497637c090c5f61121199e4970ea6ef1f25 --- /dev/null +++ b/r32_s4_lr5e-4/generation_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.55.0" +} diff --git a/r32_s4_lr5e-4/model-00001-of-00030.safetensors b/r32_s4_lr5e-4/model-00001-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7e36db17a9e1d5f6795f005293d5bb3433599a8d --- /dev/null +++ b/r32_s4_lr5e-4/model-00001-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db0edff43351696433aa3bf6eaf229fbe9e2e114572b4371862affdad3bf31f7 +size 4584408792 diff --git a/r32_s4_lr5e-4/model-00002-of-00030.safetensors b/r32_s4_lr5e-4/model-00002-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..73c51abb94b376b59ee42a4301ab03852490a174 --- /dev/null +++ b/r32_s4_lr5e-4/model-00002-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5ec31a0f0060d205e32e092a0e9a9de625eedc81038bddfc6180e7f1b318477 +size 4664167352 diff --git a/r32_s4_lr5e-4/model-00003-of-00030.safetensors b/r32_s4_lr5e-4/model-00003-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c495a7b272df0d6c5d511e45d4156d436975c600 --- /dev/null +++ b/r32_s4_lr5e-4/model-00003-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ce98f2f5b42ae39e4406e37759fb85282eb13393a8c1cc31569299965cc4bf8 +size 4999711672 diff --git a/r32_s4_lr5e-4/model-00004-of-00030.safetensors b/r32_s4_lr5e-4/model-00004-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7dcb7d56c9ea4068bef6faf9203219a155d26158 --- /dev/null +++ b/r32_s4_lr5e-4/model-00004-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad88c2f8ae9da875af96542c6e0828bef24c240d86ed947636bebe1dfd1e7463 +size 4966157008 diff --git a/r32_s4_lr5e-4/model-00005-of-00030.safetensors b/r32_s4_lr5e-4/model-00005-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..11b10547eee9f77a756bccf620e239c60d4d86d8 --- /dev/null +++ b/r32_s4_lr5e-4/model-00005-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c76773fb62236451d8a56653de2841c11c67f07fae05dc8d0ec773667d5c477 +size 4664134384 diff --git a/r32_s4_lr5e-4/model-00006-of-00030.safetensors b/r32_s4_lr5e-4/model-00006-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..03c99801c14faa8d09517b304a245fa2c8a7d4fa --- /dev/null +++ b/r32_s4_lr5e-4/model-00006-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa12adfc96a7d65fb0367cf7b8e4c8755090f5421096f6e27885326fb4605d0f +size 4664167384 diff --git a/r32_s4_lr5e-4/model-00007-of-00030.safetensors b/r32_s4_lr5e-4/model-00007-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..783a7b7e91713ac423cb0382eb84984d87dc4c3e --- /dev/null +++ b/r32_s4_lr5e-4/model-00007-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:830248a057bb07998eea70525f8f6c6e1ab693480c1ec22fdfe01c80210c48ea +size 4664167376 diff --git a/r32_s4_lr5e-4/model-00008-of-00030.safetensors b/r32_s4_lr5e-4/model-00008-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d03b7bd2254eec46ee6043758ba008d40a93978d --- /dev/null +++ b/r32_s4_lr5e-4/model-00008-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c95ef1c11e7643e878ceb9f624e01f33aab7f577c2f6cb69b4d912492e635f9f +size 4999711704 diff --git a/r32_s4_lr5e-4/model-00009-of-00030.safetensors b/r32_s4_lr5e-4/model-00009-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..108ef173dfd3589d0a2a6843b220b503d8b8d03a --- /dev/null +++ b/r32_s4_lr5e-4/model-00009-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f8e7f1ea5265b214615f127788e1a86f629b2671b2bd1056eea0ec0c2a63ba2 +size 4966157032 diff --git a/r32_s4_lr5e-4/model-00010-of-00030.safetensors b/r32_s4_lr5e-4/model-00010-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..531e3641e44215fa708b5c53c9700a554934ae6a --- /dev/null +++ b/r32_s4_lr5e-4/model-00010-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cc7eb0aca6e6f4c2810a41c068ebcca1dc695a6c4a548ccd93fbeff4f4ac613 +size 4664134384 diff --git a/r32_s4_lr5e-4/model-00011-of-00030.safetensors b/r32_s4_lr5e-4/model-00011-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee4056fa9f882341cea97056adbb051a44a5bbf2 --- /dev/null +++ b/r32_s4_lr5e-4/model-00011-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb6fb283019e5e4472a3893ebed88532b3ca7577f9f944a74813965de2c04aa +size 4664167384 diff --git a/r32_s4_lr5e-4/model-00012-of-00030.safetensors b/r32_s4_lr5e-4/model-00012-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a084d66e997364bbb78da86c4d36e7bd948f25f6 --- /dev/null +++ b/r32_s4_lr5e-4/model-00012-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37c9bf80578968e553b8eb5059664a3f9d6fd311347cdc3fb19e61f9a64f27bc +size 4664167376 diff --git a/r32_s4_lr5e-4/model-00013-of-00030.safetensors b/r32_s4_lr5e-4/model-00013-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24bae5e910ebf94b37ae6464103305e481f46676 --- /dev/null +++ b/r32_s4_lr5e-4/model-00013-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8720b02d8a590273fc49be0731b9f09e5a8991fb258664cc6b6ed6f2861b352f +size 4999711704 diff --git a/r32_s4_lr5e-4/model-00014-of-00030.safetensors b/r32_s4_lr5e-4/model-00014-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9dc9bc887881123da3f6b6fe47c3f06bd3650f24 --- /dev/null +++ b/r32_s4_lr5e-4/model-00014-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f7f8d6e6c222a7c2ab94ff38df73577fd1dbc524dabc1486955323452fe715a +size 4966157032 diff --git a/r32_s4_lr5e-4/model-00015-of-00030.safetensors b/r32_s4_lr5e-4/model-00015-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0cb15a5f870ec60f1a5b075fcd9742c66eaac752 --- /dev/null +++ b/r32_s4_lr5e-4/model-00015-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64f515c0fb073ae592bff6fa5a4c3c123ed422a5cfb9b421952beed2b1dda611 +size 4664134384 diff --git a/r32_s4_lr5e-4/model-00016-of-00030.safetensors b/r32_s4_lr5e-4/model-00016-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d83ab63977e1e9d091a33d5c477e764ab44ed52 --- /dev/null +++ b/r32_s4_lr5e-4/model-00016-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebf5e1ca4112b55d660a5fdbc7ddba30495249646bfa995e4d582d9b7f4d62a3 +size 4664167384 diff --git a/r32_s4_lr5e-4/model-00017-of-00030.safetensors b/r32_s4_lr5e-4/model-00017-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..349bccc4118d2f3c028df2f3c9c1d122a359ff50 --- /dev/null +++ b/r32_s4_lr5e-4/model-00017-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cb20cc4a85dbfe1a9ec1a76562d36319f0bec3e7159f3086fbfdf0c053e3c62 +size 4664167376 diff --git a/r32_s4_lr5e-4/model-00018-of-00030.safetensors b/r32_s4_lr5e-4/model-00018-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e0148c3dccbe6cb6a344b1b14a06b976ffe5f6b --- /dev/null +++ b/r32_s4_lr5e-4/model-00018-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3f6c3586cfa8716856f897aa614e710e75f3464957b5ea2187815798b735bc6 +size 4999711704 diff --git a/r32_s4_lr5e-4/model-00019-of-00030.safetensors b/r32_s4_lr5e-4/model-00019-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b625e4fc7aca368850b0d6d4000aeb0b01d231f3 --- /dev/null +++ b/r32_s4_lr5e-4/model-00019-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e9094d849bd9fe086bde9c250043ba047c7abdffc8179bdaf7b82ac836fc941 +size 4966157032 diff --git a/r32_s4_lr5e-4/model-00020-of-00030.safetensors b/r32_s4_lr5e-4/model-00020-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4f5303afede136873f75a06c98b8252a42a3a54 --- /dev/null +++ b/r32_s4_lr5e-4/model-00020-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:956f9a0e4f41e9131e6603897dfea6f3d5a7786f9d11acb9993e70b1148a791c +size 4664134384 diff --git a/r32_s4_lr5e-4/model-00021-of-00030.safetensors b/r32_s4_lr5e-4/model-00021-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c50f4d6b1835bd6d0de85198c7cf9706f54c8c9 --- /dev/null +++ b/r32_s4_lr5e-4/model-00021-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38a2a872da68588f162aec869f066667adff8567de6ace81c4d22bfaf28ae323 +size 4664167384 diff --git a/r32_s4_lr5e-4/model-00022-of-00030.safetensors b/r32_s4_lr5e-4/model-00022-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f465a85576e10660702f5234cf489a5e9bea7681 --- /dev/null +++ b/r32_s4_lr5e-4/model-00022-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45d8a867e6dac1a486a7603f9214bc575534dbfae32871e099f137ee2492dd10 +size 4664167376 diff --git a/r32_s4_lr5e-4/model-00023-of-00030.safetensors b/r32_s4_lr5e-4/model-00023-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..003e7a8ca1ba5bc6a3fa170e01db535ff7218f0a --- /dev/null +++ b/r32_s4_lr5e-4/model-00023-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d20c66bd759aa1f8a7371c205ec1fc5da5139a8487f5f799018f2d271f767d05 +size 4999711704 diff --git a/r32_s4_lr5e-4/model-00024-of-00030.safetensors b/r32_s4_lr5e-4/model-00024-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..251ff12392ecc75268bf34b33b057cb510dfc343 --- /dev/null +++ b/r32_s4_lr5e-4/model-00024-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92e03ef210e7569e53db606088c211bd6099278b982aeb901350cf5c9a614cbd +size 4966157032 diff --git a/r32_s4_lr5e-4/model-00025-of-00030.safetensors b/r32_s4_lr5e-4/model-00025-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85c44298f2c61b1acac8424b18731396a2d12761 --- /dev/null +++ b/r32_s4_lr5e-4/model-00025-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41e7963b2cf84bab3e63c043ba46fbc30a0fcb0be58bc8fedc01b38173f75258 +size 4664134384 diff --git a/r32_s4_lr5e-4/model-00026-of-00030.safetensors b/r32_s4_lr5e-4/model-00026-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..480f5c88c9010d9421e2dbe75eeff9ced285b140 --- /dev/null +++ b/r32_s4_lr5e-4/model-00026-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f010b11116c5468e0fba46994a993a3a625f6ca6b489c513d52baf75d3c6136e +size 4664167384 diff --git a/r32_s4_lr5e-4/model-00027-of-00030.safetensors b/r32_s4_lr5e-4/model-00027-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d647507e31c929e4861dd42237a0c12e4d59781 --- /dev/null +++ b/r32_s4_lr5e-4/model-00027-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e26b3e1ff379c7abc84d2ec3e04d103f292b6fa3d109f9cdb046cb2ffc673648 +size 4664167376 diff --git a/r32_s4_lr5e-4/model-00028-of-00030.safetensors b/r32_s4_lr5e-4/model-00028-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca865e0f7624e0321dec229dcb6c2f55ec061a03 --- /dev/null +++ b/r32_s4_lr5e-4/model-00028-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56546ddf9566b40ad956d690835a5b329dcde7a9b6ad8c6a01ee15123e5a1878 +size 4999711704 diff --git a/r32_s4_lr5e-4/model-00029-of-00030.safetensors b/r32_s4_lr5e-4/model-00029-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0a858731af7961c15f56f215cd6f9d97b30a35f --- /dev/null +++ b/r32_s4_lr5e-4/model-00029-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8605e47ac8ae4ef5a6c58172048472f721d16935a65b1e4c67c6600158c074c +size 4966173512 diff --git a/r32_s4_lr5e-4/model-00030-of-00030.safetensors b/r32_s4_lr5e-4/model-00030-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a3611c4c41c3d8c9e551befba34877f5daa8f50 --- /dev/null +++ b/r32_s4_lr5e-4/model-00030-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3eac50c2c10b890c008479074e5ac76193b05dd0e0619aa707cdb9233c8bd9d +size 2101346432 diff --git a/r32_s4_lr5e-4/model.safetensors.index.json b/r32_s4_lr5e-4/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..b1522ada6907d0b278c1a1409ee51a3e88273e4a --- /dev/null +++ b/r32_s4_lr5e-4/model.safetensors.index.json @@ -0,0 +1,731 @@ +{ + "metadata": { + "total_parameters": 70553706496, + "total_size": 141107412992 + }, + "weight_map": { + "lm_head.weight": "model-00030-of-00030.safetensors", + "model.embed_tokens.weight": "model-00001-of-00030.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00030.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.input_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.10.input_layernorm.weight": "model-00005-of-00030.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00005-of-00030.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.input_layernorm.weight": "model-00005-of-00030.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00005-of-00030.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.input_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.13.input_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.input_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.15.input_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.16.input_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.input_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.18.input_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.19.input_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.2.input_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.20.input_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.21.input_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.22.input_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.input_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.24.input_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.input_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.input_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.27.input_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.input_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.29.input_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.3.input_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.30.input_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.input_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.32.input_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.33.input_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.input_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.35.input_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.36.input_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.input_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.38.input_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.input_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.4.input_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.40.input_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.41.input_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.input_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.42.mlp.down_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.mlp.gate_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.mlp.up_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.43.input_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.43.mlp.down_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.43.mlp.gate_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.43.mlp.up_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.44.input_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.44.mlp.down_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.mlp.gate_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.mlp.up_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.input_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.45.mlp.down_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.mlp.gate_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.mlp.up_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.46.input_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.46.mlp.down_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.46.mlp.gate_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.46.mlp.up_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.47.input_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.47.mlp.down_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.mlp.gate_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.mlp.up_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.input_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.48.mlp.down_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.mlp.gate_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.mlp.up_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.49.input_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.49.mlp.down_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.49.mlp.gate_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.49.mlp.up_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.5.input_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.50.input_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.50.mlp.down_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.mlp.gate_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.mlp.up_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.input_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.51.mlp.down_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.mlp.gate_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.mlp.up_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.52.input_layernorm.weight": "model-00020-of-00030.safetensors", + "model.layers.52.mlp.down_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.mlp.gate_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.mlp.up_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00020-of-00030.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.input_layernorm.weight": "model-00020-of-00030.safetensors", + "model.layers.53.mlp.down_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.mlp.gate_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.mlp.up_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00020-of-00030.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.input_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.54.mlp.down_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.54.mlp.gate_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.mlp.up_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.55.input_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.55.mlp.down_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.mlp.gate_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.mlp.up_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.input_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.56.mlp.down_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.mlp.gate_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.mlp.up_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.57.input_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.57.mlp.down_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.57.mlp.gate_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.57.mlp.up_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.58.input_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.58.mlp.down_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.mlp.gate_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.mlp.up_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.input_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.59.mlp.down_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.mlp.gate_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.mlp.up_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.6.input_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.60.input_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.60.mlp.down_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.60.mlp.gate_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.60.mlp.up_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.60.post_attention_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.60.self_attn.k_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.60.self_attn.o_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.60.self_attn.q_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.60.self_attn.v_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.61.input_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.61.mlp.down_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.mlp.gate_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.mlp.up_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.post_attention_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.61.self_attn.k_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.self_attn.o_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.self_attn.q_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.self_attn.v_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.input_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.62.mlp.down_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.mlp.gate_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.mlp.up_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.post_attention_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.62.self_attn.k_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.self_attn.o_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.self_attn.q_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.self_attn.v_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.63.input_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.63.mlp.down_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.63.mlp.gate_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.63.mlp.up_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.63.post_attention_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.63.self_attn.k_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.63.self_attn.o_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.63.self_attn.q_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.63.self_attn.v_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.64.input_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.64.mlp.down_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.mlp.gate_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.mlp.up_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.post_attention_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.64.self_attn.k_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.self_attn.o_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.self_attn.q_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.self_attn.v_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.input_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.65.mlp.down_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.mlp.gate_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.mlp.up_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.post_attention_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.65.self_attn.k_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.self_attn.o_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.self_attn.q_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.self_attn.v_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.66.input_layernorm.weight": "model-00025-of-00030.safetensors", + "model.layers.66.mlp.down_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.mlp.gate_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.mlp.up_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.post_attention_layernorm.weight": "model-00025-of-00030.safetensors", + "model.layers.66.self_attn.k_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.self_attn.o_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.self_attn.q_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.self_attn.v_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.input_layernorm.weight": "model-00025-of-00030.safetensors", + "model.layers.67.mlp.down_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.mlp.gate_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.mlp.up_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.post_attention_layernorm.weight": "model-00025-of-00030.safetensors", + "model.layers.67.self_attn.k_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.self_attn.o_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.self_attn.q_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.self_attn.v_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.input_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.68.mlp.down_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.68.mlp.gate_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.mlp.up_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.post_attention_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.68.self_attn.k_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.self_attn.o_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.self_attn.q_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.self_attn.v_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.69.input_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.69.mlp.down_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.mlp.gate_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.mlp.up_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.post_attention_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.69.self_attn.k_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.self_attn.o_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.self_attn.q_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.self_attn.v_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.7.input_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.70.input_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.70.mlp.down_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.mlp.gate_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.mlp.up_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.post_attention_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.70.self_attn.k_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.self_attn.o_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.self_attn.q_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.self_attn.v_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.71.input_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.71.mlp.down_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.71.mlp.gate_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.71.mlp.up_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.71.post_attention_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.71.self_attn.k_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.71.self_attn.o_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.71.self_attn.q_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.71.self_attn.v_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.72.input_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.72.mlp.down_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.mlp.gate_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.mlp.up_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.post_attention_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.72.self_attn.k_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.self_attn.o_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.self_attn.q_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.self_attn.v_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.input_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.73.mlp.down_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.mlp.gate_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.mlp.up_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.post_attention_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.73.self_attn.k_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.self_attn.o_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.self_attn.q_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.self_attn.v_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.74.input_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.74.mlp.down_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.74.mlp.gate_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.74.mlp.up_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.74.post_attention_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.74.self_attn.k_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.74.self_attn.o_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.74.self_attn.q_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.74.self_attn.v_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.75.input_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.75.mlp.down_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.mlp.gate_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.mlp.up_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.post_attention_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.75.self_attn.k_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.self_attn.o_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.self_attn.q_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.self_attn.v_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.input_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.76.mlp.down_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.mlp.gate_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.mlp.up_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.post_attention_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.76.self_attn.k_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.self_attn.o_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.self_attn.q_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.self_attn.v_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.77.input_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.77.mlp.down_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.77.mlp.gate_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.77.mlp.up_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.77.post_attention_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.77.self_attn.k_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.77.self_attn.o_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.77.self_attn.q_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.77.self_attn.v_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.78.input_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.78.mlp.down_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.mlp.gate_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.mlp.up_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.post_attention_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.78.self_attn.k_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.self_attn.o_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.self_attn.q_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.self_attn.v_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.input_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.79.mlp.down_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.mlp.gate_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.mlp.up_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.post_attention_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.79.self_attn.k_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.self_attn.o_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.self_attn.q_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.self_attn.v_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.8.input_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.input_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00004-of-00030.safetensors", + "model.norm.weight": "model-00029-of-00030.safetensors" + } +} diff --git a/r32_s4_lr5e-4/special_tokens_map.json b/r32_s4_lr5e-4/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..14daf4588e61b4e4983af0fccaba4d5500c0977c --- /dev/null +++ b/r32_s4_lr5e-4/special_tokens_map.json @@ -0,0 +1,26 @@ +{ + "additional_special_tokens": [ + { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/r32_s4_lr5e-4/tokenizer.json b/r32_s4_lr5e-4/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/r32_s4_lr5e-4/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/r32_s4_lr5e-4/tokenizer_config.json b/r32_s4_lr5e-4/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d0bfd3305ba1f87f7b435611053cf7715148516c --- /dev/null +++ b/r32_s4_lr5e-4/tokenizer_config.json @@ -0,0 +1,2068 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "padding_side": "left", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +}