YanshekWoo commited on Nov 4

Commit

edf22f4

verified ·

1 Parent(s): e619bc3

init

Browse files

Files changed (21) hide show

.gitattributes +2 -34
1_Pooling/config.json +10 -0
2_Normalize/.Normalize +0 -0
LICENSE.txt +116 -0
README.md +192 -0
added_tokens.json +3 -0
config.json +87 -0
config_sentence_transformers.json +10 -0
model-00001-of-00005.safetensors +3 -0
model-00002-of-00005.safetensors +3 -0
model-00003-of-00005.safetensors +3 -0
model-00004-of-00005.safetensors +3 -0
model-00005-of-00005.safetensors +3 -0
model.safetensors.index.json +634 -0
modules.json +20 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +33 -0
task_prompts.json +273 -0
tokenizer.json +3 -0
tokenizer.model +3 -0
tokenizer_config.json +0 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,3 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

 *.safetensors filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
+tokenizer.model filter=lfs diff=lfs merge=lfs -text

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 3840,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": false,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": true,
+  "include_prompt": true
+}

2_Normalize/.Normalize ADDED Viewed

File without changes

LICENSE.txt ADDED Viewed

	@@ -0,0 +1,116 @@

+Tencent is pleased to support the community by making KaLM-Embedding available.
+Copyright (C)  2025 Tencent.  All rights reserved.
+The open-source software and/or models included in this distribution may have been modified by Tencent (“Tencent Modifications”). All Tencent Modifications are Copyright (C) Tencent.
+KaLM-Embedding is licensed under License Term of KaLM-Embedding, except for the third-party components listed in the NOTICE file, which remain licensed under their respective original terms. KaLM-Embedding does not impose any additional restrictions beyond those specified in the original licenses of these third-party components. Users are required to comply with all applicable terms and conditions of the original licenses and to ensure that the use of these third-party components conforms to all relevant laws and regulations.
+For the avoidance of doubt, KaLM-Embedding refers solely to training datasets, parameters, and weights made publicly available by Tencent in accordance with License Term of KaLM-Embedding.
+Terms of License Term of KaLM-Embedding:
+--------------------------------------------------------------------
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+1. KaLM-Embedding IS NOT INTENDED FOR USE WITHIN THE EUROPEAN UNION.
+2. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+Dependencies and Licenses:
+This open-source project, KaLM-Embedding, builds upon the following open-source models and/or software components, each of which remains licensed under its original license. Certain models or software may include modifications made by Tencent (“Tencent Modifications”), which are Copyright (C) Tencent.
+In case you believe there have been errors in the attribution below, you may submit the concerns to us for review and correction.
+Open Source Software Licensed under the Gemma Terms of Use:
+The below software in this distribution may have been modified by Tencent.
+--------------------------------------------------------------------
+1. google/gemma-3-12b-pt
+Copyright (c) 2025 google/gemma-3-12b-pt original author and authors.
+Gemma is provided under and subject to the Gemma Terms of Use found at ai.google.dev/gemma/terms
+Terms of the Gemma Terms of Use:
+--------------------------------------------------------------------
+Gemma Terms of Use
+Last modified: March 24, 2025
+By using, reproducing, modifying, distributing, performing or displaying any portion or element of Gemma, Model Derivatives including via any Hosted Service, (each as defined below) (collectively, the "Gemma Services") or otherwise accepting the terms of this Agreement, you agree to be bound by this Agreement.
+Section 1: DEFINITIONS
+1.1 Definitions
+(a) "Agreement" or "Gemma Terms of Use" means these terms and conditions that govern the use, reproduction, Distribution or modification of the Gemma Services and any terms and conditions incorporated by reference.
+(b) "Distribution" or "Distribute" means any transmission, publication, or other sharing of Gemma or Model Derivatives to a third party, including by providing or making Gemma or its functionality available as a hosted service via API, web access, or any other electronic or remote means ("Hosted Service").
+(c) "Gemma" means the set of machine learning language models, trained model weights and parameters identified in the Appendix, regardless of the source that you obtained it from.
+(d) "Google" means Google LLC.
+(e) "Model Derivatives" means all (i) modifications to Gemma, (ii) works based on Gemma, or (iii) any other machine learning model which is created by transfer of patterns of the weights, parameters, operations, or Output of Gemma, to that model in order to cause that model to perform similarly to Gemma, including distillation methods that use intermediate data representations or methods based on the generation of synthetic data Outputs by Gemma for training that model. For clarity, Outputs are not deemed Model Derivatives.
+(f) "Output" means the information content output of Gemma or a Model Derivative that results from operating or otherwise using Gemma or the Model Derivative, including via a Hosted Service.
+1.2
+As used in this Agreement, "including" means "including without limitation".
+Section 2: ELIGIBILITY AND USAGE
+2.1 Eligibility
+You represent and warrant that you have the legal capacity to enter into this Agreement (including being of sufficient age of consent). If you are accessing or using any of the Gemma Services for or on behalf of a legal entity, (a) you are entering into this Agreement on behalf of yourself and that legal entity, (b) you represent and warrant that you have the authority to act on behalf of and bind that entity to this Agreement and (c) references to "you" or "your" in the remainder of this Agreement refers to both you (as an individual) and that entity.
+2.2 Use
+You may use, reproduce, modify, Distribute, perform or display any of the Gemma Services only in accordance with the terms of this Agreement, and must not violate (or encourage or permit anyone else to violate) any term of this Agreement.
+Section 3: DISTRIBUTION AND RESTRICTIONS
+3.1 Distribution and Redistribution
+You may reproduce or Distribute copies of Gemma or Model Derivatives if you meet all of the following conditions:
+You must include the use restrictions referenced in Section 3.2 as an enforceable provision in any agreement (e.g., license agreement, terms of use, etc.) governing the use and/or distribution of Gemma or Model Derivatives and you must provide notice to subsequent users you Distribute to that Gemma or Model Derivatives are subject to the use restrictions in Section 3.2.
+You must provide all third party recipients of Gemma or Model Derivatives a copy of this Agreement.
+You must cause any modified files to carry prominent notices stating that you modified the files.
+All Distributions (other than through a Hosted Service) must be accompanied by a "Notice" text file that contains the following notice: "Gemma is provided under and subject to the Gemma Terms of Use found at ai.google.dev/gemma/terms".
+You may add your own intellectual property statement to your modifications and, except as set forth in this Section, may provide additional or different terms and conditions for use, reproduction, or Distribution of your modifications, or for any such Model Derivatives as a whole, provided your use, reproduction, modification, Distribution, performance, and display of Gemma otherwise complies with the terms and conditions of this Agreement. Any additional or different terms and conditions you impose must not conflict with the terms of this Agreement.
+3.2 Use Restrictions
+You must not use any of the Gemma Services:
+for the restricted uses set forth in the Gemma Prohibited Use Policy at ai.google.dev/gemma/prohibited_use_policy ("Prohibited Use Policy"), which is hereby incorporated by reference into this Agreement; or
+in violation of applicable laws and regulations.
+To the maximum extent permitted by law, Google reserves the right to restrict (remotely or otherwise) usage of any of the Gemma Services that Google reasonably believes are in violation of this Agreement.
+3.3 Generated Output
+Google claims no rights in Outputs you generate using Gemma. You and your users are solely responsible for Outputs and their subsequent uses.
+Section 4: ADDITIONAL PROVISIONS
+4.1 Updates
+Google may update Gemma from time to time.
+4.2 Trademarks
+Nothing in this Agreement grants you any rights to use Google's trademarks, trade names, logos or to otherwise suggest endorsement or misrepresent the relationship between you and Google. Google reserves any rights not expressly granted herein.
+4.3 DISCLAIMER OF WARRANTY
+UNLESS REQUIRED BY APPLICABLE LAW, THE GEMMA SERVICES, AND OUTPUTS, ARE PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE FOR DETERMINING THE APPROPRIATENESS OF USING, REPRODUCING, MODIFYING, PERFORMING, DISPLAYING OR DISTRIBUTING ANY OF THE GEMMA SERVICES OR OUTPUTS AND ASSUME ANY AND ALL RISKS ASSOCIATED WITH YOUR USE OR DISTRIBUTION OF ANY OF THE GEMMA SERVICES OR OUTPUTS AND YOUR EXERCISE OF RIGHTS AND PERMISSIONS UNDER THIS AGREEMENT.
+4.4 LIMITATION OF LIABILITY
+TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY, CONTRACT, OR OTHERWISE, UNLESS REQUIRED BY APPLICABLE LAW, SHALL GOOGLE OR ITS AFFILIATES BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, EXEMPLARY, CONSEQUENTIAL, OR PUNITIVE DAMAGES, OR LOST PROFITS OF ANY KIND ARISING FROM THIS AGREEMENT OR RELATED TO, ANY OF THE GEMMA SERVICES OR OUTPUTS EVEN IF GOOGLE OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+4.5 Term, Termination, and Survival
+The term of this Agreement will commence upon your acceptance of this Agreement (including acceptance by your use, modification, or Distribution, reproduction, performance or display of any portion or element of the Gemma Services) and will continue in full force and effect until terminated in accordance with the terms of this Agreement. Google may terminate this Agreement if you are in breach of any term of this Agreement. Upon termination of this Agreement, you must delete and cease use and Distribution of all copies of Gemma and Model Derivatives in your possession or control. Sections 1, 2.1, 3.3, 4.2 to 4.9 shall survive the termination of this Agreement.
+4.6 Governing Law and Jurisdiction
+This Agreement will be governed by the laws of the State of California without regard to choice of law principles. The UN Convention on Contracts for the International Sale of Goods does not apply to this Agreement. The state and federal courts of Santa Clara County, California shall have exclusive jurisdiction of any dispute arising out of this Agreement.
+4.7 Severability
+If any provision of this Agreement is held to be invalid, illegal or unenforceable, the remaining provisions shall be unaffected thereby and remain valid as if such provision had not been set forth herein.
+4.8 Entire Agreement
+This Agreement states all the terms agreed between the parties and supersedes all other agreements between the parties as of the date of acceptance relating to its subject matter.
+4.9 No Waiver
+Google will not be treated as having waived any rights by not exercising (or delaying the exercise of) any rights under this Agreement.

README.md ADDED Viewed

	@@ -0,0 +1,192 @@

+---
+datasets:
+- KaLM-Embedding/KaLM-embedding-finetuning-data
+base_model:
+- google/gemma-3-12b-pt
+pipeline_tag: feature-extraction
+library_name: sentence-transformers
+tags:
+- Retrieval
+- STS
+- Classification
+- Clustering
+- Reranking
+- vllm
+license: other
+license_name: tencent-kalm-embedding-community
+extra_gated_eu_disallowed: true
+---
+<h1 align="center">KaLM-Embedding-Gemma3-12B-2511</h1>
+<p align="center">
+  <a href="https://huggingface.co/tencent/KaLM-Embedding-Gemma3-12B-2511">
+    <img src="https://img.shields.io/badge/%F0%9F%A4%97_HuggingFace-Model-ffbd45.svg" alt="HuggingFace">
+  </a>
+  <a href="https://kalm-embedding.github.io/">
+    <img src="https://img.shields.io/badge/Home-Page-purple.svg?logo=github&" alt="Homepage">
+  </a>
+  <a href="https://github.com/Tencent/KaLM-Embedding-Gemma3-12B-2511">
+    <img src="https://img.shields.io/badge/GitHub-Code-blue.svg?logo=github&" alt="GitHub">
+  </a>
+  <a href="https://arxiv.org/abs/2506.20923">
+    <img src="https://img.shields.io/badge/Paper-KaLM--Embedding-d4333f?logo=arxiv&logoColor=white&colorA=cccccc&colorB=d4333f&style=flat" alt="Paper">
+  </a>
+</p>
+## Short Description
+**KaLM-Embedding-Gemma3-12B-2511** is a versatile and compact embedding model, which achieves SOTA performance in MMTEB (due to 11-2025).
+## MMTEB Evaluation Results
+| Rank (Borda) | Model | Mean (Task) | Mean (TaskType) | Bitext Mining | Classification | Clustering | Instruction Reranking | Multilabel Classification | Pair Classification | Reranking | Retrieval | STS |
+| :--- | :--- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
+| **1** | **KaLM-Embedding-Gemma3-12B-2511** | **72.32** | **62.51** | **83.76** | **77.88** | 55.77 | 5.49 | **33.03** | 84.73 | 67.27 | **75.66** | 79.02 |
+| 2 | llama-embed-nemotron-8b | 69.46 | 61.09 | 81.72 | 73.21 | 54.35 | 10.82 | 29.86 | 83.97 | **67.78** | 68.69 | 79.41 |
+| 3 | Qwen3-Embedding-8B | 70.58 | 61.69 | 80.89 | 74.00 | **57.65** | 10.06 | 28.66 | **86.40** | 65.63 | 70.88 | **81.08** |
+| 4 | gemini-embedding-001 | 68.37 | 59.59 | 79.28 | 71.82 | 54.59 | 5.18 | 29.16 | 83.63 | 65.58 | 67.71 | 79.40 |
+| 5 | Qwen3-Embedding-4B | 69.45 | 60.86 | 79.36 | 72.33 | 57.15 | **11.56** | 26.77 | 85.05 | 65.08 | 69.60 | 80.86 |
+| 6 | Qwen3-Embedding-0.6B | 64.34 | 56.01 | 72.23 | 66.83 | 52.33 | 5.09 | 24.59 | 80.83 | 61.41 | 64.65 | 76.17 |
+| 7 | gte-Qwen2-7B-instruct | 62.51 | 55.93 | 73.92 | 61.55 | 52.77 | 4.94 | 25.48 | 85.13 | 65.55 | 60.08 | 73.98 |
+| 8 | Linq-Embed-Mistral | 61.47 | 54.14 | 70.34 | 62.24 | 50.60 | 0.94 | 24.77 | 80.43 | 64.37 | 58.69 | 74.86 |
+| 9 | multilingual-e5-large-instruct | 63.22 | 55.08 | 80.13 | 64.94 | 50.75 | -0.40 | 22.91 | 80.86 | 62.61 | 57.12 | 76.81 |
+| 10 | embeddinggemma-300m | 61.15 | 54.31 | 64.40 | 60.90 | 51.17 | 5.61 | 24.82 | 81.40 | 63.25 | 62.49 | 74.73 |
+## Model Details
+- Model Size: 11.76B
+- Embedding Dimension: 3840
+- Max Input Tokens: 32k
+- MRL dimensions: 3840, 2048, 1024, 512, 256, 128, and 64
+- Pooling: lasttoken pooling
+## Training Recipe
+- High-quality supervised finetuning
+## 📑 Open-source Plan
+- [x] Model Checkpoint
+    - [x] [KaLM-embedding-multilingual-mini-v1](https://huggingface.co/HIT-TMG/KaLM-embedding-multilingual-mini-v1)
+    - [x] [KaLM-embedding-multilingual-mini-instruct-v1](https://huggingface.co/HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1)
+    - [x] [KaLM-embedding-multilingual-mini-instruct-v1.5](https://huggingface.co/HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5)
+    - [x] [KaLM-embedding-multilingual-mini-instruct-v2](https://huggingface.co/HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v2)
+    - [x] [KaLM-embedding-multilingual-mini-instruct-v2.5](https://huggingface.co/KaLM-Embedding/KaLM-embedding-multilingual-mini-instruct-v2.5)
+    - [x] [KaLM-Embedding-Gemma3-12B-2511](https://huggingface.co/tencent/KaLM-Embedding-Gemma3-12B-2511)
+- [x] Training and Evaluation Code: [HITsz-TMG/KaLM-Embedding](https://github.com/HITsz-TMG/KaLM-Embedding)
+- [x] Technical Report: [KaLM-Embedding-V2: Superior Training Techniques and Data Inspire A Versatile Embedding Model](https://arxiv.org/abs/2506.20923v4)
+- [x] Pre-training Data: [Pre-training Data](https://huggingface.co/datasets/HIT-TMG/KaLM-embedding-pretrain-data)
+- [x] Fine-tuning Data: [Fine-tuning Data](https://huggingface.co/datasets/KaLM-Embedding/KaLM-embedding-finetuning-data)
+## Usage
+### sentence-transformers support
+Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
+```
+pip install -U sentence-transformers
+```
+You can use the model like this:
+```python
+from sentence_transformers import SentenceTransformer
+import torch
+model = SentenceTransformer(
+    "tencent/KaLM-Embedding-Gemma3-12B-2511",
+    trust_remote_code=True,
+    model_kwargs={
+        "torch_dtype": torch.bfloat16,
+        "attn_implementation": "flash_attention_2",  # Optional
+    },
+)
+model.max_seq_length = 512
+sentences = ["This is an example sentence", "Each sentence is converted"]
+prompt = "Instruct: Classifying the category of french news.\nQuery:"
+embeddings = model.encode(
+    sentences,
+    prompt=prompt,
+    normalize_embeddings=True,
+    batch_size=256,
+    show_progress_bar=True,
+)
+print(embeddings)
+'''
+[[-0.01867676  0.02319336  0.00280762 ... -0.02075195  0.00196838
+  -0.0703125 ]
+ [-0.0067749   0.03491211  0.01434326 ... -0.0043335   0.00509644
+  -0.04174805]]
+'''
+```
+Or you can use `encode_query` and `encode_document` to automatically add the default prompt for queries (`"Instruct: Given a query, retrieve documents that answer the query \n Query: "`) and documents (`""`), respectively.
+```python
+from sentence_transformers import SentenceTransformer
+import torch
+model = SentenceTransformer(
+    "tencent/KaLM-Embedding-Gemma3-12B-2511",
+    trust_remote_code=True,
+    model_kwargs={
+        "torch_dtype": torch.bfloat16,
+        "attn_implementation": "flash_attention_2",  # Optional
+    },
+)
+model.max_seq_length = 512
+queries = [
+    "What is the capital of China?",
+    "Explain gravity",
+]
+documents = [
+    "The capital of China is Beijing.",
+    "Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun.",
+]
+query_embeddings = model.encode_query(queries)
+document_embeddings = model.encode_document(documents)
+similarities = model.similarity(query_embeddings, document_embeddings)
+print(similarities)
+'''
+tensor([[0.9034, 0.2563],
+        [0.3153, 0.7396]])
+'''
+```
+## Citation
+If you find this model useful, please consider giving a star and citation.
+```
+@misc{zhao2025kalmembeddingv2,
+      title={KaLM-Embedding-V2: Superior Training Techniques and Data Inspire A Versatile Embedding Model},
+      author={Xinping Zhao and Xinshuo Hu and Zifei Shan and Shouzheng Huang and Yao Zhou and Xin Zhang and Zetian Sun and Zhenyu Liu and Dongfang Li and Xinyuan Wei and Youcheng Pan and Yang Xiang and Meishan Zhang and Haofen Wang and Jun Yu and Baotian Hu and Min Zhang},
+      year={2025},
+      eprint={2506.20923},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2506.20923},
+}
+@misc{hu2025kalmembedding,
+      title={KaLM-Embedding: Superior Training Data Brings A Stronger Embedding Model},
+      author={Xinshuo Hu and Zifei Shan and Xinping Zhao and Zetian Sun and Zhenyu Liu and Dongfang Li and Shaolin Ye and Xinyuan Wei and Qian Chen and Baotian Hu and Haofen Wang and Jun Yu and Min Zhang},
+      year={2025},
+      eprint={2501.01028},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2501.01028},
+}
+```
+## Contact
+If you encounter any issue, feel free to contact us via the email: <[email protected]>, <[email protected]>

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<image_soft_token>": 262144
+}

config.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+  "_sliding_window_pattern": 6,
+  "architectures": [
+    "Gemma3TextModel"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attn_logit_softcapping": null,
+  "bos_token_id": 2,
+  "cache_implementation": "hybrid",
+  "eos_token_id": 1,
+  "final_logit_softcapping": null,
+  "head_dim": 256,
+  "hidden_activation": "gelu_pytorch_tanh",
+  "hidden_size": 3840,
+  "initializer_range": 0.02,
+  "intermediate_size": 15360,
+  "layer_types": [
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 131072,
+  "model_type": "gemma3_text",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 8,
+  "pad_token_id": 0,
+  "query_pre_attn_scalar": 256,
+  "rms_norm_eps": 1e-06,
+  "rope_local_base_freq": 10000.0,
+  "rope_scaling": {
+    "factor": 8.0,
+    "rope_type": "linear"
+  },
+  "rope_theta": 1000000.0,
+  "sliding_window": 1024,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.55.0",
+  "use_cache": true,
+  "vocab_size": 262208
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "__version__": {
+    "sentence_transformers": "4.1.0",
+    "transformers": "4.55.0",
+    "pytorch": "2.6.0+cu124"
+  },
+  "prompts": {},
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

model-00001-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7852bd7bbe4cd633fd592de8a3ce196ace7010438223cef591c0e8ff4b8852af
+size 4915892480

model-00002-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5e688ed56978fef84c0a982abf352a5f6a57feed6a7e6dcc48c748f4e2230ed
+size 4931293608

model-00003-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e4a1670dd88a6b2e2b9e0c0c8c8a6582be92bbb3ff5387c3a39e24b4ea9ec36
+size 4931293664

model-00004-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0609236a4d43126d080b3ba62d24fdb32408e143758bef1535ec22e8d1cfd243
+size 4931293664

model-00005-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aad6982bec79ec3c0dfae0f19f90369535f59666366edaf42b77b0948e21227f
+size 3822364144

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,634 @@

+{
+  "metadata": {
+    "total_parameters": 11766034176,
+    "total_size": 23532068352
+  },
+  "weight_map": {
+    "embed_tokens.weight": "model-00001-of-00005.safetensors",
+    "layers.0.input_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.0.mlp.down_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.0.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.0.mlp.up_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.0.post_attention_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.0.post_feedforward_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.0.pre_feedforward_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.0.self_attn.k_norm.weight": "model-00001-of-00005.safetensors",
+    "layers.0.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.0.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.0.self_attn.q_norm.weight": "model-00001-of-00005.safetensors",
+    "layers.0.self_attn.q_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.0.self_attn.v_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.1.input_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.1.mlp.down_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.1.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.1.mlp.up_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.1.post_attention_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.1.post_feedforward_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.1.pre_feedforward_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.1.self_attn.k_norm.weight": "model-00001-of-00005.safetensors",
+    "layers.1.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.1.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.1.self_attn.q_norm.weight": "model-00001-of-00005.safetensors",
+    "layers.1.self_attn.q_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.1.self_attn.v_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.10.input_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.10.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.10.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.10.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.10.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.10.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.10.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.10.self_attn.k_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.10.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.10.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.10.self_attn.q_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.10.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.10.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.11.input_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.11.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.11.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.11.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.11.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.11.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.11.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.11.self_attn.k_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.11.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.11.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.11.self_attn.q_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.11.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.11.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.12.input_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.12.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.12.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.12.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.12.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.12.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.12.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.12.self_attn.k_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.12.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.12.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.12.self_attn.q_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.12.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.12.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.13.input_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.13.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.13.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.13.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.13.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.13.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.13.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.13.self_attn.k_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.13.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.13.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.13.self_attn.q_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.13.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.13.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.14.input_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.14.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.14.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.14.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.14.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.14.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.14.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.14.self_attn.k_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.14.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.14.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.14.self_attn.q_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.14.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.14.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.15.input_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.15.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.15.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.15.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.15.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.15.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.15.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.15.self_attn.k_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.15.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.15.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.15.self_attn.q_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.15.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.15.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.16.input_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.16.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.16.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.16.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.16.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.16.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.16.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.16.self_attn.k_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.16.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.16.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.16.self_attn.q_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.16.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.16.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.17.input_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.17.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.17.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.17.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.17.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.17.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.17.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.17.self_attn.k_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.17.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.17.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.17.self_attn.q_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.17.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.17.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.18.input_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.18.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.18.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.18.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.18.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.18.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.18.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.18.self_attn.k_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.18.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.18.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.18.self_attn.q_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.18.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.18.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.19.input_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.19.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.19.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.19.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.19.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.19.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.19.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.19.self_attn.k_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.19.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.19.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.19.self_attn.q_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.19.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.19.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.2.input_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.2.mlp.down_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.2.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.2.mlp.up_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.2.post_attention_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.2.post_feedforward_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.2.pre_feedforward_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.2.self_attn.k_norm.weight": "model-00001-of-00005.safetensors",
+    "layers.2.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.2.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.2.self_attn.q_norm.weight": "model-00001-of-00005.safetensors",
+    "layers.2.self_attn.q_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.2.self_attn.v_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.20.input_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.20.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.20.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.20.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.20.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.20.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.20.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.20.self_attn.k_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.20.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.20.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.20.self_attn.q_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.20.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.20.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.21.input_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.21.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.21.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.21.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.21.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.21.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.21.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.21.self_attn.k_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.21.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.21.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.21.self_attn.q_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.21.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.21.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.22.input_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.22.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.22.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.22.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.22.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.22.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.22.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.22.self_attn.k_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.22.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.22.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.22.self_attn.q_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.22.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.22.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.23.input_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.23.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.23.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.23.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.23.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.23.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.23.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.23.self_attn.k_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.23.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.23.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.23.self_attn.q_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.23.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.23.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.24.input_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.24.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.24.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.24.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.24.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.24.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.24.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.24.self_attn.k_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.24.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.24.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.24.self_attn.q_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.24.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.24.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.25.input_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.25.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.25.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.25.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.25.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.25.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.25.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.25.self_attn.k_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.25.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.25.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.25.self_attn.q_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.25.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.25.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.26.input_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.26.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.26.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.26.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.26.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.26.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.26.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.26.self_attn.k_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.26.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.26.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.26.self_attn.q_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.26.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.26.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.27.input_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.27.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.27.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.27.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.27.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.27.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.27.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors",
+    "layers.27.self_attn.k_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.27.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.27.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.27.self_attn.q_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.27.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.27.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.28.input_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.28.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.28.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.28.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.28.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.28.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.28.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.28.self_attn.k_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.28.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.28.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.28.self_attn.q_norm.weight": "model-00003-of-00005.safetensors",
+    "layers.28.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.28.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
+    "layers.29.input_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.29.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.29.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.29.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.29.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.29.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.29.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.29.self_attn.k_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.29.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.29.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.29.self_attn.q_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.29.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.29.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.3.input_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.3.mlp.down_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.3.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.3.mlp.up_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.3.post_attention_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.3.post_feedforward_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.3.pre_feedforward_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.3.self_attn.k_norm.weight": "model-00001-of-00005.safetensors",
+    "layers.3.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.3.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.3.self_attn.q_norm.weight": "model-00001-of-00005.safetensors",
+    "layers.3.self_attn.q_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.3.self_attn.v_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.30.input_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.30.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.30.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.30.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.30.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.30.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.30.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.30.self_attn.k_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.30.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.30.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.30.self_attn.q_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.30.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.30.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.31.input_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.31.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.31.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.31.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.31.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.31.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.31.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.31.self_attn.k_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.31.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.31.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.31.self_attn.q_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.31.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.31.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.32.input_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.32.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.32.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.32.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.32.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.32.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.32.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.32.self_attn.k_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.32.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.32.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.32.self_attn.q_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.32.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.32.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.33.input_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.33.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.33.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.33.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.33.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.33.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.33.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.33.self_attn.k_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.33.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.33.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.33.self_attn.q_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.33.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.33.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.34.input_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.34.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.34.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.34.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.34.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.34.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.34.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.34.self_attn.k_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.34.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.34.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.34.self_attn.q_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.34.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.34.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.35.input_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.35.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.35.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.35.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.35.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.35.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.35.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.35.self_attn.k_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.35.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.35.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.35.self_attn.q_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.35.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.35.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.36.input_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.36.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.36.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.36.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.36.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.36.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.36.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.36.self_attn.k_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.36.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.36.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.36.self_attn.q_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.36.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.36.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.37.input_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.37.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.37.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.37.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.37.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.37.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.37.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.37.self_attn.k_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.37.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.37.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.37.self_attn.q_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.37.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.37.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.38.input_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.38.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.38.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.38.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.38.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.38.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.38.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors",
+    "layers.38.self_attn.k_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.38.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.38.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.38.self_attn.q_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.38.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.38.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.39.input_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.39.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.39.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.39.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.39.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.39.post_feedforward_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.39.pre_feedforward_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.39.self_attn.k_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.39.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.39.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.39.self_attn.q_norm.weight": "model-00004-of-00005.safetensors",
+    "layers.39.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.39.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
+    "layers.4.input_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.4.mlp.down_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.4.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.4.mlp.up_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.4.post_attention_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.4.post_feedforward_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.4.pre_feedforward_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.4.self_attn.k_norm.weight": "model-00001-of-00005.safetensors",
+    "layers.4.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.4.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.4.self_attn.q_norm.weight": "model-00001-of-00005.safetensors",
+    "layers.4.self_attn.q_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.4.self_attn.v_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.40.input_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.40.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.40.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.40.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.40.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.40.post_feedforward_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.40.pre_feedforward_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.40.self_attn.k_norm.weight": "model-00005-of-00005.safetensors",
+    "layers.40.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.40.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.40.self_attn.q_norm.weight": "model-00005-of-00005.safetensors",
+    "layers.40.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.40.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.41.input_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.41.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.41.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.41.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.41.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.41.post_feedforward_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.41.pre_feedforward_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.41.self_attn.k_norm.weight": "model-00005-of-00005.safetensors",
+    "layers.41.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.41.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.41.self_attn.q_norm.weight": "model-00005-of-00005.safetensors",
+    "layers.41.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.41.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.42.input_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.42.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.42.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.42.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.42.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.42.post_feedforward_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.42.pre_feedforward_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.42.self_attn.k_norm.weight": "model-00005-of-00005.safetensors",
+    "layers.42.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.42.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.42.self_attn.q_norm.weight": "model-00005-of-00005.safetensors",
+    "layers.42.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.42.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.43.input_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.43.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.43.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.43.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.43.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.43.post_feedforward_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.43.pre_feedforward_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.43.self_attn.k_norm.weight": "model-00005-of-00005.safetensors",
+    "layers.43.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.43.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.43.self_attn.q_norm.weight": "model-00005-of-00005.safetensors",
+    "layers.43.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.43.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.44.input_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.44.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.44.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.44.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.44.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.44.post_feedforward_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.44.pre_feedforward_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.44.self_attn.k_norm.weight": "model-00005-of-00005.safetensors",
+    "layers.44.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.44.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.44.self_attn.q_norm.weight": "model-00005-of-00005.safetensors",
+    "layers.44.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.44.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.45.input_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.45.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.45.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.45.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.45.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.45.post_feedforward_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.45.pre_feedforward_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.45.self_attn.k_norm.weight": "model-00005-of-00005.safetensors",
+    "layers.45.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.45.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.45.self_attn.q_norm.weight": "model-00005-of-00005.safetensors",
+    "layers.45.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.45.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.46.input_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.46.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.46.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.46.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.46.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.46.post_feedforward_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.46.pre_feedforward_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.46.self_attn.k_norm.weight": "model-00005-of-00005.safetensors",
+    "layers.46.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.46.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.46.self_attn.q_norm.weight": "model-00005-of-00005.safetensors",
+    "layers.46.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.46.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.47.input_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.47.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.47.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.47.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.47.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.47.post_feedforward_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.47.pre_feedforward_layernorm.weight": "model-00005-of-00005.safetensors",
+    "layers.47.self_attn.k_norm.weight": "model-00005-of-00005.safetensors",
+    "layers.47.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.47.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.47.self_attn.q_norm.weight": "model-00005-of-00005.safetensors",
+    "layers.47.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.47.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
+    "layers.5.input_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.5.mlp.down_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.5.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.5.mlp.up_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.5.post_attention_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.5.post_feedforward_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.5.pre_feedforward_layernorm.weight": "model-00001-of-00005.safetensors",
+    "layers.5.self_attn.k_norm.weight": "model-00001-of-00005.safetensors",
+    "layers.5.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.5.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.5.self_attn.q_norm.weight": "model-00001-of-00005.safetensors",
+    "layers.5.self_attn.q_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.5.self_attn.v_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.6.input_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.6.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.6.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.6.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.6.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.6.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.6.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.6.self_attn.k_norm.weight": "model-00001-of-00005.safetensors",
+    "layers.6.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.6.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.6.self_attn.q_norm.weight": "model-00001-of-00005.safetensors",
+    "layers.6.self_attn.q_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.6.self_attn.v_proj.weight": "model-00001-of-00005.safetensors",
+    "layers.7.input_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.7.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.7.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.7.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.7.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.7.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.7.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.7.self_attn.k_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.7.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.7.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.7.self_attn.q_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.7.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.7.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.8.input_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.8.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.8.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.8.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.8.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.8.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.8.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.8.self_attn.k_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.8.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.8.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.8.self_attn.q_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.8.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.8.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.9.input_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.9.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.9.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.9.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.9.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.9.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.9.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors",
+    "layers.9.self_attn.k_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.9.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.9.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.9.self_attn.q_norm.weight": "model-00002-of-00005.safetensors",
+    "layers.9.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
+    "layers.9.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+    "norm.weight": "model-00005-of-00005.safetensors"
+  }
+}

modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 131072,
+  "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "boi_token": "<start_of_image>",
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eoi_token": "<end_of_image>",
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image_soft_token>",
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

task_prompts.json ADDED Viewed

	@@ -0,0 +1,273 @@

+{
+    "AmazonCounterfactualClassification": "Classify a given Amazon customer review text as either counterfactual or not-counterfactual",
+    "AmazonPolarityClassification": "Classify Amazon reviews into positive or negative sentiment",
+    "AmazonReviewsClassification": "Classify the given Amazon review into its appropriate rating category",
+    "Banking77Classification": "Given a online banking query, find the corresponding intents",
+    "EmotionClassification": "Classify the emotion expressed in the given Twitter message into one of the six emotions: anger, fear, joy, love, sadness, and surprise",
+    "ImdbClassification": "Classify the sentiment expressed in the given movie review text from the IMDB dataset",
+    "MassiveIntentClassification": "Given a user utterance as query, find the user intents",
+    "MassiveScenarioClassification": "Given a user utterance as query, find the user scenarios",
+    "MTOPDomainClassification": "Classify the intent domain of the given utterance in task-oriented conversation",
+    "MTOPIntentClassification": "Classify the intent of the given utterance in task-oriented conversation",
+    "ToxicConversationsClassification": "Classify the given comments as either toxic or not toxic",
+    "TweetSentimentExtractionClassification": "Classify the sentiment of a given tweet as either positive, negative, or neutral",
+    "TNews": "Classify the fine-grained category of the given news title",
+    "IFlyTek": "Given an App description text, find the appropriate fine-grained category",
+    "MultilingualSentiment": "Classify sentiment of the customer review into positive, neutral, or negative",
+    "JDReview": "Classify the customer review for iPhone on e-commerce platform into positive or negative",
+    "OnlineShopping": "Classify the customer review for online shopping into positive or negative",
+    "Waimai": "Classify the customer review from a food takeaway platform into positive or negative",
+    "ArxivClusteringP2P": "Identify the main and secondary category of Arxiv papers based on the titles and abstracts",
+    "ArxivClusteringS2S": "Identify the main and secondary category of Arxiv papers based on the titles",
+    "BiorxivClusteringP2P": "Identify the main category of Biorxiv papers based on the titles and abstracts",
+    "BiorxivClusteringP2P.v2": "Identify the main category of Biorxiv papers based on the titles and abstracts",
+    "BiorxivClusteringS2S": "Identify the main category of Biorxiv papers based on the titles",
+    "BiorxivClusteringS2S.v2": "Identify the main category of Biorxiv papers based on the titles",
+    "MedrxivClusteringP2P": "Identify the main category of Medrxiv papers based on the titles and abstracts",
+    "MedrxivClusteringP2P.v2": "Identify the main category of Medrxiv papers based on the titles and abstracts",
+    "MedrxivClusteringS2S": "Identify the main category of Medrxiv papers based on the titles",
+    "MedrxivClusteringS2S.v2": "Identify the main category of Medrxiv papers based on the titles",
+    "RedditClustering": "Identify the topic or theme of Reddit posts based on the titles",
+    "RedditClusteringP2P": "Identify the topic or theme of Reddit posts based on the titles and posts",
+    "StackExchangeClustering": "Identify the topic or theme of StackExchange posts based on the titles",
+    "StackExchangeClustering.v2": "Identify the topic or theme of StackExchange posts based on the titles",
+    "StackExchangeClusteringP2P": "Identify the topic or theme of StackExchange posts based on the given paragraphs",
+    "StackExchangeClusteringP2P.v2": "Identify the topic or theme of StackExchange posts based on the given paragraphs",
+    "TwentyNewsgroupsClustering": "Identify the topic or theme of the given news articles",
+    "CLSClusteringS2S": "Identify the main category of scholar papers based on the titles",
+    "CLSClusteringP2P": "Identify the main category of scholar papers based on the titles and abstracts",
+    "CLSClusteringP2P.v2": "Identify the main category of scholar papers based on the titles and abstracts",
+    "ThuNewsClusteringS2S": "Identify the topic or theme of the given news articles based on the titles",
+    "ThuNewsClusteringP2P": "Identify the topic or theme of the given news articles based on the titles and contents",
+    "AskUbuntuDupQuestions": "Retrieve duplicate questions from AskUbuntu forum",
+    "MindSmallReranking": "Retrieve relevant news articles based on user browsing history",
+    "SciDocsRR": "Given a title of a scientific paper, retrieve the titles of other relevant papers",
+    "StackOverflowDupQuestions": "Retrieve duplicate questions from StackOverflow forum",
+    "SprintDuplicateQuestions": "Retrieve semantically duplicate questions",
+    "TwitterSemEval2015": "Retrieve tweets that are semantically similar to the given tweet",
+    "TwitterURLCorpus": "Retrieve tweets that are semantically similar to the given tweet",
+    "T2Reranking": "Given a Chinese search query, retrieve web passages that answer the question",
+    "MMarcoReranking": "Given a Chinese search query, retrieve web passages that answer the question",
+    "CMedQAv1-reranking": "Given a Chinese community medical question, retrieve replies that best answer the question",
+    "CMedQAv2-reranking": "Given a Chinese community medical question, retrieve replies that best answer the question",
+    "Ocnli": "Retrieve semantically similar text.",
+    "Cmnli": "Retrieve semantically similar text.",
+    "ArguAna": {"query": "Given a claim, find documents that refute the claim", "passage": "Given a claim, find documents that refute the claim"},
+    "ClimateFEVER": "Given a claim about climate change, retrieve documents that support or refute the claim",
+    "ClimateFEVERHardNegatives": "Given a claim about climate change, retrieve documents that support or refute the claim",
+    "DBPedia": "Given a query, retrieve relevant entity descriptions from DBPedia",
+    "FEVER": "Given a claim, retrieve documents that support or refute the claim",
+    "FEVERHardNegatives": "Given a claim, retrieve documents that support or refute the claim",
+    "FiQA2018": "Given a financial question, retrieve user replies that best answer the question",
+    "HotpotQA": "Given a multi-hop question, retrieve documents that can help answer the question",
+    "HotpotQAHardNegatives": "Given a multi-hop question, retrieve documents that can help answer the question",
+    "MSMARCO": "Given a web search query, retrieve relevant passages that answer the query",
+    "NFCorpus": "Given a question, retrieve relevant documents that best answer the question",
+    "NQ": "Given a question, retrieve Wikipedia passages that answer the question",
+    "QuoraRetrieval": "Given a question, retrieve questions that are semantically equivalent to the given question",
+    "SCIDOCS": "Given a title of a scientific paper, retrieve the titles of other relevant papers",
+    "SciFact": "Given a scientific claim, retrieve documents that support or refute the claim",
+    "Touche2020": "Given a question, retrieve detailed and persuasive arguments that answer the question",
+    "Touche2020Retrieval.v3": "Given a question, retrieve detailed and persuasive arguments that answer the question",
+    "TRECCOVID": "Given a medical query, retrieve documents that answer the query",
+    "T2Retrieval": "Given a Chinese search query, retrieve web passages that answer the question",
+    "MMarcoRetrieval": "Given a web search query, retrieve relevant passages that answer the query",
+    "VoyageMMarcoReranking": "Given a Japanese search query, retrieve web passages that answer the question",
+    "DuRetrieval": "Given a Chinese search query, retrieve web passages that answer the question",
+    "CovidRetrieval": "Given a question on COVID-19, retrieve news articles that answer the question",
+    "CmedqaRetrieval": "Given a Chinese community medical question, retrieve replies that best answer the question",
+    "EcomRetrieval": "Given a user query from an e-commerce website, retrieve description sentences of relevant products",
+    "MedicalRetrieval": "Given a medical question, retrieve user replies that best answer the question",
+    "VideoRetrieval": "Given a video search query, retrieve the titles of relevant videos",
+    "STSBenchmarkMultilingualSTS": "Retrieve semantically similar text",
+    "SICKFr": "Retrieve semantically similar text",
+    "SummEvalFr": "Given a news summary, retrieve other semantically similar summaries",
+    "MasakhaNEWSClassification":  "Classify the News in the given texts into one of the seven category: politics,sports,health,business,entertainment,technology,religion ",
+    "OpusparcusPC":"Retrieve semantically similar text",
+    "PAWSX":"Retrieve semantically similar text",
+    "HALClusteringS2S": "Identify the main category of academic passage based on the titles and contents",
+    "MasakhaNEWSClusteringP2P": "Identify the topic or theme of the given news articles based on the titles and contents",
+    "MasakhaNEWSClusteringS2S": "Identify the topic or theme of the given news articles based on the titles",
+    "MLSUMClusteringP2P": "Identify the topic or theme of the given articles based on the titles and contents",
+    "MLSUMClusteringS2S":  "Identify the topic or theme of the given articles based on the titles",
+    "SyntecReranking": "Given a question, retrieve passages that answer the question",
+    "AlloprofReranking": "Given a question, retrieve passages that answer the question",
+    "AlloprofRetrieval": "Given a question, retrieve passages that answer the question",
+    "BSARDRetrieval": "Given a question, retrieve passages that answer the question",
+    "SyntecRetrieval": "Given a question, retrieve passages that answer the question",
+    "XPQARetrieval": "Given a question, retrieve passages that answer the question",
+    "MintakaRetrieval": "Given a question, retrieve passages that answer the question",
+    "CBD":"Classify the sentiment of polish tweet reviews",
+    "PolEmo2.0-IN": "Classify the sentiment of medicine and hotels online reviews",
+    "PolEmo2.0-OUT":"Classify the sentiment of products and school online reviews",
+    "AllegroReviews": "Classify the sentiment of reviews from e-commerce marketplace Allegro",
+    "PAC": "Classify Polish contract clauses into one of the following two types: \"Safe Contract Clauses\" and \"Unfair Contract Clauses\".",
+    "SICK-E-PL": "Retrieve semantically similar text",
+    "SICK-R-PL": "Retrieve semantically similar text",
+    "STS22": "Retrieve semantically similar text",
+    "AFQMC": "Retrieve semantically similar text",
+    "AFQMC": "Retrieve semantically similar text",
+    "BQ": "Retrieve semantically similar text",
+    "LCQMC": "Retrieve semantically similar text",
+    "PAWSX": "Retrieve semantically similar text",
+    "QBQTC": "Retrieve semantically similar text",
+    "STS12": "Retrieve semantically similar text",
+    "PpcPC": "Retrieve semantically similar text",
+    "CDSC-E": "Retrieve semantically similar text",
+    "BornholmBitextMining": "Retrieve parallel sentences",
+    "NorwegianCourtsBitextMining": "Retrieve parallel sentences",
+    "PSC": "Retrieve semantically similar text",
+    "EightTagsClustering": "Identify of headlines from social media posts in Polish  into 8 categories: film, history, food, medicine, motorization, work, sport and technology",
+    "ArguAna-PL": "Given a claim, find documents that refute the claim",
+    "DBPedia-PL": "Given a query, retrieve relevant entity descriptions from DBPedia",
+    "FiQA-PL": "Given a financial question, retrieve user replies that best answer the question",
+    "HotpotQA-PL": "Given a multi-hop question, retrieve documents that can help answer the question",
+    "MSMARCO-PL": "Given a web search query, retrieve relevant passages that answer the query",
+    "NFCorpus-PL": "Given a question, retrieve relevant documents that best answer the question",
+    "NQ-PL": "Given a question, retrieve Wikipedia passages that answer the question",
+    "Quora-PL": "Given a question, retrieve questions that are semantically equivalent to the given question",
+    "SCIDOCS-PL": "Given a title of a scientific paper, retrieve the titles of other relevant papers",
+    "SciFact-PL": "Given a scientific claim, retrieve documents that support or refute the claim",
+    "TRECCOVID-PL": "Given a medical query, retrieve documents that answer the query",
+    "GeoreviewClassification": "Classify the organization rating based on the reviews",
+    "HeadlineClassification": "Classify the topic or theme of the given news headline",
+    "InappropriatenessClassification": "Classify the given message as either sensitive topic or not",
+    "KinopoiskClassification": "Classify the sentiment expressed in the given movie review text",
+    "RuReviewsClassification": "Classify product reviews into positive, negative or neutral sentiment",
+    "RuSciBenchGRNTIClassification": "Classify the category of scientific papers based on the titles and abstracts",
+    "RuSciBenchOECDClassification": "Classify the category of scientific papers based on the titles and abstracts",
+    "GeoreviewClusteringP2P": "Identify the organization category based on the reviews",
+    "RuSciBenchGRNTIClusteringP2P": "Identify the category of scientific papers based on the titles and abstracts",
+    "RuSciBenchOECDClusteringP2P": "Identify the category of scientific papers based on the titles and abstracts",
+    "TERRa": "Given a premise, retrieve a hypothesis that is entailed by the premise",
+    "RuBQReranking": "Given a question, retrieve Wikipedia passages that answer the question",
+    "RiaNewsRetrieval": "Given a headline, retrieval relevant articles",
+    "RuBQRetrieval": "Given a question, retrieve Wikipedia passages that answer the question",
+    "RUParaPhraserSTS": "Retrieve semantically similar text",
+    "RuSTSBenchmarkSTS": "Retrieve semantically similar text",
+    "AppsRetrieval": "Given a question about code problem, retrieval code that can solve user's problem",
+    "COIRCodeSearchNetRetrieval": "Given a code snippet, retrieve the comment corresponding to that code.",
+    "CodeEditSearchRetrieval": "Given a piece of code, retrieval code that in the ",
+    "CodeFeedbackMT": "Given a question about coding, retrieval code or passage that can solve user's question",
+    "CodeFeedbackST": "Given a question about coding, retrieval code or passage that can solve user's question",
+    "CodeSearchNetCCRetrieval": "Given a code comment, retrieve the code snippet corresponding to that comment.",
+    "CodeSearchNetRetrieval": "Given a code snippet, retrieve the comment corresponding to that code.",
+    "CodeTransOceanContest": "Given a piece for code, retrieval semantically similar code",
+    "CodeTransOceanDL": "Given a piece for code, retrieval semantically similar code",
+    "CosQA": "Given a question about coding, retrieval code or passage that can solve user's question",
+    "StackOverflowQA": "Given a question about coding, retrieval code or passage that can solve user's question",
+    "SyntheticText2SQL": "Given a user's question, retrieve SQL queries that are appropriate responses to the question",
+    "BibleNLPBitextMining": "Retrieve parallel sentences",
+    "BUCC.v2": "Retrieve parallel sentences",
+    "DiaBlaBitextMining": "Retrieve parallel sentences",
+    "FloresBitextMining": "Retrieve parallel sentences",
+    "IN22GenBitextMining": "Retrieve parallel sentences",
+    "IndicGenBenchFloresBitextMining": "Retrieve parallel sentences",
+    "NollySentiBitextMining": "Retrieve parallel sentences",
+    "NTREXBitextMining": "Retrieve parallel sentences",
+    "NusaTranslationBitextMining": "Retrieve parallel sentences",
+    "NusaXBitextMining": "Retrieve parallel sentences",
+    "Tatoeba": "Retrieve parallel sentences",
+    "BulgarianStoreReviewSentimentClassfication": "Classify user reviews into positive, negative or mixed sentiment",
+    "CzechProductReviewSentimentClassification": "Classify product reviews into positive, neutral, or negative sentiment",
+    "GreekLegalCodeClassification": "Given a greek legal text, classify its topic",
+    "DBpediaClassification": "Given a Wikipedia articles, categorized it into classes based on its DBpedia ontology",
+    "FinancialPhrasebankClassification": "Given financial news, categorized by sentiment into positive, negative, or neutral",
+    "PoemSentimentClassification": "Gvien a poem, categorized by sentiment into positive, no_impact, negative or mixed",
+    "TweetTopicSingleClassification": "Gvien a twitter, classify its topic",
+    "EstonianValenceClassification": "Given a news article, categorized by sentiment into negatiivne, positiivne, neutraalne or vastuolulin",
+    "FilipinoShopeeReviewsClassification": "Given a shop review, classify its rating on a scale from 1 to 5",
+    "GujaratiNewsClassification": "Given a Gujarati news articles, classify ist topic",
+    "SentimentAnalysisHindi": "Given a hindi text, categorized by sentiment into positive, negative or neutral",
+    "IndonesianIdClickbaitClassification": "Given an Indonesian news headlines, classify its into clickbait or non-clickbait",
+    "ItaCaseholdClassification": "Given a judgments, classify its topic",
+    "KorSarcasmClassification": "Given a twitter, categorized it into sarcasm or not_sarcasm",
+    "KurdishSentimentClassification": "Given a text, categorized by sentiment into positive or negative",
+    "MacedonianTweetSentimentClassification": "Given a Macedonian tweet, categorized by sentiment into positive, negative, or neutral",
+    "AfriSentiClassification": "Given a text, categorized by sentiment into positive, negative, or neutral",
+    "CataloniaTweetClassification": "Given a tweet, categorized by sentiment into AGAINST, FAVOR or NEUTRAL",
+    "CyrillicTurkicLangClassification": "Given a text, classify its language",
+    "IndicLangClassification": "Given a text, classify its language",
+    "MultiHateClassification": "Given a text, categorized by sentiment into hate or non-hate",
+    "NusaParagraphEmotionClassification": "Given a paragraph, classify its emotion",
+    "NusaX-senti": "Given a text, categorized by sentiment into positive or negative",
+    "SwissJudgementClassification": "Given a news article, categorized it into approval or dismissal",
+    "NepaliNewsClassification": "Given a news article, categorized it into business, entertainment or sports",
+    "OdiaNewsClassification": "Given a news article, categorized it into business, entertainment or sports",
+    "PunjabiNewsClassification": "Given a news article, categorized it into two-classes",
+    "SinhalaNewsClassification": "Given a news article, categorized it into political, business, technology, sports and Entertainment",
+    "CSFDSKMovieReviewSentimentClassification": "Given a movie review, classify its rating on a scale from 0 to 5",
+    "SiswatiNewsClassification": "Given a news article in Siswati, classify its topic",
+    "SlovakMovieReviewSentimentClassification": "Given a movie review, categorized it into positive or negative",
+    "SwahiliNewsClassification": "Given a news article, classify its domain",
+    "TswanaNewsClassification": "Given a news article, classify its topic",
+    "IsiZuluNewsClassification": "Given a news article, classify its topic",
+    "WikiCitiesClustering": "Identify of Wikipedia articles of cities by country",
+    "RomaniBibleClustering": "Identify verses from the Bible in Kalderash Romani by book.",
+    "ArXivHierarchicalClusteringP2P": "Identify the main and secondary category of Arxiv papers based on the titles and abstracts",
+    "ArXivHierarchicalClusteringS2S":  "Identify the main and secondary category of Arxiv papers based on the titles",
+    "BigPatentClustering.v2": "Identify the category of documents from the Big Patent dataset",
+    "AlloProfClusteringS2S": "Identify the topic of document titles from Allo Prof dataset",
+    "AlloProfClusteringS2S.v2": "Identify the topic of document titles from Allo Prof dataset",
+    "AlloProfClusteringP2P": "Identify the topic of document titles and descriptions from Allo Prof dataset",
+    "HALClusteringS2S.v2": "Identify the topic of titles from HAL",
+    "SIB200ClusteringS2S": "Identify the category of documents",
+    "WikiClusteringP2P.v2": "Identify the category of wiki passages",
+    "PlscClusteringP2P.v2": "Identify the category of titles+abstracts from Library of Science",
+    "KorHateSpeechMLClassification": "Given a Korean online news comments, classify its fine-grained hate speech classes",
+    "MalteseNewsClassification": "Given a maltese new, classify its topic",
+    "MultiEURLEXMultilabelClassification": "Given a text, classify its topic",
+    "BrazilianToxicTweetsClassification": "Classify the toxic tweets in Brazilian Portuguese into one of the six categories: LGBTQ+phobia, Xenophobia, Obscene, Insult, Misogyny and Racism.",
+    "CTKFactsNLI": "Retrieve semantically similar text",
+    "indonli": "Retrieve semantically similar text",
+    "ArmenianParaphrasePC": "Retrieve semantically similar text",
+    "PawsXPairClassification": "Retrieve semantically similar text",
+    "RTE3": "Retrieve semantically similar text",
+    "XNLI": "Retrieve semantically similar text",
+    "PpcPC": "Retrieve semantically similar text",
+    "GermanSTSBenchmark": "Retrieve semantically similar text",
+    "SICK-R": "Retrieve semantically similar text",
+    "STS13": "Retrieve semantically similar text",
+    "STS14": "Retrieve semantically similar text",
+    "STSBenchmark": "Retrieve semantically similar text",
+    "FaroeseSTS": "Retrieve semantically similar text",
+    "FinParaSTS": "Retrieve semantically similar text",
+    "JSICK": "Retrieve semantically similar text",
+    "IndicCrosslingualSTS": "Retrieve parallel sentences",
+    "SemRel24STS": "Retrieve semantically similar text",
+    "STS17": "Retrieve semantically similar text",
+    "STS22.v2": "Retrieve semantically similar text",
+    "STSES": "Retrieve semantically similar text",
+    "STSB": "Retrieve semantically similar text",
+    "AILAStatutes": "Identifying the most relevant statutes for a given situation",
+    "HagridRetrieval": "Given an information-seeking question, retrieve the best replies to answer the question",
+    "LegalBenchCorporateLobbying": "Given a query, retrieve relevant legal bill summaries",
+    "LEMBPasskeyRetrieval": "Retrieval the relevant passage for the given query",
+    "BelebeleRetrieval": "Retrieval the relevant passage for the given query",
+    "MLQARetrieval": "Retrieval the relevant passage for the given query",
+    "StatcanDialogueDatasetRetrieval": "Retrieval the relevant passage for the given query",
+    "WikipediaRetrievalMultilingual": "Retrieval the relevant passage for the given query",
+    "Core17InstructionRetrieval": "Retrieval the relevant passage for the given query with conditions",
+    "News21InstructionRetrieval": "Retrieval the relevant passage for the given query with conditions",
+    "Robust04InstructionRetrieval": "Retrieval the relevant passage for the given query with conditions",
+    "WebLINXCandidatesReranking": "Retrieval the relevant passage for the given query",
+    "WikipediaRerankingMultilingual": "Retrieval the relevant passage for the given query",
+    "STS15": "Retrieve semantically similar text",
+    "MIRACLRetrievalHardNegatives": "Retrieval relevant passage for the given query",
+    "BIOSSES": "Retrieve semantically similar text",
+    "CQADupstackRetrieval": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackGamingRetrieval": {"query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question", "passage": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question"},
+    "CQADupstackUnixRetrieval": {"query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question", "passage": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question"},
+    "STS16": "Retrieve semantically similar text",
+    "SummEval": "Retrieve semantically similar text",
+    "ATEC": "Retrieve semantically similar text",
+    "ScalaClassification": "Classify passages into correct or correct in Scandinavian Languages based on linguistic acceptability",
+    "SpartQA": "Given the following spatial reasoning question, retrieve the right answer.",
+    "CEDRClassification": "Given a comment as query, classify expressed emotions into joy, sadness, surprise, fear, and anger",
+    "DalajClassification": "Classify texts based on linguistic acceptability in Swedish",
+    "TempReasonL1": "Given the following question about time, retrieve the correct answer.",
+    "WinoGrande": "Given the following sentence, retrieve an appropriate answer to fill in the missing underscored part.",
+    "NordicLangClassification": "Classify texts based on language",
+    "TwitterHjerneRetrieval": "Retrieve answers to questions asked in Danish tweets",
+    "SwednClusteringP2P": "Identify news categories in Swedish passages"
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
+size 33384568

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
+size 4689074

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff