LauraLaureus commited on
Commit
cd00809
·
verified ·
1 Parent(s): 02c5114

Upload folder using huggingface_hub

Browse files
Files changed (43) hide show
  1. .gitattributes +3 -0
  2. checkpoint-148/1_Pooling/config.json +10 -0
  3. checkpoint-148/README.md +425 -0
  4. checkpoint-148/config.json +27 -0
  5. checkpoint-148/config_sentence_transformers.json +10 -0
  6. checkpoint-148/model.safetensors +3 -0
  7. checkpoint-148/modules.json +20 -0
  8. checkpoint-148/optimizer.pt +3 -0
  9. checkpoint-148/rng_state.pth +3 -0
  10. checkpoint-148/scheduler.pt +3 -0
  11. checkpoint-148/sentence_bert_config.json +4 -0
  12. checkpoint-148/special_tokens_map.json +51 -0
  13. checkpoint-148/tokenizer.json +3 -0
  14. checkpoint-148/tokenizer_config.json +55 -0
  15. checkpoint-148/trainer_state.json +537 -0
  16. checkpoint-148/training_args.bin +3 -0
  17. checkpoint-20/1_Pooling/config.json +10 -0
  18. checkpoint-20/README.md +400 -0
  19. checkpoint-20/config.json +27 -0
  20. checkpoint-20/config_sentence_transformers.json +10 -0
  21. checkpoint-20/model.safetensors +3 -0
  22. checkpoint-20/modules.json +20 -0
  23. checkpoint-20/optimizer.pt +3 -0
  24. checkpoint-20/rng_state.pth +3 -0
  25. checkpoint-20/scheduler.pt +3 -0
  26. checkpoint-20/sentence_bert_config.json +4 -0
  27. checkpoint-20/special_tokens_map.json +51 -0
  28. checkpoint-20/tokenizer.json +3 -0
  29. checkpoint-20/tokenizer_config.json +55 -0
  30. checkpoint-20/trainer_state.json +112 -0
  31. checkpoint-20/training_args.bin +3 -0
  32. eval/similarity_evaluation_results.csv +39 -0
  33. latest/1_Pooling/config.json +10 -0
  34. latest/README.md +404 -0
  35. latest/config.json +27 -0
  36. latest/config_sentence_transformers.json +10 -0
  37. latest/model.safetensors +3 -0
  38. latest/modules.json +20 -0
  39. latest/sentence_bert_config.json +4 -0
  40. latest/special_tokens_map.json +51 -0
  41. latest/tokenizer.json +3 -0
  42. latest/tokenizer_config.json +55 -0
  43. latest/training_args.bin +3 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-148/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ checkpoint-20/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ latest/tokenizer.json filter=lfs diff=lfs merge=lfs -text
checkpoint-148/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-148/README.md ADDED
@@ -0,0 +1,425 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - sentence-transformers
4
+ - sentence-similarity
5
+ - feature-extraction
6
+ - generated_from_trainer
7
+ - dataset_size:290
8
+ - loss:OnlineContrastiveLoss
9
+ base_model: intfloat/multilingual-e5-large
10
+ widget:
11
+ - source_sentence: Antes se coge al mentiroso que al cojo
12
+ sentences:
13
+ - A escudero pobre, taza de plata y cántaro de cobre
14
+ - En río revuelto, pesca abundante
15
+ - Se ayuda primero al necesitado que al engañador.
16
+ - source_sentence: Asno de muchos, lobos lo comen
17
+ sentences:
18
+ - Sabio entre sabios, amigos lo respetan.
19
+ - El que mucho madruga más hace que el que Dios ayuda.
20
+ - Se pilla antes a un mentiroso que a un cojo
21
+ - source_sentence: Al buey por el asta, y al hombre por la palabra
22
+ sentences:
23
+ - Si no quieres arroz con leche, toma tres tazas
24
+ - Al hombre por la palabra, y al buey por el cuerno ata
25
+ - Ese no es tu amigo, sino alguien que siempre busca estar rodeado de bullicio y
26
+ actividad.
27
+ - source_sentence: Al médico, confesor y letrado, hablarles claro
28
+ sentences:
29
+ - Al médico, confesor y letrado, no le hayas engañado
30
+ - Más vale a quien Dios ayuda que quien mucho madruga
31
+ - Al que anda entre la miel, algo se le pega
32
+ - source_sentence: A muertos y a idos, no hay amigos
33
+ sentences:
34
+ - Al buen callar llaman santo
35
+ - A los vivos y presentes, siempre hay amigos.
36
+ - Al que de prestado se viste, en la calle lo desnudan
37
+ pipeline_tag: sentence-similarity
38
+ library_name: sentence-transformers
39
+ metrics:
40
+ - pearson_cosine
41
+ - spearman_cosine
42
+ model-index:
43
+ - name: SentenceTransformer based on intfloat/multilingual-e5-large
44
+ results:
45
+ - task:
46
+ type: semantic-similarity
47
+ name: Semantic Similarity
48
+ dataset:
49
+ name: Unknown
50
+ type: unknown
51
+ metrics:
52
+ - type: pearson_cosine
53
+ value: 0.8334934833047165
54
+ name: Pearson Cosine
55
+ - type: spearman_cosine
56
+ value: 0.8261353280714282
57
+ name: Spearman Cosine
58
+ ---
59
+
60
+ # SentenceTransformer based on intfloat/multilingual-e5-large
61
+
62
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [intfloat/multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large) on the csv dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
63
+
64
+ ## Model Details
65
+
66
+ ### Model Description
67
+ - **Model Type:** Sentence Transformer
68
+ - **Base model:** [intfloat/multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large) <!-- at revision 0dc5580a448e4284468b8909bae50fa925907bc5 -->
69
+ - **Maximum Sequence Length:** 512 tokens
70
+ - **Output Dimensionality:** 1024 dimensions
71
+ - **Similarity Function:** Cosine Similarity
72
+ - **Training Dataset:**
73
+ - csv
74
+ <!-- - **Language:** Unknown -->
75
+ <!-- - **License:** Unknown -->
76
+
77
+ ### Model Sources
78
+
79
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
80
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
81
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
82
+
83
+ ### Full Model Architecture
84
+
85
+ ```
86
+ SentenceTransformer(
87
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: XLMRobertaModel
88
+ (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
89
+ (2): Normalize()
90
+ )
91
+ ```
92
+
93
+ ## Usage
94
+
95
+ ### Direct Usage (Sentence Transformers)
96
+
97
+ First install the Sentence Transformers library:
98
+
99
+ ```bash
100
+ pip install -U sentence-transformers
101
+ ```
102
+
103
+ Then you can load this model and run inference.
104
+ ```python
105
+ from sentence_transformers import SentenceTransformer
106
+
107
+ # Download from the 🤗 Hub
108
+ model = SentenceTransformer("sentence_transformers_model_id")
109
+ # Run inference
110
+ sentences = [
111
+ 'A muertos y a idos, no hay amigos',
112
+ 'A los vivos y presentes, siempre hay amigos.',
113
+ 'Al buen callar llaman santo',
114
+ ]
115
+ embeddings = model.encode(sentences)
116
+ print(embeddings.shape)
117
+ # [3, 1024]
118
+
119
+ # Get the similarity scores for the embeddings
120
+ similarities = model.similarity(embeddings, embeddings)
121
+ print(similarities.shape)
122
+ # [3, 3]
123
+ ```
124
+
125
+ <!--
126
+ ### Direct Usage (Transformers)
127
+
128
+ <details><summary>Click to see the direct usage in Transformers</summary>
129
+
130
+ </details>
131
+ -->
132
+
133
+ <!--
134
+ ### Downstream Usage (Sentence Transformers)
135
+
136
+ You can finetune this model on your own dataset.
137
+
138
+ <details><summary>Click to expand</summary>
139
+
140
+ </details>
141
+ -->
142
+
143
+ <!--
144
+ ### Out-of-Scope Use
145
+
146
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
147
+ -->
148
+
149
+ ## Evaluation
150
+
151
+ ### Metrics
152
+
153
+ #### Semantic Similarity
154
+
155
+ * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
156
+
157
+ | Metric | Value |
158
+ |:--------------------|:-----------|
159
+ | pearson_cosine | 0.8335 |
160
+ | **spearman_cosine** | **0.8261** |
161
+
162
+ <!--
163
+ ## Bias, Risks and Limitations
164
+
165
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
166
+ -->
167
+
168
+ <!--
169
+ ### Recommendations
170
+
171
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
172
+ -->
173
+
174
+ ## Training Details
175
+
176
+ ### Training Dataset
177
+
178
+ #### csv
179
+
180
+ * Dataset: csv
181
+ * Size: 290 training samples
182
+ * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
183
+ * Approximate statistics based on the first 290 samples:
184
+ | | sentence1 | sentence2 | label |
185
+ |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:------------------------------------------------|
186
+ | type | string | string | int |
187
+ | details | <ul><li>min: 7 tokens</li><li>mean: 11.68 tokens</li><li>max: 22 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 17.01 tokens</li><li>max: 44 tokens</li></ul> | <ul><li>0: ~50.00%</li><li>1: ~50.00%</li></ul> |
188
+ * Samples:
189
+ | sentence1 | sentence2 | label |
190
+ |:------------------------------------------------------------|:--------------------------------------------------------------------------------------------------|:---------------|
191
+ | <code>Gota a gota, la mar se agota.</code> | <code>Con el pasar del tiempo se llega a alcanzar cualquier meta.</code> | <code>1</code> |
192
+ | <code>Dime de qué presumes y te diré de qué careces.</code> | <code>Dime de qué careces y te diré de qué dispones.</code> | <code>0</code> |
193
+ | <code>Cómo se vive, se muere.</code> | <code>De aquella forma que hemos vivido nuestra vida será de la forma en la que moriremos.</code> | <code>1</code> |
194
+ * Loss: [<code>OnlineContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#onlinecontrastiveloss)
195
+
196
+ ### Evaluation Dataset
197
+
198
+ #### Unnamed Dataset
199
+
200
+ * Size: 1,006 evaluation samples
201
+ * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
202
+ * Approximate statistics based on the first 1000 samples:
203
+ | | sentence1 | sentence2 | label |
204
+ |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:------------------------------------------------|
205
+ | type | string | string | int |
206
+ | details | <ul><li>min: 7 tokens</li><li>mean: 12.51 tokens</li><li>max: 25 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 14.82 tokens</li><li>max: 38 tokens</li></ul> | <ul><li>0: ~49.70%</li><li>1: ~50.30%</li></ul> |
207
+ * Samples:
208
+ | sentence1 | sentence2 | label |
209
+ |:---------------------------------------------|:-----------------------------------------------------------------------|:---------------|
210
+ | <code>¿Adónde irá el buey que no are?</code> | <code>¿A dó irá el buey que no are?</code> | <code>1</code> |
211
+ | <code>¿Adónde irá el buey que no are?</code> | <code>¿Adónde irá el buey que no are ni la mula que no cargue?</code> | <code>1</code> |
212
+ | <code>¿Adónde irá el buey que no are?</code> | <code>¿Adónde irá el buey que no are, sino al matadero?</code> | <code>1</code> |
213
+ * Loss: [<code>OnlineContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#onlinecontrastiveloss)
214
+
215
+ ### Training Hyperparameters
216
+ #### Non-Default Hyperparameters
217
+
218
+ - `eval_strategy`: steps
219
+ - `learning_rate`: 1e-05
220
+ - `num_train_epochs`: 4
221
+ - `lr_scheduler_type`: constant
222
+ - `load_best_model_at_end`: True
223
+ - `eval_on_start`: True
224
+ - `batch_sampler`: no_duplicates
225
+
226
+ #### All Hyperparameters
227
+ <details><summary>Click to expand</summary>
228
+
229
+ - `overwrite_output_dir`: False
230
+ - `do_predict`: False
231
+ - `eval_strategy`: steps
232
+ - `prediction_loss_only`: True
233
+ - `per_device_train_batch_size`: 8
234
+ - `per_device_eval_batch_size`: 8
235
+ - `per_gpu_train_batch_size`: None
236
+ - `per_gpu_eval_batch_size`: None
237
+ - `gradient_accumulation_steps`: 1
238
+ - `eval_accumulation_steps`: None
239
+ - `torch_empty_cache_steps`: None
240
+ - `learning_rate`: 1e-05
241
+ - `weight_decay`: 0.0
242
+ - `adam_beta1`: 0.9
243
+ - `adam_beta2`: 0.999
244
+ - `adam_epsilon`: 1e-08
245
+ - `max_grad_norm`: 1.0
246
+ - `num_train_epochs`: 4
247
+ - `max_steps`: -1
248
+ - `lr_scheduler_type`: constant
249
+ - `lr_scheduler_kwargs`: {}
250
+ - `warmup_ratio`: 0.0
251
+ - `warmup_steps`: 0
252
+ - `log_level`: passive
253
+ - `log_level_replica`: warning
254
+ - `log_on_each_node`: True
255
+ - `logging_nan_inf_filter`: True
256
+ - `save_safetensors`: True
257
+ - `save_on_each_node`: False
258
+ - `save_only_model`: False
259
+ - `restore_callback_states_from_checkpoint`: False
260
+ - `no_cuda`: False
261
+ - `use_cpu`: False
262
+ - `use_mps_device`: False
263
+ - `seed`: 42
264
+ - `data_seed`: None
265
+ - `jit_mode_eval`: False
266
+ - `use_ipex`: False
267
+ - `bf16`: False
268
+ - `fp16`: False
269
+ - `fp16_opt_level`: O1
270
+ - `half_precision_backend`: auto
271
+ - `bf16_full_eval`: False
272
+ - `fp16_full_eval`: False
273
+ - `tf32`: None
274
+ - `local_rank`: 0
275
+ - `ddp_backend`: None
276
+ - `tpu_num_cores`: None
277
+ - `tpu_metrics_debug`: False
278
+ - `debug`: []
279
+ - `dataloader_drop_last`: False
280
+ - `dataloader_num_workers`: 0
281
+ - `dataloader_prefetch_factor`: None
282
+ - `past_index`: -1
283
+ - `disable_tqdm`: False
284
+ - `remove_unused_columns`: True
285
+ - `label_names`: None
286
+ - `load_best_model_at_end`: True
287
+ - `ignore_data_skip`: False
288
+ - `fsdp`: []
289
+ - `fsdp_min_num_params`: 0
290
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
291
+ - `tp_size`: 0
292
+ - `fsdp_transformer_layer_cls_to_wrap`: None
293
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
294
+ - `deepspeed`: None
295
+ - `label_smoothing_factor`: 0.0
296
+ - `optim`: adamw_torch
297
+ - `optim_args`: None
298
+ - `adafactor`: False
299
+ - `group_by_length`: False
300
+ - `length_column_name`: length
301
+ - `ddp_find_unused_parameters`: None
302
+ - `ddp_bucket_cap_mb`: None
303
+ - `ddp_broadcast_buffers`: False
304
+ - `dataloader_pin_memory`: True
305
+ - `dataloader_persistent_workers`: False
306
+ - `skip_memory_metrics`: True
307
+ - `use_legacy_prediction_loop`: False
308
+ - `push_to_hub`: False
309
+ - `resume_from_checkpoint`: None
310
+ - `hub_model_id`: None
311
+ - `hub_strategy`: every_save
312
+ - `hub_private_repo`: None
313
+ - `hub_always_push`: False
314
+ - `gradient_checkpointing`: False
315
+ - `gradient_checkpointing_kwargs`: None
316
+ - `include_inputs_for_metrics`: False
317
+ - `include_for_metrics`: []
318
+ - `eval_do_concat_batches`: True
319
+ - `fp16_backend`: auto
320
+ - `push_to_hub_model_id`: None
321
+ - `push_to_hub_organization`: None
322
+ - `mp_parameters`:
323
+ - `auto_find_batch_size`: False
324
+ - `full_determinism`: False
325
+ - `torchdynamo`: None
326
+ - `ray_scope`: last
327
+ - `ddp_timeout`: 1800
328
+ - `torch_compile`: False
329
+ - `torch_compile_backend`: None
330
+ - `torch_compile_mode`: None
331
+ - `dispatch_batches`: None
332
+ - `split_batches`: None
333
+ - `include_tokens_per_second`: False
334
+ - `include_num_input_tokens_seen`: False
335
+ - `neftune_noise_alpha`: None
336
+ - `optim_target_modules`: None
337
+ - `batch_eval_metrics`: False
338
+ - `eval_on_start`: True
339
+ - `use_liger_kernel`: False
340
+ - `eval_use_gather_object`: False
341
+ - `average_tokens_across_devices`: False
342
+ - `prompts`: None
343
+ - `batch_sampler`: no_duplicates
344
+ - `multi_dataset_batch_sampler`: proportional
345
+
346
+ </details>
347
+
348
+ ### Training Logs
349
+ | Epoch | Step | Training Loss | Validation Loss | spearman_cosine |
350
+ |:------:|:----:|:-------------:|:---------------:|:---------------:|
351
+ | 0 | 0 | - | 0.1095 | 0.7843 |
352
+ | 0.1351 | 5 | 0.6784 | 0.0765 | 0.8123 |
353
+ | 0.2703 | 10 | 0.5088 | 0.0533 | 0.8303 |
354
+ | 0.4054 | 15 | 0.4364 | 0.0475 | 0.8339 |
355
+ | 0.5405 | 20 | 0.3456 | 0.0435 | 0.8345 |
356
+ | 0.6757 | 25 | 0.1423 | 0.0424 | 0.8324 |
357
+ | 0.8108 | 30 | 0.2852 | 0.0443 | 0.8271 |
358
+ | 0.9459 | 35 | 0.2616 | 0.0514 | 0.8262 |
359
+ | 1.0811 | 40 | 0.1451 | 0.0521 | 0.8232 |
360
+ | 1.2162 | 45 | 0.2046 | 0.0496 | 0.8221 |
361
+ | 1.3514 | 50 | 0.055 | 0.0516 | 0.8197 |
362
+ | 1.4865 | 55 | 0.0956 | 0.0545 | 0.8190 |
363
+ | 1.6216 | 60 | 0.1213 | 0.0533 | 0.8213 |
364
+ | 1.7568 | 65 | 0.2378 | 0.0464 | 0.8253 |
365
+ | 1.8919 | 70 | 0.2723 | 0.0458 | 0.8249 |
366
+ | 2.0270 | 75 | 0.0603 | 0.0467 | 0.8226 |
367
+ | 2.1622 | 80 | 0.1089 | 0.0415 | 0.8263 |
368
+ | 2.2973 | 85 | 0.0813 | 0.0417 | 0.8270 |
369
+ | 2.4324 | 90 | 0.0 | 0.0437 | 0.8250 |
370
+ | 2.5676 | 95 | 0.0436 | 0.0467 | 0.8242 |
371
+ | 2.7027 | 100 | 0.0 | 0.0451 | 0.8242 |
372
+ | 2.8378 | 105 | 0.0 | 0.0451 | 0.8243 |
373
+ | 2.9730 | 110 | 0.0271 | 0.0433 | 0.8243 |
374
+ | 3.1081 | 115 | 0.007 | 0.0502 | 0.8195 |
375
+ | 3.2432 | 120 | 0.1025 | 0.0523 | 0.8195 |
376
+ | 3.3784 | 125 | 0.1244 | 0.0527 | 0.8251 |
377
+ | 3.5135 | 130 | 0.0 | 0.0534 | 0.8262 |
378
+ | 3.6486 | 135 | 0.0259 | 0.0571 | 0.8262 |
379
+ | 3.7838 | 140 | 0.0939 | 0.0526 | 0.8273 |
380
+ | 3.9189 | 145 | 0.1038 | 0.0527 | 0.8261 |
381
+
382
+
383
+ ### Framework Versions
384
+ - Python: 3.12.9
385
+ - Sentence Transformers: 3.4.1
386
+ - Transformers: 4.50.0
387
+ - PyTorch: 2.6.0+cpu
388
+ - Accelerate: 1.6.0
389
+ - Datasets: 3.5.0
390
+ - Tokenizers: 0.21.1
391
+
392
+ ## Citation
393
+
394
+ ### BibTeX
395
+
396
+ #### Sentence Transformers
397
+ ```bibtex
398
+ @inproceedings{reimers-2019-sentence-bert,
399
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
400
+ author = "Reimers, Nils and Gurevych, Iryna",
401
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
402
+ month = "11",
403
+ year = "2019",
404
+ publisher = "Association for Computational Linguistics",
405
+ url = "https://arxiv.org/abs/1908.10084",
406
+ }
407
+ ```
408
+
409
+ <!--
410
+ ## Glossary
411
+
412
+ *Clearly define terms in order to be accessible across audiences.*
413
+ -->
414
+
415
+ <!--
416
+ ## Model Card Authors
417
+
418
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
419
+ -->
420
+
421
+ <!--
422
+ ## Model Card Contact
423
+
424
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
425
+ -->
checkpoint-148/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 514,
16
+ "model_type": "xlm-roberta",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "output_past": true,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.50.0",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 250002
27
+ }
checkpoint-148/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.4.1",
4
+ "transformers": "4.50.0",
5
+ "pytorch": "2.6.0+cpu"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
checkpoint-148/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:184f00394770de2bd5dbb0d1d15cc87926d44ed5251e869dd7a22b49b48a9100
3
+ size 2239607176
checkpoint-148/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
checkpoint-148/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34abf973396f5c82ed62ced5f3ed810e8fdac393d8282da77c875424ddc22161
3
+ size 4471044921
checkpoint-148/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:289acee9b20675fcf33130539b2d32f8fe82f5b8a76ff22e77e5702e32d167f1
3
+ size 13990
checkpoint-148/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:146ef9b5e5cd6fc6dfefe2364ebf158e1371c154a444a4d236aaa6ca0953aa1b
3
+ size 1064
checkpoint-148/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
checkpoint-148/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
checkpoint-148/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883b037111086fd4dfebbbc9b7cee11e1517b5e0c0514879478661440f137085
3
+ size 17082987
checkpoint-148/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
+ "mask_token": "<mask>",
50
+ "model_max_length": 512,
51
+ "pad_token": "<pad>",
52
+ "sep_token": "</s>",
53
+ "tokenizer_class": "XLMRobertaTokenizer",
54
+ "unk_token": "<unk>"
55
+ }
checkpoint-148/trainer_state.json ADDED
@@ -0,0 +1,537 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 80,
3
+ "best_metric": 0.04149133339524269,
4
+ "best_model_checkpoint": "models/me5-large-retraining\\checkpoint-80",
5
+ "epoch": 4.0,
6
+ "eval_steps": 5,
7
+ "global_step": 148,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0,
14
+ "eval_loss": 0.10950354486703873,
15
+ "eval_pearson_cosine": 0.7806081242807599,
16
+ "eval_runtime": 154.7907,
17
+ "eval_samples_per_second": 6.499,
18
+ "eval_spearman_cosine": 0.7843279594448466,
19
+ "eval_steps_per_second": 0.814,
20
+ "step": 0
21
+ },
22
+ {
23
+ "epoch": 0.13513513513513514,
24
+ "grad_norm": 9.38223934173584,
25
+ "learning_rate": 1e-05,
26
+ "loss": 0.6784,
27
+ "step": 5
28
+ },
29
+ {
30
+ "epoch": 0.13513513513513514,
31
+ "eval_loss": 0.07647562772035599,
32
+ "eval_pearson_cosine": 0.8193313583571735,
33
+ "eval_runtime": 154.4069,
34
+ "eval_samples_per_second": 6.515,
35
+ "eval_spearman_cosine": 0.8122999445241028,
36
+ "eval_steps_per_second": 0.816,
37
+ "step": 5
38
+ },
39
+ {
40
+ "epoch": 0.2702702702702703,
41
+ "grad_norm": 8.665059089660645,
42
+ "learning_rate": 1e-05,
43
+ "loss": 0.5088,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.2702702702702703,
48
+ "eval_loss": 0.05329965054988861,
49
+ "eval_pearson_cosine": 0.846437709431274,
50
+ "eval_runtime": 157.2408,
51
+ "eval_samples_per_second": 6.398,
52
+ "eval_spearman_cosine": 0.8303112726354404,
53
+ "eval_steps_per_second": 0.801,
54
+ "step": 10
55
+ },
56
+ {
57
+ "epoch": 0.40540540540540543,
58
+ "grad_norm": 21.96602439880371,
59
+ "learning_rate": 1e-05,
60
+ "loss": 0.4364,
61
+ "step": 15
62
+ },
63
+ {
64
+ "epoch": 0.40540540540540543,
65
+ "eval_loss": 0.047497160732746124,
66
+ "eval_pearson_cosine": 0.8497791216636505,
67
+ "eval_runtime": 153.8649,
68
+ "eval_samples_per_second": 6.538,
69
+ "eval_spearman_cosine": 0.8338916292060913,
70
+ "eval_steps_per_second": 0.819,
71
+ "step": 15
72
+ },
73
+ {
74
+ "epoch": 0.5405405405405406,
75
+ "grad_norm": 10.698002815246582,
76
+ "learning_rate": 1e-05,
77
+ "loss": 0.3456,
78
+ "step": 20
79
+ },
80
+ {
81
+ "epoch": 0.5405405405405406,
82
+ "eval_loss": 0.043546345084905624,
83
+ "eval_pearson_cosine": 0.8481768671521202,
84
+ "eval_runtime": 154.4013,
85
+ "eval_samples_per_second": 6.515,
86
+ "eval_spearman_cosine": 0.8344803713886917,
87
+ "eval_steps_per_second": 0.816,
88
+ "step": 20
89
+ },
90
+ {
91
+ "epoch": 0.6756756756756757,
92
+ "grad_norm": 0.0,
93
+ "learning_rate": 1e-05,
94
+ "loss": 0.1423,
95
+ "step": 25
96
+ },
97
+ {
98
+ "epoch": 0.6756756756756757,
99
+ "eval_loss": 0.042350731790065765,
100
+ "eval_pearson_cosine": 0.8419294236114762,
101
+ "eval_runtime": 153.4308,
102
+ "eval_samples_per_second": 6.557,
103
+ "eval_spearman_cosine": 0.8324197823497284,
104
+ "eval_steps_per_second": 0.821,
105
+ "step": 25
106
+ },
107
+ {
108
+ "epoch": 0.8108108108108109,
109
+ "grad_norm": 11.502903938293457,
110
+ "learning_rate": 1e-05,
111
+ "loss": 0.2852,
112
+ "step": 30
113
+ },
114
+ {
115
+ "epoch": 0.8108108108108109,
116
+ "eval_loss": 0.04431174322962761,
117
+ "eval_pearson_cosine": 0.8311833878571917,
118
+ "eval_runtime": 153.1117,
119
+ "eval_samples_per_second": 6.57,
120
+ "eval_spearman_cosine": 0.8270800450821792,
121
+ "eval_steps_per_second": 0.823,
122
+ "step": 30
123
+ },
124
+ {
125
+ "epoch": 0.9459459459459459,
126
+ "grad_norm": 13.956666946411133,
127
+ "learning_rate": 1e-05,
128
+ "loss": 0.2616,
129
+ "step": 35
130
+ },
131
+ {
132
+ "epoch": 0.9459459459459459,
133
+ "eval_loss": 0.05144113302230835,
134
+ "eval_pearson_cosine": 0.8323883964862309,
135
+ "eval_runtime": 153.6257,
136
+ "eval_samples_per_second": 6.548,
137
+ "eval_spearman_cosine": 0.8261627064456549,
138
+ "eval_steps_per_second": 0.82,
139
+ "step": 35
140
+ },
141
+ {
142
+ "epoch": 1.0810810810810811,
143
+ "grad_norm": 0.0,
144
+ "learning_rate": 1e-05,
145
+ "loss": 0.1451,
146
+ "step": 40
147
+ },
148
+ {
149
+ "epoch": 1.0810810810810811,
150
+ "eval_loss": 0.052108317613601685,
151
+ "eval_pearson_cosine": 0.8322596480425719,
152
+ "eval_runtime": 152.7732,
153
+ "eval_samples_per_second": 6.585,
154
+ "eval_spearman_cosine": 0.8232463910787938,
155
+ "eval_steps_per_second": 0.825,
156
+ "step": 40
157
+ },
158
+ {
159
+ "epoch": 1.2162162162162162,
160
+ "grad_norm": 11.031551361083984,
161
+ "learning_rate": 1e-05,
162
+ "loss": 0.2046,
163
+ "step": 45
164
+ },
165
+ {
166
+ "epoch": 1.2162162162162162,
167
+ "eval_loss": 0.049648430198431015,
168
+ "eval_pearson_cosine": 0.8345968763719022,
169
+ "eval_runtime": 151.5419,
170
+ "eval_samples_per_second": 6.638,
171
+ "eval_spearman_cosine": 0.8220928743948337,
172
+ "eval_steps_per_second": 0.831,
173
+ "step": 45
174
+ },
175
+ {
176
+ "epoch": 1.3513513513513513,
177
+ "grad_norm": 0.0,
178
+ "learning_rate": 1e-05,
179
+ "loss": 0.055,
180
+ "step": 50
181
+ },
182
+ {
183
+ "epoch": 1.3513513513513513,
184
+ "eval_loss": 0.05155247077345848,
185
+ "eval_pearson_cosine": 0.8295312877735412,
186
+ "eval_runtime": 152.4645,
187
+ "eval_samples_per_second": 6.598,
188
+ "eval_spearman_cosine": 0.8197002635410278,
189
+ "eval_steps_per_second": 0.826,
190
+ "step": 50
191
+ },
192
+ {
193
+ "epoch": 1.4864864864864864,
194
+ "grad_norm": 0.0,
195
+ "learning_rate": 1e-05,
196
+ "loss": 0.0956,
197
+ "step": 55
198
+ },
199
+ {
200
+ "epoch": 1.4864864864864864,
201
+ "eval_loss": 0.05453842505812645,
202
+ "eval_pearson_cosine": 0.8258612961974536,
203
+ "eval_runtime": 151.2464,
204
+ "eval_samples_per_second": 6.651,
205
+ "eval_spearman_cosine": 0.8190362223126076,
206
+ "eval_steps_per_second": 0.833,
207
+ "step": 55
208
+ },
209
+ {
210
+ "epoch": 1.6216216216216215,
211
+ "grad_norm": 10.073129653930664,
212
+ "learning_rate": 1e-05,
213
+ "loss": 0.1213,
214
+ "step": 60
215
+ },
216
+ {
217
+ "epoch": 1.6216216216216215,
218
+ "eval_loss": 0.05331311747431755,
219
+ "eval_pearson_cosine": 0.8280490163439709,
220
+ "eval_runtime": 151.9002,
221
+ "eval_samples_per_second": 6.623,
222
+ "eval_spearman_cosine": 0.8212679517806181,
223
+ "eval_steps_per_second": 0.829,
224
+ "step": 60
225
+ },
226
+ {
227
+ "epoch": 1.7567567567567568,
228
+ "grad_norm": 8.610294342041016,
229
+ "learning_rate": 1e-05,
230
+ "loss": 0.2378,
231
+ "step": 65
232
+ },
233
+ {
234
+ "epoch": 1.7567567567567568,
235
+ "eval_loss": 0.04638493061065674,
236
+ "eval_pearson_cosine": 0.8348764332747338,
237
+ "eval_runtime": 151.3483,
238
+ "eval_samples_per_second": 6.647,
239
+ "eval_spearman_cosine": 0.8253343633484949,
240
+ "eval_steps_per_second": 0.833,
241
+ "step": 65
242
+ },
243
+ {
244
+ "epoch": 1.8918918918918919,
245
+ "grad_norm": 7.2265729904174805,
246
+ "learning_rate": 1e-05,
247
+ "loss": 0.2723,
248
+ "step": 70
249
+ },
250
+ {
251
+ "epoch": 1.8918918918918919,
252
+ "eval_loss": 0.04580773040652275,
253
+ "eval_pearson_cosine": 0.8327080612027193,
254
+ "eval_runtime": 151.0951,
255
+ "eval_samples_per_second": 6.658,
256
+ "eval_spearman_cosine": 0.8249373111883099,
257
+ "eval_steps_per_second": 0.834,
258
+ "step": 70
259
+ },
260
+ {
261
+ "epoch": 2.027027027027027,
262
+ "grad_norm": 0.0,
263
+ "learning_rate": 1e-05,
264
+ "loss": 0.0603,
265
+ "step": 75
266
+ },
267
+ {
268
+ "epoch": 2.027027027027027,
269
+ "eval_loss": 0.04667947068810463,
270
+ "eval_pearson_cosine": 0.8292172905350813,
271
+ "eval_runtime": 150.817,
272
+ "eval_samples_per_second": 6.67,
273
+ "eval_spearman_cosine": 0.8226234223032437,
274
+ "eval_steps_per_second": 0.835,
275
+ "step": 75
276
+ },
277
+ {
278
+ "epoch": 2.1621621621621623,
279
+ "grad_norm": 7.278922080993652,
280
+ "learning_rate": 1e-05,
281
+ "loss": 0.1089,
282
+ "step": 80
283
+ },
284
+ {
285
+ "epoch": 2.1621621621621623,
286
+ "eval_loss": 0.04149133339524269,
287
+ "eval_pearson_cosine": 0.8348787263877363,
288
+ "eval_runtime": 151.9077,
289
+ "eval_samples_per_second": 6.622,
290
+ "eval_spearman_cosine": 0.8262517044196894,
291
+ "eval_steps_per_second": 0.829,
292
+ "step": 80
293
+ },
294
+ {
295
+ "epoch": 2.2972972972972974,
296
+ "grad_norm": 0.0,
297
+ "learning_rate": 1e-05,
298
+ "loss": 0.0813,
299
+ "step": 85
300
+ },
301
+ {
302
+ "epoch": 2.2972972972972974,
303
+ "eval_loss": 0.041691090911626816,
304
+ "eval_pearson_cosine": 0.834739922043313,
305
+ "eval_runtime": 151.4069,
306
+ "eval_samples_per_second": 6.644,
307
+ "eval_spearman_cosine": 0.8269978977904043,
308
+ "eval_steps_per_second": 0.832,
309
+ "step": 85
310
+ },
311
+ {
312
+ "epoch": 2.4324324324324325,
313
+ "grad_norm": 0.0,
314
+ "learning_rate": 1e-05,
315
+ "loss": 0.0,
316
+ "step": 90
317
+ },
318
+ {
319
+ "epoch": 2.4324324324324325,
320
+ "eval_loss": 0.043681543320417404,
321
+ "eval_pearson_cosine": 0.8300953895591171,
322
+ "eval_runtime": 151.2024,
323
+ "eval_samples_per_second": 6.653,
324
+ "eval_spearman_cosine": 0.8249920776743955,
325
+ "eval_steps_per_second": 0.833,
326
+ "step": 90
327
+ },
328
+ {
329
+ "epoch": 2.5675675675675675,
330
+ "grad_norm": 0.0,
331
+ "learning_rate": 1e-05,
332
+ "loss": 0.0436,
333
+ "step": 95
334
+ },
335
+ {
336
+ "epoch": 2.5675675675675675,
337
+ "eval_loss": 0.04666070267558098,
338
+ "eval_pearson_cosine": 0.8280036278693699,
339
+ "eval_runtime": 150.6994,
340
+ "eval_samples_per_second": 6.676,
341
+ "eval_spearman_cosine": 0.8241911129581995,
342
+ "eval_steps_per_second": 0.836,
343
+ "step": 95
344
+ },
345
+ {
346
+ "epoch": 2.7027027027027026,
347
+ "grad_norm": 0.0,
348
+ "learning_rate": 1e-05,
349
+ "loss": 0.0,
350
+ "step": 100
351
+ },
352
+ {
353
+ "epoch": 2.7027027027027026,
354
+ "eval_loss": 0.04513184353709221,
355
+ "eval_pearson_cosine": 0.827662426497365,
356
+ "eval_runtime": 151.7254,
357
+ "eval_samples_per_second": 6.63,
358
+ "eval_spearman_cosine": 0.8241911153867979,
359
+ "eval_steps_per_second": 0.83,
360
+ "step": 100
361
+ },
362
+ {
363
+ "epoch": 2.8378378378378377,
364
+ "grad_norm": 0.0,
365
+ "learning_rate": 1e-05,
366
+ "loss": 0.0,
367
+ "step": 105
368
+ },
369
+ {
370
+ "epoch": 2.8378378378378377,
371
+ "eval_loss": 0.045100126415491104,
372
+ "eval_pearson_cosine": 0.8271686930217653,
373
+ "eval_runtime": 150.6791,
374
+ "eval_samples_per_second": 6.676,
375
+ "eval_spearman_cosine": 0.8242595734942031,
376
+ "eval_steps_per_second": 0.836,
377
+ "step": 105
378
+ },
379
+ {
380
+ "epoch": 2.972972972972973,
381
+ "grad_norm": 0.0,
382
+ "learning_rate": 1e-05,
383
+ "loss": 0.0271,
384
+ "step": 110
385
+ },
386
+ {
387
+ "epoch": 2.972972972972973,
388
+ "eval_loss": 0.04326998442411423,
389
+ "eval_pearson_cosine": 0.8242998475213165,
390
+ "eval_runtime": 151.5644,
391
+ "eval_samples_per_second": 6.637,
392
+ "eval_spearman_cosine": 0.8243348749833265,
393
+ "eval_steps_per_second": 0.831,
394
+ "step": 110
395
+ },
396
+ {
397
+ "epoch": 3.108108108108108,
398
+ "grad_norm": 0.0,
399
+ "learning_rate": 1e-05,
400
+ "loss": 0.007,
401
+ "step": 115
402
+ },
403
+ {
404
+ "epoch": 3.108108108108108,
405
+ "eval_loss": 0.050163887441158295,
406
+ "eval_pearson_cosine": 0.8100599157021782,
407
+ "eval_runtime": 149.8939,
408
+ "eval_samples_per_second": 6.711,
409
+ "eval_spearman_cosine": 0.8195085832550942,
410
+ "eval_steps_per_second": 0.841,
411
+ "step": 115
412
+ },
413
+ {
414
+ "epoch": 3.2432432432432434,
415
+ "grad_norm": 5.909173011779785,
416
+ "learning_rate": 1e-05,
417
+ "loss": 0.1025,
418
+ "step": 120
419
+ },
420
+ {
421
+ "epoch": 3.2432432432432434,
422
+ "eval_loss": 0.052336592227220535,
423
+ "eval_pearson_cosine": 0.8092739374985023,
424
+ "eval_runtime": 151.2544,
425
+ "eval_samples_per_second": 6.651,
426
+ "eval_spearman_cosine": 0.8194743493718913,
427
+ "eval_steps_per_second": 0.833,
428
+ "step": 120
429
+ },
430
+ {
431
+ "epoch": 3.3783783783783785,
432
+ "grad_norm": 0.0,
433
+ "learning_rate": 1e-05,
434
+ "loss": 0.1244,
435
+ "step": 125
436
+ },
437
+ {
438
+ "epoch": 3.3783783783783785,
439
+ "eval_loss": 0.05269436165690422,
440
+ "eval_pearson_cosine": 0.8212737827789367,
441
+ "eval_runtime": 150.843,
442
+ "eval_samples_per_second": 6.669,
443
+ "eval_spearman_cosine": 0.8250605382131625,
444
+ "eval_steps_per_second": 0.835,
445
+ "step": 125
446
+ },
447
+ {
448
+ "epoch": 3.5135135135135136,
449
+ "grad_norm": 0.0,
450
+ "learning_rate": 1e-05,
451
+ "loss": 0.0,
452
+ "step": 130
453
+ },
454
+ {
455
+ "epoch": 3.5135135135135136,
456
+ "eval_loss": 0.05343884229660034,
457
+ "eval_pearson_cosine": 0.8257663666576973,
458
+ "eval_runtime": 150.9582,
459
+ "eval_samples_per_second": 6.664,
460
+ "eval_spearman_cosine": 0.8261900896885362,
461
+ "eval_steps_per_second": 0.835,
462
+ "step": 130
463
+ },
464
+ {
465
+ "epoch": 3.6486486486486487,
466
+ "grad_norm": 0.0,
467
+ "learning_rate": 1e-05,
468
+ "loss": 0.0259,
469
+ "step": 135
470
+ },
471
+ {
472
+ "epoch": 3.6486486486486487,
473
+ "eval_loss": 0.05709109827876091,
474
+ "eval_pearson_cosine": 0.8294106767298322,
475
+ "eval_runtime": 151.0358,
476
+ "eval_samples_per_second": 6.661,
477
+ "eval_spearman_cosine": 0.826217475365987,
478
+ "eval_steps_per_second": 0.834,
479
+ "step": 135
480
+ },
481
+ {
482
+ "epoch": 3.7837837837837838,
483
+ "grad_norm": 0.0,
484
+ "learning_rate": 1e-05,
485
+ "loss": 0.0939,
486
+ "step": 140
487
+ },
488
+ {
489
+ "epoch": 3.7837837837837838,
490
+ "eval_loss": 0.05264894291758537,
491
+ "eval_pearson_cosine": 0.8333850683405307,
492
+ "eval_runtime": 151.069,
493
+ "eval_samples_per_second": 6.659,
494
+ "eval_spearman_cosine": 0.8273128026466706,
495
+ "eval_steps_per_second": 0.834,
496
+ "step": 140
497
+ },
498
+ {
499
+ "epoch": 3.918918918918919,
500
+ "grad_norm": 7.900262832641602,
501
+ "learning_rate": 1e-05,
502
+ "loss": 0.1038,
503
+ "step": 145
504
+ },
505
+ {
506
+ "epoch": 3.918918918918919,
507
+ "eval_loss": 0.0527377724647522,
508
+ "eval_pearson_cosine": 0.8334934833047165,
509
+ "eval_runtime": 150.8011,
510
+ "eval_samples_per_second": 6.671,
511
+ "eval_spearman_cosine": 0.8261353280714282,
512
+ "eval_steps_per_second": 0.836,
513
+ "step": 145
514
+ }
515
+ ],
516
+ "logging_steps": 5,
517
+ "max_steps": 148,
518
+ "num_input_tokens_seen": 0,
519
+ "num_train_epochs": 4,
520
+ "save_steps": 10,
521
+ "stateful_callbacks": {
522
+ "TrainerControl": {
523
+ "args": {
524
+ "should_epoch_stop": false,
525
+ "should_evaluate": false,
526
+ "should_log": false,
527
+ "should_save": true,
528
+ "should_training_stop": true
529
+ },
530
+ "attributes": {}
531
+ }
532
+ },
533
+ "total_flos": 0.0,
534
+ "train_batch_size": 8,
535
+ "trial_name": null,
536
+ "trial_params": null
537
+ }
checkpoint-148/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6195991b123e7d9abf5d6fe39d2acc574f894288d505cb696178ae95f5cb07f
3
+ size 5624
checkpoint-20/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-20/README.md ADDED
@@ -0,0 +1,400 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - sentence-transformers
4
+ - sentence-similarity
5
+ - feature-extraction
6
+ - generated_from_trainer
7
+ - dataset_size:290
8
+ - loss:OnlineContrastiveLoss
9
+ base_model: intfloat/multilingual-e5-large
10
+ widget:
11
+ - source_sentence: Antes se coge al mentiroso que al cojo
12
+ sentences:
13
+ - A escudero pobre, taza de plata y cántaro de cobre
14
+ - En río revuelto, pesca abundante
15
+ - Se ayuda primero al necesitado que al engañador.
16
+ - source_sentence: Asno de muchos, lobos lo comen
17
+ sentences:
18
+ - Sabio entre sabios, amigos lo respetan.
19
+ - El que mucho madruga más hace que el que Dios ayuda.
20
+ - Se pilla antes a un mentiroso que a un cojo
21
+ - source_sentence: Al buey por el asta, y al hombre por la palabra
22
+ sentences:
23
+ - Si no quieres arroz con leche, toma tres tazas
24
+ - Al hombre por la palabra, y al buey por el cuerno ata
25
+ - Ese no es tu amigo, sino alguien que siempre busca estar rodeado de bullicio y
26
+ actividad.
27
+ - source_sentence: Al médico, confesor y letrado, hablarles claro
28
+ sentences:
29
+ - Al médico, confesor y letrado, no le hayas engañado
30
+ - Más vale a quien Dios ayuda que quien mucho madruga
31
+ - Al que anda entre la miel, algo se le pega
32
+ - source_sentence: A muertos y a idos, no hay amigos
33
+ sentences:
34
+ - Al buen callar llaman santo
35
+ - A los vivos y presentes, siempre hay amigos.
36
+ - Al que de prestado se viste, en la calle lo desnudan
37
+ pipeline_tag: sentence-similarity
38
+ library_name: sentence-transformers
39
+ metrics:
40
+ - pearson_cosine
41
+ - spearman_cosine
42
+ model-index:
43
+ - name: SentenceTransformer based on intfloat/multilingual-e5-large
44
+ results:
45
+ - task:
46
+ type: semantic-similarity
47
+ name: Semantic Similarity
48
+ dataset:
49
+ name: Unknown
50
+ type: unknown
51
+ metrics:
52
+ - type: pearson_cosine
53
+ value: 0.8481768671521202
54
+ name: Pearson Cosine
55
+ - type: spearman_cosine
56
+ value: 0.8344803713886917
57
+ name: Spearman Cosine
58
+ ---
59
+
60
+ # SentenceTransformer based on intfloat/multilingual-e5-large
61
+
62
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [intfloat/multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large) on the csv dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
63
+
64
+ ## Model Details
65
+
66
+ ### Model Description
67
+ - **Model Type:** Sentence Transformer
68
+ - **Base model:** [intfloat/multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large) <!-- at revision 0dc5580a448e4284468b8909bae50fa925907bc5 -->
69
+ - **Maximum Sequence Length:** 512 tokens
70
+ - **Output Dimensionality:** 1024 dimensions
71
+ - **Similarity Function:** Cosine Similarity
72
+ - **Training Dataset:**
73
+ - csv
74
+ <!-- - **Language:** Unknown -->
75
+ <!-- - **License:** Unknown -->
76
+
77
+ ### Model Sources
78
+
79
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
80
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
81
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
82
+
83
+ ### Full Model Architecture
84
+
85
+ ```
86
+ SentenceTransformer(
87
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: XLMRobertaModel
88
+ (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
89
+ (2): Normalize()
90
+ )
91
+ ```
92
+
93
+ ## Usage
94
+
95
+ ### Direct Usage (Sentence Transformers)
96
+
97
+ First install the Sentence Transformers library:
98
+
99
+ ```bash
100
+ pip install -U sentence-transformers
101
+ ```
102
+
103
+ Then you can load this model and run inference.
104
+ ```python
105
+ from sentence_transformers import SentenceTransformer
106
+
107
+ # Download from the 🤗 Hub
108
+ model = SentenceTransformer("sentence_transformers_model_id")
109
+ # Run inference
110
+ sentences = [
111
+ 'A muertos y a idos, no hay amigos',
112
+ 'A los vivos y presentes, siempre hay amigos.',
113
+ 'Al buen callar llaman santo',
114
+ ]
115
+ embeddings = model.encode(sentences)
116
+ print(embeddings.shape)
117
+ # [3, 1024]
118
+
119
+ # Get the similarity scores for the embeddings
120
+ similarities = model.similarity(embeddings, embeddings)
121
+ print(similarities.shape)
122
+ # [3, 3]
123
+ ```
124
+
125
+ <!--
126
+ ### Direct Usage (Transformers)
127
+
128
+ <details><summary>Click to see the direct usage in Transformers</summary>
129
+
130
+ </details>
131
+ -->
132
+
133
+ <!--
134
+ ### Downstream Usage (Sentence Transformers)
135
+
136
+ You can finetune this model on your own dataset.
137
+
138
+ <details><summary>Click to expand</summary>
139
+
140
+ </details>
141
+ -->
142
+
143
+ <!--
144
+ ### Out-of-Scope Use
145
+
146
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
147
+ -->
148
+
149
+ ## Evaluation
150
+
151
+ ### Metrics
152
+
153
+ #### Semantic Similarity
154
+
155
+ * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
156
+
157
+ | Metric | Value |
158
+ |:--------------------|:-----------|
159
+ | pearson_cosine | 0.8482 |
160
+ | **spearman_cosine** | **0.8345** |
161
+
162
+ <!--
163
+ ## Bias, Risks and Limitations
164
+
165
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
166
+ -->
167
+
168
+ <!--
169
+ ### Recommendations
170
+
171
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
172
+ -->
173
+
174
+ ## Training Details
175
+
176
+ ### Training Dataset
177
+
178
+ #### csv
179
+
180
+ * Dataset: csv
181
+ * Size: 290 training samples
182
+ * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
183
+ * Approximate statistics based on the first 290 samples:
184
+ | | sentence1 | sentence2 | label |
185
+ |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:------------------------------------------------|
186
+ | type | string | string | int |
187
+ | details | <ul><li>min: 7 tokens</li><li>mean: 11.68 tokens</li><li>max: 22 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 17.01 tokens</li><li>max: 44 tokens</li></ul> | <ul><li>0: ~50.00%</li><li>1: ~50.00%</li></ul> |
188
+ * Samples:
189
+ | sentence1 | sentence2 | label |
190
+ |:------------------------------------------------------------|:--------------------------------------------------------------------------------------------------|:---------------|
191
+ | <code>Gota a gota, la mar se agota.</code> | <code>Con el pasar del tiempo se llega a alcanzar cualquier meta.</code> | <code>1</code> |
192
+ | <code>Dime de qué presumes y te diré de qué careces.</code> | <code>Dime de qué careces y te diré de qué dispones.</code> | <code>0</code> |
193
+ | <code>Cómo se vive, se muere.</code> | <code>De aquella forma que hemos vivido nuestra vida será de la forma en la que moriremos.</code> | <code>1</code> |
194
+ * Loss: [<code>OnlineContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#onlinecontrastiveloss)
195
+
196
+ ### Evaluation Dataset
197
+
198
+ #### Unnamed Dataset
199
+
200
+ * Size: 1,006 evaluation samples
201
+ * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
202
+ * Approximate statistics based on the first 1000 samples:
203
+ | | sentence1 | sentence2 | label |
204
+ |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:------------------------------------------------|
205
+ | type | string | string | int |
206
+ | details | <ul><li>min: 7 tokens</li><li>mean: 12.51 tokens</li><li>max: 25 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 14.82 tokens</li><li>max: 38 tokens</li></ul> | <ul><li>0: ~49.70%</li><li>1: ~50.30%</li></ul> |
207
+ * Samples:
208
+ | sentence1 | sentence2 | label |
209
+ |:---------------------------------------------|:-----------------------------------------------------------------------|:---------------|
210
+ | <code>¿Adónde irá el buey que no are?</code> | <code>¿A dó irá el buey que no are?</code> | <code>1</code> |
211
+ | <code>¿Adónde irá el buey que no are?</code> | <code>¿Adónde irá el buey que no are ni la mula que no cargue?</code> | <code>1</code> |
212
+ | <code>¿Adónde irá el buey que no are?</code> | <code>¿Adónde irá el buey que no are, sino al matadero?</code> | <code>1</code> |
213
+ * Loss: [<code>OnlineContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#onlinecontrastiveloss)
214
+
215
+ ### Training Hyperparameters
216
+ #### Non-Default Hyperparameters
217
+
218
+ - `eval_strategy`: steps
219
+ - `learning_rate`: 1e-05
220
+ - `num_train_epochs`: 1
221
+ - `lr_scheduler_type`: constant
222
+ - `load_best_model_at_end`: True
223
+ - `eval_on_start`: True
224
+ - `batch_sampler`: no_duplicates
225
+
226
+ #### All Hyperparameters
227
+ <details><summary>Click to expand</summary>
228
+
229
+ - `overwrite_output_dir`: False
230
+ - `do_predict`: False
231
+ - `eval_strategy`: steps
232
+ - `prediction_loss_only`: True
233
+ - `per_device_train_batch_size`: 8
234
+ - `per_device_eval_batch_size`: 8
235
+ - `per_gpu_train_batch_size`: None
236
+ - `per_gpu_eval_batch_size`: None
237
+ - `gradient_accumulation_steps`: 1
238
+ - `eval_accumulation_steps`: None
239
+ - `torch_empty_cache_steps`: None
240
+ - `learning_rate`: 1e-05
241
+ - `weight_decay`: 0.0
242
+ - `adam_beta1`: 0.9
243
+ - `adam_beta2`: 0.999
244
+ - `adam_epsilon`: 1e-08
245
+ - `max_grad_norm`: 1.0
246
+ - `num_train_epochs`: 1
247
+ - `max_steps`: -1
248
+ - `lr_scheduler_type`: constant
249
+ - `lr_scheduler_kwargs`: {}
250
+ - `warmup_ratio`: 0.0
251
+ - `warmup_steps`: 0
252
+ - `log_level`: passive
253
+ - `log_level_replica`: warning
254
+ - `log_on_each_node`: True
255
+ - `logging_nan_inf_filter`: True
256
+ - `save_safetensors`: True
257
+ - `save_on_each_node`: False
258
+ - `save_only_model`: False
259
+ - `restore_callback_states_from_checkpoint`: False
260
+ - `no_cuda`: False
261
+ - `use_cpu`: False
262
+ - `use_mps_device`: False
263
+ - `seed`: 42
264
+ - `data_seed`: None
265
+ - `jit_mode_eval`: False
266
+ - `use_ipex`: False
267
+ - `bf16`: False
268
+ - `fp16`: False
269
+ - `fp16_opt_level`: O1
270
+ - `half_precision_backend`: auto
271
+ - `bf16_full_eval`: False
272
+ - `fp16_full_eval`: False
273
+ - `tf32`: None
274
+ - `local_rank`: 0
275
+ - `ddp_backend`: None
276
+ - `tpu_num_cores`: None
277
+ - `tpu_metrics_debug`: False
278
+ - `debug`: []
279
+ - `dataloader_drop_last`: False
280
+ - `dataloader_num_workers`: 0
281
+ - `dataloader_prefetch_factor`: None
282
+ - `past_index`: -1
283
+ - `disable_tqdm`: False
284
+ - `remove_unused_columns`: True
285
+ - `label_names`: None
286
+ - `load_best_model_at_end`: True
287
+ - `ignore_data_skip`: False
288
+ - `fsdp`: []
289
+ - `fsdp_min_num_params`: 0
290
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
291
+ - `tp_size`: 0
292
+ - `fsdp_transformer_layer_cls_to_wrap`: None
293
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
294
+ - `deepspeed`: None
295
+ - `label_smoothing_factor`: 0.0
296
+ - `optim`: adamw_torch
297
+ - `optim_args`: None
298
+ - `adafactor`: False
299
+ - `group_by_length`: False
300
+ - `length_column_name`: length
301
+ - `ddp_find_unused_parameters`: None
302
+ - `ddp_bucket_cap_mb`: None
303
+ - `ddp_broadcast_buffers`: False
304
+ - `dataloader_pin_memory`: True
305
+ - `dataloader_persistent_workers`: False
306
+ - `skip_memory_metrics`: True
307
+ - `use_legacy_prediction_loop`: False
308
+ - `push_to_hub`: False
309
+ - `resume_from_checkpoint`: None
310
+ - `hub_model_id`: None
311
+ - `hub_strategy`: every_save
312
+ - `hub_private_repo`: None
313
+ - `hub_always_push`: False
314
+ - `gradient_checkpointing`: False
315
+ - `gradient_checkpointing_kwargs`: None
316
+ - `include_inputs_for_metrics`: False
317
+ - `include_for_metrics`: []
318
+ - `eval_do_concat_batches`: True
319
+ - `fp16_backend`: auto
320
+ - `push_to_hub_model_id`: None
321
+ - `push_to_hub_organization`: None
322
+ - `mp_parameters`:
323
+ - `auto_find_batch_size`: False
324
+ - `full_determinism`: False
325
+ - `torchdynamo`: None
326
+ - `ray_scope`: last
327
+ - `ddp_timeout`: 1800
328
+ - `torch_compile`: False
329
+ - `torch_compile_backend`: None
330
+ - `torch_compile_mode`: None
331
+ - `dispatch_batches`: None
332
+ - `split_batches`: None
333
+ - `include_tokens_per_second`: False
334
+ - `include_num_input_tokens_seen`: False
335
+ - `neftune_noise_alpha`: None
336
+ - `optim_target_modules`: None
337
+ - `batch_eval_metrics`: False
338
+ - `eval_on_start`: True
339
+ - `use_liger_kernel`: False
340
+ - `eval_use_gather_object`: False
341
+ - `average_tokens_across_devices`: False
342
+ - `prompts`: None
343
+ - `batch_sampler`: no_duplicates
344
+ - `multi_dataset_batch_sampler`: proportional
345
+
346
+ </details>
347
+
348
+ ### Training Logs
349
+ | Epoch | Step | Training Loss | Validation Loss | spearman_cosine |
350
+ |:------:|:----:|:-------------:|:---------------:|:---------------:|
351
+ | 0 | 0 | - | 0.1095 | 0.7843 |
352
+ | 0.1351 | 5 | 0.6784 | 0.0765 | 0.8123 |
353
+ | 0.2703 | 10 | 0.5088 | 0.0533 | 0.8303 |
354
+ | 0.4054 | 15 | 0.4364 | 0.0475 | 0.8339 |
355
+ | 0.5405 | 20 | 0.3456 | 0.0435 | 0.8345 |
356
+
357
+
358
+ ### Framework Versions
359
+ - Python: 3.12.9
360
+ - Sentence Transformers: 3.4.1
361
+ - Transformers: 4.50.0
362
+ - PyTorch: 2.6.0+cpu
363
+ - Accelerate: 1.6.0
364
+ - Datasets: 3.5.0
365
+ - Tokenizers: 0.21.1
366
+
367
+ ## Citation
368
+
369
+ ### BibTeX
370
+
371
+ #### Sentence Transformers
372
+ ```bibtex
373
+ @inproceedings{reimers-2019-sentence-bert,
374
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
375
+ author = "Reimers, Nils and Gurevych, Iryna",
376
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
377
+ month = "11",
378
+ year = "2019",
379
+ publisher = "Association for Computational Linguistics",
380
+ url = "https://arxiv.org/abs/1908.10084",
381
+ }
382
+ ```
383
+
384
+ <!--
385
+ ## Glossary
386
+
387
+ *Clearly define terms in order to be accessible across audiences.*
388
+ -->
389
+
390
+ <!--
391
+ ## Model Card Authors
392
+
393
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
394
+ -->
395
+
396
+ <!--
397
+ ## Model Card Contact
398
+
399
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
400
+ -->
checkpoint-20/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 514,
16
+ "model_type": "xlm-roberta",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "output_past": true,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.50.0",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 250002
27
+ }
checkpoint-20/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.4.1",
4
+ "transformers": "4.50.0",
5
+ "pytorch": "2.6.0+cpu"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
checkpoint-20/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d8db9ca7a34661a8ab59ec5df97ad40f2c4e75973337e38d6910ecf9c1a527f
3
+ size 2239607176
checkpoint-20/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
checkpoint-20/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9a2e5de59454cb420f37bc12a0f0c4f45d5c2ba401324295394063e96b747af
3
+ size 4471044921
checkpoint-20/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6f5fc60f03b02c0e0a76c701d090afac8e288367825dce0a6ff8fc8463b25ee
3
+ size 13990
checkpoint-20/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66649393eb0b497626eaceb0dc7ef324c049601175c3adb7f3d8bf15359013ec
3
+ size 1064
checkpoint-20/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
checkpoint-20/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
checkpoint-20/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883b037111086fd4dfebbbc9b7cee11e1517b5e0c0514879478661440f137085
3
+ size 17082987
checkpoint-20/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
+ "mask_token": "<mask>",
50
+ "model_max_length": 512,
51
+ "pad_token": "<pad>",
52
+ "sep_token": "</s>",
53
+ "tokenizer_class": "XLMRobertaTokenizer",
54
+ "unk_token": "<unk>"
55
+ }
checkpoint-20/trainer_state.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 20,
3
+ "best_metric": 0.043546345084905624,
4
+ "best_model_checkpoint": "models/me5-large-retraining\\checkpoint-20",
5
+ "epoch": 0.5405405405405406,
6
+ "eval_steps": 5,
7
+ "global_step": 20,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0,
14
+ "eval_loss": 0.10950354486703873,
15
+ "eval_pearson_cosine": 0.7806081242807599,
16
+ "eval_runtime": 151.6579,
17
+ "eval_samples_per_second": 6.633,
18
+ "eval_spearman_cosine": 0.7843279594448466,
19
+ "eval_steps_per_second": 0.831,
20
+ "step": 0
21
+ },
22
+ {
23
+ "epoch": 0.13513513513513514,
24
+ "grad_norm": 9.38223934173584,
25
+ "learning_rate": 1e-05,
26
+ "loss": 0.6784,
27
+ "step": 5
28
+ },
29
+ {
30
+ "epoch": 0.13513513513513514,
31
+ "eval_loss": 0.07647562772035599,
32
+ "eval_pearson_cosine": 0.8193313583571735,
33
+ "eval_runtime": 156.2929,
34
+ "eval_samples_per_second": 6.437,
35
+ "eval_spearman_cosine": 0.8122999445241028,
36
+ "eval_steps_per_second": 0.806,
37
+ "step": 5
38
+ },
39
+ {
40
+ "epoch": 0.2702702702702703,
41
+ "grad_norm": 8.665059089660645,
42
+ "learning_rate": 1e-05,
43
+ "loss": 0.5088,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.2702702702702703,
48
+ "eval_loss": 0.05329965054988861,
49
+ "eval_pearson_cosine": 0.846437709431274,
50
+ "eval_runtime": 152.6705,
51
+ "eval_samples_per_second": 6.589,
52
+ "eval_spearman_cosine": 0.8303112726354404,
53
+ "eval_steps_per_second": 0.825,
54
+ "step": 10
55
+ },
56
+ {
57
+ "epoch": 0.40540540540540543,
58
+ "grad_norm": 21.96602439880371,
59
+ "learning_rate": 1e-05,
60
+ "loss": 0.4364,
61
+ "step": 15
62
+ },
63
+ {
64
+ "epoch": 0.40540540540540543,
65
+ "eval_loss": 0.047497160732746124,
66
+ "eval_pearson_cosine": 0.8497791216636505,
67
+ "eval_runtime": 150.8647,
68
+ "eval_samples_per_second": 6.668,
69
+ "eval_spearman_cosine": 0.8338916292060913,
70
+ "eval_steps_per_second": 0.835,
71
+ "step": 15
72
+ },
73
+ {
74
+ "epoch": 0.5405405405405406,
75
+ "grad_norm": 10.698002815246582,
76
+ "learning_rate": 1e-05,
77
+ "loss": 0.3456,
78
+ "step": 20
79
+ },
80
+ {
81
+ "epoch": 0.5405405405405406,
82
+ "eval_loss": 0.043546345084905624,
83
+ "eval_pearson_cosine": 0.8481768671521202,
84
+ "eval_runtime": 151.8674,
85
+ "eval_samples_per_second": 6.624,
86
+ "eval_spearman_cosine": 0.8344803713886917,
87
+ "eval_steps_per_second": 0.83,
88
+ "step": 20
89
+ }
90
+ ],
91
+ "logging_steps": 5,
92
+ "max_steps": 37,
93
+ "num_input_tokens_seen": 0,
94
+ "num_train_epochs": 1,
95
+ "save_steps": 10,
96
+ "stateful_callbacks": {
97
+ "TrainerControl": {
98
+ "args": {
99
+ "should_epoch_stop": false,
100
+ "should_evaluate": false,
101
+ "should_log": false,
102
+ "should_save": true,
103
+ "should_training_stop": false
104
+ },
105
+ "attributes": {}
106
+ }
107
+ },
108
+ "total_flos": 0.0,
109
+ "train_batch_size": 8,
110
+ "trial_name": null,
111
+ "trial_params": null
112
+ }
checkpoint-20/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b41c6ac4c736e654c2036409a6676535c43e7b515e2e3c97cfc2b06f8c81bf3
3
+ size 5624
eval/similarity_evaluation_results.csv ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,steps,cosine_pearson,cosine_spearman
2
+ 0,0,0.7806081242807599,0.7843279594448466
3
+ 0.13513513513513514,5,0.8193313583571735,0.8122999445241028
4
+ 0.2702702702702703,10,0.846437709431274,0.8303112726354404
5
+ 0.40540540540540543,15,0.8497791216636505,0.8338916292060913
6
+ 0.5405405405405406,20,0.8481768671521202,0.8344803713886917
7
+ 0.6756756756756757,25,0.8419294236114762,0.8324197823497284
8
+ 0.8108108108108109,30,0.8311833878571917,0.8270800450821792
9
+ 0.9459459459459459,35,0.8323883964862309,0.8261627064456549
10
+ 1.0810810810810811,40,0.8322596480425719,0.8232463910787938
11
+ 1.2162162162162162,45,0.8345968763719022,0.8220928743948337
12
+ 1.3513513513513513,50,0.8295312877735412,0.8197002635410278
13
+ 1.4864864864864864,55,0.8258612961974536,0.8190362223126076
14
+ 1.6216216216216215,60,0.8280490163439709,0.8212679517806181
15
+ 1.7567567567567568,65,0.8348764332747338,0.8253343633484949
16
+ 1.8918918918918919,70,0.8327080612027193,0.8249373111883099
17
+ 2.027027027027027,75,0.8292172905350813,0.8226234223032437
18
+ 2.1621621621621623,80,0.8348787263877363,0.8262517044196894
19
+ 2.2972972972972974,85,0.834739922043313,0.8269978977904043
20
+ 2.4324324324324325,90,0.8300953895591171,0.8249920776743955
21
+ 2.5675675675675675,95,0.8280036278693699,0.8241911129581995
22
+ 2.7027027027027026,100,0.827662426497365,0.8241911153867979
23
+ 2.8378378378378377,105,0.8271686930217653,0.8242595734942031
24
+ 2.972972972972973,110,0.8242998475213165,0.8243348749833265
25
+ 3.108108108108108,115,0.8100599157021782,0.8195085832550942
26
+ 3.2432432432432434,120,0.8092739374985023,0.8194743493718913
27
+ 3.3783783783783785,125,0.8212737827789367,0.8250605382131625
28
+ 3.5135135135135136,130,0.8257663666576973,0.8261900896885362
29
+ 3.6486486486486487,135,0.8294106767298322,0.826217475365987
30
+ 3.7837837837837838,140,0.8333850683405307,0.8273128026466706
31
+ 3.918918918918919,145,0.8334934833047165,0.8261353280714282
32
+ 0,0,0.7806081242807599,0.7843279594448466
33
+ 0.13513513513513514,5,0.8193313583571735,0.8122999445241028
34
+ 0.2702702702702703,10,0.846437709431274,0.8303112726354404
35
+ 0.40540540540540543,15,0.8497791216636505,0.8338916292060913
36
+ 0.5405405405405406,20,0.8481768671521202,0.8344803713886917
37
+ 0.6756756756756757,25,0.8419294236114762,0.8324197823497284
38
+ 0.8108108108108109,30,0.8311833878571917,0.8270800450821792
39
+ 0.9459459459459459,35,0.8323883964862309,0.8261627064456549
latest/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
latest/README.md ADDED
@@ -0,0 +1,404 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - sentence-transformers
4
+ - sentence-similarity
5
+ - feature-extraction
6
+ - generated_from_trainer
7
+ - dataset_size:290
8
+ - loss:OnlineContrastiveLoss
9
+ base_model: intfloat/multilingual-e5-large
10
+ widget:
11
+ - source_sentence: Antes se coge al mentiroso que al cojo
12
+ sentences:
13
+ - A escudero pobre, taza de plata y cántaro de cobre
14
+ - En río revuelto, pesca abundante
15
+ - Se ayuda primero al necesitado que al engañador.
16
+ - source_sentence: Asno de muchos, lobos lo comen
17
+ sentences:
18
+ - Sabio entre sabios, amigos lo respetan.
19
+ - El que mucho madruga más hace que el que Dios ayuda.
20
+ - Se pilla antes a un mentiroso que a un cojo
21
+ - source_sentence: Al buey por el asta, y al hombre por la palabra
22
+ sentences:
23
+ - Si no quieres arroz con leche, toma tres tazas
24
+ - Al hombre por la palabra, y al buey por el cuerno ata
25
+ - Ese no es tu amigo, sino alguien que siempre busca estar rodeado de bullicio y
26
+ actividad.
27
+ - source_sentence: Al médico, confesor y letrado, hablarles claro
28
+ sentences:
29
+ - Al médico, confesor y letrado, no le hayas engañado
30
+ - Más vale a quien Dios ayuda que quien mucho madruga
31
+ - Al que anda entre la miel, algo se le pega
32
+ - source_sentence: A muertos y a idos, no hay amigos
33
+ sentences:
34
+ - Al buen callar llaman santo
35
+ - A los vivos y presentes, siempre hay amigos.
36
+ - Al que de prestado se viste, en la calle lo desnudan
37
+ pipeline_tag: sentence-similarity
38
+ library_name: sentence-transformers
39
+ metrics:
40
+ - pearson_cosine
41
+ - spearman_cosine
42
+ model-index:
43
+ - name: SentenceTransformer based on intfloat/multilingual-e5-large
44
+ results:
45
+ - task:
46
+ type: semantic-similarity
47
+ name: Semantic Similarity
48
+ dataset:
49
+ name: Unknown
50
+ type: unknown
51
+ metrics:
52
+ - type: pearson_cosine
53
+ value: 0.8323883964862309
54
+ name: Pearson Cosine
55
+ - type: spearman_cosine
56
+ value: 0.8261627064456549
57
+ name: Spearman Cosine
58
+ ---
59
+
60
+ # SentenceTransformer based on intfloat/multilingual-e5-large
61
+
62
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [intfloat/multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large) on the csv dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
63
+
64
+ ## Model Details
65
+
66
+ ### Model Description
67
+ - **Model Type:** Sentence Transformer
68
+ - **Base model:** [intfloat/multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large) <!-- at revision 0dc5580a448e4284468b8909bae50fa925907bc5 -->
69
+ - **Maximum Sequence Length:** 512 tokens
70
+ - **Output Dimensionality:** 1024 dimensions
71
+ - **Similarity Function:** Cosine Similarity
72
+ - **Training Dataset:**
73
+ - csv
74
+ <!-- - **Language:** Unknown -->
75
+ <!-- - **License:** Unknown -->
76
+
77
+ ### Model Sources
78
+
79
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
80
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
81
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
82
+
83
+ ### Full Model Architecture
84
+
85
+ ```
86
+ SentenceTransformer(
87
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: XLMRobertaModel
88
+ (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
89
+ (2): Normalize()
90
+ )
91
+ ```
92
+
93
+ ## Usage
94
+
95
+ ### Direct Usage (Sentence Transformers)
96
+
97
+ First install the Sentence Transformers library:
98
+
99
+ ```bash
100
+ pip install -U sentence-transformers
101
+ ```
102
+
103
+ Then you can load this model and run inference.
104
+ ```python
105
+ from sentence_transformers import SentenceTransformer
106
+
107
+ # Download from the 🤗 Hub
108
+ model = SentenceTransformer("sentence_transformers_model_id")
109
+ # Run inference
110
+ sentences = [
111
+ 'A muertos y a idos, no hay amigos',
112
+ 'A los vivos y presentes, siempre hay amigos.',
113
+ 'Al buen callar llaman santo',
114
+ ]
115
+ embeddings = model.encode(sentences)
116
+ print(embeddings.shape)
117
+ # [3, 1024]
118
+
119
+ # Get the similarity scores for the embeddings
120
+ similarities = model.similarity(embeddings, embeddings)
121
+ print(similarities.shape)
122
+ # [3, 3]
123
+ ```
124
+
125
+ <!--
126
+ ### Direct Usage (Transformers)
127
+
128
+ <details><summary>Click to see the direct usage in Transformers</summary>
129
+
130
+ </details>
131
+ -->
132
+
133
+ <!--
134
+ ### Downstream Usage (Sentence Transformers)
135
+
136
+ You can finetune this model on your own dataset.
137
+
138
+ <details><summary>Click to expand</summary>
139
+
140
+ </details>
141
+ -->
142
+
143
+ <!--
144
+ ### Out-of-Scope Use
145
+
146
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
147
+ -->
148
+
149
+ ## Evaluation
150
+
151
+ ### Metrics
152
+
153
+ #### Semantic Similarity
154
+
155
+ * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
156
+
157
+ | Metric | Value |
158
+ |:--------------------|:-----------|
159
+ | pearson_cosine | 0.8324 |
160
+ | **spearman_cosine** | **0.8262** |
161
+
162
+ <!--
163
+ ## Bias, Risks and Limitations
164
+
165
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
166
+ -->
167
+
168
+ <!--
169
+ ### Recommendations
170
+
171
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
172
+ -->
173
+
174
+ ## Training Details
175
+
176
+ ### Training Dataset
177
+
178
+ #### csv
179
+
180
+ * Dataset: csv
181
+ * Size: 290 training samples
182
+ * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
183
+ * Approximate statistics based on the first 290 samples:
184
+ | | sentence1 | sentence2 | label |
185
+ |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:------------------------------------------------|
186
+ | type | string | string | int |
187
+ | details | <ul><li>min: 7 tokens</li><li>mean: 11.68 tokens</li><li>max: 22 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 17.01 tokens</li><li>max: 44 tokens</li></ul> | <ul><li>0: ~50.00%</li><li>1: ~50.00%</li></ul> |
188
+ * Samples:
189
+ | sentence1 | sentence2 | label |
190
+ |:------------------------------------------------------------|:--------------------------------------------------------------------------------------------------|:---------------|
191
+ | <code>Gota a gota, la mar se agota.</code> | <code>Con el pasar del tiempo se llega a alcanzar cualquier meta.</code> | <code>1</code> |
192
+ | <code>Dime de qué presumes y te diré de qué careces.</code> | <code>Dime de qué careces y te diré de qué dispones.</code> | <code>0</code> |
193
+ | <code>Cómo se vive, se muere.</code> | <code>De aquella forma que hemos vivido nuestra vida será de la forma en la que moriremos.</code> | <code>1</code> |
194
+ * Loss: [<code>OnlineContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#onlinecontrastiveloss)
195
+
196
+ ### Evaluation Dataset
197
+
198
+ #### Unnamed Dataset
199
+
200
+ * Size: 1,006 evaluation samples
201
+ * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
202
+ * Approximate statistics based on the first 1000 samples:
203
+ | | sentence1 | sentence2 | label |
204
+ |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:------------------------------------------------|
205
+ | type | string | string | int |
206
+ | details | <ul><li>min: 7 tokens</li><li>mean: 12.51 tokens</li><li>max: 25 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 14.82 tokens</li><li>max: 38 tokens</li></ul> | <ul><li>0: ~49.70%</li><li>1: ~50.30%</li></ul> |
207
+ * Samples:
208
+ | sentence1 | sentence2 | label |
209
+ |:---------------------------------------------|:-----------------------------------------------------------------------|:---------------|
210
+ | <code>¿Adónde irá el buey que no are?</code> | <code>¿A dó irá el buey que no are?</code> | <code>1</code> |
211
+ | <code>¿Adónde irá el buey que no are?</code> | <code>¿Adónde irá el buey que no are ni la mula que no cargue?</code> | <code>1</code> |
212
+ | <code>¿Adónde irá el buey que no are?</code> | <code>¿Adónde irá el buey que no are, sino al matadero?</code> | <code>1</code> |
213
+ * Loss: [<code>OnlineContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#onlinecontrastiveloss)
214
+
215
+ ### Training Hyperparameters
216
+ #### Non-Default Hyperparameters
217
+
218
+ - `eval_strategy`: steps
219
+ - `learning_rate`: 1e-05
220
+ - `num_train_epochs`: 1
221
+ - `lr_scheduler_type`: constant
222
+ - `load_best_model_at_end`: True
223
+ - `eval_on_start`: True
224
+ - `batch_sampler`: no_duplicates
225
+
226
+ #### All Hyperparameters
227
+ <details><summary>Click to expand</summary>
228
+
229
+ - `overwrite_output_dir`: False
230
+ - `do_predict`: False
231
+ - `eval_strategy`: steps
232
+ - `prediction_loss_only`: True
233
+ - `per_device_train_batch_size`: 8
234
+ - `per_device_eval_batch_size`: 8
235
+ - `per_gpu_train_batch_size`: None
236
+ - `per_gpu_eval_batch_size`: None
237
+ - `gradient_accumulation_steps`: 1
238
+ - `eval_accumulation_steps`: None
239
+ - `torch_empty_cache_steps`: None
240
+ - `learning_rate`: 1e-05
241
+ - `weight_decay`: 0.0
242
+ - `adam_beta1`: 0.9
243
+ - `adam_beta2`: 0.999
244
+ - `adam_epsilon`: 1e-08
245
+ - `max_grad_norm`: 1.0
246
+ - `num_train_epochs`: 1
247
+ - `max_steps`: -1
248
+ - `lr_scheduler_type`: constant
249
+ - `lr_scheduler_kwargs`: {}
250
+ - `warmup_ratio`: 0.0
251
+ - `warmup_steps`: 0
252
+ - `log_level`: passive
253
+ - `log_level_replica`: warning
254
+ - `log_on_each_node`: True
255
+ - `logging_nan_inf_filter`: True
256
+ - `save_safetensors`: True
257
+ - `save_on_each_node`: False
258
+ - `save_only_model`: False
259
+ - `restore_callback_states_from_checkpoint`: False
260
+ - `no_cuda`: False
261
+ - `use_cpu`: False
262
+ - `use_mps_device`: False
263
+ - `seed`: 42
264
+ - `data_seed`: None
265
+ - `jit_mode_eval`: False
266
+ - `use_ipex`: False
267
+ - `bf16`: False
268
+ - `fp16`: False
269
+ - `fp16_opt_level`: O1
270
+ - `half_precision_backend`: auto
271
+ - `bf16_full_eval`: False
272
+ - `fp16_full_eval`: False
273
+ - `tf32`: None
274
+ - `local_rank`: 0
275
+ - `ddp_backend`: None
276
+ - `tpu_num_cores`: None
277
+ - `tpu_metrics_debug`: False
278
+ - `debug`: []
279
+ - `dataloader_drop_last`: False
280
+ - `dataloader_num_workers`: 0
281
+ - `dataloader_prefetch_factor`: None
282
+ - `past_index`: -1
283
+ - `disable_tqdm`: False
284
+ - `remove_unused_columns`: True
285
+ - `label_names`: None
286
+ - `load_best_model_at_end`: True
287
+ - `ignore_data_skip`: False
288
+ - `fsdp`: []
289
+ - `fsdp_min_num_params`: 0
290
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
291
+ - `tp_size`: 0
292
+ - `fsdp_transformer_layer_cls_to_wrap`: None
293
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
294
+ - `deepspeed`: None
295
+ - `label_smoothing_factor`: 0.0
296
+ - `optim`: adamw_torch
297
+ - `optim_args`: None
298
+ - `adafactor`: False
299
+ - `group_by_length`: False
300
+ - `length_column_name`: length
301
+ - `ddp_find_unused_parameters`: None
302
+ - `ddp_bucket_cap_mb`: None
303
+ - `ddp_broadcast_buffers`: False
304
+ - `dataloader_pin_memory`: True
305
+ - `dataloader_persistent_workers`: False
306
+ - `skip_memory_metrics`: True
307
+ - `use_legacy_prediction_loop`: False
308
+ - `push_to_hub`: False
309
+ - `resume_from_checkpoint`: None
310
+ - `hub_model_id`: None
311
+ - `hub_strategy`: every_save
312
+ - `hub_private_repo`: None
313
+ - `hub_always_push`: False
314
+ - `gradient_checkpointing`: False
315
+ - `gradient_checkpointing_kwargs`: None
316
+ - `include_inputs_for_metrics`: False
317
+ - `include_for_metrics`: []
318
+ - `eval_do_concat_batches`: True
319
+ - `fp16_backend`: auto
320
+ - `push_to_hub_model_id`: None
321
+ - `push_to_hub_organization`: None
322
+ - `mp_parameters`:
323
+ - `auto_find_batch_size`: False
324
+ - `full_determinism`: False
325
+ - `torchdynamo`: None
326
+ - `ray_scope`: last
327
+ - `ddp_timeout`: 1800
328
+ - `torch_compile`: False
329
+ - `torch_compile_backend`: None
330
+ - `torch_compile_mode`: None
331
+ - `dispatch_batches`: None
332
+ - `split_batches`: None
333
+ - `include_tokens_per_second`: False
334
+ - `include_num_input_tokens_seen`: False
335
+ - `neftune_noise_alpha`: None
336
+ - `optim_target_modules`: None
337
+ - `batch_eval_metrics`: False
338
+ - `eval_on_start`: True
339
+ - `use_liger_kernel`: False
340
+ - `eval_use_gather_object`: False
341
+ - `average_tokens_across_devices`: False
342
+ - `prompts`: None
343
+ - `batch_sampler`: no_duplicates
344
+ - `multi_dataset_batch_sampler`: proportional
345
+
346
+ </details>
347
+
348
+ ### Training Logs
349
+ | Epoch | Step | Training Loss | Validation Loss | spearman_cosine |
350
+ |:----------:|:------:|:-------------:|:---------------:|:---------------:|
351
+ | 0 | 0 | - | 0.1095 | 0.7843 |
352
+ | 0.1351 | 5 | 0.6784 | 0.0765 | 0.8123 |
353
+ | 0.2703 | 10 | 0.5088 | 0.0533 | 0.8303 |
354
+ | 0.4054 | 15 | 0.4364 | 0.0475 | 0.8339 |
355
+ | **0.5405** | **20** | **0.3456** | **0.0435** | **0.8345** |
356
+ | 0.6757 | 25 | 0.1423 | 0.0424 | 0.8324 |
357
+ | 0.8108 | 30 | 0.2852 | 0.0443 | 0.8271 |
358
+ | 0.9459 | 35 | 0.2616 | 0.0514 | 0.8262 |
359
+
360
+ * The bold row denotes the saved checkpoint.
361
+
362
+ ### Framework Versions
363
+ - Python: 3.12.9
364
+ - Sentence Transformers: 3.4.1
365
+ - Transformers: 4.50.0
366
+ - PyTorch: 2.6.0+cpu
367
+ - Accelerate: 1.6.0
368
+ - Datasets: 3.5.0
369
+ - Tokenizers: 0.21.1
370
+
371
+ ## Citation
372
+
373
+ ### BibTeX
374
+
375
+ #### Sentence Transformers
376
+ ```bibtex
377
+ @inproceedings{reimers-2019-sentence-bert,
378
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
379
+ author = "Reimers, Nils and Gurevych, Iryna",
380
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
381
+ month = "11",
382
+ year = "2019",
383
+ publisher = "Association for Computational Linguistics",
384
+ url = "https://arxiv.org/abs/1908.10084",
385
+ }
386
+ ```
387
+
388
+ <!--
389
+ ## Glossary
390
+
391
+ *Clearly define terms in order to be accessible across audiences.*
392
+ -->
393
+
394
+ <!--
395
+ ## Model Card Authors
396
+
397
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
398
+ -->
399
+
400
+ <!--
401
+ ## Model Card Contact
402
+
403
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
404
+ -->
latest/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 514,
16
+ "model_type": "xlm-roberta",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "output_past": true,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.50.0",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 250002
27
+ }
latest/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.4.1",
4
+ "transformers": "4.50.0",
5
+ "pytorch": "2.6.0+cpu"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
latest/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d8db9ca7a34661a8ab59ec5df97ad40f2c4e75973337e38d6910ecf9c1a527f
3
+ size 2239607176
latest/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
latest/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
latest/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
latest/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883b037111086fd4dfebbbc9b7cee11e1517b5e0c0514879478661440f137085
3
+ size 17082987
latest/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
+ "mask_token": "<mask>",
50
+ "model_max_length": 512,
51
+ "pad_token": "<pad>",
52
+ "sep_token": "</s>",
53
+ "tokenizer_class": "XLMRobertaTokenizer",
54
+ "unk_token": "<unk>"
55
+ }
latest/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b41c6ac4c736e654c2036409a6676535c43e7b515e2e3c97cfc2b06f8c81bf3
3
+ size 5624