xiaorui638 commited on
Commit
9d6bc38
·
verified ·
1 Parent(s): 629f0d1

Upload LoRA adapter from dpo_40k_abla_one_cat_one

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +8 -0
  2. README.md +88 -0
  3. README.md.bak +88 -0
  4. adapter_config.json +37 -0
  5. adapter_model.safetensors +3 -0
  6. added_tokens.json +24 -0
  7. all_results.json +20 -0
  8. chat_template.json +3 -0
  9. checkpoint-100/README.md +208 -0
  10. checkpoint-100/adapter_config.json +37 -0
  11. checkpoint-100/adapter_model.safetensors +3 -0
  12. checkpoint-100/added_tokens.json +24 -0
  13. checkpoint-100/chat_template.json +3 -0
  14. checkpoint-100/merges.txt +0 -0
  15. checkpoint-100/preprocessor_config.json +32 -0
  16. checkpoint-100/rng_state_0.pth +3 -0
  17. checkpoint-100/rng_state_1.pth +3 -0
  18. checkpoint-100/rng_state_2.pth +3 -0
  19. checkpoint-100/rng_state_3.pth +3 -0
  20. checkpoint-100/special_tokens_map.json +31 -0
  21. checkpoint-100/tokenizer.json +3 -0
  22. checkpoint-100/tokenizer_config.json +210 -0
  23. checkpoint-100/trainer_state.json +815 -0
  24. checkpoint-100/training_args.bin +3 -0
  25. checkpoint-100/vocab.json +0 -0
  26. checkpoint-200/README.md +208 -0
  27. checkpoint-200/adapter_config.json +37 -0
  28. checkpoint-200/adapter_model.safetensors +3 -0
  29. checkpoint-200/added_tokens.json +24 -0
  30. checkpoint-200/chat_template.json +3 -0
  31. checkpoint-200/merges.txt +0 -0
  32. checkpoint-200/preprocessor_config.json +32 -0
  33. checkpoint-200/rng_state_0.pth +3 -0
  34. checkpoint-200/rng_state_1.pth +3 -0
  35. checkpoint-200/rng_state_2.pth +3 -0
  36. checkpoint-200/rng_state_3.pth +3 -0
  37. checkpoint-200/special_tokens_map.json +31 -0
  38. checkpoint-200/tokenizer.json +3 -0
  39. checkpoint-200/tokenizer_config.json +210 -0
  40. checkpoint-200/trainer_state.json +1597 -0
  41. checkpoint-200/training_args.bin +3 -0
  42. checkpoint-200/vocab.json +0 -0
  43. checkpoint-300/README.md +208 -0
  44. checkpoint-300/adapter_config.json +37 -0
  45. checkpoint-300/adapter_model.safetensors +3 -0
  46. checkpoint-300/added_tokens.json +24 -0
  47. checkpoint-300/chat_template.json +3 -0
  48. checkpoint-300/merges.txt +0 -0
  49. checkpoint-300/preprocessor_config.json +32 -0
  50. checkpoint-300/rng_state_0.pth +3 -0
.gitattributes CHANGED
@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
+ checkpoint-600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
42
+ checkpoint-621/tokenizer.json filter=lfs diff=lfs merge=lfs -text
43
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ license: other
4
+ base_model: Qwen/Qwen2.5-VL-7B-Instruct
5
+ tags:
6
+ - base_model:adapter:/p/scratch/taco-vlm/xiao4/models/Qwen2.5-VL-7B-Instruct
7
+ - llama-factory
8
+ - lora
9
+ - transformers
10
+ pipeline_tag: text-generation
11
+ model-index:
12
+ - name: dpo_40k_abla_one_cat_one
13
+ results: []
14
+ ---
15
+
16
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
+ should probably proofread and complete it, then remove this comment. -->
18
+
19
+ # dpo_40k_abla_one_cat_one
20
+
21
+ This model is a fine-tuned version of [/p/scratch/taco-vlm/xiao4/models/Qwen2.5-VL-7B-Instruct](https://huggingface.co//p/scratch/taco-vlm/xiao4/models/Qwen2.5-VL-7B-Instruct) on the dpo_ablation_one_cat_one dataset.
22
+ It achieves the following results on the evaluation set:
23
+ - Loss: 0.5072
24
+ - Rewards/chosen: -0.4765
25
+ - Rewards/rejected: -1.1458
26
+ - Rewards/accuracies: 0.7700
27
+ - Rewards/margins: 0.6692
28
+ - Logps/chosen: -35.2662
29
+ - Logps/rejected: -46.7822
30
+ - Logits/chosen: 0.2949
31
+ - Logits/rejected: 0.2973
32
+
33
+ ## Model description
34
+
35
+ More information needed
36
+
37
+ ## Intended uses & limitations
38
+
39
+ More information needed
40
+
41
+ ## Training and evaluation data
42
+
43
+ More information needed
44
+
45
+ ## Training procedure
46
+
47
+ ### Training hyperparameters
48
+
49
+ The following hyperparameters were used during training:
50
+ - learning_rate: 5e-06
51
+ - train_batch_size: 2
52
+ - eval_batch_size: 1
53
+ - seed: 42
54
+ - distributed_type: multi-GPU
55
+ - num_devices: 4
56
+ - gradient_accumulation_steps: 8
57
+ - total_train_batch_size: 64
58
+ - total_eval_batch_size: 4
59
+ - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
60
+ - lr_scheduler_type: cosine
61
+ - lr_scheduler_warmup_ratio: 0.1
62
+ - num_epochs: 1.0
63
+
64
+ ### Training results
65
+
66
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/chosen | Logps/rejected | Logits/chosen | Logits/rejected |
67
+ |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:------------:|:--------------:|:-------------:|:---------------:|
68
+ | 0.6882 | 0.0804 | 50 | 0.6898 | -0.0044 | -0.0115 | 0.5350 | 0.0071 | -30.5444 | -35.4393 | 0.5502 | 0.5581 |
69
+ | 0.6603 | 0.1608 | 100 | 0.6593 | -0.0806 | -0.1570 | 0.6850 | 0.0763 | -31.3070 | -36.8941 | 0.5384 | 0.5446 |
70
+ | 0.6387 | 0.2412 | 150 | 0.6298 | -0.1917 | -0.3455 | 0.7250 | 0.1538 | -32.4175 | -38.7793 | 0.5058 | 0.5080 |
71
+ | 0.5986 | 0.3216 | 200 | 0.5988 | -0.2330 | -0.4814 | 0.7050 | 0.2485 | -32.8302 | -40.1388 | 0.4847 | 0.4844 |
72
+ | 0.5368 | 0.4020 | 250 | 0.5667 | -0.2959 | -0.6688 | 0.7200 | 0.3728 | -33.4601 | -42.0120 | 0.4258 | 0.4350 |
73
+ | 0.5416 | 0.4824 | 300 | 0.5450 | -0.3299 | -0.8038 | 0.7450 | 0.4739 | -33.8000 | -43.3626 | 0.3828 | 0.3894 |
74
+ | 0.5141 | 0.5628 | 350 | 0.5301 | -0.3794 | -0.9226 | 0.7450 | 0.5432 | -34.2943 | -44.5501 | 0.3541 | 0.3622 |
75
+ | 0.5122 | 0.6432 | 400 | 0.5206 | -0.4136 | -1.0123 | 0.7550 | 0.5987 | -34.6362 | -45.4474 | 0.3284 | 0.3337 |
76
+ | 0.4817 | 0.7236 | 450 | 0.5165 | -0.4476 | -1.0766 | 0.7750 | 0.6290 | -34.9764 | -46.0903 | 0.3096 | 0.3177 |
77
+ | 0.4709 | 0.8040 | 500 | 0.5102 | -0.4623 | -1.1173 | 0.7800 | 0.6550 | -35.1233 | -46.4975 | 0.3006 | 0.3063 |
78
+ | 0.4759 | 0.8844 | 550 | 0.5098 | -0.4751 | -1.1359 | 0.7800 | 0.6609 | -35.2515 | -46.6838 | 0.2987 | 0.3002 |
79
+ | 0.4342 | 0.9648 | 600 | 0.5086 | -0.4804 | -1.1453 | 0.7800 | 0.6649 | -35.3051 | -46.7775 | 0.2947 | 0.2991 |
80
+
81
+
82
+ ### Framework versions
83
+
84
+ - PEFT 0.17.1
85
+ - Transformers 4.49.0
86
+ - Pytorch 2.5.1+cu124
87
+ - Datasets 4.0.0
88
+ - Tokenizers 0.21.0
README.md.bak ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ license: other
4
+ base_model: /p/scratch/taco-vlm/xiao4/models/Qwen2.5-VL-7B-Instruct
5
+ tags:
6
+ - base_model:adapter:/p/scratch/taco-vlm/xiao4/models/Qwen2.5-VL-7B-Instruct
7
+ - llama-factory
8
+ - lora
9
+ - transformers
10
+ pipeline_tag: text-generation
11
+ model-index:
12
+ - name: dpo_40k_abla_one_cat_one
13
+ results: []
14
+ ---
15
+
16
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
+ should probably proofread and complete it, then remove this comment. -->
18
+
19
+ # dpo_40k_abla_one_cat_one
20
+
21
+ This model is a fine-tuned version of [/p/scratch/taco-vlm/xiao4/models/Qwen2.5-VL-7B-Instruct](https://huggingface.co//p/scratch/taco-vlm/xiao4/models/Qwen2.5-VL-7B-Instruct) on the dpo_ablation_one_cat_one dataset.
22
+ It achieves the following results on the evaluation set:
23
+ - Loss: 0.5072
24
+ - Rewards/chosen: -0.4765
25
+ - Rewards/rejected: -1.1458
26
+ - Rewards/accuracies: 0.7700
27
+ - Rewards/margins: 0.6692
28
+ - Logps/chosen: -35.2662
29
+ - Logps/rejected: -46.7822
30
+ - Logits/chosen: 0.2949
31
+ - Logits/rejected: 0.2973
32
+
33
+ ## Model description
34
+
35
+ More information needed
36
+
37
+ ## Intended uses & limitations
38
+
39
+ More information needed
40
+
41
+ ## Training and evaluation data
42
+
43
+ More information needed
44
+
45
+ ## Training procedure
46
+
47
+ ### Training hyperparameters
48
+
49
+ The following hyperparameters were used during training:
50
+ - learning_rate: 5e-06
51
+ - train_batch_size: 2
52
+ - eval_batch_size: 1
53
+ - seed: 42
54
+ - distributed_type: multi-GPU
55
+ - num_devices: 4
56
+ - gradient_accumulation_steps: 8
57
+ - total_train_batch_size: 64
58
+ - total_eval_batch_size: 4
59
+ - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
60
+ - lr_scheduler_type: cosine
61
+ - lr_scheduler_warmup_ratio: 0.1
62
+ - num_epochs: 1.0
63
+
64
+ ### Training results
65
+
66
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/chosen | Logps/rejected | Logits/chosen | Logits/rejected |
67
+ |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:------------:|:--------------:|:-------------:|:---------------:|
68
+ | 0.6882 | 0.0804 | 50 | 0.6898 | -0.0044 | -0.0115 | 0.5350 | 0.0071 | -30.5444 | -35.4393 | 0.5502 | 0.5581 |
69
+ | 0.6603 | 0.1608 | 100 | 0.6593 | -0.0806 | -0.1570 | 0.6850 | 0.0763 | -31.3070 | -36.8941 | 0.5384 | 0.5446 |
70
+ | 0.6387 | 0.2412 | 150 | 0.6298 | -0.1917 | -0.3455 | 0.7250 | 0.1538 | -32.4175 | -38.7793 | 0.5058 | 0.5080 |
71
+ | 0.5986 | 0.3216 | 200 | 0.5988 | -0.2330 | -0.4814 | 0.7050 | 0.2485 | -32.8302 | -40.1388 | 0.4847 | 0.4844 |
72
+ | 0.5368 | 0.4020 | 250 | 0.5667 | -0.2959 | -0.6688 | 0.7200 | 0.3728 | -33.4601 | -42.0120 | 0.4258 | 0.4350 |
73
+ | 0.5416 | 0.4824 | 300 | 0.5450 | -0.3299 | -0.8038 | 0.7450 | 0.4739 | -33.8000 | -43.3626 | 0.3828 | 0.3894 |
74
+ | 0.5141 | 0.5628 | 350 | 0.5301 | -0.3794 | -0.9226 | 0.7450 | 0.5432 | -34.2943 | -44.5501 | 0.3541 | 0.3622 |
75
+ | 0.5122 | 0.6432 | 400 | 0.5206 | -0.4136 | -1.0123 | 0.7550 | 0.5987 | -34.6362 | -45.4474 | 0.3284 | 0.3337 |
76
+ | 0.4817 | 0.7236 | 450 | 0.5165 | -0.4476 | -1.0766 | 0.7750 | 0.6290 | -34.9764 | -46.0903 | 0.3096 | 0.3177 |
77
+ | 0.4709 | 0.8040 | 500 | 0.5102 | -0.4623 | -1.1173 | 0.7800 | 0.6550 | -35.1233 | -46.4975 | 0.3006 | 0.3063 |
78
+ | 0.4759 | 0.8844 | 550 | 0.5098 | -0.4751 | -1.1359 | 0.7800 | 0.6609 | -35.2515 | -46.6838 | 0.2987 | 0.3002 |
79
+ | 0.4342 | 0.9648 | 600 | 0.5086 | -0.4804 | -1.1453 | 0.7800 | 0.6649 | -35.3051 | -46.7775 | 0.2947 | 0.2991 |
80
+
81
+
82
+ ### Framework versions
83
+
84
+ - PEFT 0.17.1
85
+ - Transformers 4.49.0
86
+ - Pytorch 2.5.1+cu124
87
+ - Datasets 4.0.0
88
+ - Tokenizers 0.21.0
adapter_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/p/scratch/taco-vlm/xiao4/models/Qwen2.5-VL-7B-Instruct",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 64,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.0,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 32,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "v_proj",
29
+ "q_proj"
30
+ ],
31
+ "target_parameters": null,
32
+ "task_type": "CAUSAL_LM",
33
+ "trainable_token_indices": null,
34
+ "use_dora": false,
35
+ "use_qalora": false,
36
+ "use_rslora": false
37
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25e363363cf61168751ff5e9f947016b10f167fd728689648f705ccd98039bbd
3
+ size 40385208
added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
all_results.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9985929648241206,
3
+ "eval_logits/chosen": 0.2949379086494446,
4
+ "eval_logits/rejected": 0.2972874939441681,
5
+ "eval_logps/chosen": -35.26618194580078,
6
+ "eval_logps/rejected": -46.7822380065918,
7
+ "eval_loss": 0.5072444677352905,
8
+ "eval_rewards/accuracies": 0.7699999809265137,
9
+ "eval_rewards/chosen": -0.4765482246875763,
10
+ "eval_rewards/margins": 0.669231653213501,
11
+ "eval_rewards/rejected": -1.1457798480987549,
12
+ "eval_runtime": 48.1974,
13
+ "eval_samples_per_second": 4.15,
14
+ "eval_steps_per_second": 1.037,
15
+ "total_flos": 1.595144247000957e+18,
16
+ "train_loss": 0.5550001341363658,
17
+ "train_runtime": 15024.8122,
18
+ "train_samples_per_second": 2.649,
19
+ "train_steps_per_second": 0.041
20
+ }
chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
+ }
checkpoint-100/README.md ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: /p/scratch/taco-vlm/xiao4/models/Qwen2.5-VL-7B-Instruct
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:/p/scratch/taco-vlm/xiao4/models/Qwen2.5-VL-7B-Instruct
7
+ - llama-factory
8
+ - lora
9
+ - transformers
10
+ ---
11
+
12
+ # Model Card for Model ID
13
+
14
+ <!-- Provide a quick summary of what the model is/does. -->
15
+
16
+
17
+
18
+ ## Model Details
19
+
20
+ ### Model Description
21
+
22
+ <!-- Provide a longer summary of what this model is. -->
23
+
24
+
25
+
26
+ - **Developed by:** [More Information Needed]
27
+ - **Funded by [optional]:** [More Information Needed]
28
+ - **Shared by [optional]:** [More Information Needed]
29
+ - **Model type:** [More Information Needed]
30
+ - **Language(s) (NLP):** [More Information Needed]
31
+ - **License:** [More Information Needed]
32
+ - **Finetuned from model [optional]:** [More Information Needed]
33
+
34
+ ### Model Sources [optional]
35
+
36
+ <!-- Provide the basic links for the model. -->
37
+
38
+ - **Repository:** [More Information Needed]
39
+ - **Paper [optional]:** [More Information Needed]
40
+ - **Demo [optional]:** [More Information Needed]
41
+
42
+ ## Uses
43
+
44
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
45
+
46
+ ### Direct Use
47
+
48
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Downstream Use [optional]
53
+
54
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
55
+
56
+ [More Information Needed]
57
+
58
+ ### Out-of-Scope Use
59
+
60
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ## Bias, Risks, and Limitations
65
+
66
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
67
+
68
+ [More Information Needed]
69
+
70
+ ### Recommendations
71
+
72
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
73
+
74
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
75
+
76
+ ## How to Get Started with the Model
77
+
78
+ Use the code below to get started with the model.
79
+
80
+ [More Information Needed]
81
+
82
+ ## Training Details
83
+
84
+ ### Training Data
85
+
86
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
87
+
88
+ [More Information Needed]
89
+
90
+ ### Training Procedure
91
+
92
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
93
+
94
+ #### Preprocessing [optional]
95
+
96
+ [More Information Needed]
97
+
98
+
99
+ #### Training Hyperparameters
100
+
101
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
102
+
103
+ #### Speeds, Sizes, Times [optional]
104
+
105
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
106
+
107
+ [More Information Needed]
108
+
109
+ ## Evaluation
110
+
111
+ <!-- This section describes the evaluation protocols and provides the results. -->
112
+
113
+ ### Testing Data, Factors & Metrics
114
+
115
+ #### Testing Data
116
+
117
+ <!-- This should link to a Dataset Card if possible. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Factors
122
+
123
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
124
+
125
+ [More Information Needed]
126
+
127
+ #### Metrics
128
+
129
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
130
+
131
+ [More Information Needed]
132
+
133
+ ### Results
134
+
135
+ [More Information Needed]
136
+
137
+ #### Summary
138
+
139
+
140
+
141
+ ## Model Examination [optional]
142
+
143
+ <!-- Relevant interpretability work for the model goes here -->
144
+
145
+ [More Information Needed]
146
+
147
+ ## Environmental Impact
148
+
149
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
150
+
151
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
152
+
153
+ - **Hardware Type:** [More Information Needed]
154
+ - **Hours used:** [More Information Needed]
155
+ - **Cloud Provider:** [More Information Needed]
156
+ - **Compute Region:** [More Information Needed]
157
+ - **Carbon Emitted:** [More Information Needed]
158
+
159
+ ## Technical Specifications [optional]
160
+
161
+ ### Model Architecture and Objective
162
+
163
+ [More Information Needed]
164
+
165
+ ### Compute Infrastructure
166
+
167
+ [More Information Needed]
168
+
169
+ #### Hardware
170
+
171
+ [More Information Needed]
172
+
173
+ #### Software
174
+
175
+ [More Information Needed]
176
+
177
+ ## Citation [optional]
178
+
179
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
180
+
181
+ **BibTeX:**
182
+
183
+ [More Information Needed]
184
+
185
+ **APA:**
186
+
187
+ [More Information Needed]
188
+
189
+ ## Glossary [optional]
190
+
191
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
192
+
193
+ [More Information Needed]
194
+
195
+ ## More Information [optional]
196
+
197
+ [More Information Needed]
198
+
199
+ ## Model Card Authors [optional]
200
+
201
+ [More Information Needed]
202
+
203
+ ## Model Card Contact
204
+
205
+ [More Information Needed]
206
+ ### Framework versions
207
+
208
+ - PEFT 0.17.1
checkpoint-100/adapter_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/p/scratch/taco-vlm/xiao4/models/Qwen2.5-VL-7B-Instruct",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 64,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.0,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 32,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "v_proj",
29
+ "q_proj"
30
+ ],
31
+ "target_parameters": null,
32
+ "task_type": "CAUSAL_LM",
33
+ "trainable_token_indices": null,
34
+ "use_dora": false,
35
+ "use_qalora": false,
36
+ "use_rslora": false
37
+ }
checkpoint-100/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eb092e07b0837ef2c3510e82ad21ec89eb026ab6c39f7a9e23c8392d893d45a
3
+ size 40385208
checkpoint-100/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
checkpoint-100/chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
+ }
checkpoint-100/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-100/preprocessor_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": null,
3
+ "default_to_square": true,
4
+ "do_center_crop": null,
5
+ "do_convert_rgb": true,
6
+ "do_normalize": true,
7
+ "do_rescale": true,
8
+ "do_resize": true,
9
+ "image_mean": [
10
+ 0.48145466,
11
+ 0.4578275,
12
+ 0.40821073
13
+ ],
14
+ "image_processor_type": "Qwen2VLImageProcessorFast",
15
+ "image_std": [
16
+ 0.26862954,
17
+ 0.26130258,
18
+ 0.27577711
19
+ ],
20
+ "max_pixels": 12845056,
21
+ "merge_size": 2,
22
+ "min_pixels": 3136,
23
+ "patch_size": 14,
24
+ "processor_class": "Qwen2_5_VLProcessor",
25
+ "resample": 3,
26
+ "rescale_factor": 0.00392156862745098,
27
+ "size": {
28
+ "longest_edge": 1003520,
29
+ "shortest_edge": 3136
30
+ },
31
+ "temporal_patch_size": 2
32
+ }
checkpoint-100/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:308f94f9a5c24e1bad5c393d56ae7af7782600f4e791d9c6ac35b22fff2105b6
3
+ size 15024
checkpoint-100/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b056f3c23cb32dc77a2ec9e7651e0b64e4440e21f0fdf969b86bfc56a1cbdf06
3
+ size 15024
checkpoint-100/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3f8a05714bc528f4885a2816181652f2303b3e8150f89b56aaee6bec56aa520
3
+ size 15024
checkpoint-100/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f755bd3c330281961e5c03af9d10ce8c1e1678619d384f6f1fd5fd7dce2ff50
3
+ size 15024
checkpoint-100/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
checkpoint-100/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
checkpoint-100/tokenizer_config.json ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|im_end|>",
201
+ "errors": "replace",
202
+ "extra_special_tokens": {},
203
+ "model_max_length": 131072,
204
+ "pad_token": "<|endoftext|>",
205
+ "padding_side": "right",
206
+ "processor_class": "Qwen2_5_VLProcessor",
207
+ "split_special_tokens": false,
208
+ "tokenizer_class": "Qwen2Tokenizer",
209
+ "unk_token": null
210
+ }
checkpoint-100/trainer_state.json ADDED
@@ -0,0 +1,815 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.16080402010050251,
5
+ "eval_steps": 50,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0032160804020100503,
13
+ "grad_norm": 0.398336261510849,
14
+ "learning_rate": 1.5873015873015874e-07,
15
+ "logits/chosen": 0.9754317998886108,
16
+ "logits/rejected": 1.027288556098938,
17
+ "logps/chosen": -31.835588455200195,
18
+ "logps/rejected": -34.915592193603516,
19
+ "loss": 0.6931,
20
+ "rewards/accuracies": 0.0,
21
+ "rewards/chosen": 0.0,
22
+ "rewards/margins": 0.0,
23
+ "rewards/rejected": 0.0,
24
+ "step": 2
25
+ },
26
+ {
27
+ "epoch": 0.006432160804020101,
28
+ "grad_norm": 0.4530814290046692,
29
+ "learning_rate": 3.174603174603175e-07,
30
+ "logits/chosen": 1.4162323474884033,
31
+ "logits/rejected": 1.4583587646484375,
32
+ "logps/chosen": -30.352081298828125,
33
+ "logps/rejected": -33.31166076660156,
34
+ "loss": 0.6938,
35
+ "rewards/accuracies": 0.5078125,
36
+ "rewards/chosen": -0.003089881967753172,
37
+ "rewards/margins": -0.0008391638984903693,
38
+ "rewards/rejected": -0.0022507174871861935,
39
+ "step": 4
40
+ },
41
+ {
42
+ "epoch": 0.009648241206030151,
43
+ "grad_norm": 0.46197977662086487,
44
+ "learning_rate": 4.7619047619047623e-07,
45
+ "logits/chosen": 1.2524590492248535,
46
+ "logits/rejected": 1.3010387420654297,
47
+ "logps/chosen": -29.417587280273438,
48
+ "logps/rejected": -33.4231071472168,
49
+ "loss": 0.6936,
50
+ "rewards/accuracies": 0.4609375,
51
+ "rewards/chosen": 0.0004551531164906919,
52
+ "rewards/margins": -0.00048057991079986095,
53
+ "rewards/rejected": 0.0009357327362522483,
54
+ "step": 6
55
+ },
56
+ {
57
+ "epoch": 0.012864321608040201,
58
+ "grad_norm": 0.4546354115009308,
59
+ "learning_rate": 6.34920634920635e-07,
60
+ "logits/chosen": 1.4723472595214844,
61
+ "logits/rejected": 1.5091776847839355,
62
+ "logps/chosen": -32.08555221557617,
63
+ "logps/rejected": -34.23310089111328,
64
+ "loss": 0.6917,
65
+ "rewards/accuracies": 0.484375,
66
+ "rewards/chosen": 0.004024952184408903,
67
+ "rewards/margins": 0.0033740843646228313,
68
+ "rewards/rejected": 0.0006508685764856637,
69
+ "step": 8
70
+ },
71
+ {
72
+ "epoch": 0.016080402010050253,
73
+ "grad_norm": 0.4574313759803772,
74
+ "learning_rate": 7.936507936507937e-07,
75
+ "logits/chosen": 1.278916597366333,
76
+ "logits/rejected": 1.3133082389831543,
77
+ "logps/chosen": -31.22161102294922,
78
+ "logps/rejected": -34.931907653808594,
79
+ "loss": 0.6929,
80
+ "rewards/accuracies": 0.4453125,
81
+ "rewards/chosen": 0.0015166704542934895,
82
+ "rewards/margins": 0.0008749825647100806,
83
+ "rewards/rejected": 0.0006416881224140525,
84
+ "step": 10
85
+ },
86
+ {
87
+ "epoch": 0.019296482412060303,
88
+ "grad_norm": 0.44448181986808777,
89
+ "learning_rate": 9.523809523809525e-07,
90
+ "logits/chosen": 1.3098528385162354,
91
+ "logits/rejected": 1.3612464666366577,
92
+ "logps/chosen": -30.1417179107666,
93
+ "logps/rejected": -35.877037048339844,
94
+ "loss": 0.6958,
95
+ "rewards/accuracies": 0.4296875,
96
+ "rewards/chosen": -0.00687691755592823,
97
+ "rewards/margins": -0.004914135672152042,
98
+ "rewards/rejected": -0.001962781883776188,
99
+ "step": 12
100
+ },
101
+ {
102
+ "epoch": 0.022512562814070353,
103
+ "grad_norm": 0.4568133056163788,
104
+ "learning_rate": 1.111111111111111e-06,
105
+ "logits/chosen": 1.4617085456848145,
106
+ "logits/rejected": 1.4705244302749634,
107
+ "logps/chosen": -30.95242691040039,
108
+ "logps/rejected": -36.34254455566406,
109
+ "loss": 0.6939,
110
+ "rewards/accuracies": 0.4921875,
111
+ "rewards/chosen": -0.0029178508557379246,
112
+ "rewards/margins": -0.0011518384562805295,
113
+ "rewards/rejected": -0.001766012515872717,
114
+ "step": 14
115
+ },
116
+ {
117
+ "epoch": 0.025728643216080403,
118
+ "grad_norm": 0.4778600037097931,
119
+ "learning_rate": 1.26984126984127e-06,
120
+ "logits/chosen": 1.3174049854278564,
121
+ "logits/rejected": 1.3094675540924072,
122
+ "logps/chosen": -28.937427520751953,
123
+ "logps/rejected": -36.366676330566406,
124
+ "loss": 0.6932,
125
+ "rewards/accuracies": 0.4765625,
126
+ "rewards/chosen": -0.0009857461554929614,
127
+ "rewards/margins": 0.00013500778004527092,
128
+ "rewards/rejected": -0.001120753469876945,
129
+ "step": 16
130
+ },
131
+ {
132
+ "epoch": 0.028944723618090452,
133
+ "grad_norm": 0.421551913022995,
134
+ "learning_rate": 1.4285714285714286e-06,
135
+ "logits/chosen": 1.3308428525924683,
136
+ "logits/rejected": 1.4079976081848145,
137
+ "logps/chosen": -33.79199981689453,
138
+ "logps/rejected": -36.55875778198242,
139
+ "loss": 0.6941,
140
+ "rewards/accuracies": 0.4765625,
141
+ "rewards/chosen": -0.002126973122358322,
142
+ "rewards/margins": -0.001552498433738947,
143
+ "rewards/rejected": -0.0005744747468270361,
144
+ "step": 18
145
+ },
146
+ {
147
+ "epoch": 0.032160804020100506,
148
+ "grad_norm": 0.4264390468597412,
149
+ "learning_rate": 1.5873015873015873e-06,
150
+ "logits/chosen": 1.4721007347106934,
151
+ "logits/rejected": 1.485215663909912,
152
+ "logps/chosen": -31.09832000732422,
153
+ "logps/rejected": -36.288822174072266,
154
+ "loss": 0.6951,
155
+ "rewards/accuracies": 0.4765625,
156
+ "rewards/chosen": -0.005364666692912579,
157
+ "rewards/margins": -0.003565119579434395,
158
+ "rewards/rejected": -0.0017995464149862528,
159
+ "step": 20
160
+ },
161
+ {
162
+ "epoch": 0.03537688442211055,
163
+ "grad_norm": 0.40373197197914124,
164
+ "learning_rate": 1.746031746031746e-06,
165
+ "logits/chosen": 1.3501726388931274,
166
+ "logits/rejected": 1.370957374572754,
167
+ "logps/chosen": -31.461055755615234,
168
+ "logps/rejected": -34.38249588012695,
169
+ "loss": 0.6938,
170
+ "rewards/accuracies": 0.46875,
171
+ "rewards/chosen": -0.003749656490981579,
172
+ "rewards/margins": -0.0010161390528082848,
173
+ "rewards/rejected": -0.0027335172053426504,
174
+ "step": 22
175
+ },
176
+ {
177
+ "epoch": 0.038592964824120606,
178
+ "grad_norm": 0.4503360390663147,
179
+ "learning_rate": 1.904761904761905e-06,
180
+ "logits/chosen": 1.1474275588989258,
181
+ "logits/rejected": 1.136375904083252,
182
+ "logps/chosen": -32.1020393371582,
183
+ "logps/rejected": -35.39215087890625,
184
+ "loss": 0.695,
185
+ "rewards/accuracies": 0.4765625,
186
+ "rewards/chosen": -0.0007312107481993735,
187
+ "rewards/margins": -0.003288470208644867,
188
+ "rewards/rejected": 0.002557259751483798,
189
+ "step": 24
190
+ },
191
+ {
192
+ "epoch": 0.04180904522613065,
193
+ "grad_norm": 0.39122092723846436,
194
+ "learning_rate": 2.0634920634920634e-06,
195
+ "logits/chosen": 1.0779863595962524,
196
+ "logits/rejected": 1.0993211269378662,
197
+ "logps/chosen": -31.845603942871094,
198
+ "logps/rejected": -35.04288864135742,
199
+ "loss": 0.6949,
200
+ "rewards/accuracies": 0.46875,
201
+ "rewards/chosen": -0.0012633285950869322,
202
+ "rewards/margins": -0.0032839984633028507,
203
+ "rewards/rejected": 0.002020669635385275,
204
+ "step": 26
205
+ },
206
+ {
207
+ "epoch": 0.045025125628140705,
208
+ "grad_norm": 0.4362376630306244,
209
+ "learning_rate": 2.222222222222222e-06,
210
+ "logits/chosen": 1.4343101978302002,
211
+ "logits/rejected": 1.4324826002120972,
212
+ "logps/chosen": -29.318994522094727,
213
+ "logps/rejected": -31.928974151611328,
214
+ "loss": 0.6941,
215
+ "rewards/accuracies": 0.53125,
216
+ "rewards/chosen": -0.00553980004042387,
217
+ "rewards/margins": -0.0015490761725232005,
218
+ "rewards/rejected": -0.003990724217146635,
219
+ "step": 28
220
+ },
221
+ {
222
+ "epoch": 0.04824120603015075,
223
+ "grad_norm": 0.4269290864467621,
224
+ "learning_rate": 2.380952380952381e-06,
225
+ "logits/chosen": 1.29887056350708,
226
+ "logits/rejected": 1.3346788883209229,
227
+ "logps/chosen": -32.30498123168945,
228
+ "logps/rejected": -35.8641357421875,
229
+ "loss": 0.694,
230
+ "rewards/accuracies": 0.46875,
231
+ "rewards/chosen": -0.00222906400449574,
232
+ "rewards/margins": -0.0013898832257837057,
233
+ "rewards/rejected": -0.0008391811279579997,
234
+ "step": 30
235
+ },
236
+ {
237
+ "epoch": 0.051457286432160805,
238
+ "grad_norm": 0.4749397337436676,
239
+ "learning_rate": 2.53968253968254e-06,
240
+ "logits/chosen": 1.2239577770233154,
241
+ "logits/rejected": 1.2360297441482544,
242
+ "logps/chosen": -30.756057739257812,
243
+ "logps/rejected": -35.22294616699219,
244
+ "loss": 0.6937,
245
+ "rewards/accuracies": 0.46875,
246
+ "rewards/chosen": 0.0008076138328760862,
247
+ "rewards/margins": -0.0006099215243011713,
248
+ "rewards/rejected": 0.0014175355900079012,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 0.05467336683417085,
253
+ "grad_norm": 0.41970404982566833,
254
+ "learning_rate": 2.6984126984126986e-06,
255
+ "logits/chosen": 1.4691964387893677,
256
+ "logits/rejected": 1.5019806623458862,
257
+ "logps/chosen": -30.574474334716797,
258
+ "logps/rejected": -33.60447311401367,
259
+ "loss": 0.695,
260
+ "rewards/accuracies": 0.453125,
261
+ "rewards/chosen": -0.004354957491159439,
262
+ "rewards/margins": -0.00334002822637558,
263
+ "rewards/rejected": -0.001014929497614503,
264
+ "step": 34
265
+ },
266
+ {
267
+ "epoch": 0.057889447236180905,
268
+ "grad_norm": 0.4544912278652191,
269
+ "learning_rate": 2.8571428571428573e-06,
270
+ "logits/chosen": 1.2973356246948242,
271
+ "logits/rejected": 1.3056020736694336,
272
+ "logps/chosen": -29.477462768554688,
273
+ "logps/rejected": -33.272056579589844,
274
+ "loss": 0.6925,
275
+ "rewards/accuracies": 0.5390625,
276
+ "rewards/chosen": -0.0026571941561996937,
277
+ "rewards/margins": 0.001574229565449059,
278
+ "rewards/rejected": -0.0042314231395721436,
279
+ "step": 36
280
+ },
281
+ {
282
+ "epoch": 0.06110552763819096,
283
+ "grad_norm": 0.4139344096183777,
284
+ "learning_rate": 3.015873015873016e-06,
285
+ "logits/chosen": 1.3676331043243408,
286
+ "logits/rejected": 1.4585086107254028,
287
+ "logps/chosen": -31.87004280090332,
288
+ "logps/rejected": -33.22168731689453,
289
+ "loss": 0.6944,
290
+ "rewards/accuracies": 0.4453125,
291
+ "rewards/chosen": -0.006321355234831572,
292
+ "rewards/margins": -0.0021584928035736084,
293
+ "rewards/rejected": -0.004162862431257963,
294
+ "step": 38
295
+ },
296
+ {
297
+ "epoch": 0.06432160804020101,
298
+ "grad_norm": 0.4309309422969818,
299
+ "learning_rate": 3.1746031746031746e-06,
300
+ "logits/chosen": 1.4414952993392944,
301
+ "logits/rejected": 1.4826974868774414,
302
+ "logps/chosen": -29.79780387878418,
303
+ "logps/rejected": -33.88084030151367,
304
+ "loss": 0.6933,
305
+ "rewards/accuracies": 0.4765625,
306
+ "rewards/chosen": -0.001039503258652985,
307
+ "rewards/margins": -7.016677409410477e-05,
308
+ "rewards/rejected": -0.0009693363681435585,
309
+ "step": 40
310
+ },
311
+ {
312
+ "epoch": 0.06753768844221106,
313
+ "grad_norm": 0.4449164569377899,
314
+ "learning_rate": 3.3333333333333333e-06,
315
+ "logits/chosen": 1.2832252979278564,
316
+ "logits/rejected": 1.2548108100891113,
317
+ "logps/chosen": -30.628887176513672,
318
+ "logps/rejected": -36.61481475830078,
319
+ "loss": 0.6917,
320
+ "rewards/accuracies": 0.53125,
321
+ "rewards/chosen": -0.0037453575059771538,
322
+ "rewards/margins": 0.003167451359331608,
323
+ "rewards/rejected": -0.006912808865308762,
324
+ "step": 42
325
+ },
326
+ {
327
+ "epoch": 0.0707537688442211,
328
+ "grad_norm": 0.45570507645606995,
329
+ "learning_rate": 3.492063492063492e-06,
330
+ "logits/chosen": 1.5801219940185547,
331
+ "logits/rejected": 1.5999751091003418,
332
+ "logps/chosen": -31.53429412841797,
333
+ "logps/rejected": -36.12282943725586,
334
+ "loss": 0.693,
335
+ "rewards/accuracies": 0.453125,
336
+ "rewards/chosen": -0.0012303784023970366,
337
+ "rewards/margins": 0.000611663912422955,
338
+ "rewards/rejected": -0.0018420428968966007,
339
+ "step": 44
340
+ },
341
+ {
342
+ "epoch": 0.07396984924623115,
343
+ "grad_norm": 0.44526174664497375,
344
+ "learning_rate": 3.6507936507936507e-06,
345
+ "logits/chosen": 1.2573561668395996,
346
+ "logits/rejected": 1.2938108444213867,
347
+ "logps/chosen": -30.280303955078125,
348
+ "logps/rejected": -34.33146667480469,
349
+ "loss": 0.6913,
350
+ "rewards/accuracies": 0.5,
351
+ "rewards/chosen": -0.0019697281531989574,
352
+ "rewards/margins": 0.004040538799017668,
353
+ "rewards/rejected": -0.006010266952216625,
354
+ "step": 46
355
+ },
356
+ {
357
+ "epoch": 0.07718592964824121,
358
+ "grad_norm": 1.2515525817871094,
359
+ "learning_rate": 3.80952380952381e-06,
360
+ "logits/chosen": 0.8721216917037964,
361
+ "logits/rejected": 0.8969432711601257,
362
+ "logps/chosen": -32.89268493652344,
363
+ "logps/rejected": -34.27225875854492,
364
+ "loss": 0.6897,
365
+ "rewards/accuracies": 0.59375,
366
+ "rewards/chosen": -0.0003448700299486518,
367
+ "rewards/margins": 0.0074737779796123505,
368
+ "rewards/rejected": -0.007818647660315037,
369
+ "step": 48
370
+ },
371
+ {
372
+ "epoch": 0.08040201005025126,
373
+ "grad_norm": 0.6490676999092102,
374
+ "learning_rate": 3.968253968253968e-06,
375
+ "logits/chosen": 1.3292715549468994,
376
+ "logits/rejected": 1.3583744764328003,
377
+ "logps/chosen": -27.783782958984375,
378
+ "logps/rejected": -34.58745574951172,
379
+ "loss": 0.6882,
380
+ "rewards/accuracies": 0.625,
381
+ "rewards/chosen": 0.0020002706442028284,
382
+ "rewards/margins": 0.010215929709374905,
383
+ "rewards/rejected": -0.00821565929800272,
384
+ "step": 50
385
+ },
386
+ {
387
+ "epoch": 0.08040201005025126,
388
+ "eval_logits/chosen": 0.5501546263694763,
389
+ "eval_logits/rejected": 0.5580804944038391,
390
+ "eval_logps/chosen": -30.54437255859375,
391
+ "eval_logps/rejected": -35.43931198120117,
392
+ "eval_loss": 0.6898252964019775,
393
+ "eval_rewards/accuracies": 0.5349999666213989,
394
+ "eval_rewards/chosen": -0.0043669044971466064,
395
+ "eval_rewards/margins": 0.0071200719103217125,
396
+ "eval_rewards/rejected": -0.011486975476145744,
397
+ "eval_runtime": 48.2016,
398
+ "eval_samples_per_second": 4.149,
399
+ "eval_steps_per_second": 1.037,
400
+ "step": 50
401
+ },
402
+ {
403
+ "epoch": 0.0836180904522613,
404
+ "grad_norm": 0.4490942358970642,
405
+ "learning_rate": 4.126984126984127e-06,
406
+ "logits/chosen": 1.4922609329223633,
407
+ "logits/rejected": 1.5168870687484741,
408
+ "logps/chosen": -30.933542251586914,
409
+ "logps/rejected": -32.83856964111328,
410
+ "loss": 0.6887,
411
+ "rewards/accuracies": 0.5859375,
412
+ "rewards/chosen": -0.0028348369523882866,
413
+ "rewards/margins": 0.009395316243171692,
414
+ "rewards/rejected": -0.012230154126882553,
415
+ "step": 52
416
+ },
417
+ {
418
+ "epoch": 0.08683417085427135,
419
+ "grad_norm": 0.4455580413341522,
420
+ "learning_rate": 4.2857142857142855e-06,
421
+ "logits/chosen": 1.2956047058105469,
422
+ "logits/rejected": 1.3195202350616455,
423
+ "logps/chosen": -33.25441360473633,
424
+ "logps/rejected": -34.96155548095703,
425
+ "loss": 0.6888,
426
+ "rewards/accuracies": 0.609375,
427
+ "rewards/chosen": -0.011137772351503372,
428
+ "rewards/margins": 0.009241417050361633,
429
+ "rewards/rejected": -0.020379189401865005,
430
+ "step": 54
431
+ },
432
+ {
433
+ "epoch": 0.09005025125628141,
434
+ "grad_norm": 0.46649816632270813,
435
+ "learning_rate": 4.444444444444444e-06,
436
+ "logits/chosen": 1.442166805267334,
437
+ "logits/rejected": 1.4832069873809814,
438
+ "logps/chosen": -30.720643997192383,
439
+ "logps/rejected": -34.70309829711914,
440
+ "loss": 0.6936,
441
+ "rewards/accuracies": 0.5,
442
+ "rewards/chosen": -0.010124221444129944,
443
+ "rewards/margins": -0.0002470978070050478,
444
+ "rewards/rejected": -0.009877122938632965,
445
+ "step": 56
446
+ },
447
+ {
448
+ "epoch": 0.09326633165829146,
449
+ "grad_norm": 0.467673122882843,
450
+ "learning_rate": 4.603174603174604e-06,
451
+ "logits/chosen": 1.3763530254364014,
452
+ "logits/rejected": 1.4356203079223633,
453
+ "logps/chosen": -31.76504135131836,
454
+ "logps/rejected": -33.25387954711914,
455
+ "loss": 0.6874,
456
+ "rewards/accuracies": 0.5546875,
457
+ "rewards/chosen": -0.008322305977344513,
458
+ "rewards/margins": 0.012220000848174095,
459
+ "rewards/rejected": -0.020542306825518608,
460
+ "step": 58
461
+ },
462
+ {
463
+ "epoch": 0.0964824120603015,
464
+ "grad_norm": 0.5383568406105042,
465
+ "learning_rate": 4.761904761904762e-06,
466
+ "logits/chosen": 1.2813664674758911,
467
+ "logits/rejected": 1.3347243070602417,
468
+ "logps/chosen": -33.40138626098633,
469
+ "logps/rejected": -34.50080871582031,
470
+ "loss": 0.6869,
471
+ "rewards/accuracies": 0.5859375,
472
+ "rewards/chosen": -0.014261961914598942,
473
+ "rewards/margins": 0.013282392174005508,
474
+ "rewards/rejected": -0.027544351294636726,
475
+ "step": 60
476
+ },
477
+ {
478
+ "epoch": 0.09969849246231156,
479
+ "grad_norm": 0.4995841383934021,
480
+ "learning_rate": 4.920634920634921e-06,
481
+ "logits/chosen": 1.3010649681091309,
482
+ "logits/rejected": 1.3390882015228271,
483
+ "logps/chosen": -29.836774826049805,
484
+ "logps/rejected": -34.70252990722656,
485
+ "loss": 0.6843,
486
+ "rewards/accuracies": 0.625,
487
+ "rewards/chosen": -0.010679999366402626,
488
+ "rewards/margins": 0.018656719475984573,
489
+ "rewards/rejected": -0.0293367151170969,
490
+ "step": 62
491
+ },
492
+ {
493
+ "epoch": 0.10291457286432161,
494
+ "grad_norm": 0.4841148555278778,
495
+ "learning_rate": 4.999960377651517e-06,
496
+ "logits/chosen": 1.422184705734253,
497
+ "logits/rejected": 1.4505563974380493,
498
+ "logps/chosen": -34.65579605102539,
499
+ "logps/rejected": -33.47809600830078,
500
+ "loss": 0.6897,
501
+ "rewards/accuracies": 0.5390625,
502
+ "rewards/chosen": -0.01736070215702057,
503
+ "rewards/margins": 0.007918944582343102,
504
+ "rewards/rejected": -0.02527964487671852,
505
+ "step": 64
506
+ },
507
+ {
508
+ "epoch": 0.10613065326633166,
509
+ "grad_norm": 0.5981670022010803,
510
+ "learning_rate": 4.999643406399275e-06,
511
+ "logits/chosen": 1.1232147216796875,
512
+ "logits/rejected": 1.145438313484192,
513
+ "logps/chosen": -29.947956085205078,
514
+ "logps/rejected": -33.95368194580078,
515
+ "loss": 0.6838,
516
+ "rewards/accuracies": 0.5703125,
517
+ "rewards/chosen": -0.018678227439522743,
518
+ "rewards/margins": 0.019824998453259468,
519
+ "rewards/rejected": -0.03850322589278221,
520
+ "step": 66
521
+ },
522
+ {
523
+ "epoch": 0.1093467336683417,
524
+ "grad_norm": 0.571692705154419,
525
+ "learning_rate": 4.999009504083738e-06,
526
+ "logits/chosen": 1.3420298099517822,
527
+ "logits/rejected": 1.338175654411316,
528
+ "logps/chosen": -30.924522399902344,
529
+ "logps/rejected": -37.51594161987305,
530
+ "loss": 0.6828,
531
+ "rewards/accuracies": 0.609375,
532
+ "rewards/chosen": -0.020221160724759102,
533
+ "rewards/margins": 0.02198150008916855,
534
+ "rewards/rejected": -0.0422026626765728,
535
+ "step": 68
536
+ },
537
+ {
538
+ "epoch": 0.11256281407035176,
539
+ "grad_norm": 0.5207106471061707,
540
+ "learning_rate": 4.998058751077705e-06,
541
+ "logits/chosen": 1.3332023620605469,
542
+ "logits/rejected": 1.3814537525177002,
543
+ "logps/chosen": -31.7532958984375,
544
+ "logps/rejected": -34.753013610839844,
545
+ "loss": 0.6822,
546
+ "rewards/accuracies": 0.59375,
547
+ "rewards/chosen": -0.031598228961229324,
548
+ "rewards/margins": 0.023511648178100586,
549
+ "rewards/rejected": -0.05510988086462021,
550
+ "step": 70
551
+ },
552
+ {
553
+ "epoch": 0.11577889447236181,
554
+ "grad_norm": 0.47247663140296936,
555
+ "learning_rate": 4.996791267927632e-06,
556
+ "logits/chosen": 1.3693702220916748,
557
+ "logits/rejected": 1.3828941583633423,
558
+ "logps/chosen": -30.8132266998291,
559
+ "logps/rejected": -35.77416229248047,
560
+ "loss": 0.6827,
561
+ "rewards/accuracies": 0.6015625,
562
+ "rewards/chosen": -0.030194712802767754,
563
+ "rewards/margins": 0.022484488785266876,
564
+ "rewards/rejected": -0.05267919972538948,
565
+ "step": 72
566
+ },
567
+ {
568
+ "epoch": 0.11899497487437186,
569
+ "grad_norm": 0.48132631182670593,
570
+ "learning_rate": 4.995207215338358e-06,
571
+ "logits/chosen": 1.490431785583496,
572
+ "logits/rejected": 1.5298147201538086,
573
+ "logps/chosen": -34.107643127441406,
574
+ "logps/rejected": -34.63951110839844,
575
+ "loss": 0.687,
576
+ "rewards/accuracies": 0.515625,
577
+ "rewards/chosen": -0.036115553230047226,
578
+ "rewards/margins": 0.014621476642787457,
579
+ "rewards/rejected": -0.05073703080415726,
580
+ "step": 74
581
+ },
582
+ {
583
+ "epoch": 0.12221105527638192,
584
+ "grad_norm": 0.5402019619941711,
585
+ "learning_rate": 4.9933067941527175e-06,
586
+ "logits/chosen": 1.3514180183410645,
587
+ "logits/rejected": 1.3422389030456543,
588
+ "logps/chosen": -36.069549560546875,
589
+ "logps/rejected": -33.26152038574219,
590
+ "loss": 0.6841,
591
+ "rewards/accuracies": 0.5625,
592
+ "rewards/chosen": -0.04465916007757187,
593
+ "rewards/margins": 0.02099396474659443,
594
+ "rewards/rejected": -0.06565312296152115,
595
+ "step": 76
596
+ },
597
+ {
598
+ "epoch": 0.12542713567839195,
599
+ "grad_norm": 0.5146540999412537,
600
+ "learning_rate": 4.991090245326083e-06,
601
+ "logits/chosen": 1.0872302055358887,
602
+ "logits/rejected": 1.1015247106552124,
603
+ "logps/chosen": -29.906904220581055,
604
+ "logps/rejected": -38.96195983886719,
605
+ "loss": 0.6758,
606
+ "rewards/accuracies": 0.609375,
607
+ "rewards/chosen": -0.038489095866680145,
608
+ "rewards/margins": 0.037541136145591736,
609
+ "rewards/rejected": -0.07603023201227188,
610
+ "step": 78
611
+ },
612
+ {
613
+ "epoch": 0.12864321608040202,
614
+ "grad_norm": 0.5551427602767944,
615
+ "learning_rate": 4.988557849895811e-06,
616
+ "logits/chosen": 1.3899340629577637,
617
+ "logits/rejected": 1.4685105085372925,
618
+ "logps/chosen": -34.21710205078125,
619
+ "logps/rejected": -38.152099609375,
620
+ "loss": 0.6695,
621
+ "rewards/accuracies": 0.65625,
622
+ "rewards/chosen": -0.03813210129737854,
623
+ "rewards/margins": 0.05099986493587494,
624
+ "rewards/rejected": -0.08913196623325348,
625
+ "step": 80
626
+ },
627
+ {
628
+ "epoch": 0.13185929648241207,
629
+ "grad_norm": 0.5468294620513916,
630
+ "learning_rate": 4.9857099289456115e-06,
631
+ "logits/chosen": 1.4150495529174805,
632
+ "logits/rejected": 1.4033807516098022,
633
+ "logps/chosen": -30.979236602783203,
634
+ "logps/rejected": -36.2646484375,
635
+ "loss": 0.6759,
636
+ "rewards/accuracies": 0.6484375,
637
+ "rewards/chosen": -0.04814925417304039,
638
+ "rewards/margins": 0.0380784347653389,
639
+ "rewards/rejected": -0.08622768521308899,
640
+ "step": 82
641
+ },
642
+ {
643
+ "epoch": 0.13507537688442212,
644
+ "grad_norm": 0.47936275601387024,
645
+ "learning_rate": 4.9825468435648345e-06,
646
+ "logits/chosen": 1.1383363008499146,
647
+ "logits/rejected": 1.1823409795761108,
648
+ "logps/chosen": -29.871156692504883,
649
+ "logps/rejected": -32.21099090576172,
650
+ "loss": 0.6789,
651
+ "rewards/accuracies": 0.578125,
652
+ "rewards/chosen": -0.04363500326871872,
653
+ "rewards/margins": 0.03290008008480072,
654
+ "rewards/rejected": -0.07653507590293884,
655
+ "step": 84
656
+ },
657
+ {
658
+ "epoch": 0.13829145728643216,
659
+ "grad_norm": 0.48245131969451904,
660
+ "learning_rate": 4.97906899480269e-06,
661
+ "logits/chosen": 1.4419957399368286,
662
+ "logits/rejected": 1.475970983505249,
663
+ "logps/chosen": -30.77682113647461,
664
+ "logps/rejected": -35.76774215698242,
665
+ "loss": 0.6698,
666
+ "rewards/accuracies": 0.6484375,
667
+ "rewards/chosen": -0.04197033494710922,
668
+ "rewards/margins": 0.05154535919427872,
669
+ "rewards/rejected": -0.09351569414138794,
670
+ "step": 86
671
+ },
672
+ {
673
+ "epoch": 0.1415075376884422,
674
+ "grad_norm": 0.5745317935943604,
675
+ "learning_rate": 4.975276823617399e-06,
676
+ "logits/chosen": 1.4063524007797241,
677
+ "logits/rejected": 1.439096450805664,
678
+ "logps/chosen": -29.64651870727539,
679
+ "logps/rejected": -35.58737564086914,
680
+ "loss": 0.6644,
681
+ "rewards/accuracies": 0.6484375,
682
+ "rewards/chosen": -0.045151449739933014,
683
+ "rewards/margins": 0.06304246932268143,
684
+ "rewards/rejected": -0.10819391906261444,
685
+ "step": 88
686
+ },
687
+ {
688
+ "epoch": 0.14472361809045226,
689
+ "grad_norm": 0.5324569344520569,
690
+ "learning_rate": 4.971170810820279e-06,
691
+ "logits/chosen": 1.3591543436050415,
692
+ "logits/rejected": 1.3323087692260742,
693
+ "logps/chosen": -34.168128967285156,
694
+ "logps/rejected": -37.837242126464844,
695
+ "loss": 0.6722,
696
+ "rewards/accuracies": 0.640625,
697
+ "rewards/chosen": -0.05879779905080795,
698
+ "rewards/margins": 0.04740719497203827,
699
+ "rewards/rejected": -0.10620499402284622,
700
+ "step": 90
701
+ },
702
+ {
703
+ "epoch": 0.1479396984924623,
704
+ "grad_norm": 0.6153348684310913,
705
+ "learning_rate": 4.96675147701479e-06,
706
+ "logits/chosen": 1.3236520290374756,
707
+ "logits/rejected": 1.3638286590576172,
708
+ "logps/chosen": -33.26496887207031,
709
+ "logps/rejected": -35.09466552734375,
710
+ "loss": 0.678,
711
+ "rewards/accuracies": 0.609375,
712
+ "rewards/chosen": -0.07028309255838394,
713
+ "rewards/margins": 0.037059590220451355,
714
+ "rewards/rejected": -0.1073426827788353,
715
+ "step": 92
716
+ },
717
+ {
718
+ "epoch": 0.15115577889447235,
719
+ "grad_norm": 0.6804412603378296,
720
+ "learning_rate": 4.962019382530521e-06,
721
+ "logits/chosen": 1.1772613525390625,
722
+ "logits/rejected": 1.2550945281982422,
723
+ "logps/chosen": -34.816246032714844,
724
+ "logps/rejected": -34.04191970825195,
725
+ "loss": 0.676,
726
+ "rewards/accuracies": 0.5859375,
727
+ "rewards/chosen": -0.0760345533490181,
728
+ "rewards/margins": 0.041960567235946655,
729
+ "rewards/rejected": -0.11799512058496475,
730
+ "step": 94
731
+ },
732
+ {
733
+ "epoch": 0.15437185929648242,
734
+ "grad_norm": 0.5287646055221558,
735
+ "learning_rate": 4.956975127352146e-06,
736
+ "logits/chosen": 1.1335078477859497,
737
+ "logits/rejected": 1.0898680686950684,
738
+ "logps/chosen": -32.298492431640625,
739
+ "logps/rejected": -38.93742370605469,
740
+ "loss": 0.6661,
741
+ "rewards/accuracies": 0.6328125,
742
+ "rewards/chosen": -0.07041005790233612,
743
+ "rewards/margins": 0.06344542652368546,
744
+ "rewards/rejected": -0.13385546207427979,
745
+ "step": 96
746
+ },
747
+ {
748
+ "epoch": 0.15758793969849247,
749
+ "grad_norm": 0.5636870265007019,
750
+ "learning_rate": 4.951619351043353e-06,
751
+ "logits/chosen": 1.3233567476272583,
752
+ "logits/rejected": 1.356798768043518,
753
+ "logps/chosen": -31.532367706298828,
754
+ "logps/rejected": -33.19044494628906,
755
+ "loss": 0.6698,
756
+ "rewards/accuracies": 0.6015625,
757
+ "rewards/chosen": -0.08084239810705185,
758
+ "rewards/margins": 0.05414910987019539,
759
+ "rewards/rejected": -0.13499149680137634,
760
+ "step": 98
761
+ },
762
+ {
763
+ "epoch": 0.16080402010050251,
764
+ "grad_norm": 0.5570967197418213,
765
+ "learning_rate": 4.945952732665755e-06,
766
+ "logits/chosen": 1.301795244216919,
767
+ "logits/rejected": 1.3351643085479736,
768
+ "logps/chosen": -32.771270751953125,
769
+ "logps/rejected": -41.387725830078125,
770
+ "loss": 0.6603,
771
+ "rewards/accuracies": 0.6015625,
772
+ "rewards/chosen": -0.07904825359582901,
773
+ "rewards/margins": 0.07616038620471954,
774
+ "rewards/rejected": -0.15520863234996796,
775
+ "step": 100
776
+ },
777
+ {
778
+ "epoch": 0.16080402010050251,
779
+ "eval_logits/chosen": 0.5383610725402832,
780
+ "eval_logits/rejected": 0.544625997543335,
781
+ "eval_logps/chosen": -31.30695343017578,
782
+ "eval_logps/rejected": -36.894073486328125,
783
+ "eval_loss": 0.6592575311660767,
784
+ "eval_rewards/accuracies": 0.6850000023841858,
785
+ "eval_rewards/chosen": -0.08062513917684555,
786
+ "eval_rewards/margins": 0.07633791118860245,
787
+ "eval_rewards/rejected": -0.156963050365448,
788
+ "eval_runtime": 48.1277,
789
+ "eval_samples_per_second": 4.156,
790
+ "eval_steps_per_second": 1.039,
791
+ "step": 100
792
+ }
793
+ ],
794
+ "logging_steps": 2,
795
+ "max_steps": 621,
796
+ "num_input_tokens_seen": 0,
797
+ "num_train_epochs": 1,
798
+ "save_steps": 100,
799
+ "stateful_callbacks": {
800
+ "TrainerControl": {
801
+ "args": {
802
+ "should_epoch_stop": false,
803
+ "should_evaluate": false,
804
+ "should_log": false,
805
+ "should_save": true,
806
+ "should_training_stop": false
807
+ },
808
+ "attributes": {}
809
+ }
810
+ },
811
+ "total_flos": 2.5711860103774208e+17,
812
+ "train_batch_size": 2,
813
+ "trial_name": null,
814
+ "trial_params": null
815
+ }
checkpoint-100/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcf7d186e3568d77f55778a26c607e4da9c0a96dd9d37a4828aaf8e3780e13fb
3
+ size 5752
checkpoint-100/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-200/README.md ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: /p/scratch/taco-vlm/xiao4/models/Qwen2.5-VL-7B-Instruct
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:/p/scratch/taco-vlm/xiao4/models/Qwen2.5-VL-7B-Instruct
7
+ - llama-factory
8
+ - lora
9
+ - transformers
10
+ ---
11
+
12
+ # Model Card for Model ID
13
+
14
+ <!-- Provide a quick summary of what the model is/does. -->
15
+
16
+
17
+
18
+ ## Model Details
19
+
20
+ ### Model Description
21
+
22
+ <!-- Provide a longer summary of what this model is. -->
23
+
24
+
25
+
26
+ - **Developed by:** [More Information Needed]
27
+ - **Funded by [optional]:** [More Information Needed]
28
+ - **Shared by [optional]:** [More Information Needed]
29
+ - **Model type:** [More Information Needed]
30
+ - **Language(s) (NLP):** [More Information Needed]
31
+ - **License:** [More Information Needed]
32
+ - **Finetuned from model [optional]:** [More Information Needed]
33
+
34
+ ### Model Sources [optional]
35
+
36
+ <!-- Provide the basic links for the model. -->
37
+
38
+ - **Repository:** [More Information Needed]
39
+ - **Paper [optional]:** [More Information Needed]
40
+ - **Demo [optional]:** [More Information Needed]
41
+
42
+ ## Uses
43
+
44
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
45
+
46
+ ### Direct Use
47
+
48
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Downstream Use [optional]
53
+
54
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
55
+
56
+ [More Information Needed]
57
+
58
+ ### Out-of-Scope Use
59
+
60
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ## Bias, Risks, and Limitations
65
+
66
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
67
+
68
+ [More Information Needed]
69
+
70
+ ### Recommendations
71
+
72
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
73
+
74
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
75
+
76
+ ## How to Get Started with the Model
77
+
78
+ Use the code below to get started with the model.
79
+
80
+ [More Information Needed]
81
+
82
+ ## Training Details
83
+
84
+ ### Training Data
85
+
86
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
87
+
88
+ [More Information Needed]
89
+
90
+ ### Training Procedure
91
+
92
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
93
+
94
+ #### Preprocessing [optional]
95
+
96
+ [More Information Needed]
97
+
98
+
99
+ #### Training Hyperparameters
100
+
101
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
102
+
103
+ #### Speeds, Sizes, Times [optional]
104
+
105
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
106
+
107
+ [More Information Needed]
108
+
109
+ ## Evaluation
110
+
111
+ <!-- This section describes the evaluation protocols and provides the results. -->
112
+
113
+ ### Testing Data, Factors & Metrics
114
+
115
+ #### Testing Data
116
+
117
+ <!-- This should link to a Dataset Card if possible. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Factors
122
+
123
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
124
+
125
+ [More Information Needed]
126
+
127
+ #### Metrics
128
+
129
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
130
+
131
+ [More Information Needed]
132
+
133
+ ### Results
134
+
135
+ [More Information Needed]
136
+
137
+ #### Summary
138
+
139
+
140
+
141
+ ## Model Examination [optional]
142
+
143
+ <!-- Relevant interpretability work for the model goes here -->
144
+
145
+ [More Information Needed]
146
+
147
+ ## Environmental Impact
148
+
149
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
150
+
151
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
152
+
153
+ - **Hardware Type:** [More Information Needed]
154
+ - **Hours used:** [More Information Needed]
155
+ - **Cloud Provider:** [More Information Needed]
156
+ - **Compute Region:** [More Information Needed]
157
+ - **Carbon Emitted:** [More Information Needed]
158
+
159
+ ## Technical Specifications [optional]
160
+
161
+ ### Model Architecture and Objective
162
+
163
+ [More Information Needed]
164
+
165
+ ### Compute Infrastructure
166
+
167
+ [More Information Needed]
168
+
169
+ #### Hardware
170
+
171
+ [More Information Needed]
172
+
173
+ #### Software
174
+
175
+ [More Information Needed]
176
+
177
+ ## Citation [optional]
178
+
179
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
180
+
181
+ **BibTeX:**
182
+
183
+ [More Information Needed]
184
+
185
+ **APA:**
186
+
187
+ [More Information Needed]
188
+
189
+ ## Glossary [optional]
190
+
191
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
192
+
193
+ [More Information Needed]
194
+
195
+ ## More Information [optional]
196
+
197
+ [More Information Needed]
198
+
199
+ ## Model Card Authors [optional]
200
+
201
+ [More Information Needed]
202
+
203
+ ## Model Card Contact
204
+
205
+ [More Information Needed]
206
+ ### Framework versions
207
+
208
+ - PEFT 0.17.1
checkpoint-200/adapter_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/p/scratch/taco-vlm/xiao4/models/Qwen2.5-VL-7B-Instruct",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 64,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.0,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 32,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "v_proj",
29
+ "q_proj"
30
+ ],
31
+ "target_parameters": null,
32
+ "task_type": "CAUSAL_LM",
33
+ "trainable_token_indices": null,
34
+ "use_dora": false,
35
+ "use_qalora": false,
36
+ "use_rslora": false
37
+ }
checkpoint-200/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9dac64d83cb235284c26a460f27c061eed2600489ff67edf5842e43eea76ab7
3
+ size 40385208
checkpoint-200/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
checkpoint-200/chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
+ }
checkpoint-200/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-200/preprocessor_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": null,
3
+ "default_to_square": true,
4
+ "do_center_crop": null,
5
+ "do_convert_rgb": true,
6
+ "do_normalize": true,
7
+ "do_rescale": true,
8
+ "do_resize": true,
9
+ "image_mean": [
10
+ 0.48145466,
11
+ 0.4578275,
12
+ 0.40821073
13
+ ],
14
+ "image_processor_type": "Qwen2VLImageProcessorFast",
15
+ "image_std": [
16
+ 0.26862954,
17
+ 0.26130258,
18
+ 0.27577711
19
+ ],
20
+ "max_pixels": 12845056,
21
+ "merge_size": 2,
22
+ "min_pixels": 3136,
23
+ "patch_size": 14,
24
+ "processor_class": "Qwen2_5_VLProcessor",
25
+ "resample": 3,
26
+ "rescale_factor": 0.00392156862745098,
27
+ "size": {
28
+ "longest_edge": 1003520,
29
+ "shortest_edge": 3136
30
+ },
31
+ "temporal_patch_size": 2
32
+ }
checkpoint-200/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c5e18f922d0af74d820247ae97bee506ab412554a58345ddf2558abc94ee3e3
3
+ size 15024
checkpoint-200/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a2dcca6d9741f46592359768ea2212b9321da6408d1fd7d3a80b017bf37f434
3
+ size 15024
checkpoint-200/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69420ece2c255923c5cbb3c6c9c4a6b9cb38fb57e5d3033c8b7d436a1faf6f13
3
+ size 15024
checkpoint-200/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66f278b40a1e23b88a657c4e5d03afa8dbbbe14dfeb16f6b4beedaece6cdd0b9
3
+ size 15024
checkpoint-200/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
checkpoint-200/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
checkpoint-200/tokenizer_config.json ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|im_end|>",
201
+ "errors": "replace",
202
+ "extra_special_tokens": {},
203
+ "model_max_length": 131072,
204
+ "pad_token": "<|endoftext|>",
205
+ "padding_side": "right",
206
+ "processor_class": "Qwen2_5_VLProcessor",
207
+ "split_special_tokens": false,
208
+ "tokenizer_class": "Qwen2Tokenizer",
209
+ "unk_token": null
210
+ }
checkpoint-200/trainer_state.json ADDED
@@ -0,0 +1,1597 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.32160804020100503,
5
+ "eval_steps": 50,
6
+ "global_step": 200,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0032160804020100503,
13
+ "grad_norm": 0.398336261510849,
14
+ "learning_rate": 1.5873015873015874e-07,
15
+ "logits/chosen": 0.9754317998886108,
16
+ "logits/rejected": 1.027288556098938,
17
+ "logps/chosen": -31.835588455200195,
18
+ "logps/rejected": -34.915592193603516,
19
+ "loss": 0.6931,
20
+ "rewards/accuracies": 0.0,
21
+ "rewards/chosen": 0.0,
22
+ "rewards/margins": 0.0,
23
+ "rewards/rejected": 0.0,
24
+ "step": 2
25
+ },
26
+ {
27
+ "epoch": 0.006432160804020101,
28
+ "grad_norm": 0.4530814290046692,
29
+ "learning_rate": 3.174603174603175e-07,
30
+ "logits/chosen": 1.4162323474884033,
31
+ "logits/rejected": 1.4583587646484375,
32
+ "logps/chosen": -30.352081298828125,
33
+ "logps/rejected": -33.31166076660156,
34
+ "loss": 0.6938,
35
+ "rewards/accuracies": 0.5078125,
36
+ "rewards/chosen": -0.003089881967753172,
37
+ "rewards/margins": -0.0008391638984903693,
38
+ "rewards/rejected": -0.0022507174871861935,
39
+ "step": 4
40
+ },
41
+ {
42
+ "epoch": 0.009648241206030151,
43
+ "grad_norm": 0.46197977662086487,
44
+ "learning_rate": 4.7619047619047623e-07,
45
+ "logits/chosen": 1.2524590492248535,
46
+ "logits/rejected": 1.3010387420654297,
47
+ "logps/chosen": -29.417587280273438,
48
+ "logps/rejected": -33.4231071472168,
49
+ "loss": 0.6936,
50
+ "rewards/accuracies": 0.4609375,
51
+ "rewards/chosen": 0.0004551531164906919,
52
+ "rewards/margins": -0.00048057991079986095,
53
+ "rewards/rejected": 0.0009357327362522483,
54
+ "step": 6
55
+ },
56
+ {
57
+ "epoch": 0.012864321608040201,
58
+ "grad_norm": 0.4546354115009308,
59
+ "learning_rate": 6.34920634920635e-07,
60
+ "logits/chosen": 1.4723472595214844,
61
+ "logits/rejected": 1.5091776847839355,
62
+ "logps/chosen": -32.08555221557617,
63
+ "logps/rejected": -34.23310089111328,
64
+ "loss": 0.6917,
65
+ "rewards/accuracies": 0.484375,
66
+ "rewards/chosen": 0.004024952184408903,
67
+ "rewards/margins": 0.0033740843646228313,
68
+ "rewards/rejected": 0.0006508685764856637,
69
+ "step": 8
70
+ },
71
+ {
72
+ "epoch": 0.016080402010050253,
73
+ "grad_norm": 0.4574313759803772,
74
+ "learning_rate": 7.936507936507937e-07,
75
+ "logits/chosen": 1.278916597366333,
76
+ "logits/rejected": 1.3133082389831543,
77
+ "logps/chosen": -31.22161102294922,
78
+ "logps/rejected": -34.931907653808594,
79
+ "loss": 0.6929,
80
+ "rewards/accuracies": 0.4453125,
81
+ "rewards/chosen": 0.0015166704542934895,
82
+ "rewards/margins": 0.0008749825647100806,
83
+ "rewards/rejected": 0.0006416881224140525,
84
+ "step": 10
85
+ },
86
+ {
87
+ "epoch": 0.019296482412060303,
88
+ "grad_norm": 0.44448181986808777,
89
+ "learning_rate": 9.523809523809525e-07,
90
+ "logits/chosen": 1.3098528385162354,
91
+ "logits/rejected": 1.3612464666366577,
92
+ "logps/chosen": -30.1417179107666,
93
+ "logps/rejected": -35.877037048339844,
94
+ "loss": 0.6958,
95
+ "rewards/accuracies": 0.4296875,
96
+ "rewards/chosen": -0.00687691755592823,
97
+ "rewards/margins": -0.004914135672152042,
98
+ "rewards/rejected": -0.001962781883776188,
99
+ "step": 12
100
+ },
101
+ {
102
+ "epoch": 0.022512562814070353,
103
+ "grad_norm": 0.4568133056163788,
104
+ "learning_rate": 1.111111111111111e-06,
105
+ "logits/chosen": 1.4617085456848145,
106
+ "logits/rejected": 1.4705244302749634,
107
+ "logps/chosen": -30.95242691040039,
108
+ "logps/rejected": -36.34254455566406,
109
+ "loss": 0.6939,
110
+ "rewards/accuracies": 0.4921875,
111
+ "rewards/chosen": -0.0029178508557379246,
112
+ "rewards/margins": -0.0011518384562805295,
113
+ "rewards/rejected": -0.001766012515872717,
114
+ "step": 14
115
+ },
116
+ {
117
+ "epoch": 0.025728643216080403,
118
+ "grad_norm": 0.4778600037097931,
119
+ "learning_rate": 1.26984126984127e-06,
120
+ "logits/chosen": 1.3174049854278564,
121
+ "logits/rejected": 1.3094675540924072,
122
+ "logps/chosen": -28.937427520751953,
123
+ "logps/rejected": -36.366676330566406,
124
+ "loss": 0.6932,
125
+ "rewards/accuracies": 0.4765625,
126
+ "rewards/chosen": -0.0009857461554929614,
127
+ "rewards/margins": 0.00013500778004527092,
128
+ "rewards/rejected": -0.001120753469876945,
129
+ "step": 16
130
+ },
131
+ {
132
+ "epoch": 0.028944723618090452,
133
+ "grad_norm": 0.421551913022995,
134
+ "learning_rate": 1.4285714285714286e-06,
135
+ "logits/chosen": 1.3308428525924683,
136
+ "logits/rejected": 1.4079976081848145,
137
+ "logps/chosen": -33.79199981689453,
138
+ "logps/rejected": -36.55875778198242,
139
+ "loss": 0.6941,
140
+ "rewards/accuracies": 0.4765625,
141
+ "rewards/chosen": -0.002126973122358322,
142
+ "rewards/margins": -0.001552498433738947,
143
+ "rewards/rejected": -0.0005744747468270361,
144
+ "step": 18
145
+ },
146
+ {
147
+ "epoch": 0.032160804020100506,
148
+ "grad_norm": 0.4264390468597412,
149
+ "learning_rate": 1.5873015873015873e-06,
150
+ "logits/chosen": 1.4721007347106934,
151
+ "logits/rejected": 1.485215663909912,
152
+ "logps/chosen": -31.09832000732422,
153
+ "logps/rejected": -36.288822174072266,
154
+ "loss": 0.6951,
155
+ "rewards/accuracies": 0.4765625,
156
+ "rewards/chosen": -0.005364666692912579,
157
+ "rewards/margins": -0.003565119579434395,
158
+ "rewards/rejected": -0.0017995464149862528,
159
+ "step": 20
160
+ },
161
+ {
162
+ "epoch": 0.03537688442211055,
163
+ "grad_norm": 0.40373197197914124,
164
+ "learning_rate": 1.746031746031746e-06,
165
+ "logits/chosen": 1.3501726388931274,
166
+ "logits/rejected": 1.370957374572754,
167
+ "logps/chosen": -31.461055755615234,
168
+ "logps/rejected": -34.38249588012695,
169
+ "loss": 0.6938,
170
+ "rewards/accuracies": 0.46875,
171
+ "rewards/chosen": -0.003749656490981579,
172
+ "rewards/margins": -0.0010161390528082848,
173
+ "rewards/rejected": -0.0027335172053426504,
174
+ "step": 22
175
+ },
176
+ {
177
+ "epoch": 0.038592964824120606,
178
+ "grad_norm": 0.4503360390663147,
179
+ "learning_rate": 1.904761904761905e-06,
180
+ "logits/chosen": 1.1474275588989258,
181
+ "logits/rejected": 1.136375904083252,
182
+ "logps/chosen": -32.1020393371582,
183
+ "logps/rejected": -35.39215087890625,
184
+ "loss": 0.695,
185
+ "rewards/accuracies": 0.4765625,
186
+ "rewards/chosen": -0.0007312107481993735,
187
+ "rewards/margins": -0.003288470208644867,
188
+ "rewards/rejected": 0.002557259751483798,
189
+ "step": 24
190
+ },
191
+ {
192
+ "epoch": 0.04180904522613065,
193
+ "grad_norm": 0.39122092723846436,
194
+ "learning_rate": 2.0634920634920634e-06,
195
+ "logits/chosen": 1.0779863595962524,
196
+ "logits/rejected": 1.0993211269378662,
197
+ "logps/chosen": -31.845603942871094,
198
+ "logps/rejected": -35.04288864135742,
199
+ "loss": 0.6949,
200
+ "rewards/accuracies": 0.46875,
201
+ "rewards/chosen": -0.0012633285950869322,
202
+ "rewards/margins": -0.0032839984633028507,
203
+ "rewards/rejected": 0.002020669635385275,
204
+ "step": 26
205
+ },
206
+ {
207
+ "epoch": 0.045025125628140705,
208
+ "grad_norm": 0.4362376630306244,
209
+ "learning_rate": 2.222222222222222e-06,
210
+ "logits/chosen": 1.4343101978302002,
211
+ "logits/rejected": 1.4324826002120972,
212
+ "logps/chosen": -29.318994522094727,
213
+ "logps/rejected": -31.928974151611328,
214
+ "loss": 0.6941,
215
+ "rewards/accuracies": 0.53125,
216
+ "rewards/chosen": -0.00553980004042387,
217
+ "rewards/margins": -0.0015490761725232005,
218
+ "rewards/rejected": -0.003990724217146635,
219
+ "step": 28
220
+ },
221
+ {
222
+ "epoch": 0.04824120603015075,
223
+ "grad_norm": 0.4269290864467621,
224
+ "learning_rate": 2.380952380952381e-06,
225
+ "logits/chosen": 1.29887056350708,
226
+ "logits/rejected": 1.3346788883209229,
227
+ "logps/chosen": -32.30498123168945,
228
+ "logps/rejected": -35.8641357421875,
229
+ "loss": 0.694,
230
+ "rewards/accuracies": 0.46875,
231
+ "rewards/chosen": -0.00222906400449574,
232
+ "rewards/margins": -0.0013898832257837057,
233
+ "rewards/rejected": -0.0008391811279579997,
234
+ "step": 30
235
+ },
236
+ {
237
+ "epoch": 0.051457286432160805,
238
+ "grad_norm": 0.4749397337436676,
239
+ "learning_rate": 2.53968253968254e-06,
240
+ "logits/chosen": 1.2239577770233154,
241
+ "logits/rejected": 1.2360297441482544,
242
+ "logps/chosen": -30.756057739257812,
243
+ "logps/rejected": -35.22294616699219,
244
+ "loss": 0.6937,
245
+ "rewards/accuracies": 0.46875,
246
+ "rewards/chosen": 0.0008076138328760862,
247
+ "rewards/margins": -0.0006099215243011713,
248
+ "rewards/rejected": 0.0014175355900079012,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 0.05467336683417085,
253
+ "grad_norm": 0.41970404982566833,
254
+ "learning_rate": 2.6984126984126986e-06,
255
+ "logits/chosen": 1.4691964387893677,
256
+ "logits/rejected": 1.5019806623458862,
257
+ "logps/chosen": -30.574474334716797,
258
+ "logps/rejected": -33.60447311401367,
259
+ "loss": 0.695,
260
+ "rewards/accuracies": 0.453125,
261
+ "rewards/chosen": -0.004354957491159439,
262
+ "rewards/margins": -0.00334002822637558,
263
+ "rewards/rejected": -0.001014929497614503,
264
+ "step": 34
265
+ },
266
+ {
267
+ "epoch": 0.057889447236180905,
268
+ "grad_norm": 0.4544912278652191,
269
+ "learning_rate": 2.8571428571428573e-06,
270
+ "logits/chosen": 1.2973356246948242,
271
+ "logits/rejected": 1.3056020736694336,
272
+ "logps/chosen": -29.477462768554688,
273
+ "logps/rejected": -33.272056579589844,
274
+ "loss": 0.6925,
275
+ "rewards/accuracies": 0.5390625,
276
+ "rewards/chosen": -0.0026571941561996937,
277
+ "rewards/margins": 0.001574229565449059,
278
+ "rewards/rejected": -0.0042314231395721436,
279
+ "step": 36
280
+ },
281
+ {
282
+ "epoch": 0.06110552763819096,
283
+ "grad_norm": 0.4139344096183777,
284
+ "learning_rate": 3.015873015873016e-06,
285
+ "logits/chosen": 1.3676331043243408,
286
+ "logits/rejected": 1.4585086107254028,
287
+ "logps/chosen": -31.87004280090332,
288
+ "logps/rejected": -33.22168731689453,
289
+ "loss": 0.6944,
290
+ "rewards/accuracies": 0.4453125,
291
+ "rewards/chosen": -0.006321355234831572,
292
+ "rewards/margins": -0.0021584928035736084,
293
+ "rewards/rejected": -0.004162862431257963,
294
+ "step": 38
295
+ },
296
+ {
297
+ "epoch": 0.06432160804020101,
298
+ "grad_norm": 0.4309309422969818,
299
+ "learning_rate": 3.1746031746031746e-06,
300
+ "logits/chosen": 1.4414952993392944,
301
+ "logits/rejected": 1.4826974868774414,
302
+ "logps/chosen": -29.79780387878418,
303
+ "logps/rejected": -33.88084030151367,
304
+ "loss": 0.6933,
305
+ "rewards/accuracies": 0.4765625,
306
+ "rewards/chosen": -0.001039503258652985,
307
+ "rewards/margins": -7.016677409410477e-05,
308
+ "rewards/rejected": -0.0009693363681435585,
309
+ "step": 40
310
+ },
311
+ {
312
+ "epoch": 0.06753768844221106,
313
+ "grad_norm": 0.4449164569377899,
314
+ "learning_rate": 3.3333333333333333e-06,
315
+ "logits/chosen": 1.2832252979278564,
316
+ "logits/rejected": 1.2548108100891113,
317
+ "logps/chosen": -30.628887176513672,
318
+ "logps/rejected": -36.61481475830078,
319
+ "loss": 0.6917,
320
+ "rewards/accuracies": 0.53125,
321
+ "rewards/chosen": -0.0037453575059771538,
322
+ "rewards/margins": 0.003167451359331608,
323
+ "rewards/rejected": -0.006912808865308762,
324
+ "step": 42
325
+ },
326
+ {
327
+ "epoch": 0.0707537688442211,
328
+ "grad_norm": 0.45570507645606995,
329
+ "learning_rate": 3.492063492063492e-06,
330
+ "logits/chosen": 1.5801219940185547,
331
+ "logits/rejected": 1.5999751091003418,
332
+ "logps/chosen": -31.53429412841797,
333
+ "logps/rejected": -36.12282943725586,
334
+ "loss": 0.693,
335
+ "rewards/accuracies": 0.453125,
336
+ "rewards/chosen": -0.0012303784023970366,
337
+ "rewards/margins": 0.000611663912422955,
338
+ "rewards/rejected": -0.0018420428968966007,
339
+ "step": 44
340
+ },
341
+ {
342
+ "epoch": 0.07396984924623115,
343
+ "grad_norm": 0.44526174664497375,
344
+ "learning_rate": 3.6507936507936507e-06,
345
+ "logits/chosen": 1.2573561668395996,
346
+ "logits/rejected": 1.2938108444213867,
347
+ "logps/chosen": -30.280303955078125,
348
+ "logps/rejected": -34.33146667480469,
349
+ "loss": 0.6913,
350
+ "rewards/accuracies": 0.5,
351
+ "rewards/chosen": -0.0019697281531989574,
352
+ "rewards/margins": 0.004040538799017668,
353
+ "rewards/rejected": -0.006010266952216625,
354
+ "step": 46
355
+ },
356
+ {
357
+ "epoch": 0.07718592964824121,
358
+ "grad_norm": 1.2515525817871094,
359
+ "learning_rate": 3.80952380952381e-06,
360
+ "logits/chosen": 0.8721216917037964,
361
+ "logits/rejected": 0.8969432711601257,
362
+ "logps/chosen": -32.89268493652344,
363
+ "logps/rejected": -34.27225875854492,
364
+ "loss": 0.6897,
365
+ "rewards/accuracies": 0.59375,
366
+ "rewards/chosen": -0.0003448700299486518,
367
+ "rewards/margins": 0.0074737779796123505,
368
+ "rewards/rejected": -0.007818647660315037,
369
+ "step": 48
370
+ },
371
+ {
372
+ "epoch": 0.08040201005025126,
373
+ "grad_norm": 0.6490676999092102,
374
+ "learning_rate": 3.968253968253968e-06,
375
+ "logits/chosen": 1.3292715549468994,
376
+ "logits/rejected": 1.3583744764328003,
377
+ "logps/chosen": -27.783782958984375,
378
+ "logps/rejected": -34.58745574951172,
379
+ "loss": 0.6882,
380
+ "rewards/accuracies": 0.625,
381
+ "rewards/chosen": 0.0020002706442028284,
382
+ "rewards/margins": 0.010215929709374905,
383
+ "rewards/rejected": -0.00821565929800272,
384
+ "step": 50
385
+ },
386
+ {
387
+ "epoch": 0.08040201005025126,
388
+ "eval_logits/chosen": 0.5501546263694763,
389
+ "eval_logits/rejected": 0.5580804944038391,
390
+ "eval_logps/chosen": -30.54437255859375,
391
+ "eval_logps/rejected": -35.43931198120117,
392
+ "eval_loss": 0.6898252964019775,
393
+ "eval_rewards/accuracies": 0.5349999666213989,
394
+ "eval_rewards/chosen": -0.0043669044971466064,
395
+ "eval_rewards/margins": 0.0071200719103217125,
396
+ "eval_rewards/rejected": -0.011486975476145744,
397
+ "eval_runtime": 48.2016,
398
+ "eval_samples_per_second": 4.149,
399
+ "eval_steps_per_second": 1.037,
400
+ "step": 50
401
+ },
402
+ {
403
+ "epoch": 0.0836180904522613,
404
+ "grad_norm": 0.4490942358970642,
405
+ "learning_rate": 4.126984126984127e-06,
406
+ "logits/chosen": 1.4922609329223633,
407
+ "logits/rejected": 1.5168870687484741,
408
+ "logps/chosen": -30.933542251586914,
409
+ "logps/rejected": -32.83856964111328,
410
+ "loss": 0.6887,
411
+ "rewards/accuracies": 0.5859375,
412
+ "rewards/chosen": -0.0028348369523882866,
413
+ "rewards/margins": 0.009395316243171692,
414
+ "rewards/rejected": -0.012230154126882553,
415
+ "step": 52
416
+ },
417
+ {
418
+ "epoch": 0.08683417085427135,
419
+ "grad_norm": 0.4455580413341522,
420
+ "learning_rate": 4.2857142857142855e-06,
421
+ "logits/chosen": 1.2956047058105469,
422
+ "logits/rejected": 1.3195202350616455,
423
+ "logps/chosen": -33.25441360473633,
424
+ "logps/rejected": -34.96155548095703,
425
+ "loss": 0.6888,
426
+ "rewards/accuracies": 0.609375,
427
+ "rewards/chosen": -0.011137772351503372,
428
+ "rewards/margins": 0.009241417050361633,
429
+ "rewards/rejected": -0.020379189401865005,
430
+ "step": 54
431
+ },
432
+ {
433
+ "epoch": 0.09005025125628141,
434
+ "grad_norm": 0.46649816632270813,
435
+ "learning_rate": 4.444444444444444e-06,
436
+ "logits/chosen": 1.442166805267334,
437
+ "logits/rejected": 1.4832069873809814,
438
+ "logps/chosen": -30.720643997192383,
439
+ "logps/rejected": -34.70309829711914,
440
+ "loss": 0.6936,
441
+ "rewards/accuracies": 0.5,
442
+ "rewards/chosen": -0.010124221444129944,
443
+ "rewards/margins": -0.0002470978070050478,
444
+ "rewards/rejected": -0.009877122938632965,
445
+ "step": 56
446
+ },
447
+ {
448
+ "epoch": 0.09326633165829146,
449
+ "grad_norm": 0.467673122882843,
450
+ "learning_rate": 4.603174603174604e-06,
451
+ "logits/chosen": 1.3763530254364014,
452
+ "logits/rejected": 1.4356203079223633,
453
+ "logps/chosen": -31.76504135131836,
454
+ "logps/rejected": -33.25387954711914,
455
+ "loss": 0.6874,
456
+ "rewards/accuracies": 0.5546875,
457
+ "rewards/chosen": -0.008322305977344513,
458
+ "rewards/margins": 0.012220000848174095,
459
+ "rewards/rejected": -0.020542306825518608,
460
+ "step": 58
461
+ },
462
+ {
463
+ "epoch": 0.0964824120603015,
464
+ "grad_norm": 0.5383568406105042,
465
+ "learning_rate": 4.761904761904762e-06,
466
+ "logits/chosen": 1.2813664674758911,
467
+ "logits/rejected": 1.3347243070602417,
468
+ "logps/chosen": -33.40138626098633,
469
+ "logps/rejected": -34.50080871582031,
470
+ "loss": 0.6869,
471
+ "rewards/accuracies": 0.5859375,
472
+ "rewards/chosen": -0.014261961914598942,
473
+ "rewards/margins": 0.013282392174005508,
474
+ "rewards/rejected": -0.027544351294636726,
475
+ "step": 60
476
+ },
477
+ {
478
+ "epoch": 0.09969849246231156,
479
+ "grad_norm": 0.4995841383934021,
480
+ "learning_rate": 4.920634920634921e-06,
481
+ "logits/chosen": 1.3010649681091309,
482
+ "logits/rejected": 1.3390882015228271,
483
+ "logps/chosen": -29.836774826049805,
484
+ "logps/rejected": -34.70252990722656,
485
+ "loss": 0.6843,
486
+ "rewards/accuracies": 0.625,
487
+ "rewards/chosen": -0.010679999366402626,
488
+ "rewards/margins": 0.018656719475984573,
489
+ "rewards/rejected": -0.0293367151170969,
490
+ "step": 62
491
+ },
492
+ {
493
+ "epoch": 0.10291457286432161,
494
+ "grad_norm": 0.4841148555278778,
495
+ "learning_rate": 4.999960377651517e-06,
496
+ "logits/chosen": 1.422184705734253,
497
+ "logits/rejected": 1.4505563974380493,
498
+ "logps/chosen": -34.65579605102539,
499
+ "logps/rejected": -33.47809600830078,
500
+ "loss": 0.6897,
501
+ "rewards/accuracies": 0.5390625,
502
+ "rewards/chosen": -0.01736070215702057,
503
+ "rewards/margins": 0.007918944582343102,
504
+ "rewards/rejected": -0.02527964487671852,
505
+ "step": 64
506
+ },
507
+ {
508
+ "epoch": 0.10613065326633166,
509
+ "grad_norm": 0.5981670022010803,
510
+ "learning_rate": 4.999643406399275e-06,
511
+ "logits/chosen": 1.1232147216796875,
512
+ "logits/rejected": 1.145438313484192,
513
+ "logps/chosen": -29.947956085205078,
514
+ "logps/rejected": -33.95368194580078,
515
+ "loss": 0.6838,
516
+ "rewards/accuracies": 0.5703125,
517
+ "rewards/chosen": -0.018678227439522743,
518
+ "rewards/margins": 0.019824998453259468,
519
+ "rewards/rejected": -0.03850322589278221,
520
+ "step": 66
521
+ },
522
+ {
523
+ "epoch": 0.1093467336683417,
524
+ "grad_norm": 0.571692705154419,
525
+ "learning_rate": 4.999009504083738e-06,
526
+ "logits/chosen": 1.3420298099517822,
527
+ "logits/rejected": 1.338175654411316,
528
+ "logps/chosen": -30.924522399902344,
529
+ "logps/rejected": -37.51594161987305,
530
+ "loss": 0.6828,
531
+ "rewards/accuracies": 0.609375,
532
+ "rewards/chosen": -0.020221160724759102,
533
+ "rewards/margins": 0.02198150008916855,
534
+ "rewards/rejected": -0.0422026626765728,
535
+ "step": 68
536
+ },
537
+ {
538
+ "epoch": 0.11256281407035176,
539
+ "grad_norm": 0.5207106471061707,
540
+ "learning_rate": 4.998058751077705e-06,
541
+ "logits/chosen": 1.3332023620605469,
542
+ "logits/rejected": 1.3814537525177002,
543
+ "logps/chosen": -31.7532958984375,
544
+ "logps/rejected": -34.753013610839844,
545
+ "loss": 0.6822,
546
+ "rewards/accuracies": 0.59375,
547
+ "rewards/chosen": -0.031598228961229324,
548
+ "rewards/margins": 0.023511648178100586,
549
+ "rewards/rejected": -0.05510988086462021,
550
+ "step": 70
551
+ },
552
+ {
553
+ "epoch": 0.11577889447236181,
554
+ "grad_norm": 0.47247663140296936,
555
+ "learning_rate": 4.996791267927632e-06,
556
+ "logits/chosen": 1.3693702220916748,
557
+ "logits/rejected": 1.3828941583633423,
558
+ "logps/chosen": -30.8132266998291,
559
+ "logps/rejected": -35.77416229248047,
560
+ "loss": 0.6827,
561
+ "rewards/accuracies": 0.6015625,
562
+ "rewards/chosen": -0.030194712802767754,
563
+ "rewards/margins": 0.022484488785266876,
564
+ "rewards/rejected": -0.05267919972538948,
565
+ "step": 72
566
+ },
567
+ {
568
+ "epoch": 0.11899497487437186,
569
+ "grad_norm": 0.48132631182670593,
570
+ "learning_rate": 4.995207215338358e-06,
571
+ "logits/chosen": 1.490431785583496,
572
+ "logits/rejected": 1.5298147201538086,
573
+ "logps/chosen": -34.107643127441406,
574
+ "logps/rejected": -34.63951110839844,
575
+ "loss": 0.687,
576
+ "rewards/accuracies": 0.515625,
577
+ "rewards/chosen": -0.036115553230047226,
578
+ "rewards/margins": 0.014621476642787457,
579
+ "rewards/rejected": -0.05073703080415726,
580
+ "step": 74
581
+ },
582
+ {
583
+ "epoch": 0.12221105527638192,
584
+ "grad_norm": 0.5402019619941711,
585
+ "learning_rate": 4.9933067941527175e-06,
586
+ "logits/chosen": 1.3514180183410645,
587
+ "logits/rejected": 1.3422389030456543,
588
+ "logps/chosen": -36.069549560546875,
589
+ "logps/rejected": -33.26152038574219,
590
+ "loss": 0.6841,
591
+ "rewards/accuracies": 0.5625,
592
+ "rewards/chosen": -0.04465916007757187,
593
+ "rewards/margins": 0.02099396474659443,
594
+ "rewards/rejected": -0.06565312296152115,
595
+ "step": 76
596
+ },
597
+ {
598
+ "epoch": 0.12542713567839195,
599
+ "grad_norm": 0.5146540999412537,
600
+ "learning_rate": 4.991090245326083e-06,
601
+ "logits/chosen": 1.0872302055358887,
602
+ "logits/rejected": 1.1015247106552124,
603
+ "logps/chosen": -29.906904220581055,
604
+ "logps/rejected": -38.96195983886719,
605
+ "loss": 0.6758,
606
+ "rewards/accuracies": 0.609375,
607
+ "rewards/chosen": -0.038489095866680145,
608
+ "rewards/margins": 0.037541136145591736,
609
+ "rewards/rejected": -0.07603023201227188,
610
+ "step": 78
611
+ },
612
+ {
613
+ "epoch": 0.12864321608040202,
614
+ "grad_norm": 0.5551427602767944,
615
+ "learning_rate": 4.988557849895811e-06,
616
+ "logits/chosen": 1.3899340629577637,
617
+ "logits/rejected": 1.4685105085372925,
618
+ "logps/chosen": -34.21710205078125,
619
+ "logps/rejected": -38.152099609375,
620
+ "loss": 0.6695,
621
+ "rewards/accuracies": 0.65625,
622
+ "rewards/chosen": -0.03813210129737854,
623
+ "rewards/margins": 0.05099986493587494,
624
+ "rewards/rejected": -0.08913196623325348,
625
+ "step": 80
626
+ },
627
+ {
628
+ "epoch": 0.13185929648241207,
629
+ "grad_norm": 0.5468294620513916,
630
+ "learning_rate": 4.9857099289456115e-06,
631
+ "logits/chosen": 1.4150495529174805,
632
+ "logits/rejected": 1.4033807516098022,
633
+ "logps/chosen": -30.979236602783203,
634
+ "logps/rejected": -36.2646484375,
635
+ "loss": 0.6759,
636
+ "rewards/accuracies": 0.6484375,
637
+ "rewards/chosen": -0.04814925417304039,
638
+ "rewards/margins": 0.0380784347653389,
639
+ "rewards/rejected": -0.08622768521308899,
640
+ "step": 82
641
+ },
642
+ {
643
+ "epoch": 0.13507537688442212,
644
+ "grad_norm": 0.47936275601387024,
645
+ "learning_rate": 4.9825468435648345e-06,
646
+ "logits/chosen": 1.1383363008499146,
647
+ "logits/rejected": 1.1823409795761108,
648
+ "logps/chosen": -29.871156692504883,
649
+ "logps/rejected": -32.21099090576172,
650
+ "loss": 0.6789,
651
+ "rewards/accuracies": 0.578125,
652
+ "rewards/chosen": -0.04363500326871872,
653
+ "rewards/margins": 0.03290008008480072,
654
+ "rewards/rejected": -0.07653507590293884,
655
+ "step": 84
656
+ },
657
+ {
658
+ "epoch": 0.13829145728643216,
659
+ "grad_norm": 0.48245131969451904,
660
+ "learning_rate": 4.97906899480269e-06,
661
+ "logits/chosen": 1.4419957399368286,
662
+ "logits/rejected": 1.475970983505249,
663
+ "logps/chosen": -30.77682113647461,
664
+ "logps/rejected": -35.76774215698242,
665
+ "loss": 0.6698,
666
+ "rewards/accuracies": 0.6484375,
667
+ "rewards/chosen": -0.04197033494710922,
668
+ "rewards/margins": 0.05154535919427872,
669
+ "rewards/rejected": -0.09351569414138794,
670
+ "step": 86
671
+ },
672
+ {
673
+ "epoch": 0.1415075376884422,
674
+ "grad_norm": 0.5745317935943604,
675
+ "learning_rate": 4.975276823617399e-06,
676
+ "logits/chosen": 1.4063524007797241,
677
+ "logits/rejected": 1.439096450805664,
678
+ "logps/chosen": -29.64651870727539,
679
+ "logps/rejected": -35.58737564086914,
680
+ "loss": 0.6644,
681
+ "rewards/accuracies": 0.6484375,
682
+ "rewards/chosen": -0.045151449739933014,
683
+ "rewards/margins": 0.06304246932268143,
684
+ "rewards/rejected": -0.10819391906261444,
685
+ "step": 88
686
+ },
687
+ {
688
+ "epoch": 0.14472361809045226,
689
+ "grad_norm": 0.5324569344520569,
690
+ "learning_rate": 4.971170810820279e-06,
691
+ "logits/chosen": 1.3591543436050415,
692
+ "logits/rejected": 1.3323087692260742,
693
+ "logps/chosen": -34.168128967285156,
694
+ "logps/rejected": -37.837242126464844,
695
+ "loss": 0.6722,
696
+ "rewards/accuracies": 0.640625,
697
+ "rewards/chosen": -0.05879779905080795,
698
+ "rewards/margins": 0.04740719497203827,
699
+ "rewards/rejected": -0.10620499402284622,
700
+ "step": 90
701
+ },
702
+ {
703
+ "epoch": 0.1479396984924623,
704
+ "grad_norm": 0.6153348684310913,
705
+ "learning_rate": 4.96675147701479e-06,
706
+ "logits/chosen": 1.3236520290374756,
707
+ "logits/rejected": 1.3638286590576172,
708
+ "logps/chosen": -33.26496887207031,
709
+ "logps/rejected": -35.09466552734375,
710
+ "loss": 0.678,
711
+ "rewards/accuracies": 0.609375,
712
+ "rewards/chosen": -0.07028309255838394,
713
+ "rewards/margins": 0.037059590220451355,
714
+ "rewards/rejected": -0.1073426827788353,
715
+ "step": 92
716
+ },
717
+ {
718
+ "epoch": 0.15115577889447235,
719
+ "grad_norm": 0.6804412603378296,
720
+ "learning_rate": 4.962019382530521e-06,
721
+ "logits/chosen": 1.1772613525390625,
722
+ "logits/rejected": 1.2550945281982422,
723
+ "logps/chosen": -34.816246032714844,
724
+ "logps/rejected": -34.04191970825195,
725
+ "loss": 0.676,
726
+ "rewards/accuracies": 0.5859375,
727
+ "rewards/chosen": -0.0760345533490181,
728
+ "rewards/margins": 0.041960567235946655,
729
+ "rewards/rejected": -0.11799512058496475,
730
+ "step": 94
731
+ },
732
+ {
733
+ "epoch": 0.15437185929648242,
734
+ "grad_norm": 0.5287646055221558,
735
+ "learning_rate": 4.956975127352146e-06,
736
+ "logits/chosen": 1.1335078477859497,
737
+ "logits/rejected": 1.0898680686950684,
738
+ "logps/chosen": -32.298492431640625,
739
+ "logps/rejected": -38.93742370605469,
740
+ "loss": 0.6661,
741
+ "rewards/accuracies": 0.6328125,
742
+ "rewards/chosen": -0.07041005790233612,
743
+ "rewards/margins": 0.06344542652368546,
744
+ "rewards/rejected": -0.13385546207427979,
745
+ "step": 96
746
+ },
747
+ {
748
+ "epoch": 0.15758793969849247,
749
+ "grad_norm": 0.5636870265007019,
750
+ "learning_rate": 4.951619351043353e-06,
751
+ "logits/chosen": 1.3233567476272583,
752
+ "logits/rejected": 1.356798768043518,
753
+ "logps/chosen": -31.532367706298828,
754
+ "logps/rejected": -33.19044494628906,
755
+ "loss": 0.6698,
756
+ "rewards/accuracies": 0.6015625,
757
+ "rewards/chosen": -0.08084239810705185,
758
+ "rewards/margins": 0.05414910987019539,
759
+ "rewards/rejected": -0.13499149680137634,
760
+ "step": 98
761
+ },
762
+ {
763
+ "epoch": 0.16080402010050251,
764
+ "grad_norm": 0.5570967197418213,
765
+ "learning_rate": 4.945952732665755e-06,
766
+ "logits/chosen": 1.301795244216919,
767
+ "logits/rejected": 1.3351643085479736,
768
+ "logps/chosen": -32.771270751953125,
769
+ "logps/rejected": -41.387725830078125,
770
+ "loss": 0.6603,
771
+ "rewards/accuracies": 0.6015625,
772
+ "rewards/chosen": -0.07904825359582901,
773
+ "rewards/margins": 0.07616038620471954,
774
+ "rewards/rejected": -0.15520863234996796,
775
+ "step": 100
776
+ },
777
+ {
778
+ "epoch": 0.16080402010050251,
779
+ "eval_logits/chosen": 0.5383610725402832,
780
+ "eval_logits/rejected": 0.544625997543335,
781
+ "eval_logps/chosen": -31.30695343017578,
782
+ "eval_logps/rejected": -36.894073486328125,
783
+ "eval_loss": 0.6592575311660767,
784
+ "eval_rewards/accuracies": 0.6850000023841858,
785
+ "eval_rewards/chosen": -0.08062513917684555,
786
+ "eval_rewards/margins": 0.07633791118860245,
787
+ "eval_rewards/rejected": -0.156963050365448,
788
+ "eval_runtime": 48.1277,
789
+ "eval_samples_per_second": 4.156,
790
+ "eval_steps_per_second": 1.039,
791
+ "step": 100
792
+ },
793
+ {
794
+ "epoch": 0.16402010050251256,
795
+ "grad_norm": 0.56045001745224,
796
+ "learning_rate": 4.939975990692789e-06,
797
+ "logits/chosen": 1.2697747945785522,
798
+ "logits/rejected": 1.2618439197540283,
799
+ "logps/chosen": -32.927146911621094,
800
+ "logps/rejected": -35.013206481933594,
801
+ "loss": 0.6671,
802
+ "rewards/accuracies": 0.65625,
803
+ "rewards/chosen": -0.08042748272418976,
804
+ "rewards/margins": 0.060588739812374115,
805
+ "rewards/rejected": -0.14101621508598328,
806
+ "step": 102
807
+ },
808
+ {
809
+ "epoch": 0.1672361809045226,
810
+ "grad_norm": 0.5901036858558655,
811
+ "learning_rate": 4.9336898829186185e-06,
812
+ "logits/chosen": 1.0983755588531494,
813
+ "logits/rejected": 1.1659495830535889,
814
+ "logps/chosen": -34.55509567260742,
815
+ "logps/rejected": -37.56676483154297,
816
+ "loss": 0.6575,
817
+ "rewards/accuracies": 0.671875,
818
+ "rewards/chosen": -0.08760575950145721,
819
+ "rewards/margins": 0.08133453875780106,
820
+ "rewards/rejected": -0.16894027590751648,
821
+ "step": 104
822
+ },
823
+ {
824
+ "epoch": 0.17045226130653265,
825
+ "grad_norm": 0.7394624352455139,
826
+ "learning_rate": 4.927095206362057e-06,
827
+ "logits/chosen": 1.1626837253570557,
828
+ "logits/rejected": 1.1353670358657837,
829
+ "logps/chosen": -33.66203308105469,
830
+ "logps/rejected": -35.955352783203125,
831
+ "loss": 0.6693,
832
+ "rewards/accuracies": 0.6328125,
833
+ "rewards/chosen": -0.10034358501434326,
834
+ "rewards/margins": 0.05721643567085266,
835
+ "rewards/rejected": -0.15756002068519592,
836
+ "step": 106
837
+ },
838
+ {
839
+ "epoch": 0.1736683417085427,
840
+ "grad_norm": 0.7603115439414978,
841
+ "learning_rate": 4.920192797165511e-06,
842
+ "logits/chosen": 1.2644569873809814,
843
+ "logits/rejected": 1.2957494258880615,
844
+ "logps/chosen": -33.66388702392578,
845
+ "logps/rejected": -33.66148376464844,
846
+ "loss": 0.6784,
847
+ "rewards/accuracies": 0.5703125,
848
+ "rewards/chosen": -0.11911429464817047,
849
+ "rewards/margins": 0.039303235709667206,
850
+ "rewards/rejected": -0.15841752290725708,
851
+ "step": 108
852
+ },
853
+ {
854
+ "epoch": 0.17688442211055277,
855
+ "grad_norm": 0.5353249907493591,
856
+ "learning_rate": 4.912983530488966e-06,
857
+ "logits/chosen": 1.4405113458633423,
858
+ "logits/rejected": 1.4680776596069336,
859
+ "logps/chosen": -32.20611572265625,
860
+ "logps/rejected": -35.25544738769531,
861
+ "loss": 0.6688,
862
+ "rewards/accuracies": 0.609375,
863
+ "rewards/chosen": -0.1222277581691742,
864
+ "rewards/margins": 0.056810542941093445,
865
+ "rewards/rejected": -0.17903831601142883,
866
+ "step": 110
867
+ },
868
+ {
869
+ "epoch": 0.18010050251256282,
870
+ "grad_norm": 0.7354335784912109,
871
+ "learning_rate": 4.905468320399023e-06,
872
+ "logits/chosen": 1.2397936582565308,
873
+ "logits/rejected": 1.2630605697631836,
874
+ "logps/chosen": -29.642383575439453,
875
+ "logps/rejected": -37.407413482666016,
876
+ "loss": 0.6483,
877
+ "rewards/accuracies": 0.71875,
878
+ "rewards/chosen": -0.09827758371829987,
879
+ "rewards/margins": 0.1014958843588829,
880
+ "rewards/rejected": -0.19977346062660217,
881
+ "step": 112
882
+ },
883
+ {
884
+ "epoch": 0.18331658291457287,
885
+ "grad_norm": 0.6473943591117859,
886
+ "learning_rate": 4.897648119753006e-06,
887
+ "logits/chosen": 1.752604603767395,
888
+ "logits/rejected": 1.7992733716964722,
889
+ "logps/chosen": -34.81816101074219,
890
+ "logps/rejected": -34.017303466796875,
891
+ "loss": 0.6765,
892
+ "rewards/accuracies": 0.5546875,
893
+ "rewards/chosen": -0.11875072121620178,
894
+ "rewards/margins": 0.04180861636996269,
895
+ "rewards/rejected": -0.16055932641029358,
896
+ "step": 114
897
+ },
898
+ {
899
+ "epoch": 0.18653266331658291,
900
+ "grad_norm": 0.7461528778076172,
901
+ "learning_rate": 4.889523920078144e-06,
902
+ "logits/chosen": 1.528036117553711,
903
+ "logits/rejected": 1.5142472982406616,
904
+ "logps/chosen": -29.153812408447266,
905
+ "logps/rejected": -37.897003173828125,
906
+ "loss": 0.6421,
907
+ "rewards/accuracies": 0.7265625,
908
+ "rewards/chosen": -0.11313571035861969,
909
+ "rewards/margins": 0.1143951416015625,
910
+ "rewards/rejected": -0.227530837059021,
911
+ "step": 116
912
+ },
913
+ {
914
+ "epoch": 0.18974874371859296,
915
+ "grad_norm": 0.6040567755699158,
916
+ "learning_rate": 4.881096751445863e-06,
917
+ "logits/chosen": 1.0826690196990967,
918
+ "logits/rejected": 1.132314682006836,
919
+ "logps/chosen": -34.31199645996094,
920
+ "logps/rejected": -37.10968017578125,
921
+ "loss": 0.649,
922
+ "rewards/accuracies": 0.6875,
923
+ "rewards/chosen": -0.11125093698501587,
924
+ "rewards/margins": 0.09869204461574554,
925
+ "rewards/rejected": -0.2099429816007614,
926
+ "step": 118
927
+ },
928
+ {
929
+ "epoch": 0.192964824120603,
930
+ "grad_norm": 0.6605610251426697,
931
+ "learning_rate": 4.8723676823411734e-06,
932
+ "logits/chosen": 1.3900730609893799,
933
+ "logits/rejected": 1.399127721786499,
934
+ "logps/chosen": -31.501989364624023,
935
+ "logps/rejected": -38.05152893066406,
936
+ "loss": 0.6518,
937
+ "rewards/accuracies": 0.7265625,
938
+ "rewards/chosen": -0.12589490413665771,
939
+ "rewards/margins": 0.09352517873048782,
940
+ "rewards/rejected": -0.21942007541656494,
941
+ "step": 120
942
+ },
943
+ {
944
+ "epoch": 0.19618090452261305,
945
+ "grad_norm": 0.7710764408111572,
946
+ "learning_rate": 4.8633378195272015e-06,
947
+ "logits/chosen": 1.6575126647949219,
948
+ "logits/rejected": 1.6922370195388794,
949
+ "logps/chosen": -31.622278213500977,
950
+ "logps/rejected": -34.734527587890625,
951
+ "loss": 0.6565,
952
+ "rewards/accuracies": 0.65625,
953
+ "rewards/chosen": -0.12990064918994904,
954
+ "rewards/margins": 0.08599180728197098,
955
+ "rewards/rejected": -0.2158924639225006,
956
+ "step": 122
957
+ },
958
+ {
959
+ "epoch": 0.19939698492462313,
960
+ "grad_norm": 0.5700326561927795,
961
+ "learning_rate": 4.854008307904865e-06,
962
+ "logits/chosen": 1.2142469882965088,
963
+ "logits/rejected": 1.2496553659439087,
964
+ "logps/chosen": -31.798629760742188,
965
+ "logps/rejected": -38.08250045776367,
966
+ "loss": 0.6458,
967
+ "rewards/accuracies": 0.6875,
968
+ "rewards/chosen": -0.11889669299125671,
969
+ "rewards/margins": 0.10755539685487747,
970
+ "rewards/rejected": -0.22645209729671478,
971
+ "step": 124
972
+ },
973
+ {
974
+ "epoch": 0.20261306532663317,
975
+ "grad_norm": 0.7354090213775635,
976
+ "learning_rate": 4.844380330367701e-06,
977
+ "logits/chosen": 1.4388645887374878,
978
+ "logits/rejected": 1.4413161277770996,
979
+ "logps/chosen": -28.97043228149414,
980
+ "logps/rejected": -37.475364685058594,
981
+ "loss": 0.6321,
982
+ "rewards/accuracies": 0.7421875,
983
+ "rewards/chosen": -0.10781995952129364,
984
+ "rewards/margins": 0.136630117893219,
985
+ "rewards/rejected": -0.24445009231567383,
986
+ "step": 126
987
+ },
988
+ {
989
+ "epoch": 0.20582914572864322,
990
+ "grad_norm": 0.6026410460472107,
991
+ "learning_rate": 4.8344551076518985e-06,
992
+ "logits/chosen": 1.167945146560669,
993
+ "logits/rejected": 1.181955099105835,
994
+ "logps/chosen": -33.69917297363281,
995
+ "logps/rejected": -41.300926208496094,
996
+ "loss": 0.6472,
997
+ "rewards/accuracies": 0.6484375,
998
+ "rewards/chosen": -0.11048279702663422,
999
+ "rewards/margins": 0.10788710415363312,
1000
+ "rewards/rejected": -0.21836990118026733,
1001
+ "step": 128
1002
+ },
1003
+ {
1004
+ "epoch": 0.20904522613065327,
1005
+ "grad_norm": 0.6147257089614868,
1006
+ "learning_rate": 4.824233898181509e-06,
1007
+ "logits/chosen": 1.3260626792907715,
1008
+ "logits/rejected": 1.3723877668380737,
1009
+ "logps/chosen": -32.34976577758789,
1010
+ "logps/rejected": -36.80198669433594,
1011
+ "loss": 0.6356,
1012
+ "rewards/accuracies": 0.6875,
1013
+ "rewards/chosen": -0.13550475239753723,
1014
+ "rewards/margins": 0.13105657696723938,
1015
+ "rewards/rejected": -0.2665613293647766,
1016
+ "step": 130
1017
+ },
1018
+ {
1019
+ "epoch": 0.2122613065326633,
1020
+ "grad_norm": 0.8525822162628174,
1021
+ "learning_rate": 4.8137179979088995e-06,
1022
+ "logits/chosen": 1.1474467515945435,
1023
+ "logits/rejected": 1.179036259651184,
1024
+ "logps/chosen": -32.16166305541992,
1025
+ "logps/rejected": -35.32681655883789,
1026
+ "loss": 0.6498,
1027
+ "rewards/accuracies": 0.6171875,
1028
+ "rewards/chosen": -0.1576756238937378,
1029
+ "rewards/margins": 0.10601034015417099,
1030
+ "rewards/rejected": -0.2636859714984894,
1031
+ "step": 132
1032
+ },
1033
+ {
1034
+ "epoch": 0.21547738693467336,
1035
+ "grad_norm": 0.6118230223655701,
1036
+ "learning_rate": 4.8029087401504315e-06,
1037
+ "logits/chosen": 1.3549935817718506,
1038
+ "logits/rejected": 1.4064905643463135,
1039
+ "logps/chosen": -32.3475456237793,
1040
+ "logps/rejected": -36.25346374511719,
1041
+ "loss": 0.635,
1042
+ "rewards/accuracies": 0.7265625,
1043
+ "rewards/chosen": -0.11831659078598022,
1044
+ "rewards/margins": 0.13686415553092957,
1045
+ "rewards/rejected": -0.2551807463169098,
1046
+ "step": 134
1047
+ },
1048
+ {
1049
+ "epoch": 0.2186934673366834,
1050
+ "grad_norm": 0.6388143301010132,
1051
+ "learning_rate": 4.791807495417415e-06,
1052
+ "logits/chosen": 1.3601171970367432,
1053
+ "logits/rejected": 1.3299559354782104,
1054
+ "logps/chosen": -33.817562103271484,
1055
+ "logps/rejected": -42.62956237792969,
1056
+ "loss": 0.6258,
1057
+ "rewards/accuracies": 0.7265625,
1058
+ "rewards/chosen": -0.1584807187318802,
1059
+ "rewards/margins": 0.1535823941230774,
1060
+ "rewards/rejected": -0.3120630979537964,
1061
+ "step": 136
1062
+ },
1063
+ {
1064
+ "epoch": 0.22190954773869348,
1065
+ "grad_norm": 0.6631596088409424,
1066
+ "learning_rate": 4.780415671242334e-06,
1067
+ "logits/chosen": 1.1997196674346924,
1068
+ "logits/rejected": 1.24537193775177,
1069
+ "logps/chosen": -31.997718811035156,
1070
+ "logps/rejected": -38.69951248168945,
1071
+ "loss": 0.6454,
1072
+ "rewards/accuracies": 0.703125,
1073
+ "rewards/chosen": -0.1721266806125641,
1074
+ "rewards/margins": 0.11670082062482834,
1075
+ "rewards/rejected": -0.288827508687973,
1076
+ "step": 138
1077
+ },
1078
+ {
1079
+ "epoch": 0.22512562814070353,
1080
+ "grad_norm": 1.039876103401184,
1081
+ "learning_rate": 4.76873471200039e-06,
1082
+ "logits/chosen": 1.4914782047271729,
1083
+ "logits/rejected": 1.5159029960632324,
1084
+ "logps/chosen": -31.84731101989746,
1085
+ "logps/rejected": -38.26771545410156,
1086
+ "loss": 0.6436,
1087
+ "rewards/accuracies": 0.703125,
1088
+ "rewards/chosen": -0.16641682386398315,
1089
+ "rewards/margins": 0.12111803144216537,
1090
+ "rewards/rejected": -0.2875348627567291,
1091
+ "step": 140
1092
+ },
1093
+ {
1094
+ "epoch": 0.22834170854271357,
1095
+ "grad_norm": 0.7036734223365784,
1096
+ "learning_rate": 4.756766098726368e-06,
1097
+ "logits/chosen": 1.2412619590759277,
1098
+ "logits/rejected": 1.2428562641143799,
1099
+ "logps/chosen": -32.30570602416992,
1100
+ "logps/rejected": -37.51960754394531,
1101
+ "loss": 0.6218,
1102
+ "rewards/accuracies": 0.734375,
1103
+ "rewards/chosen": -0.15528711676597595,
1104
+ "rewards/margins": 0.1665349304676056,
1105
+ "rewards/rejected": -0.32182204723358154,
1106
+ "step": 142
1107
+ },
1108
+ {
1109
+ "epoch": 0.23155778894472362,
1110
+ "grad_norm": 0.7187156081199646,
1111
+ "learning_rate": 4.744511348926855e-06,
1112
+ "logits/chosen": 1.284571647644043,
1113
+ "logits/rejected": 1.3178225755691528,
1114
+ "logps/chosen": -31.669775009155273,
1115
+ "logps/rejected": -37.961936950683594,
1116
+ "loss": 0.6329,
1117
+ "rewards/accuracies": 0.6328125,
1118
+ "rewards/chosen": -0.19643092155456543,
1119
+ "rewards/margins": 0.15072323381900787,
1120
+ "rewards/rejected": -0.3471541404724121,
1121
+ "step": 144
1122
+ },
1123
+ {
1124
+ "epoch": 0.23477386934673367,
1125
+ "grad_norm": 0.8957286477088928,
1126
+ "learning_rate": 4.73197201638783e-06,
1127
+ "logits/chosen": 1.2843809127807617,
1128
+ "logits/rejected": 1.2913951873779297,
1129
+ "logps/chosen": -33.99455261230469,
1130
+ "logps/rejected": -38.29543685913086,
1131
+ "loss": 0.6243,
1132
+ "rewards/accuracies": 0.734375,
1133
+ "rewards/chosen": -0.16770941019058228,
1134
+ "rewards/margins": 0.16542242467403412,
1135
+ "rewards/rejected": -0.3331318199634552,
1136
+ "step": 146
1137
+ },
1138
+ {
1139
+ "epoch": 0.2379899497487437,
1140
+ "grad_norm": 0.6802579164505005,
1141
+ "learning_rate": 4.7191496909776665e-06,
1142
+ "logits/chosen": 1.3031625747680664,
1143
+ "logits/rejected": 1.318772554397583,
1144
+ "logps/chosen": -31.817935943603516,
1145
+ "logps/rejected": -38.92062759399414,
1146
+ "loss": 0.6381,
1147
+ "rewards/accuracies": 0.65625,
1148
+ "rewards/chosen": -0.16274359822273254,
1149
+ "rewards/margins": 0.13347209990024567,
1150
+ "rewards/rejected": -0.2962157130241394,
1151
+ "step": 148
1152
+ },
1153
+ {
1154
+ "epoch": 0.24120603015075376,
1155
+ "grad_norm": 0.8087608218193054,
1156
+ "learning_rate": 4.706045998445548e-06,
1157
+ "logits/chosen": 1.1990268230438232,
1158
+ "logits/rejected": 1.19586181640625,
1159
+ "logps/chosen": -35.0596809387207,
1160
+ "logps/rejected": -38.830718994140625,
1161
+ "loss": 0.6387,
1162
+ "rewards/accuracies": 0.6484375,
1163
+ "rewards/chosen": -0.20017872750759125,
1164
+ "rewards/margins": 0.1404918134212494,
1165
+ "rewards/rejected": -0.3406705856323242,
1166
+ "step": 150
1167
+ },
1168
+ {
1169
+ "epoch": 0.24120603015075376,
1170
+ "eval_logits/chosen": 0.505786657333374,
1171
+ "eval_logits/rejected": 0.5080003142356873,
1172
+ "eval_logps/chosen": -32.41748046875,
1173
+ "eval_logps/rejected": -38.779293060302734,
1174
+ "eval_loss": 0.6298141479492188,
1175
+ "eval_rewards/accuracies": 0.7249999642372131,
1176
+ "eval_rewards/chosen": -0.19167786836624146,
1177
+ "eval_rewards/margins": 0.15380695462226868,
1178
+ "eval_rewards/rejected": -0.3454848527908325,
1179
+ "eval_runtime": 48.1694,
1180
+ "eval_samples_per_second": 4.152,
1181
+ "eval_steps_per_second": 1.038,
1182
+ "step": 150
1183
+ },
1184
+ {
1185
+ "epoch": 0.24442211055276383,
1186
+ "grad_norm": 0.6448603272438049,
1187
+ "learning_rate": 4.692662600215339e-06,
1188
+ "logits/chosen": 1.2040199041366577,
1189
+ "logits/rejected": 1.2359912395477295,
1190
+ "logps/chosen": -31.330707550048828,
1191
+ "logps/rejected": -37.42987060546875,
1192
+ "loss": 0.6424,
1193
+ "rewards/accuracies": 0.671875,
1194
+ "rewards/chosen": -0.20019026100635529,
1195
+ "rewards/margins": 0.1243181824684143,
1196
+ "rewards/rejected": -0.3245084285736084,
1197
+ "step": 152
1198
+ },
1199
+ {
1200
+ "epoch": 0.24763819095477388,
1201
+ "grad_norm": 0.7376160025596619,
1202
+ "learning_rate": 4.679001193174931e-06,
1203
+ "logits/chosen": 1.1904234886169434,
1204
+ "logits/rejected": 1.2631607055664062,
1205
+ "logps/chosen": -35.694915771484375,
1206
+ "logps/rejected": -39.23067855834961,
1207
+ "loss": 0.6407,
1208
+ "rewards/accuracies": 0.6640625,
1209
+ "rewards/chosen": -0.20610052347183228,
1210
+ "rewards/margins": 0.13499653339385986,
1211
+ "rewards/rejected": -0.34109705686569214,
1212
+ "step": 154
1213
+ },
1214
+ {
1215
+ "epoch": 0.2508542713567839,
1216
+ "grad_norm": 0.848278284072876,
1217
+ "learning_rate": 4.665063509461098e-06,
1218
+ "logits/chosen": 1.4529380798339844,
1219
+ "logits/rejected": 1.4410022497177124,
1220
+ "logps/chosen": -33.822208404541016,
1221
+ "logps/rejected": -37.43674850463867,
1222
+ "loss": 0.6339,
1223
+ "rewards/accuracies": 0.6953125,
1224
+ "rewards/chosen": -0.2073659896850586,
1225
+ "rewards/margins": 0.14442923665046692,
1226
+ "rewards/rejected": -0.3517952263355255,
1227
+ "step": 156
1228
+ },
1229
+ {
1230
+ "epoch": 0.25407035175879394,
1231
+ "grad_norm": 0.7130846381187439,
1232
+ "learning_rate": 4.650851316239867e-06,
1233
+ "logits/chosen": 1.2848093509674072,
1234
+ "logits/rejected": 1.3309288024902344,
1235
+ "logps/chosen": -33.921539306640625,
1236
+ "logps/rejected": -36.744384765625,
1237
+ "loss": 0.6192,
1238
+ "rewards/accuracies": 0.75,
1239
+ "rewards/chosen": -0.1995392143726349,
1240
+ "rewards/margins": 0.17559002339839935,
1241
+ "rewards/rejected": -0.37512922286987305,
1242
+ "step": 158
1243
+ },
1244
+ {
1245
+ "epoch": 0.25728643216080405,
1246
+ "grad_norm": 0.889614462852478,
1247
+ "learning_rate": 4.636366415482474e-06,
1248
+ "logits/chosen": 1.4534492492675781,
1249
+ "logits/rejected": 1.4741634130477905,
1250
+ "logps/chosen": -37.4984130859375,
1251
+ "logps/rejected": -40.52190017700195,
1252
+ "loss": 0.6113,
1253
+ "rewards/accuracies": 0.75,
1254
+ "rewards/chosen": -0.2035304307937622,
1255
+ "rewards/margins": 0.19503936171531677,
1256
+ "rewards/rejected": -0.39856982231140137,
1257
+ "step": 160
1258
+ },
1259
+ {
1260
+ "epoch": 0.2605025125628141,
1261
+ "grad_norm": 0.797125518321991,
1262
+ "learning_rate": 4.621610643736878e-06,
1263
+ "logits/chosen": 1.1096376180648804,
1264
+ "logits/rejected": 1.1337332725524902,
1265
+ "logps/chosen": -33.37801742553711,
1266
+ "logps/rejected": -37.59724426269531,
1267
+ "loss": 0.6226,
1268
+ "rewards/accuracies": 0.7421875,
1269
+ "rewards/chosen": -0.23320314288139343,
1270
+ "rewards/margins": 0.17068544030189514,
1271
+ "rewards/rejected": -0.4038885831832886,
1272
+ "step": 162
1273
+ },
1274
+ {
1275
+ "epoch": 0.26371859296482414,
1276
+ "grad_norm": 0.6749808192253113,
1277
+ "learning_rate": 4.60658587189491e-06,
1278
+ "logits/chosen": 1.2708582878112793,
1279
+ "logits/rejected": 1.2609119415283203,
1280
+ "logps/chosen": -34.26423645019531,
1281
+ "logps/rejected": -38.192108154296875,
1282
+ "loss": 0.6222,
1283
+ "rewards/accuracies": 0.6875,
1284
+ "rewards/chosen": -0.19967004656791687,
1285
+ "rewards/margins": 0.1713426113128662,
1286
+ "rewards/rejected": -0.3710126280784607,
1287
+ "step": 164
1288
+ },
1289
+ {
1290
+ "epoch": 0.2669346733668342,
1291
+ "grad_norm": 0.7875650525093079,
1292
+ "learning_rate": 4.5912940049550596e-06,
1293
+ "logits/chosen": 1.5831212997436523,
1294
+ "logits/rejected": 1.5837812423706055,
1295
+ "logps/chosen": -31.933712005615234,
1296
+ "logps/rejected": -40.272857666015625,
1297
+ "loss": 0.6043,
1298
+ "rewards/accuracies": 0.71875,
1299
+ "rewards/chosen": -0.22415216267108917,
1300
+ "rewards/margins": 0.22287428379058838,
1301
+ "rewards/rejected": -0.44702646136283875,
1302
+ "step": 166
1303
+ },
1304
+ {
1305
+ "epoch": 0.27015075376884423,
1306
+ "grad_norm": 0.7695869207382202,
1307
+ "learning_rate": 4.575736981780942e-06,
1308
+ "logits/chosen": 1.2019391059875488,
1309
+ "logits/rejected": 1.1937005519866943,
1310
+ "logps/chosen": -33.98455810546875,
1311
+ "logps/rejected": -36.95637512207031,
1312
+ "loss": 0.6069,
1313
+ "rewards/accuracies": 0.7109375,
1314
+ "rewards/chosen": -0.20682327449321747,
1315
+ "rewards/margins": 0.2069566249847412,
1316
+ "rewards/rejected": -0.4137799143791199,
1317
+ "step": 168
1318
+ },
1319
+ {
1320
+ "epoch": 0.2733668341708543,
1321
+ "grad_norm": 18.707008361816406,
1322
+ "learning_rate": 4.559916774855464e-06,
1323
+ "logits/chosen": 1.1806917190551758,
1324
+ "logits/rejected": 1.175247311592102,
1325
+ "logps/chosen": -31.461313247680664,
1326
+ "logps/rejected": -45.7471923828125,
1327
+ "loss": 0.6132,
1328
+ "rewards/accuracies": 0.8046875,
1329
+ "rewards/chosen": -0.1899334043264389,
1330
+ "rewards/margins": 0.21620748937129974,
1331
+ "rewards/rejected": -0.40614089369773865,
1332
+ "step": 170
1333
+ },
1334
+ {
1335
+ "epoch": 0.2765829145728643,
1336
+ "grad_norm": 0.753237783908844,
1337
+ "learning_rate": 4.543835390030738e-06,
1338
+ "logits/chosen": 1.1212278604507446,
1339
+ "logits/rejected": 1.1133912801742554,
1340
+ "logps/chosen": -33.20658874511719,
1341
+ "logps/rejected": -39.882999420166016,
1342
+ "loss": 0.5982,
1343
+ "rewards/accuracies": 0.7109375,
1344
+ "rewards/chosen": -0.1884865164756775,
1345
+ "rewards/margins": 0.23654288053512573,
1346
+ "rewards/rejected": -0.4250293970108032,
1347
+ "step": 172
1348
+ },
1349
+ {
1350
+ "epoch": 0.27979899497487437,
1351
+ "grad_norm": 0.8276423811912537,
1352
+ "learning_rate": 4.5274948662737535e-06,
1353
+ "logits/chosen": 1.5256576538085938,
1354
+ "logits/rejected": 1.535451054573059,
1355
+ "logps/chosen": -31.11783218383789,
1356
+ "logps/rejected": -39.00643539428711,
1357
+ "loss": 0.629,
1358
+ "rewards/accuracies": 0.640625,
1359
+ "rewards/chosen": -0.23714976012706757,
1360
+ "rewards/margins": 0.16218701004981995,
1361
+ "rewards/rejected": -0.3993367850780487,
1362
+ "step": 174
1363
+ },
1364
+ {
1365
+ "epoch": 0.2830150753768844,
1366
+ "grad_norm": 0.9968907833099365,
1367
+ "learning_rate": 4.510897275407856e-06,
1368
+ "logits/chosen": 1.174997091293335,
1369
+ "logits/rejected": 1.2027724981307983,
1370
+ "logps/chosen": -35.436641693115234,
1371
+ "logps/rejected": -38.52527618408203,
1372
+ "loss": 0.603,
1373
+ "rewards/accuracies": 0.7578125,
1374
+ "rewards/chosen": -0.2507714629173279,
1375
+ "rewards/margins": 0.22110292315483093,
1376
+ "rewards/rejected": -0.4718744158744812,
1377
+ "step": 176
1378
+ },
1379
+ {
1380
+ "epoch": 0.28623115577889446,
1381
+ "grad_norm": 0.8604883551597595,
1382
+ "learning_rate": 4.494044721850066e-06,
1383
+ "logits/chosen": 1.4382469654083252,
1384
+ "logits/rejected": 1.4571839570999146,
1385
+ "logps/chosen": -36.266475677490234,
1386
+ "logps/rejected": -40.861724853515625,
1387
+ "loss": 0.5918,
1388
+ "rewards/accuracies": 0.765625,
1389
+ "rewards/chosen": -0.22194752097129822,
1390
+ "rewards/margins": 0.2503468990325928,
1391
+ "rewards/rejected": -0.472294420003891,
1392
+ "step": 178
1393
+ },
1394
+ {
1395
+ "epoch": 0.2894472361809045,
1396
+ "grad_norm": 0.8287671208381653,
1397
+ "learning_rate": 4.476939342344246e-06,
1398
+ "logits/chosen": 0.9575952291488647,
1399
+ "logits/rejected": 0.9769454598426819,
1400
+ "logps/chosen": -33.822174072265625,
1401
+ "logps/rejected": -40.31230545043945,
1402
+ "loss": 0.6002,
1403
+ "rewards/accuracies": 0.71875,
1404
+ "rewards/chosen": -0.23980066180229187,
1405
+ "rewards/margins": 0.24076348543167114,
1406
+ "rewards/rejected": -0.480564147233963,
1407
+ "step": 180
1408
+ },
1409
+ {
1410
+ "epoch": 0.29266331658291456,
1411
+ "grad_norm": 0.9954908490180969,
1412
+ "learning_rate": 4.459583305690198e-06,
1413
+ "logits/chosen": 1.1955639123916626,
1414
+ "logits/rejected": 1.1979701519012451,
1415
+ "logps/chosen": -35.99625015258789,
1416
+ "logps/rejected": -42.92171096801758,
1417
+ "loss": 0.6081,
1418
+ "rewards/accuracies": 0.703125,
1419
+ "rewards/chosen": -0.2439308762550354,
1420
+ "rewards/margins": 0.21916835010051727,
1421
+ "rewards/rejected": -0.4630991816520691,
1422
+ "step": 182
1423
+ },
1424
+ {
1425
+ "epoch": 0.2958793969849246,
1426
+ "grad_norm": 0.9786974191665649,
1427
+ "learning_rate": 4.4419788124686665e-06,
1428
+ "logits/chosen": 1.3801398277282715,
1429
+ "logits/rejected": 1.347995638847351,
1430
+ "logps/chosen": -34.530696868896484,
1431
+ "logps/rejected": -41.12428283691406,
1432
+ "loss": 0.6282,
1433
+ "rewards/accuracies": 0.6953125,
1434
+ "rewards/chosen": -0.23650634288787842,
1435
+ "rewards/margins": 0.18922509253025055,
1436
+ "rewards/rejected": -0.4257314205169678,
1437
+ "step": 184
1438
+ },
1439
+ {
1440
+ "epoch": 0.29909547738693465,
1441
+ "grad_norm": 10.048331260681152,
1442
+ "learning_rate": 4.424128094762331e-06,
1443
+ "logits/chosen": 1.2418086528778076,
1444
+ "logits/rejected": 1.2690742015838623,
1445
+ "logps/chosen": -34.209205627441406,
1446
+ "logps/rejected": -43.19721221923828,
1447
+ "loss": 0.6351,
1448
+ "rewards/accuracies": 0.6875,
1449
+ "rewards/chosen": -0.2699955105781555,
1450
+ "rewards/margins": 0.17394134402275085,
1451
+ "rewards/rejected": -0.443936824798584,
1452
+ "step": 186
1453
+ },
1454
+ {
1455
+ "epoch": 0.3023115577889447,
1456
+ "grad_norm": 1.1740622520446777,
1457
+ "learning_rate": 4.406033415872801e-06,
1458
+ "logits/chosen": 1.211313247680664,
1459
+ "logits/rejected": 1.2540022134780884,
1460
+ "logps/chosen": -30.689380645751953,
1461
+ "logps/rejected": -41.284637451171875,
1462
+ "loss": 0.5942,
1463
+ "rewards/accuracies": 0.7421875,
1464
+ "rewards/chosen": -0.23609167337417603,
1465
+ "rewards/margins": 0.25958290696144104,
1466
+ "rewards/rejected": -0.49567458033561707,
1467
+ "step": 188
1468
+ },
1469
+ {
1470
+ "epoch": 0.3055276381909548,
1471
+ "grad_norm": 0.7530081868171692,
1472
+ "learning_rate": 4.387697070033649e-06,
1473
+ "logits/chosen": 1.691462516784668,
1474
+ "logits/rejected": 1.7449442148208618,
1475
+ "logps/chosen": -34.56097412109375,
1476
+ "logps/rejected": -40.92518615722656,
1477
+ "loss": 0.5942,
1478
+ "rewards/accuracies": 0.7421875,
1479
+ "rewards/chosen": -0.22781601548194885,
1480
+ "rewards/margins": 0.24525202810764313,
1481
+ "rewards/rejected": -0.4730680286884308,
1482
+ "step": 190
1483
+ },
1484
+ {
1485
+ "epoch": 0.30874371859296484,
1486
+ "grad_norm": 0.9380214810371399,
1487
+ "learning_rate": 4.369121382119523e-06,
1488
+ "logits/chosen": 1.3881587982177734,
1489
+ "logits/rejected": 1.4654074907302856,
1490
+ "logps/chosen": -35.41065979003906,
1491
+ "logps/rejected": -40.2274284362793,
1492
+ "loss": 0.5986,
1493
+ "rewards/accuracies": 0.7109375,
1494
+ "rewards/chosen": -0.28155750036239624,
1495
+ "rewards/margins": 0.2501455545425415,
1496
+ "rewards/rejected": -0.531702995300293,
1497
+ "step": 192
1498
+ },
1499
+ {
1500
+ "epoch": 0.3119597989949749,
1501
+ "grad_norm": 0.977056622505188,
1502
+ "learning_rate": 4.350308707351373e-06,
1503
+ "logits/chosen": 1.2062150239944458,
1504
+ "logits/rejected": 1.2337925434112549,
1505
+ "logps/chosen": -33.4559211730957,
1506
+ "logps/rejected": -39.458404541015625,
1507
+ "loss": 0.5739,
1508
+ "rewards/accuracies": 0.796875,
1509
+ "rewards/chosen": -0.22728925943374634,
1510
+ "rewards/margins": 0.30084502696990967,
1511
+ "rewards/rejected": -0.5281342267990112,
1512
+ "step": 194
1513
+ },
1514
+ {
1515
+ "epoch": 0.31517587939698494,
1516
+ "grad_norm": 0.8578616380691528,
1517
+ "learning_rate": 4.331261430997835e-06,
1518
+ "logits/chosen": 1.1321465969085693,
1519
+ "logits/rejected": 1.1912870407104492,
1520
+ "logps/chosen": -35.213966369628906,
1521
+ "logps/rejected": -40.169456481933594,
1522
+ "loss": 0.5716,
1523
+ "rewards/accuracies": 0.765625,
1524
+ "rewards/chosen": -0.21415600180625916,
1525
+ "rewards/margins": 0.31074848771095276,
1526
+ "rewards/rejected": -0.5249044895172119,
1527
+ "step": 196
1528
+ },
1529
+ {
1530
+ "epoch": 0.318391959798995,
1531
+ "grad_norm": 0.739769458770752,
1532
+ "learning_rate": 4.3119819680728e-06,
1533
+ "logits/chosen": 1.0423147678375244,
1534
+ "logits/rejected": 1.0700112581253052,
1535
+ "logps/chosen": -34.76524353027344,
1536
+ "logps/rejected": -39.74441909790039,
1537
+ "loss": 0.6,
1538
+ "rewards/accuracies": 0.734375,
1539
+ "rewards/chosen": -0.22830314934253693,
1540
+ "rewards/margins": 0.2578757107257843,
1541
+ "rewards/rejected": -0.48617884516716003,
1542
+ "step": 198
1543
+ },
1544
+ {
1545
+ "epoch": 0.32160804020100503,
1546
+ "grad_norm": 1.007567286491394,
1547
+ "learning_rate": 4.292472763029213e-06,
1548
+ "logits/chosen": 1.282730221748352,
1549
+ "logits/rejected": 1.3393166065216064,
1550
+ "logps/chosen": -31.768949508666992,
1551
+ "logps/rejected": -36.599937438964844,
1552
+ "loss": 0.5986,
1553
+ "rewards/accuracies": 0.734375,
1554
+ "rewards/chosen": -0.23056712746620178,
1555
+ "rewards/margins": 0.24101990461349487,
1556
+ "rewards/rejected": -0.47158706188201904,
1557
+ "step": 200
1558
+ },
1559
+ {
1560
+ "epoch": 0.32160804020100503,
1561
+ "eval_logits/chosen": 0.48468613624572754,
1562
+ "eval_logits/rejected": 0.48436060547828674,
1563
+ "eval_logps/chosen": -32.83020782470703,
1564
+ "eval_logps/rejected": -40.13883590698242,
1565
+ "eval_loss": 0.5988396406173706,
1566
+ "eval_rewards/accuracies": 0.7049999833106995,
1567
+ "eval_rewards/chosen": -0.23295056819915771,
1568
+ "eval_rewards/margins": 0.2484888881444931,
1569
+ "eval_rewards/rejected": -0.481439471244812,
1570
+ "eval_runtime": 48.1896,
1571
+ "eval_samples_per_second": 4.15,
1572
+ "eval_steps_per_second": 1.038,
1573
+ "step": 200
1574
+ }
1575
+ ],
1576
+ "logging_steps": 2,
1577
+ "max_steps": 621,
1578
+ "num_input_tokens_seen": 0,
1579
+ "num_train_epochs": 1,
1580
+ "save_steps": 100,
1581
+ "stateful_callbacks": {
1582
+ "TrainerControl": {
1583
+ "args": {
1584
+ "should_epoch_stop": false,
1585
+ "should_evaluate": false,
1586
+ "should_log": false,
1587
+ "should_save": true,
1588
+ "should_training_stop": false
1589
+ },
1590
+ "attributes": {}
1591
+ }
1592
+ },
1593
+ "total_flos": 5.140837960442184e+17,
1594
+ "train_batch_size": 2,
1595
+ "trial_name": null,
1596
+ "trial_params": null
1597
+ }
checkpoint-200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcf7d186e3568d77f55778a26c607e4da9c0a96dd9d37a4828aaf8e3780e13fb
3
+ size 5752
checkpoint-200/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-300/README.md ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: /p/scratch/taco-vlm/xiao4/models/Qwen2.5-VL-7B-Instruct
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:/p/scratch/taco-vlm/xiao4/models/Qwen2.5-VL-7B-Instruct
7
+ - llama-factory
8
+ - lora
9
+ - transformers
10
+ ---
11
+
12
+ # Model Card for Model ID
13
+
14
+ <!-- Provide a quick summary of what the model is/does. -->
15
+
16
+
17
+
18
+ ## Model Details
19
+
20
+ ### Model Description
21
+
22
+ <!-- Provide a longer summary of what this model is. -->
23
+
24
+
25
+
26
+ - **Developed by:** [More Information Needed]
27
+ - **Funded by [optional]:** [More Information Needed]
28
+ - **Shared by [optional]:** [More Information Needed]
29
+ - **Model type:** [More Information Needed]
30
+ - **Language(s) (NLP):** [More Information Needed]
31
+ - **License:** [More Information Needed]
32
+ - **Finetuned from model [optional]:** [More Information Needed]
33
+
34
+ ### Model Sources [optional]
35
+
36
+ <!-- Provide the basic links for the model. -->
37
+
38
+ - **Repository:** [More Information Needed]
39
+ - **Paper [optional]:** [More Information Needed]
40
+ - **Demo [optional]:** [More Information Needed]
41
+
42
+ ## Uses
43
+
44
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
45
+
46
+ ### Direct Use
47
+
48
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Downstream Use [optional]
53
+
54
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
55
+
56
+ [More Information Needed]
57
+
58
+ ### Out-of-Scope Use
59
+
60
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ## Bias, Risks, and Limitations
65
+
66
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
67
+
68
+ [More Information Needed]
69
+
70
+ ### Recommendations
71
+
72
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
73
+
74
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
75
+
76
+ ## How to Get Started with the Model
77
+
78
+ Use the code below to get started with the model.
79
+
80
+ [More Information Needed]
81
+
82
+ ## Training Details
83
+
84
+ ### Training Data
85
+
86
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
87
+
88
+ [More Information Needed]
89
+
90
+ ### Training Procedure
91
+
92
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
93
+
94
+ #### Preprocessing [optional]
95
+
96
+ [More Information Needed]
97
+
98
+
99
+ #### Training Hyperparameters
100
+
101
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
102
+
103
+ #### Speeds, Sizes, Times [optional]
104
+
105
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
106
+
107
+ [More Information Needed]
108
+
109
+ ## Evaluation
110
+
111
+ <!-- This section describes the evaluation protocols and provides the results. -->
112
+
113
+ ### Testing Data, Factors & Metrics
114
+
115
+ #### Testing Data
116
+
117
+ <!-- This should link to a Dataset Card if possible. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Factors
122
+
123
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
124
+
125
+ [More Information Needed]
126
+
127
+ #### Metrics
128
+
129
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
130
+
131
+ [More Information Needed]
132
+
133
+ ### Results
134
+
135
+ [More Information Needed]
136
+
137
+ #### Summary
138
+
139
+
140
+
141
+ ## Model Examination [optional]
142
+
143
+ <!-- Relevant interpretability work for the model goes here -->
144
+
145
+ [More Information Needed]
146
+
147
+ ## Environmental Impact
148
+
149
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
150
+
151
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
152
+
153
+ - **Hardware Type:** [More Information Needed]
154
+ - **Hours used:** [More Information Needed]
155
+ - **Cloud Provider:** [More Information Needed]
156
+ - **Compute Region:** [More Information Needed]
157
+ - **Carbon Emitted:** [More Information Needed]
158
+
159
+ ## Technical Specifications [optional]
160
+
161
+ ### Model Architecture and Objective
162
+
163
+ [More Information Needed]
164
+
165
+ ### Compute Infrastructure
166
+
167
+ [More Information Needed]
168
+
169
+ #### Hardware
170
+
171
+ [More Information Needed]
172
+
173
+ #### Software
174
+
175
+ [More Information Needed]
176
+
177
+ ## Citation [optional]
178
+
179
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
180
+
181
+ **BibTeX:**
182
+
183
+ [More Information Needed]
184
+
185
+ **APA:**
186
+
187
+ [More Information Needed]
188
+
189
+ ## Glossary [optional]
190
+
191
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
192
+
193
+ [More Information Needed]
194
+
195
+ ## More Information [optional]
196
+
197
+ [More Information Needed]
198
+
199
+ ## Model Card Authors [optional]
200
+
201
+ [More Information Needed]
202
+
203
+ ## Model Card Contact
204
+
205
+ [More Information Needed]
206
+ ### Framework versions
207
+
208
+ - PEFT 0.17.1
checkpoint-300/adapter_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/p/scratch/taco-vlm/xiao4/models/Qwen2.5-VL-7B-Instruct",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 64,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.0,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 32,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "v_proj",
29
+ "q_proj"
30
+ ],
31
+ "target_parameters": null,
32
+ "task_type": "CAUSAL_LM",
33
+ "trainable_token_indices": null,
34
+ "use_dora": false,
35
+ "use_qalora": false,
36
+ "use_rslora": false
37
+ }
checkpoint-300/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a178154b2410189b0e8d6a87279fdfd036d38473a9ab77c575cafd58b1bd625
3
+ size 40385208
checkpoint-300/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
checkpoint-300/chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
+ }
checkpoint-300/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-300/preprocessor_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": null,
3
+ "default_to_square": true,
4
+ "do_center_crop": null,
5
+ "do_convert_rgb": true,
6
+ "do_normalize": true,
7
+ "do_rescale": true,
8
+ "do_resize": true,
9
+ "image_mean": [
10
+ 0.48145466,
11
+ 0.4578275,
12
+ 0.40821073
13
+ ],
14
+ "image_processor_type": "Qwen2VLImageProcessorFast",
15
+ "image_std": [
16
+ 0.26862954,
17
+ 0.26130258,
18
+ 0.27577711
19
+ ],
20
+ "max_pixels": 12845056,
21
+ "merge_size": 2,
22
+ "min_pixels": 3136,
23
+ "patch_size": 14,
24
+ "processor_class": "Qwen2_5_VLProcessor",
25
+ "resample": 3,
26
+ "rescale_factor": 0.00392156862745098,
27
+ "size": {
28
+ "longest_edge": 1003520,
29
+ "shortest_edge": 3136
30
+ },
31
+ "temporal_patch_size": 2
32
+ }
checkpoint-300/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef002048764051a71fb00f8f978e9ec32b780dc850bdb059af362cc56494234b
3
+ size 15024