Fix pipeline tag and link to paper
#1
by
nielsr
HF Staff
- opened
README.md
CHANGED
|
@@ -1,28 +1,28 @@
|
|
| 1 |
---
|
| 2 |
-
|
|
|
|
| 3 |
language:
|
| 4 |
- en
|
| 5 |
- zh
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
tags:
|
| 7 |
- llama-factory
|
| 8 |
- easy-r1
|
| 9 |
- full
|
| 10 |
- generated_from_trainer
|
| 11 |
-
metrics:
|
| 12 |
-
- f1
|
| 13 |
-
base_model:
|
| 14 |
-
- Qwen/Qwen2.5-VL-7B-Instruct
|
| 15 |
model-index:
|
| 16 |
-
- name: GuardReasoner-VL-
|
| 17 |
results: []
|
| 18 |
-
pipeline_tag: text-classification
|
| 19 |
-
library_name: transformers
|
| 20 |
---
|
| 21 |
|
| 22 |
-
# GuardReasoner-VL-
|
| 23 |
|
| 24 |
-
This model is a fine-tuned version of [Qwen/Qwen2.5-VL-
|
| 25 |
-
This model is based on the paper [GuardReasoner-VL: Safeguarding VLMs via Reinforced Reasoning](https://
|
| 26 |
|
| 27 |
<!-- The training data of R-SFT can be found in [GuardReasonerTrain](https://huggingface.co/datasets/yueliu1999/GuardReasonerTrain). -->
|
| 28 |
|
|
@@ -37,7 +37,7 @@ from transformers import AutoProcessor
|
|
| 37 |
from qwen_vl_utils import process_vision_info
|
| 38 |
|
| 39 |
parser = argparse.ArgumentParser(description="GuardReasoner-VL Inference")
|
| 40 |
-
parser.add_argument("--model_path", type=str, default="yueliu1999/GuardReasoner-VL-
|
| 41 |
parser.add_argument("--benchmark_path", type=str, default="./data/benchmark/", help="benchmark path")
|
| 42 |
args = parser.parse_args()
|
| 43 |
|
|
@@ -152,17 +152,23 @@ messages = [
|
|
| 152 |
|
| 153 |
case3_res = generate(messages)
|
| 154 |
|
| 155 |
-
print("case1
|
|
|
|
|
|
|
| 156 |
print("-"*30)
|
| 157 |
print(case1_res)
|
| 158 |
print("-"*30)
|
| 159 |
|
| 160 |
-
print("case2
|
|
|
|
|
|
|
| 161 |
print("-"*30)
|
| 162 |
print(case2_res)
|
| 163 |
print("-"*30)
|
| 164 |
|
| 165 |
-
print("case3
|
|
|
|
|
|
|
| 166 |
print("-"*30)
|
| 167 |
print(case3_res)
|
| 168 |
print("-"*30)
|
|
|
|
| 1 |
---
|
| 2 |
+
base_model:
|
| 3 |
+
- Qwen/Qwen2.5-VL-3B-Instruct
|
| 4 |
language:
|
| 5 |
- en
|
| 6 |
- zh
|
| 7 |
+
library_name: transformers
|
| 8 |
+
license: apache-2.0
|
| 9 |
+
metrics:
|
| 10 |
+
- f1
|
| 11 |
+
pipeline_tag: image-text-to-text
|
| 12 |
tags:
|
| 13 |
- llama-factory
|
| 14 |
- easy-r1
|
| 15 |
- full
|
| 16 |
- generated_from_trainer
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
model-index:
|
| 18 |
+
- name: GuardReasoner-VL-3B
|
| 19 |
results: []
|
|
|
|
|
|
|
| 20 |
---
|
| 21 |
|
| 22 |
+
# GuardReasoner-VL-3B
|
| 23 |
|
| 24 |
+
This model is a fine-tuned version of [Qwen/Qwen2.5-VL-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct) via R-SFT and online RL.
|
| 25 |
+
This model is based on the paper [GuardReasoner-VL: Safeguarding VLMs via Reinforced Reasoning](https://huggingface.co/papers/2505.11049).
|
| 26 |
|
| 27 |
<!-- The training data of R-SFT can be found in [GuardReasonerTrain](https://huggingface.co/datasets/yueliu1999/GuardReasonerTrain). -->
|
| 28 |
|
|
|
|
| 37 |
from qwen_vl_utils import process_vision_info
|
| 38 |
|
| 39 |
parser = argparse.ArgumentParser(description="GuardReasoner-VL Inference")
|
| 40 |
+
parser.add_argument("--model_path", type=str, default="yueliu1999/GuardReasoner-VL-3B", help="model path")
|
| 41 |
parser.add_argument("--benchmark_path", type=str, default="./data/benchmark/", help="benchmark path")
|
| 42 |
args = parser.parse_args()
|
| 43 |
|
|
|
|
| 152 |
|
| 153 |
case3_res = generate(messages)
|
| 154 |
|
| 155 |
+
print("case1:
|
| 156 |
+
|
| 157 |
+
")
|
| 158 |
print("-"*30)
|
| 159 |
print(case1_res)
|
| 160 |
print("-"*30)
|
| 161 |
|
| 162 |
+
print("case2:
|
| 163 |
+
|
| 164 |
+
")
|
| 165 |
print("-"*30)
|
| 166 |
print(case2_res)
|
| 167 |
print("-"*30)
|
| 168 |
|
| 169 |
+
print("case3:
|
| 170 |
+
|
| 171 |
+
")
|
| 172 |
print("-"*30)
|
| 173 |
print(case3_res)
|
| 174 |
print("-"*30)
|