novateur commited on May 13

Commit

a83dcb1

verified ·

1 Parent(s): fc602db

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dev_scripts/build_docs.sh +8 -0
.dev_scripts/ci_container_test.sh +41 -0
.dev_scripts/dockerci.sh +88 -0
.github/ISSUE_TEMPLATE/bug_report.md +19 -0
.github/ISSUE_TEMPLATE/custom.md +8 -0
.github/ISSUE_TEMPLATE/feature_request.md +17 -0
.github/PULL_REQUEST_TEMPLATE.md +13 -0
.github/SECURITY.md +3 -0
.github/workflows/citest.yaml +75 -0
.github/workflows/lint.yaml +22 -0
.github/workflows/publish.yaml +29 -0
.gitignore +151 -0
.pre-commit-config.yaml +52 -0
.pre-commit-config_local.yaml +52 -0
CODE_OF_CONDUCT.md +132 -0
CONTRIBUTING.md +63 -0
CONTRIBUTING_CN.md +81 -0
LICENSE +201 -0
MANIFEST.in +5 -0
Makefile +25 -0
README.md +423 -0
README_CN.md +413 -0
asset/discord_qr.jpg +0 -0
asset/wechat.png +0 -0
dataset.json +60 -0
dataset_new.json +0 -0
docs/Makefile +20 -0
docs/README.md +37 -0
docs/make.bat +35 -0
docs/resources/grpo.png +0 -0
docs/source/.readthedocs.yaml +29 -0
docs/source/BestPractices/Embedding训练.md +97 -0
docs/source/BestPractices/GRPO代码训练.md +134 -0
docs/source/BestPractices/GRPO多模态训练.md +306 -0
docs/source/BestPractices/GRPO完整流程.md +179 -0
docs/source/BestPractices/NPU支持.md +225 -0
docs/source/BestPractices/快速训练VL模型.md +227 -0
docs/source/BestPractices/更多最佳实践.md +7 -0
docs/source/Customization/插件化.md +215 -0
docs/source/Customization/自定义数据集.md +241 -0
docs/source/Customization/自定义模型.md +34 -0
docs/source/GetStarted/SWIFT安装.md +87 -0
docs/source/GetStarted/Web-UI.md +36 -0
docs/source/GetStarted/快速开始.md +103 -0
docs/source/Instruction/Agent支持.md +207 -0
docs/source/Instruction/GRPO.md +461 -0
docs/source/Instruction/Megatron-SWIFT训练.md +293 -0
docs/source/Instruction/ReleaseNote3.0.md +80 -0
docs/source/Instruction/人类对齐.md +115 -0
docs/source/Instruction/使用tuners.md +133 -0

.dev_scripts/build_docs.sh ADDED Viewed

	@@ -0,0 +1,8 @@

+pip install -r requirements/docs.txt
+cd docs
+rm -rf build
+# update api rst
+#rm -rf source/api/
+#sphinx-apidoc --module-first -o source/api/ ../modelscope/
+make html

.dev_scripts/ci_container_test.sh ADDED Viewed

	@@ -0,0 +1,41 @@

+if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
+    # pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
+    pip install -r requirements/tests.txt -i https://mirrors.aliyun.com/pypi/simple/
+    git config --global --add safe.directory /ms-swift
+    git config --global user.email tmp
+    git config --global user.name tmp.com
+    # linter test
+    # use internal project for pre-commit due to the network problem
+    if [ `git remote -v | grep alibaba  | wc -l` -gt 1 ]; then
+        pre-commit run -c .pre-commit-config_local.yaml --all-files
+        if [ $? -ne 0 ]; then
+            echo "linter test failed, please run 'pre-commit run --all-files' to check"
+            echo "From the repository folder"
+            echo "Run 'pip install -r requirements/tests.txt' install test dependencies."
+            echo "Run 'pre-commit install' install pre-commit hooks."
+            echo "Finally run linter with command: 'pre-commit run --all-files' to check."
+            echo "Ensure there is no failure!!!!!!!!"
+            exit -1
+        fi
+    fi
+    pip install -r requirements/framework.txt -U -i https://mirrors.aliyun.com/pypi/simple/
+    pip install diffusers decord einops -U -i https://mirrors.aliyun.com/pypi/simple/
+    pip install autoawq -U --no-deps
+    # test with install
+    pip install .
+    pip install auto_gptq bitsandbytes deepspeed -U -i https://mirrors.aliyun.com/pypi/simple/
+else
+    echo "Running case in release image, run case directly!"
+fi
+# remove torch_extensions folder to avoid ci hang.
+rm -rf ~/.cache/torch_extensions
+if [ $# -eq 0 ]; then
+    ci_command="python tests/run.py --subprocess"
+else
+    ci_command="$@"
+fi
+echo "Running case with command: $ci_command"
+$ci_command

.dev_scripts/dockerci.sh ADDED Viewed

	@@ -0,0 +1,88 @@

+#!/bin/bash
+MODELSCOPE_CACHE_DIR_IN_CONTAINER=/modelscope_cache
+CODE_DIR=$PWD
+CODE_DIR_IN_CONTAINER=/ms-swift
+echo "$USER"
+gpus='0,1 2,3'
+cpu_sets='0-15 16-31'
+cpu_sets_arr=($cpu_sets)
+is_get_file_lock=false
+CI_COMMAND=${CI_COMMAND:-bash .dev_scripts/ci_container_test.sh python tests/run.py --parallel 2 --run_config tests/run_config.yaml}
+echo "ci command: $CI_COMMAND"
+PR_CHANGED_FILES="${PR_CHANGED_FILES:-}"
+echo "PR modified files: $PR_CHANGED_FILES"
+PR_CHANGED_FILES=${PR_CHANGED_FILES//[ ]/#}
+echo "PR_CHANGED_FILES: $PR_CHANGED_FILES"
+idx=0
+for gpu in $gpus
+do
+  exec {lock_fd}>"/tmp/gpu$gpu" || exit 1
+  flock -n "$lock_fd" || { echo "WARN: gpu $gpu is in use!" >&2; idx=$((idx+1)); continue; }
+  echo "get gpu lock $gpu"
+  CONTAINER_NAME="swift-ci-$idx"
+  let is_get_file_lock=true
+  # pull image if there are update
+  docker pull ${IMAGE_NAME}:${IMAGE_VERSION}
+  if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
+    echo 'debugging'
+    docker run --rm --name $CONTAINER_NAME --shm-size=16gb \
+              --cpuset-cpus=${cpu_sets_arr[$idx]} \
+              --gpus='"'"device=$gpu"'"' \
+              -v $CODE_DIR:$CODE_DIR_IN_CONTAINER \
+              -v $MODELSCOPE_CACHE:$MODELSCOPE_CACHE_DIR_IN_CONTAINER \
+              -v $MODELSCOPE_HOME_CACHE/$idx:/root \
+              -v /home/admin/pre-commit:/home/admin/pre-commit \
+              -e CI_TEST=True \
+              -e TEST_LEVEL=$TEST_LEVEL \
+              -e MODELSCOPE_CACHE=$MODELSCOPE_CACHE_DIR_IN_CONTAINER \
+              -e MODELSCOPE_DOMAIN=$MODELSCOPE_DOMAIN \
+              -e MODELSCOPE_SDK_DEBUG=True \
+              -e HUB_DATASET_ENDPOINT=$HUB_DATASET_ENDPOINT \
+              -e TEST_ACCESS_TOKEN_CITEST=$TEST_ACCESS_TOKEN_CITEST \
+              -e TEST_ACCESS_TOKEN_SDKDEV=$TEST_ACCESS_TOKEN_SDKDEV \
+              -e TEST_LEVEL=$TEST_LEVEL \
+              -e MODELSCOPE_ENVIRONMENT='ci' \
+              -e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \
+              -e MODEL_TAG_URL=$MODEL_TAG_URL \
+              -e MODELSCOPE_API_TOKEN=$MODELSCOPE_API_TOKEN \
+	            -e PR_CHANGED_FILES=$PR_CHANGED_FILES \
+              --workdir=$CODE_DIR_IN_CONTAINER \
+              ${IMAGE_NAME}:${IMAGE_VERSION} \
+              $CI_COMMAND
+  else
+    docker run --rm --name $CONTAINER_NAME --shm-size=16gb \
+              --cpuset-cpus=${cpu_sets_arr[$idx]} \
+              --gpus='"'"device=$gpu"'"' \
+              -v $CODE_DIR:$CODE_DIR_IN_CONTAINER \
+              -v $MODELSCOPE_CACHE:$MODELSCOPE_CACHE_DIR_IN_CONTAINER \
+              -v $MODELSCOPE_HOME_CACHE/$idx:/root \
+              -v /home/admin/pre-commit:/home/admin/pre-commit \
+              -e CI_TEST=True \
+              -e TEST_LEVEL=$TEST_LEVEL \
+              -e MODELSCOPE_CACHE=$MODELSCOPE_CACHE_DIR_IN_CONTAINER \
+              -e MODELSCOPE_DOMAIN=$MODELSCOPE_DOMAIN \
+              -e HUB_DATASET_ENDPOINT=$HUB_DATASET_ENDPOINT \
+              -e TEST_ACCESS_TOKEN_CITEST=$TEST_ACCESS_TOKEN_CITEST \
+              -e TEST_ACCESS_TOKEN_SDKDEV=$TEST_ACCESS_TOKEN_SDKDEV \
+              -e TEST_LEVEL=$TEST_LEVEL \
+              -e MODELSCOPE_ENVIRONMENT='ci' \
+              -e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \
+              -e MODEL_TAG_URL=$MODEL_TAG_URL \
+              -e MODELSCOPE_API_TOKEN=$MODELSCOPE_API_TOKEN \
+	            -e PR_CHANGED_FILES=$PR_CHANGED_FILES \
+              --workdir=$CODE_DIR_IN_CONTAINER \
+              ${IMAGE_NAME}:${IMAGE_VERSION} \
+              $CI_COMMAND
+  fi
+  if [ $? -ne 0 ]; then
+    echo "Running test case failed, please check the log!"
+    exit -1
+  fi
+  break
+done
+if [ "$is_get_file_lock" = false ] ; then
+    echo 'No free GPU!'
+    exit 1
+fi

.github/ISSUE_TEMPLATE/bug_report.md ADDED Viewed

	@@ -0,0 +1,19 @@

+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: ''
+assignees: ''
+---
+**Describe the bug**
+What the bug is, and how to reproduce, better with screenshots(描述bug以及复现过程，最好有截图)
+**Your hardware and system info**
+Write your system info like CUDA version/system/GPU/torch version here(在这里给出硬件信息和系统信息，如CUDA版本，系统，GPU型号和torch版本等)
+**Additional context**
+Add any other context about the problem here(在这里补充其他信息)

.github/ISSUE_TEMPLATE/custom.md ADDED Viewed

	@@ -0,0 +1,8 @@

+---
+name: Custom issue template
+about: Describe this issue template's purpose here.
+title: ''
+labels: ''
+assignees: ''
+---

.github/ISSUE_TEMPLATE/feature_request.md ADDED Viewed

	@@ -0,0 +1,17 @@

+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+---
+**Describe the feature**
+Please describe the feature requested here(请在这里描述需求)
+**Paste any useful information**
+Paste any useful information, including papers, github links, etc.(请在这里描述其他有用的信息，比如相关的论文地址，github链接等)
+**Additional context**
+Add any other context or information here(其他信息可以写在这里)

.github/PULL_REQUEST_TEMPLATE.md ADDED Viewed

	@@ -0,0 +1,13 @@

+# PR type
+- [ ] Bug Fix
+- [ ] New Feature
+- [ ] Document Updates
+- [ ] More Models or Datasets Support
+# PR information
+Write the detail information belongs to this PR.
+## Experiment results
+Paste your experiment result here(if needed).

.github/SECURITY.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # Reporting Security Issues
2	+
3	+ Usually security issues of a deep learning project come from non-standard 3rd packages or continuous running services. If you are suffering from security issues from our project, please consider reporting to us. We appreciate your efforts to responsibly disclose your findings, and will make every effort to acknowledge your contributions.

.github/workflows/citest.yaml ADDED Viewed

	@@ -0,0 +1,75 @@

+name: citest
+on:
+  push:
+    branches:
+      - master
+      - "release/**"
+    paths-ignore:
+      - "setup.*"
+      - "requirements.txt"
+      - "requirements/**"
+      - "docs/**"
+      - "tools/**"
+      - ".dev_scripts/**"
+      - "README.md"
+      - "README_*.md"
+      - "NOTICE"
+      - ".github/workflows/lint.yaml"
+      - ".github/workflows/publish.yaml"
+  pull_request:
+    paths-ignore:
+      - "setup.*"
+      - "requirements.txt"
+      - "requirements/**"
+      - "docs/**"
+      - "tools/**"
+      - ".dev_scripts/**"
+      - "README.md"
+      - "README_*.md"
+      - "NOTICE"
+      - ".github/workflows/lint.yaml"
+      - ".github/workflows/publish.yaml"
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+jobs:
+  unittest:
+    # The type of runner that the job will run on
+    runs-on: [self-hosted]
+    timeout-minutes: 240
+    steps:
+      - name: ResetFileMode
+        shell: bash
+        run: |
+          # reset filemode to allow action runner to delete files
+          # generated by root in docker
+          set -e
+          source ~/.bashrc
+          sudo chown -R $USER:$USER  $ACTION_RUNNER_DIR
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+            lfs: 'true'
+            submodules: 'true'
+            fetch-depth: ${{ github.event_name == 'pull_request' && 2 || 0 }}
+      - name: Get changed files
+        id: changed-files
+        run: |
+            if ${{ github.event_name == 'pull_request' }}; then
+                echo "PR_CHANGED_FILES=$(git diff --name-only -r HEAD^1 HEAD | xargs)" >> $GITHUB_ENV
+            else
+                echo "PR_CHANGED_FILES=$(git diff --name-only ${{ github.event.before }} ${{ github.event.after }} | xargs)" >> $GITHUB_ENV
+            fi
+      - name: Checkout LFS objects
+        run: git lfs checkout
+      - name: Run unittest
+        shell: bash
+        run: |
+          set -e
+          source /mnt/modelscope/ci_env.sh
+          bash .dev_scripts/dockerci.sh

.github/workflows/lint.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+name: Lint test
+on: [push, pull_request]
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.10'
+      - name: Install pre-commit hook
+        run: |
+          pip install pre-commit
+      - name: Linting
+        run: pre-commit run --all-files

.github/workflows/publish.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+name: release
+on:
+  push:
+    tags:
+      - 'v**'
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}-publish
+  cancel-in-progress: true
+jobs:
+  build-n-publish:
+    runs-on: ubuntu-22.04
+    #if: startsWith(github.event.ref, 'refs/tags')
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.10'
+      - name: Install wheel
+        run: pip install wheel packaging setuptools==69.5.1
+      - name: Build ModelScope Swift
+        run: python setup.py sdist bdist_wheel
+      - name: Publish package to PyPI
+        run: |
+          pip install twine
+          twine upload dist/* --skip-existing -u __token__ -p ${{ secrets.PYPI_API_TOKEN }}

.gitignore ADDED Viewed

	@@ -0,0 +1,151 @@

+# Byte-compiled / optimized / DLL files
+tmp
+*.ttf
+__pycache__/
+*.py[cod]
+*$py.class
+test.py
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+/package
+/temp
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.vscode
+.idea
+.run
+# custom
+*.pkl
+*.pkl.json
+*.log.json
+*.whl
+*.tar.gz
+*.swp
+*.log
+*.tar.gz
+source.sh
+tensorboard.sh
+.DS_Store
+replace.sh
+result.png
+result.jpg
+result.mp4
+output/
+outputs/
+wandb/
+*.out
+benchmarks/
+eval_output/
+eval_outputs/
+transformers/
+vlmeval/
+my_model/
+/data
+result/
+images
+/custom/
+megatron_output/
+# Pytorch
+*.pth
+*.pt
+# ast template
+ast_index_file.py

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,52 @@

+repos:
+  - repo: https://github.com/pycqa/flake8.git
+    rev: 4.0.0
+    hooks:
+      - id: flake8
+        exclude: |
+            (?x)^(
+                thirdparty/|
+                examples/|
+                tests/run.py
+            )$
+  - repo: https://github.com/PyCQA/isort.git
+    rev: 4.3.21
+    hooks:
+      - id: isort
+        exclude: |
+            (?x)^(
+                examples/|
+                tests/run.py|
+                swift/cli/sft.py
+            )$
+  - repo: https://github.com/pre-commit/mirrors-yapf.git
+    rev: v0.30.0
+    hooks:
+      - id: yapf
+        exclude: |
+            (?x)^(
+                thirdparty/|
+                examples/|
+                tests/run.py
+            )$
+  - repo: https://github.com/pre-commit/pre-commit-hooks.git
+    rev: v3.1.0
+    hooks:
+      - id: trailing-whitespace
+        exclude: thirdparty/|tests/run.py
+      - id: check-yaml
+        exclude: thirdparty/|tests/run.py
+      - id: end-of-file-fixer
+        exclude: thirdparty/|tests/run.py
+      - id: requirements-txt-fixer
+        exclude: thirdparty/|tests/run.py
+      - id: double-quote-string-fixer
+        exclude: thirdparty/|tests/run.py
+      - id: check-merge-conflict
+        exclude: thirdparty/|tests/run.py
+      - id: fix-encoding-pragma
+        exclude: thirdparty/|tests/run.py
+        args: ["--remove"]
+      - id: mixed-line-ending
+        exclude: thirdparty/|tests/run.py
+        args: ["--fix=lf"]

.pre-commit-config_local.yaml ADDED Viewed

	@@ -0,0 +1,52 @@

+repos:
+  - repo: /home/admin/pre-commit/flake8
+    rev: 4.0.0
+    hooks:
+      - id: flake8
+        exclude: |
+            (?x)^(
+                thirdparty/|
+                examples/|
+                tests/run.py
+            )$
+  - repo: /home/admin/pre-commit/isort
+    rev: 4.3.21
+    hooks:
+      - id: isort
+        exclude: |
+            (?x)^(
+                examples/|
+                tests/run.py|
+                swift/cli/sft.py
+            )$
+  - repo: /home/admin/pre-commit/mirrors-yapf
+    rev: v0.30.0
+    hooks:
+      - id: yapf
+        exclude: |
+            (?x)^(
+                thirdparty/|
+                examples/|
+                tests/run.py
+            )$
+  - repo: /home/admin/pre-commit/pre-commit-hooks
+    rev: v3.1.0
+    hooks:
+      - id: trailing-whitespace
+        exclude: thirdparty/|tests/run.py
+      - id: check-yaml
+        exclude: thirdparty/|tests/run.py
+      - id: end-of-file-fixer
+        exclude: thirdparty/
+      - id: requirements-txt-fixer
+        exclude: thirdparty/|tests/run.py
+      - id: double-quote-string-fixer
+        exclude: thirdparty/|tests/run.py
+      - id: check-merge-conflict
+        exclude: thirdparty/|tests/run.py
+      - id: fix-encoding-pragma
+        exclude: thirdparty/|tests/run.py
+        args: ["--remove"]
+      - id: mixed-line-ending
+        exclude: thirdparty/|tests/run.py
+        args: ["--fix=lf"]

CODE_OF_CONDUCT.md ADDED Viewed

	@@ -0,0 +1,132 @@

+# Contributor Covenant Code of Conduct
+## Our Pledge
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, caste, color, religion, or sexual
+identity and orientation.
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+## Our Standards
+Examples of behavior that contributes to a positive environment for our
+community include:
+* Demonstrating empathy and kindness toward other people
+* Being respectful of differing opinions, viewpoints, and experiences
+* Giving and gracefully accepting constructive feedback
+* Accepting responsibility and apologizing to those affected by our mistakes,
+  and learning from the experience
+* Focusing on what is best not just for us as individuals, but for the overall
+  community
+Examples of unacceptable behavior include:
+* The use of sexualized language or imagery, and sexual attention or advances of
+  any kind
+* Trolling, insulting or derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or email address,
+  without their explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+## Enforcement Responsibilities
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+## Scope
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official e-mail address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+## Enforcement
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+[email protected].
+All complaints will be reviewed and investigated promptly and fairly.
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+## Enforcement Guidelines
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+### 1. Correction
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+**Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+### 2. Warning
+**Community Impact**: A violation through a single incident or series of
+actions.
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or permanent
+ban.
+### 3. Temporary Ban
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+### 4. Permanent Ban
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior, harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+**Consequence**: A permanent ban from any sort of public interaction within the
+community.
+## Attribution
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.1, available at
+[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
+Community Impact Guidelines were inspired by
+[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
+For answers to common questions about this code of conduct, see the FAQ at
+[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
+[https://www.contributor-covenant.org/translations][translations].
+[homepage]: https://www.contributor-covenant.org
+[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
+[Mozilla CoC]: https://github.com/mozilla/diversity
+[FAQ]: https://www.contributor-covenant.org/faq
+[translations]: https://www.contributor-covenant.org/translations

CONTRIBUTING.md ADDED Viewed

	@@ -0,0 +1,63 @@

+# Contributor Guide
+_Welcome to offer PRs, bug reports, documentation supplements or other types of contributions to SWIFT!_
+## Table of Contents
+- [Code of Conduct](#-code-of-conduct)
+- [Contribution Process](#-contribution-process)
+- [Hardware support](#-Hardware-support)
+## 📖 Code of Conduct
+Please refer to our [Code of Conduct documentation](./CODE_OF_CONDUCT.md).
+## 🔁 Contribution Process
+### What We Need
+- New Technologies and New Models: SWIFT needs to support more open-source models and datasets, or new technologies that we have not paid attention to. If you are interested please submit a PR to us.
+- Technical Propagation: If you are interested in technical propagation, you are welcome to help us write tutorials, documents or videos on any website, and send us the link.
+- Community Contribution: You can write technical articles related to SWIFT, and submit them to us. After review and approval, we will publish them on the official ModelScope accounts (Zhihu, WeChat, etc.), with your name assigned.
+### Incentives
+- we will issue electronic certificates to contributors on behalf of the ModelScope community, to encourage your selfless contributions.
+- We will offer small souvenirs related to the ModelScope Community.
+- We will provide free A10 computing power during the development period. For more details, please refer to [Hardware-support](#-Hardware-support) section.
+### Submitting PR (Pull Requests)
+Any feature development is carried out in the form of Fork and then PR on GitHub.
+1. Fork: Go to the [SWIFT](https://github.com/modelscope/swift) page and click the **Fork button**. After completion, a SWIFT code repository will be cloned under your personal organization.
+2. Clone: Clone the code repository generated in the first step to your local machine and **create a new branch** for development. During development, please click the **Sync Fork button** in time to synchronize with the `main` branch to prevent code expiration and conflicts.
+3. Submit PR: After development and testing, push the code to the remote branch. On GitHub, go to the **Pull Requests page**, create a new PR, select your code branch as the source branch, and the `modelscope/swift:main` branch as the target branch.
+4. Write Description: It is necessary to provide a good feature description in the PR, so that the reviewers know the content of your modification.
+5. Review: We hope that the code to be merged is concise and efficient, so we may raise some questions and discuss them. Please note that any issues raised in the review are aimed at the code itself, not at you personally. Once all issues are discussed and resolved, your code will be approved.
+### Code Standards and Development Approach
+SWIFT has conventional variable naming conventions and development approaches. Please follow these approaches as much as possible during development.
+1. Variable names are separated by underscores, and class names are named with the first letter of each word capitalized.
+2. All Python indentation uses four spaces instead of a tab.
+3. Choose well-known open-source libraries, avoid using closed-source libraries or unstable open-source libraries, and avoid repeating the existing code.
+After the PR is submitted, SWIFT will perform two types of tests:
+- Code Lint Test: A static code compliance check test. please make sure that you have performed code lint locally in advance.
+```shell
+pip install pre-commit # In the swift folder
+pre-commit run --all-files # Fix the errors reported by pre-commit until all checks are successful
+```
+- CI Tests: Smoke tests and unit tests, please refer to the next section.
+### Running CI Tests
+Before submitting the PR, please ensure that your development code is protected by test cases, such as smoke tests for new features, or unit tests for various edge cases. Reviewers will also pay attention to this during code review. At the same time, there will be dedicated services running CI Tests, running all test cases, and the code can only be merged after the test cases pass.
+Additionally, since some important tests have been skipped due to long running time, to ensure that your logic is correct, you can run the test locally:
+```shell
+python tests/llm/test_run.py
+```
+Please make sure this test can pass normally.
+## ✅ Hardware support
+SWIFT will provide hardware support for developers, including free GPUs. If needed, please email us ([[email protected]](mailto:[email protected])) or join our WeChat group:
+<p align="left">
+<img src="asset/wechat.png" width="250" style="display: inline-block;">
+</p>

CONTRIBUTING_CN.md ADDED Viewed

	@@ -0,0 +1,81 @@

+# 贡献者指引
+*欢迎帮SWIFT提供Feature PR、Bug反馈、文档补充或其他类型的贡献！*
+## 目录
+- [代码规约](#-代码规约)
+- [贡献流程](#-贡献流程)
+- [资源支持](#-资源支持)
+## 📖 代码规约
+请查看我们的[代码规约文档](./CODE_OF_CONDUCT.md).
+## 🔁 贡献流程
+### 我们需要什么
+- 新技术和新模型：SWIFT需要支持更多的开源模型和数据集，或我们没有关注到的新技术，如果您对此有兴趣，可以提交PR给我们。
+- 技术布道：如果您对技术布道有兴趣，欢迎在任何网站上帮我们撰写教程文档或视频等，并将链接发给我们。
+- 社区供稿：您可以撰写和SWIFT有关的技术文章，并供稿给我们，我们审核通过后会在魔搭官方账号（知乎、公众号等）上进行发布，并属上您的名字。
+### 激励
+- 我们会以魔搭社区的身份给贡献者颁发电子证书，以鼓励您的无私贡献。
+- 我们会赠送相关魔搭社区相关周边小礼品。
+- 我们会赠送开发期间的免费A10算力，具体可以查看[资源支持](#-资源支持)章节。
+### 提交PR（Pull Requests）
+任何feature开发都在github上以先Fork后PR的形式进行。
+1. Fork：进入[SWIFT](https://github.com/modelscope/swift)页面后，点击**Fork按钮**执行。完成后会在您的个人组织下克隆出一个SWIFT代码库
+2. Clone：将第一步产生的代码库clone到本地并**拉新分支**进行开发，开发中请及时点击**Sync Fork按钮**同步`main`分支，防止代码过期并冲突
+3. 提交PR：开发、测试完成后将代码推送到远程分支。在github上点击**Pull Requests页面**，新建一个PR，源分支选择您提交的代码分支，目标分支选择`modelscope/swift:main`分支
+4. 撰写描述：在PR中填写良好的feature描述是必要的，让Reviewers知道您的修改内容
+5. Review：我们希望合入的代码简洁高效，因此可能会提出一些问题并讨论。请注意，任何review中提出的问题是针对代码本身，而非您个人。在所有问题讨论通过后，您的代码会被通过
+### 代码规范和开发方式
+SWIFT有约定俗成的变量命名方式和开发方式。在开发中请尽量遵循这些方式。
+1. 变量命名以下划线分割，类名以所有单词首字母大写方式命名
+2. 所有的python缩进都是四个空格取代一个tab
+3. 选用知名的开源库，避免使用闭源库或不稳定的开源库，避免重复造轮子
+SWIFT在PR提交后会进行两类测试：
+- Code Lint测试 对代码进行静态规范走查的测试，为保证改测试通过，请保证本地预先进行了Code lint。方法是：
+  ```shell
+  pip install pre-commit
+  # 在swift文件夹内
+  pre-commit run --all-files
+  # 对pre-commit报的错误进行修改，直到所有的检查都是成功状态
+  ```
+- CI Tests 冒烟测试和单元测试，请查看下一章节
+### Running CI Tests
+在提交PR前，请保证您的开发代码已经受到了测试用例的保护。例如，对新功能的冒烟测试，或者各种边缘case的单元测试等。在代码review时Reviewers也会关注这一点。同时，也会有服务专门运行CI Tests，运行所有的测试用例，测试用例通过后代码才可以合并。
+另外，由于运行时间过长，我们跳过了部分重要测试，为保证您的逻辑是正确的，可以在本地执行该测试：
+```shell
+python tests/llm/test_run.py
+```
+请保证该测试可以正常通过。
+## ✅ 资源支持
+SWIFT会为开发者提供资源支持，包括免费的GPU算力。如果需要请邮件联系我们（[[email protected]](mailto:[email protected])）或加入我们的微信群：
+<p align="left">
+<img src="asset/wechat.png" width="250" style="display: inline-block;">
+</p>

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

MANIFEST.in ADDED Viewed

	@@ -0,0 +1,5 @@

+recursive-include swift/utils *.py
+recursive-include swift/llm/dataset/data *.*
+recursive-include swift/llm/ds_config *.json
+recursive-include requirements *.txt
+recursive-include swift/plugin/loss_scale/config *.json

Makefile ADDED Viewed

	@@ -0,0 +1,25 @@

+WHL_BUILD_DIR :=package
+DOC_BUILD_DIR :=docs/build/
+# default rule
+default: whl docs
+.PHONY: docs
+docs:
+	bash .dev_scripts/build_docs.sh
+.PHONY: linter
+linter:
+	bash .dev_scripts/linter.sh
+.PHONY: test
+test:
+	bash .dev_scripts/citest.sh
+.PHONY: whl
+whl:
+	python setup.py sdist bdist_wheel
+.PHONY: clean
+clean:
+	rm -rf  $(WHL_BUILD_DIR) $(DOC_BUILD_DIR)

README.md ADDED Viewed

	@@ -0,0 +1,423 @@

+# SWIFT (Scalable lightWeight Infrastructure for Fine-Tuning)
+<p align="center">
+    <br>
+    <img src="asset/banner.png"/>
+    <br>
+<p>
+<p align="center">
+<a href="https://modelscope.cn/home">ModelScope Community Website</a>
+<br>
+        <a href="README_CN.md">中文</a> &nbsp ｜ &nbsp English &nbsp
+</p>
+<p align="center">
+<img src="https://img.shields.io/badge/python-3.10-5be.svg">
+<img src="https://img.shields.io/badge/pytorch-%E2%89%A52.0-orange.svg">
+<a href="https://github.com/modelscope/modelscope/"><img src="https://img.shields.io/badge/modelscope-%E2%89%A51.19-5D91D4.svg"></a>
+<a href="https://pypi.org/project/ms-swift/"><img src="https://badge.fury.io/py/ms-swift.svg"></a>
+<a href="https://github.com/modelscope/swift/blob/main/LICENSE"><img src="https://img.shields.io/github/license/modelscope/swift"></a>
+<a href="https://pepy.tech/project/ms-swift"><img src="https://pepy.tech/badge/ms-swift"></a>
+<a href="https://github.com/modelscope/swift/pulls"><img src="https://img.shields.io/badge/PR-welcome-55EB99.svg"></a>
+</p>
+<p align="center">
+<a href="https://trendshift.io/repositories/6427" target="_blank"><img src="https://trendshift.io/api/badge/repositories/6427" alt="modelscope%2Fswift | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+</p>
+<p align="center">
+        <a href="https://arxiv.org/abs/2408.05517">Paper</a> &nbsp ｜ <a href="https://swift.readthedocs.io/en/latest/">English Documentation</a> &nbsp ｜ &nbsp <a href="https://swift.readthedocs.io/zh-cn/latest/">中文文档</a> &nbsp
+</p>
+## 📖 Table of Contents
+- [Groups](#-Groups)
+- [Introduction](#-introduction)
+- [News](#-news)
+- [Installation](#%EF%B8%8F-installation)
+- [Quick Start](#-quick-Start)
+- [Usage](#-Usage)
+- [License](#-License)
+- [Citation](#-citation)
+## ☎ Groups
+You can contact us and communicate with us by adding our group:
+[Discord Group](https://discord.com/invite/D27yfEFVz5)              |  WeChat Group
+:-------------------------:|:-------------------------:
+<img src="asset/discord_qr.jpg" width="200" height="200">  |  <img src="asset/wechat.png" width="200" height="200">
+## 📝 Introduction
+🍲 ms-swift is an official framework provided by the ModelScope community for fine-tuning and deploying large language models and multi-modal large models. It currently supports the training (pre-training, fine-tuning, human alignment), inference, evaluation, quantization, and deployment of 500+ large models and 200+ multi-modal large models. These large language models (LLMs) include models such as Qwen3, Qwen3-MoE, Qwen2.5, InternLM3, GLM4, Mistral, DeepSeek-R1, Yi1.5, TeleChat2, Baichuan2, and Gemma2. The multi-modal LLMs include models such as Qwen2.5-VL, Qwen2-Audio, Llama3.4, Llava, InternVL2.5, MiniCPM-V-2.6, GLM4v, Xcomposer2.5, Yi-VL, DeepSeek-VL2, Phi3.5-Vision, and GOT-OCR2.
+🍔 Additionally, ms-swift incorporates the latest training technologies, including lightweight techniques such as LoRA, QLoRA, Llama-Pro, LongLoRA, GaLore, Q-GaLore, LoRA+, LISA, DoRA, FourierFt, ReFT, UnSloth, and Liger, as well as human alignment training methods like DPO, GRPO, RM, PPO, KTO, CPO, SimPO, and ORPO. ms-swift supports acceleration of inference, evaluation, and deployment modules using vLLM and LMDeploy, and it supports model quantization with technologies like GPTQ, AWQ, and BNB. Furthermore, ms-swift offers a Gradio-based Web UI and a wealth of best practices.
+**Why choose ms-swift?**
+- 🍎 **Model Types**: Supports 500+ pure text large models, **200+ multi-modal large models**, as well as All-to-All multi-modal models, sequence classification models, and embedding models, **covering the entire process from training to deployment**.
+- **Dataset Types**: Comes with 150+ pre-training, fine-tuning, human alignment, multi-modal datasets, and supports custom datasets.
+- **Hardware Support**: Compatible with CPU, RTX series, T4/V100, A10/A100/H100, Ascend NPU, MPS, etc.
+- 🍊 **Lightweight Training**: Supports lightweight fine-tuning methods like LoRA, QLoRA, DoRA, LoRA+, ReFT, RS-LoRA, LLaMAPro, Adapter, GaLore, Q-Galore, LISA, UnSloth, Liger-Kernel.
+- **Distributed Training**: Supports distributed data parallel (DDP), device_map simple model parallelism, DeepSpeed ZeRO2/ZeRO3, FSDP, and other distributed training techniques.
+- **Quantization Training**: Supports training quantized models like BNB, AWQ, GPTQ, AQLM, HQQ, EETQ.
+- **RLHF Training**: Supports human alignment training methods such as DPO, GRPO, RM, PPO, KTO, CPO, SimPO, ORPO for both pure text and multi-modal large models.
+- 🍓 **Multi-Modal Training**: Supports training on different modalities like images, videos, and audio, for tasks like VQA, captioning, OCR, and grounding.
+- **Interface Training**: Provides capabilities for training, inference, evaluation, quantization through an interface, completing the whole large model pipeline.
+- **Plugin and Extension**: Supports custom model and dataset extensions, as well as customization of components like loss, metric, trainer, loss-scale, callback, optimizer.
+- 🍉 **Toolbox Capabilities**: Offers not only training support for large models and multi-modal large models but also covers the entire process of inference, evaluation, quantization, and deployment.
+- **Inference Acceleration**: Supports inference acceleration engines like PyTorch, vLLM, LmDeploy, and provides OpenAI API for accelerating inference, deployment, and evaluation modules.
+- **Model Evaluation**: Uses EvalScope as the evaluation backend and supports evaluation on 100+ datasets for both pure text and multi-modal models.
+- **Model Quantization**: Supports AWQ, GPTQ, and BNB quantized exports, with models that can use vLLM/LmDeploy for inference acceleration and continue training.
+## 🎉 News
+- 🎁 2025.05.11: GRPO now supports custom processing logic for reward models. See the GenRM example [here](./docs/source_en/Instruction/GRPO.md#customized-reward-models) .
+- 🎁 2025.04.15: The ms-swift paper has been accepted by AAAI 2025. You can find the paper at [this link](https://ojs.aaai.org/index.php/AAAI/article/view/35383).
+- 🎁 2025.03.23: Multi-round GRPO is now supported for training multi-turn dialogue scenarios (e.g., agent tool calling). Please refer to the [training script](https://idealab.alibaba-inc.com/examples/train/grpo/internal/train_multi_round.sh).
+- 🎁 2025.03.16: Support for Megatron's parallel training techniques is now available. Please see the [Megatron-SWIFT training documentation](https://swift.readthedocs.io/zh-cn/latest/Instruction/Megatron-SWIFT训练.html).
+- 🎁 2025.03.15: Fine-tuning of embedding models for both pure text and multimodal models is supported. Please check the [training script](https://idealab.alibaba-inc.com/examples/train/embedding).
+- 🎁 2025.03.05: The hybrid mode for GRPO is supported, with a script for training a 72B model on 4 GPUs (4*80G) available [here](https://idealab.alibaba-inc.com/examples/train/grpo/internal/train_72b_4gpu.sh). Tensor parallelism with vllm is also supported, with the training script available [here](https://idealab.alibaba-inc.com/examples/train/grpo/internal/multi_gpu_mp_colocate.sh).
+- 🎁 2025.02.21: The GRPO algorithm now supports LMDeploy, with the training script available [here](https://idealab.alibaba-inc.com/examples/train/grpo/internal/full_lmdeploy.sh). Additionally, the performance of the GRPO algorithm has been tested, achieving a training speed increase of up to 300% using various tricks. Please check the WanDB table [here](https://wandb.ai/tastelikefeet/grpo_perf_test?nw=nwuseryuzezyz).
+- 🎁 2025.02.21: The `swift sample` command is now supported. The reinforcement fine-tuning script can be found [here](https://idealab.alibaba-inc.com/docs/source/Instruction/强化微调.md), and the large model API distillation sampling script is available [here](https://idealab.alibaba-inc.com/examples/sampler/distill/distill.sh).
+- 🔥 2025.02.12: Support for the GRPO (Group Relative Policy Optimization) training algorithm has been added. Documentation is available [here](https://idealab.alibaba-inc.com/docs/source/Instruction/GRPO.md).
+- 🎁 2024.12.04: Major update to **ms-swift 3.0**. Please refer to the [release notes and changes](https://swift.readthedocs.io/zh-cn/latest/Instruction/ReleaseNote3.0.html).
+<details><summary>More</summary>
+- 🎉 2024.08.12: The ms-swift paper has been published on arXiv and can be read [here](https://arxiv.org/abs/2408.05517).
+- 🔥 2024.08.05: Support for using [evalscope](https://github.com/modelscope/evalscope/) as a backend for evaluating large models and multimodal models.
+- 🔥 2024.07.29: Support for using [vllm](https://github.com/vllm-project/vllm) and [lmdeploy](https://github.com/InternLM/lmdeploy) to accelerate inference for large models and multimodal models. When performing infer/deploy/eval, you can specify `--infer_backend vllm/lmdeploy`.
+- 🔥 2024.07.24: Support for human preference alignment training for multimodal large models, including DPO/ORPO/SimPO/CPO/KTO/RM/PPO.
+- 🔥 2024.02.01: Support for Agent training! The training algorithm is derived from [this paper](https://arxiv.org/pdf/2309.00986.pdf).
+</details>
+## 🛠️ Installation
+To install using pip:
+```shell
+pip install ms-swift -U
+```
+To install from source:
+```shell
+# pip install git+https://github.com/modelscope/ms-swift.git
+git clone https://github.com/modelscope/ms-swift.git
+cd ms-swift
+pip install -e .
+```
+Running Environment:
+|              | Range        | Recommended | Notes                                     |
+| ------------ |--------------| ----------- | ----------------------------------------- |
+| python       | >=3.9        | 3.10        |                                           |
+| cuda         |              | cuda12      | No need to install if using CPU, NPU, MPS |
+| torch        | >=2.0        |             |                                           |
+| transformers | >=4.33       | 4.51      |                                           |
+| modelscope   | >=1.23       |             |                                           |
+| peft | >=0.11,<0.16 | ||
+| trl | >=0.13,<0.18 | 0.17 |RLHF|
+| deepspeed    | >=0.14       | 0.14.5 | Training                                  |
+| vllm         | >=0.5.1      | 0.7.3/0.8       | Inference/Deployment/Evaluation           |
+| lmdeploy     | >=0.5        | 0.8       | Inference/Deployment/Evaluation           |
+| evalscope | >=0.11       |  | Evaluation |
+For more optional dependencies, you can refer to [here](https://github.com/modelscope/ms-swift/blob/main/requirements/install_all.sh).
+## 🚀 Quick Start
+10 minutes of self-cognition fine-tuning of Qwen2.5-7B-Instruct on a single 3090 GPU:
+### Command Line Interface
+```shell
+# 22GB
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type lora \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot
+```
+Tips:
+- If you want to train with a custom dataset, you can refer to [this guide](https://swift.readthedocs.io/en/latest/Customization/Custom-dataset.html) to organize your dataset format and specify `--dataset <dataset_path>`.
+- The `--model_author` and `--model_name` parameters are only effective when the dataset includes `swift/self-cognition`.
+- To train with a different model, simply modify `--model <model_id/model_path>`.
+- By default, ModelScope is used for downloading models and datasets. If you want to use HuggingFace, simply specify `--use_hf true`.
+After training is complete, use the following command to infer with the trained weights:
+- Here, `--adapters` should be replaced with the last checkpoint folder generated during training. Since the adapters folder contains the training parameter file `args.json`, there is no need to specify `--model`, `--system` separately; Swift will automatically read these parameters. To disable this behavior, you can set `--load_args false`.
+```shell
+# Using an interactive command line for inference.
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --stream true \
+    --temperature 0 \
+    --max_new_tokens 2048
+# merge-lora and use vLLM for inference acceleration
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --stream true \
+    --merge_lora true \
+    --infer_backend vllm \
+    --max_model_len 8192 \
+    --temperature 0 \
+    --max_new_tokens 2048
+```
+Finally, use the following command to push the model to ModelScope:
+```shell
+CUDA_VISIBLE_DEVICES=0 \
+swift export \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --push_to_hub true \
+    --hub_model_id '<your-model-id>' \
+    --hub_token '<your-sdk-token>' \
+    --use_hf false
+```
+### Web-UI
+The Web-UI is a **zero-threshold** training and deployment interface solution based on Gradio interface technology. For more details, you can check [here](https://swift.readthedocs.io/en/latest/GetStarted/Web-UI.html).
+```shell
+SWIFT_UI_LANG=en swift web-ui
+```
+![image.png](./docs/resources/web-ui-en.jpg)
+### Using Python
+ms-swift also supports training and inference using Python. Below is pseudocode for training and inference. For more details, you can refer to [here](https://github.com/modelscope/ms-swift/blob/main/examples/notebook/qwen2_5-self-cognition/self-cognition-sft.ipynb).
+Training:
+```python
+# Retrieve the model and template, and add a trainable LoRA module
+model, tokenizer = get_model_tokenizer(model_id_or_path, ...)
+template = get_template(model.model_meta.template, tokenizer, ...)
+model = Swift.prepare_model(model, lora_config)
+# Download and load the dataset, and encode the text into tokens
+train_dataset, val_dataset = load_dataset(dataset_id_or_path, ...)
+train_dataset = EncodePreprocessor(template=template)(train_dataset, num_proc=num_proc)
+val_dataset = EncodePreprocessor(template=template)(val_dataset, num_proc=num_proc)
+# Train the model
+trainer = Seq2SeqTrainer(
+    model=model,
+    args=training_args,
+    data_collator=template.data_collator,
+    train_dataset=train_dataset,
+    eval_dataset=val_dataset,
+    template=template,
+)
+trainer.train()
+```
+Inference:
+```python
+# Perform inference using the native PyTorch engine
+engine = PtEngine(model_id_or_path, adapters=[lora_checkpoint])
+infer_request = InferRequest(messages=[{'role': 'user', 'content': 'who are you?'}])
+request_config = RequestConfig(max_tokens=max_new_tokens, temperature=temperature)
+resp_list = engine.infer([infer_request], request_config)
+print(f'response: {resp_list[0].choices[0].message.content}')
+```
+## ✨ Usage
+Here is a minimal example of training to deployment using ms-swift. For more details, you can check the [examples](https://github.com/modelscope/ms-swift/tree/main/examples).
+- If you want to use other models or datasets (including multimodal models and datasets), you only need to modify `--model` to specify the corresponding model's ID or path, and modify `--dataset` to specify the corresponding dataset's ID or path.
+- By default, ModelScope is used for downloading models and datasets. If you want to use HuggingFace, simply specify `--use_hf true`.
+|   Useful Links |
+| ------ |
+|   [🔥Command Line Parameters](https://swift.readthedocs.io/en/latest/Instruction/Command-line-parameters.html)   |
+|   [Supported Models and Datasets](https://swift.readthedocs.io/en/latest/Instruction/Supported-models-and-datasets.html)   |
+|   [Custom Models](https://swift.readthedocs.io/en/latest/Customization/Custom-model.html), [🔥Custom Datasets](https://swift.readthedocs.io/en/latest/Customization/Custom-dataset.html)   |
+|   [LLM Tutorial](https://github.com/modelscope/modelscope-classroom/tree/main/LLM-tutorial)   |
+### Training
+Supported Training Methods:
+| Method                             | Full-Parameter                                               | LoRA                                                                                        | QLoRA                                                        | Deepspeed                                                    | Multi-Node                                                   | Multi-Modal                                                                                  |
+|------------------------------------|--------------------------------------------------------------|---------------------------------------------------------------------------------------------|--------------------------------------------------------------|--------------------------------------------------------------|--------------------------------------------------------------|----------------------------------------------------------------------------------------------|
+| Pre-training                       | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/pretrain/train.sh) | ✅                                                                                           | ✅                                                            | ✅                                                            | ✅                                                            | ✅                                                                                            |
+| Instruction Supervised Fine-tuning | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/full/train.sh) | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/lora_sft.sh)            | [✅](https://github.com/modelscope/ms-swift/tree/main/examples/train/qlora) | [✅](https://github.com/modelscope/ms-swift/tree/main/examples/train/multi-gpu/deepspeed) | [✅](https://github.com/modelscope/ms-swift/tree/main/examples/train/multi-node)                                                            | [✅](https://github.com/modelscope/ms-swift/tree/main/examples/train/multimodal)              |
+| DPO Training                       | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/dpo.sh)            | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/dpo.sh) | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/rlhf/dpo.sh)  |
+| GRPO Training                      | [✅]((https://github.com/modelscope/ms-swift/blob/main/examples/train/grpo/internal/grpo_zero2.sh)) | ✅                                                                                           | ✅                                                            | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/grpo/internal/multi_node)                                    | ✅                                                                                            |
+| Reward Model Training              | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/rm.sh)             | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/rm.sh) | ✅                                                            | ✅                                                                                            |
+| PPO Training                       | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/ppo.sh)            | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/ppo.sh) | ✅                                                            | ❌                                                                                            |
+| KTO Training                       | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/kto.sh)            | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/kto.sh) | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/rlhf/kto.sh)  |
+| CPO Training                       | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/cpo.sh)            | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/cpo.sh) | ✅                                                            | ✅                                                                                            |
+| SimPO Training                     | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/simpo.sh)          | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/simpo.sh) | ✅                                                            | ✅                                                                                            |
+| ORPO Training                      | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/orpo.sh)           | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/orpo.sh) | ✅                                                            | ✅                                                                                            |
+| Classification Model Training      | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/seq_cls/qwen2_5/sft.sh) | ✅                                                            | ✅                                                            | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/seq_cls/qwen2_vl/sft.sh) |
+| Embedding Model Training           | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/embedding/train_gte.sh) | ✅                                                            | ✅                                                            | ✅                                                            | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/embedding/train_gme.sh)  |
+Pre-training:
+```shell
+# 8*A100
+NPROC_PER_NODE=8 \
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+swift pt \
+    --model Qwen/Qwen2.5-7B \
+    --dataset swift/chinese-c4 \
+    --streaming true \
+    --train_type full \
+    --deepspeed zero2 \
+    --output_dir output \
+    --max_steps 10000 \
+    ...
+```
+Fine-tuning:
+```shell
+CUDA_VISIBLE_DEVICES=0 swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --dataset AI-ModelScope/alpaca-gpt4-data-en \
+    --train_type lora \
+    --output_dir output \
+    ...
+```
+RLHF:
+```shell
+CUDA_VISIBLE_DEVICES=0 swift rlhf \
+    --rlhf_type dpo \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji \
+    --train_type lora \
+    --output_dir output \
+    ...
+```
+### Inference
+```shell
+CUDA_VISIBLE_DEVICES=0 swift infer \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --stream true \
+    --infer_backend pt \
+    --max_new_tokens 2048
+# LoRA
+CUDA_VISIBLE_DEVICES=0 swift infer \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --adapters swift/test_lora \
+    --stream true \
+    --infer_backend pt \
+    --temperature 0 \
+    --max_new_tokens 2048
+```
+### Interface Inference
+```shell
+CUDA_VISIBLE_DEVICES=0 swift app \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --stream true \
+    --infer_backend pt \
+    --max_new_tokens 2048
+```
+### Deployment
+```shell
+CUDA_VISIBLE_DEVICES=0 swift deploy \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --infer_backend vllm
+```
+### Sampling
+```shell
+CUDA_VISIBLE_DEVICES=0 swift sample \
+    --model LLM-Research/Meta-Llama-3.1-8B-Instruct \
+    --sampler_engine pt \
+    --num_return_sequences 5 \
+    --dataset AI-ModelScope/alpaca-gpt4-data-zh#5
+```
+### Evaluation
+```shell
+CUDA_VISIBLE_DEVICES=0 swift eval \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --infer_backend lmdeploy \
+    --eval_backend OpenCompass \
+    --eval_dataset ARC_c
+```
+### Quantization
+```shell
+CUDA_VISIBLE_DEVICES=0 swift export \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --quant_bits 4 --quant_method awq \
+    --dataset AI-ModelScope/alpaca-gpt4-data-zh \
+    --output_dir Qwen2.5-7B-Instruct-AWQ
+```
+### Push Model
+```shell
+swift export \
+    --model <model-path> \
+    --push_to_hub true \
+    --hub_model_id '<model-id>' \
+    --hub_token '<sdk-token>'
+```
+## 🏛 License
+This framework is licensed under the [Apache License (Version 2.0)](https://github.com/modelscope/modelscope/blob/master/LICENSE). For models and datasets, please refer to the original resource page and follow the corresponding License.
+## 📎 Citation
+```bibtex
+@misc{zhao2024swiftascalablelightweightinfrastructure,
+      title={SWIFT:A Scalable lightWeight Infrastructure for Fine-Tuning},
+      author={Yuze Zhao and Jintao Huang and Jinghan Hu and Xingjun Wang and Yunlin Mao and Daoze Zhang and Zeyinzi Jiang and Zhikai Wu and Baole Ai and Ang Wang and Wenmeng Zhou and Yingda Chen},
+      year={2024},
+      eprint={2408.05517},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2408.05517},
+}
+```
+## Star History
+[![Star History Chart](https://api.star-history.com/svg?repos=modelscope/swift&type=Date)](https://star-history.com/#modelscope/ms-swift&Date)

README_CN.md ADDED Viewed

	@@ -0,0 +1,413 @@

+# SWIFT (Scalable lightWeight Infrastructure for Fine-Tuning)
+<p align="center">
+    <br>
+    <img src="asset/banner.png"/>
+    <br>
+<p>
+<p align="center">
+<a href="https://modelscope.cn/home">魔搭社区官网</a>
+<br>
+        中文&nbsp ｜ &nbsp<a href="README.md">English</a>&nbsp
+</p>
+<p align="center">
+<img src="https://img.shields.io/badge/python-3.10-5be.svg">
+<img src="https://img.shields.io/badge/pytorch-%E2%89%A52.0-orange.svg">
+<a href="https://github.com/modelscope/modelscope/"><img src="https://img.shields.io/badge/modelscope-%E2%89%A51.19-5D91D4.svg"></a>
+<a href="https://pypi.org/project/ms-swift/"><img src="https://badge.fury.io/py/ms-swift.svg"></a>
+<a href="https://github.com/modelscope/swift/blob/main/LICENSE"><img src="https://img.shields.io/github/license/modelscope/swift"></a>
+<a href="https://pepy.tech/project/ms-swift"><img src="https://pepy.tech/badge/ms-swift"></a>
+<a href="https://github.com/modelscope/swift/pulls"><img src="https://img.shields.io/badge/PR-welcome-55EB99.svg"></a>
+</p>
+<p align="center">
+<a href="https://trendshift.io/repositories/6427" target="_blank"><img src="https://trendshift.io/api/badge/repositories/6427" alt="modelscope%2Fswift | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+</p>
+<p align="center">
+        <a href="https://arxiv.org/abs/2408.05517">论文</a> &nbsp ｜ <a href="https://swift.readthedocs.io/en/latest/">English Documentation</a> &nbsp ｜ &nbsp <a href="https://swift.readthedocs.io/zh-cn/latest/">中文文档</a> &nbsp
+</p>
+##  📖 目录
+- [用户群](#-用户群)
+- [简介](#-简介)
+- [新闻](#-新闻)
+- [安装](#%EF%B8%8F-安装)
+- [快速开始](#-快速开始)
+- [如何使用](#-如何使用)
+- [License](#-license)
+- [引用](#-引用)
+## ☎ 用户群
+请扫描下面的二维码来加入我们的交流群：
+[Discord Group](https://discord.com/invite/D27yfEFVz5)              |  微信群
+:-------------------------:|:-------------------------:
+<img src="asset/discord_qr.jpg" width="200" height="200">  |  <img src="asset/wechat.png" width="200" height="200">
+## 📝 简介
+🍲 ms-swift是魔搭社区提供的大模型与多模态大模型微调部署框架，现已支持500+大模型与200+多模态大模型的训练（预训练、微调、人类对齐）、推理、评测、量化与部署。其中大模型包括：Qwen3、Qwen3-MoE、Qwen2.5、InternLM3、GLM4、Mistral、DeepSeek-R1、Yi1.5、TeleChat2、Baichuan2、Gemma2等模型，多模态大模型包括：Qwen2.5-VL、Qwen2-Audio、Llama4、Llava、InternVL2.5、MiniCPM-V-2.6、GLM4v、Xcomposer2.5、Yi-VL、DeepSeek-VL2、Phi3.5-Vision、GOT-OCR2等模型。
+🍔 除此之外，ms-swift汇集了最新的训练技术，包括LoRA、QLoRA、Llama-Pro、LongLoRA、GaLore、Q-GaLore、LoRA+、LISA、DoRA、FourierFt、ReFT、UnSloth、和Liger等轻量化训练技术，以及DPO、GRPO、RM、PPO、KTO、CPO、SimPO、ORPO等人类对齐训练方法。ms-swift支持使用vLLM和LMDeploy对推理、评测和部署模块进行加速，并支持使用GPTQ、AWQ、BNB等技术对大模型进行量化。ms-swift还提供了基于Gradio的Web-UI界面及丰富的最佳实践。
+**为什么选择ms-swift？**
+- 🍎 **模型类型**：支持500+纯文本大模型、**200+多模态大模型**以及All-to-All全模态模型、序列分类模型、Embedding模型**训练到部署全流程**。
+- **数据集类型**：内置150+预训练、微调、人类对齐、多模态等各种类型的数据集，并支持自定义数据集。
+- **硬件支持**：CPU、RTX系列、T4/V100、A10/A100/H100、Ascend NPU、MPS等。
+- 🍊 **轻量训练**：支持了LoRA、QLoRA、DoRA、LoRA+、ReFT、RS-LoRA、LLaMAPro、Adapter、GaLore、Q-Galore、LISA、UnSloth、Liger-Kernel等轻量微调方式。
+- **分布式训练**：支持分布式数据并行（DDP）、device_map简易模型并行、DeepSpeed ZeRO2 ZeRO3、FSDP等分布式训练技术。
+- **量化训练**：支持对BNB、AWQ、GPTQ、AQLM、HQQ、EETQ量化模型进行训练。
+- **RLHF训练**：支持纯文本大模型和多模态大模型的DPO、GRPO、RM、PPO、KTO、CPO、SimPO、ORPO等人类对齐训练方法。
+- 🍓 **多模态训练**：支持对图像、视频和语音不同模态模型进行训练，支持VQA、Caption、OCR、Grounding任务的训练。
+- **界面训练**：以界面的方式提供训练、推理、评测、量化的能力，完成大模型的全链路。
+- **插件化与拓展**：支持自定义模型和数据集拓展，支持对loss、metric、trainer、loss-scale、callback、optimizer等组件进行自定义。
+- 🍉 **工具箱能力**：不仅提供大模型和多模态大模型的训练支持，还涵盖其推理、评测、量化和部署全流程。
+- **推理加速**：支持PyTorch、vLLM、LmDeploy推理加速引擎，并提供OpenAI接口，为推理、部署和评测模块提供加速。
+- **模型评测**：以EvalScope作为评测后端，支持100+评测数据集对纯��本和多模态模型进行评测。
+- **模型量化**：支持AWQ、GPTQ和BNB的量化导出，导出的模型支持使用vLLM/LmDeploy推理加速，并支持继续训练。
+## 🎉 新闻
+- 🎁 2025.05.11: GRPO中的奖励模型支持自定义处理逻辑，GenRM的例子参考[这里](./docs/source/Instruction/GRPO.md#自定义奖励模型)
+- 🎁 2025.04.15: ms-swift论文已经被AAAI 2025接收，论文地址在[这里](https://ojs.aaai.org/index.php/AAAI/article/view/35383)。
+- 🎁 2025.03.23: 支持了多轮GRPO，用于构建多轮对话场景的训练(例如agent tool calling)，请查看[训练脚本](examples/train/grpo/internal/train_multi_round.sh)。
+- 🎁 2025.03.16: 支持了Megatron的并行技术进行训练，请查看[Megatron-SWIFT训练文档](https://swift.readthedocs.io/zh-cn/latest/Instruction/Megatron-SWIFT训练.html)。
+- 🎁 2025.03.15: 支持纯文本和多模态模型的embedding模型的微调，请查看[训练脚本](examples/train/embedding)。
+- 🎁 2025.03.05: 支持GRPO的hybrid模式，4GPU(4*80G)训练72B模型的脚本参考[这里](examples/train/grpo/internal/train_72b_4gpu.sh)。同时支持vllm的tensor并行，训练脚本参考[这里](examples/train/grpo/internal/multi_gpu_mp_colocate.sh)。
+- 🎁 2025.02.21: GRPO算法支持使用LMDeploy，训练脚本参考[这里](examples/train/grpo/internal/full_lmdeploy.sh)。此外测试了GRPO算法的性能，使用一些tricks使训练速度提高到300%。WanDB表格请查看[这里](https://wandb.ai/tastelikefeet/grpo_perf_test?nw=nwuseryuzezyz)。
+- 🎁 2025.02.21: 支持`swift sample`命令。强化微调脚本参考[这里](docs/source/Instruction/强化微调.md)，大模型API蒸馏采样脚本参考[这里](examples/sampler/distill/distill.sh)。
+- 🔥 2025.02.12: 支持GRPO (Group Relative Policy Optimization) 训练算法，文档参考[这里](docs/source/Instruction/GRPO.md)。
+- 🎁 2024.12.04: **ms-swift3.0**大版本更新。请查看[发布说明和更改](https://swift.readthedocs.io/zh-cn/latest/Instruction/ReleaseNote3.0.html)。
+<details><summary>更多</summary>
+- 🎉 2024.08.12: ms-swift论文已经发布到arXiv上，可以点击[这里](https://arxiv.org/abs/2408.05517)阅读。
+- 🔥 2024.08.05: 支持使用[evalscope](https://github.com/modelscope/evalscope/)作为后端进行大模型和多模态模型的评测。
+- 🔥 2024.07.29: 支持使用[vllm](https://github.com/vllm-project/vllm), [lmdeploy](https://github.com/InternLM/lmdeploy)对大模型和多模态大模型进行推理加速，在infer/deploy/eval时额外指定`--infer_backend vllm/lmdeploy`即可。
+- 🔥 2024.07.24: 支持对多模态大模型进行人类偏好对齐训练，包括DPO/ORPO/SimPO/CPO/KTO/RM/PPO。
+- 🔥 2024.02.01: 支持Agent训练！训练算法源自这篇[论文](https://arxiv.org/pdf/2309.00986.pdf)。
+</details>
+## 🛠️ 安装
+使用pip进行安装：
+```shell
+pip install ms-swift -U
+```
+从源代码安装：
+```shell
+# pip install git+https://github.com/modelscope/ms-swift.git
+git clone https://github.com/modelscope/ms-swift.git
+cd ms-swift
+pip install -e .
+```
+运行环境：
+|        | 范围           | 推荐 | 备注 |
+| ------ |--------------| ---- | --|
+| python | >=3.9        | 3.10 ||
+| cuda |              | cuda12 |使用cpu、npu、mps则无需安装|
+| torch | >=2.0        |  ||
+| transformers | >=4.33       | 4.51 ||
+| modelscope | >=1.23       |  ||
+| peft | >=0.11,<0.16 | ||
+| trl | >=0.13,<0.18 | 0.17 |RLHF|
+| deepspeed | >=0.14       | 0.14.5 |训练|
+| vllm | >=0.5.1      | 0.7.3/0.8 |推理/部署/评测|
+| lmdeploy | >=0.5        | 0.8 |推理/部署/评测|
+| evalscope | >=0.11       | |评测|
+更多可选依赖可以参考[这里](https://github.com/modelscope/ms-swift/blob/main/requirements/install_all.sh)。
+## 🚀 快速开始
+**10分钟**在单卡3090上对Qwen2.5-7B-Instruct进行自我认知微调：
+### 命令行
+```shell
+# 22GB
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type lora \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot
+```
+小贴士：
+- 如果要使用自定义数据集进行训练，你可以参考[这里](https://swift.readthedocs.io/zh-cn/latest/Customization/%E8%87%AA%E5%AE%9A%E4%B9%89%E6%95%B0%E6%8D%AE%E9%9B%86.html)组织数据集格式，并指定`--dataset <dataset_path>`。
+- `--model_author`和`--model_name`参数只有当数据集中包含`swift/self-cognition`时才生效。
+- 如果要使用其他模型进行训练，你只需要修改`--model <model_id/model_path>`即可。
+- 默认使用ModelScope进行模型和数据集的下载。如果要使用HuggingFace，指定`--use_hf true`即可。
+训练完成后，使用以下命令对训练后的权重进行推理：
+- 这里的`--adapters`需要替换成训练生成的last checkpoint文件夹。由于adapters文件夹中包含了训练的参数文件`args.json`，因此不需要额外指定`--model`，`--system`，swift会自动读取这些参数。如果要关闭此行为，可以设置`--load_args false`。
+```shell
+# 使用交互式命令行进行推理
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --stream true \
+    --temperature 0 \
+    --max_new_tokens 2048
+# merge-lora并使用vLLM进行推理加速
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --stream true \
+    --merge_lora true \
+    --infer_backend vllm \
+    --max_model_len 8192 \
+    --temperature 0 \
+    --max_new_tokens 2048
+```
+最后，使用以下命令将模型推送到ModelScope：
+```shell
+CUDA_VISIBLE_DEVICES=0 \
+swift export \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --push_to_hub true \
+    --hub_model_id '<your-model-id>' \
+    --hub_token '<your-sdk-token>' \
+    --use_hf false
+```
+### Web-UI
+Web-UI是基于gradio界面技术的**零门槛**训练、部署界面方案，具体可以查看[这里](https://swift.readthedocs.io/zh-cn/latest/GetStarted/Web-UI.html)。
+```shell
+swift web-ui
+```
+![image.png](./docs/resources/web-ui.jpg)
+### 使用Python
+ms-swift也支持使用python的方式进行训练和推理。下面给出训练和推理的**伪代码**，具体可以查看[这里](https://github.com/modelscope/ms-swift/blob/main/examples/notebook/qwen2_5-self-cognition/self-cognition-sft.ipynb)。
+训练：
+```python
+# 获取模型和template，并加入可训练的LoRA模块
+model, tokenizer = get_model_tokenizer(model_id_or_path, ...)
+template = get_template(model.model_meta.template, tokenizer, ...)
+model = Swift.prepare_model(model, lora_config)
+# 下载并载入数据集，并将文本encode成tokens
+train_dataset, val_dataset = load_dataset(dataset_id_or_path, ...)
+train_dataset = EncodePreprocessor(template=template)(train_dataset, num_proc=num_proc)
+val_dataset = EncodePreprocessor(template=template)(val_dataset, num_proc=num_proc)
+# 进行训练
+trainer = Seq2SeqTrainer(
+    model=model,
+    args=training_args,
+    data_collator=template.data_collator,
+    train_dataset=train_dataset,
+    eval_dataset=val_dataset,
+    template=template,
+)
+trainer.train()
+```
+推理：
+```python
+# 使用原生pytorch引擎进行推理
+engine = PtEngine(model_id_or_path, adapters=[lora_checkpoint])
+infer_request = InferRequest(messages=[{'role': 'user', 'content': 'who are you?'}])
+request_config = RequestConfig(max_tokens=max_new_tokens, temperature=temperature)
+resp_list = engine.infer([infer_request], request_config)
+print(f'response: {resp_list[0].choices[0].message.content}')
+```
+## ✨ 如何使用
+这里给出使用ms-swift进行训练到部署到最简示例，具体可以查看[examples](https://github.com/modelscope/ms-swift/tree/main/examples)。
+- 若想使用其他模型或者数据集（含多模态模型和数据集），你只需要修改`--model`指定对应模型的id或者path，修改`--dataset`指定对应数据集的id或者path即可。
+- 默认使用ModelScope进行模型和数据集的下载。如果要使用HuggingFace，指定`--use_hf true`即可。
+|   常用链接 |
+| ------ |
+|   [🔥命令行参数](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html)   |
+|   [支持的模型和数据集](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.html)   |
+|   [自定义模型](https://swift.readthedocs.io/zh-cn/latest/Customization/%E8%87%AA%E5%AE%9A%E4%B9%89%E6%A8%A1%E5%9E%8B.html), [🔥自定义数据集](https://swift.readthedocs.io/zh-cn/latest/Customization/%E8%87%AA%E5%AE%9A%E4%B9%89%E6%95%B0%E6%8D%AE%E9%9B%86.html)   |
+|   [大模型教程](https://github.com/modelscope/modelscope-classroom/tree/main/LLM-tutorial)   |
+### 训练
+支持的训练方法：
+| 方法   | 全参数 | LoRA                                                                                        | QLoRA | Deepspeed | 多机 | 多模态                                                                                          |
+| ------ | ------ |---------------------------------------------------------------------------------------------| ----- | ------ | ------ |----------------------------------------------------------------------------------------------|
+| 预训练 | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/pretrain/train.sh) | ✅                                                                                           | ✅ | ✅ | ✅ | ✅                                                                                            |
+| 指令监督微调 | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/full/train.sh) | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/lora_sft.sh)            | [✅](https://github.com/modelscope/ms-swift/tree/main/examples/train/qlora) | [✅](https://github.com/modelscope/ms-swift/tree/main/examples/train/multi-gpu/deepspeed) | [✅](https://github.com/modelscope/ms-swift/tree/main/examples/train/multi-node) | [✅](https://github.com/modelscope/ms-swift/tree/main/examples/train/multimodal)              |
+| DPO训练 | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/dpo.sh)            | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/dpo.sh) | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/rlhf/dpo.sh)  |
+| GRPO训练 | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/grpo/internal/grpo_zero2.sh) | ✅                                                                                           | ✅ | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/grpo/internal/multi_node) | ✅                                                                                            |
+| 奖励模型训练 | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/rm.sh)             | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/rm.sh) | ✅ | ✅                                                                                            |
+| PPO训练 | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/ppo.sh)            | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/ppo.sh) | ✅ | ❌                                                                                            |
+| KTO训练 | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/kto.sh)            | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/kto.sh) | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/rlhf/kto.sh)  |
+| CPO训练 | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/cpo.sh)            | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/cpo.sh) | ✅ | ✅                                                                                            |
+| SimPO训练 | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/simpo.sh)          | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/simpo.sh) | ✅ | ✅                                                                                            |
+| ORPO训练 | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/orpo.sh)           | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/rlhf/orpo.sh) | ✅ | ✅                                                                                            |
+| 分类模型训练 | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/seq_cls/qwen2_5/sft.sh) | ✅ | ✅ | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/seq_cls/qwen2_vl/sft.sh) |
+| Embedding模型训练 | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/embedding/train_gte.sh) | ✅ | ✅ | ✅ | [✅](https://github.com/modelscope/ms-swift/blob/main/examples/train/embedding/train_gme.sh)  |
+预训练：
+```shell
+# 8*A100
+NPROC_PER_NODE=8 \
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+swift pt \
+    --model Qwen/Qwen2.5-7B \
+    --dataset swift/chinese-c4 \
+    --streaming true \
+    --train_type full \
+    --deepspeed zero2 \
+    --output_dir output \
+    --max_steps 10000 \
+    ...
+```
+微调：
+```shell
+CUDA_VISIBLE_DEVICES=0 swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --dataset AI-ModelScope/alpaca-gpt4-data-zh \
+    --train_type lora \
+    --output_dir output \
+    ...
+```
+RLHF：
+```shell
+CUDA_VISIBLE_DEVICES=0 swift rlhf \
+    --rlhf_type dpo \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji \
+    --train_type lora \
+    --output_dir output \
+    ...
+```
+### 推理
+```shell
+CUDA_VISIBLE_DEVICES=0 swift infer \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --stream true \
+    --infer_backend pt \
+    --max_new_tokens 2048
+# LoRA
+CUDA_VISIBLE_DEVICES=0 swift infer \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --adapters swift/test_lora \
+    --stream true \
+    --infer_backend pt \
+    --temperature 0 \
+    --max_new_tokens 2048
+```
+### 界面推理
+```shell
+CUDA_VISIBLE_DEVICES=0 swift app \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --stream true \
+    --infer_backend pt \
+    --max_new_tokens 2048 \
+    --lang zh
+```
+### 部署
+```shell
+CUDA_VISIBLE_DEVICES=0 swift deploy \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --infer_backend vllm
+```
+### 采样
+```shell
+CUDA_VISIBLE_DEVICES=0 swift sample \
+    --model LLM-Research/Meta-Llama-3.1-8B-Instruct \
+    --sampler_engine pt \
+    --num_return_sequences 5 \
+    --dataset AI-ModelScope/alpaca-gpt4-data-zh#5
+```
+### 评测
+```shell
+CUDA_VISIBLE_DEVICES=0 swift eval \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --infer_backend lmdeploy \
+    --eval_backend OpenCompass \
+    --eval_dataset ARC_c
+```
+### 量化
+```shell
+CUDA_VISIBLE_DEVICES=0 swift export \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --quant_bits 4 --quant_method awq \
+    --dataset AI-ModelScope/alpaca-gpt4-data-zh \
+    --output_dir Qwen2.5-7B-Instruct-AWQ
+```
+### 推送模型
+```shell
+swift export \
+    --model <model-path> \
+    --push_to_hub true \
+    --hub_model_id '<model-id>' \
+    --hub_token '<sdk-token>'
+```
+## 🏛 License
+本框架使用[Apache License (Version 2.0)](https://github.com/modelscope/modelscope/blob/master/LICENSE)进行许可。模型和数据集请查看原资源页面并遵守对应License。
+## 📎 引用
+```bibtex
+@misc{zhao2024swiftascalablelightweightinfrastructure,
+      title={SWIFT:A Scalable lightWeight Infrastructure for Fine-Tuning},
+      author={Yuze Zhao and Jintao Huang and Jinghan Hu and Xingjun Wang and Yunlin Mao and Daoze Zhang and Zeyinzi Jiang and Zhikai Wu and Baole Ai and Ang Wang and Wenmeng Zhou and Yingda Chen},
+      year={2024},
+      eprint={2408.05517},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2408.05517},
+}
+```
+## Star History
+[![Star History Chart](https://api.star-history.com/svg?repos=modelscope/swift&type=Date)](https://star-history.com/#modelscope/ms-swift&Date)

asset/discord_qr.jpg ADDED Viewed

asset/wechat.png ADDED Viewed

dataset.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音��了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/home/xj_data/jishengpeng/InteractSpeech/Train600/tmp/matched_audio/PLACES3.5--train--413.wav"]}

dataset_new.json ADDED Viewed

The diff for this file is too large to render. See raw diff

docs/Makefile ADDED Viewed

	@@ -0,0 +1,20 @@

+# Minimal makefile for Sphinx documentation
+#
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+.PHONY: help Makefile
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

docs/README.md ADDED Viewed

	@@ -0,0 +1,37 @@

+## maintain docs
+1. build docs
+    ```shell
+    # in root directory:
+    make docs
+    ```
+2. doc string format
+    We adopt the google style docstring format as the standard, please refer to the following documents.
+    1. Google Python style guide docstring [link](http://google.github.io/styleguide/pyguide.html#381-docstrings)
+    2. Google docstring example [link](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html)
+    3. sample：torch.nn.modules.conv [link](https://pytorch.org/docs/stable/_modules/torch/nn/modules/conv.html#Conv1d)
+    4. load function as an example：
+    ```python
+    def load(file, file_format=None, **kwargs):
+        """Load data from json/yaml/pickle files.
+        This method provides a unified api for loading data from serialized files.
+        Args:
+            file (str or :obj:`Path` or file-like object): Filename or a file-like
+                object.
+            file_format (str, optional): If not specified, the file format will be
+                inferred from the file extension, otherwise use the specified one.
+                Currently supported formats include "json", "yaml/yml".
+        Examples:
+            >>> load('/path/of/your/file')  # file is stored in disk
+            >>> load('https://path/of/your/file')  # file is stored on internet
+            >>> load('oss://path/of/your/file')  # file is stored in petrel
+        Returns:
+            The content from the file.
+        """
+    ```

docs/make.bat ADDED Viewed

	@@ -0,0 +1,35 @@

+@ECHO OFF
+pushd %~dp0
+REM Command file for Sphinx documentation
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+if "%1" == "" goto help
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+:end
+popd

docs/resources/grpo.png ADDED Viewed

docs/source/.readthedocs.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+# .readthedocs.yaml
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+# Required
+version: 2
+# Set the OS, Python version and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.10"
+# Build documentation in the "docs/" directory with Sphinx
+sphinx:
+  configuration: docs/source/conf.py
+# Optionally build your docs in additional formats such as PDF and ePub
+# formats:
+#    - pdf
+#    - epub
+# Optional but recommended, declare the Python requirements required
+# to build your documentation
+# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
+python:
+   install:
+      - requirements: requirements/docs.txt
+      - requirements: requirements/framework.txt

docs/source/BestPractices/Embedding训练.md ADDED Viewed

	@@ -0,0 +1,97 @@

+# Embedding训练
+SWIFT已经支持Embedding模型的训练，包括纯文本和多模态两个类型。目前已经支持的模型有：
+1. modernbert embedding模型
+   - [ModelScope](https://modelscope.cn/models/iic/gte-modernbert-base) [Hugging Face](https://huggingface.co/Alibaba-NLP/gte-modernbert-base)
+2. gte embedding模型
+   - 1.5B: [ModelScope](https://www.modelscope.cn/models/iic/gte_Qwen2-1.5B-instruct) [Hugging Face](https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct)
+   - 7B: [ModelScope](https://www.modelscope.cn/models/iic/gte_Qwen2-7B-instruct) [Hugging Face](https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct)
+3. gme embedding模型
+   - 2B: [ModelScope](https://www.modelscope.cn/models/iic/gme-Qwen2-VL-2B-Instruct) [Hugging Face](https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-2B-Instruct)
+   - 7B: [ModelScope](https://www.modelscope.cn/models/iic/gme-Qwen2-VL-7B-Instruct) [Hugging Face](https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-7B-Instruct)
+开发者可以自行集成自己的模型，模型forward输出值需要满足：
+```json
+{"last_hidden_state": some-embedding-tensor}
+```
+返回值是一个json，具有`last_hidden_state` key，value是embedding tensor即可，输入部分可以使用我们已经支持的template。
+需要注意的是，SWIFT目前支持的embedding模型均为符合纯文本或多模态LLM，目前并不支持CLIP类型的模型训练。
+此外，SWIFT支持的所有embedding模型在模型forward最后都增加了normalize，如自行增加新模型请注意增加normalize层。
+## loss
+目前SWIFT支持的Embedding模型可以使用的loss有：
+- cosine_similarity: cosine相似度loss，计算两个embedding的相似度，并根据label的值拟合，实际为MSE loss
+- contrastive: 可调margin的对比学习loss，label仅支持0和1两个值
+- online_contrastive: 考虑hard negative和hard positive部分的contrastive loss，label仅支持0和1两个值
+- infonce: 在同一个batch中不同row两两计算cosine相似度，并使row内部相似度最大，不同row相似度最小，不需要label
+loss的源代码可以在[这里](https://github.com/modelscope/ms-swift/blob/main/swift/plugin/loss.py)找到。
+## 数据集格式
+> 注：
+> 1. 下面的多模态部分<image>标签可以出现在query/response/rejected_response的任意位置，只需要标签数量和images的值数量相等即可
+> 2. 标签和images的对应顺序为先对应query中的<image>标签，然后是response中的，之后按顺序解析rejected_response中的
+> 3. query代表anchor sample，response代表positive sample或对比sample，rejected_response是hard negative samples
+> 4. 也支持<video>, <audio>标签，即天然支持video和audio的embedding
+### cosine_similarity loss对应的格式
+```json lines
+# LLM
+{"query": "sentence1", "response":  "sentence2", "label": 0.8}
+# MLLM
+{"query": "<image>", "response":  "<image>sentence", "images": ["/some/images1.jpg", "/some/images2.jpg"], "label": 0.7}
+{"query": "sentence1", "response":  "<image>sentence2", "images": ["/some/images1.jpg"], "label": 0.7}
+```
+### contrastive/online_contrastive loss对应的格式
+```json lines
+# LLM
+{"query": "sentence1", "response":  "sentence2", "label": 1}
+# MLLM
+{"query": "<image>", "response":  "sentence", "images": "/some/images.jpg", "label": 1}
+{"query": "<image>sentence1", "response":  "sentence2", "images": "/some/images.jpg", "label": 0}
+```
+评测的指标分别是两个embedding的欧式距离、点积等的pearson系数以及spearman系数，共八个指标。
+### infonce 格式
+```json lines
+# LLM
+{"query": "sentence1", "response":  "sentence2"}
+# MLLM
+{"query": "<image>", "response":  "sentence", "images": "/some/images.jpg"}
+{"query": "<image>sentence1", "response":  "<image>sentence2", "rejected_response": ["<image>sentence1", "<image>sentence2"], "images": ["/some/images.jpg", "/some/images.jpg", "/some/images.jpg", "/some/images.jpg"]}
+```
+infonce loss支持几个环境变量：
+1. INFONCE_TEMPERATURE temperature参数，不设置的话默认值是0.01
+2. INFONCE_USE_BATCH 使用sample内部的rejected_response（hard negative样例）还是使用一个batch的所有responses，默认为True代表使用batch内部的responses
+3. INFONCE_HARD_NEGATIVES hard negatives的数量，如果不设置会使用rejected_response的所有samples，由于长度未必一致，因此会采用for循环计算loss（计算会慢），如果设置为某个数值，则如果不够会对缺失数量进行随机采样，超长会选用前`INFONCE_HARD_NEGATIVES`个
+4. INFONCE_MASK_FAKE_NEGATIVE mask掉假negative。默认为False，开启时会判断positive sample的similarity+0.1，比该值大的sample的similarity会被设置为-inf，防止positive sample泄露问题
+> 也可以在数据集中将hard negatives数量设置为数量相等，这样即使不设置也不会使用for循环方式，加快计算速度
+> rejected_response也可以没有，这种情况下INFONCE_USE_BATCH保持为True，会使用一个batch内部的其他samples作为rejected responses
+infonce loss的评测会有下面几个指标：
+- mean_neg 所有hard_negative的平均值
+- mean_pos 所有positive的平均值
+- margin positive-max_hard_negative的平均值
+## 脚手架
+SWIFT提供了两个脚手架训练脚本：
+- [gte模型](https://github.com/tastelikefeet/swift/blob/main/examples/train/embedding/train_gte.sh)
+- [gme模型](https://github.com/tastelikefeet/swift/blob/main/examples/train/embedding/train_gme.sh)

docs/source/BestPractices/GRPO代码训练.md ADDED Viewed

	@@ -0,0 +1,134 @@

+# GRPO代码训练
+本文档介绍如何使用GRPO对模型进行代码训练
+模型：[Qwen/Qwen2.5-7B-Instruct](https://www.modelscope.cn/models/Qwen/Qwen2.5-VL-7B-Instruct)
+数据集：[open-r1/verifiable-coding-problems-python-10k](https://www.modelscope.cn/datasets/open-r1/verifiable-coding-problems-python-10k/dataPeview)
+数据集样例
+```json
+{
+  "problem": "Solve the following coding problem using the programming language python: Polycarp has $n$ different binary words. A word called binary if it contains only characters '0' and '1'. For example, these words are binary: \"0001\", \"11\", \"0\" and \"0011100\". Polycarp wants to offer his set of $n$ binary words to play a game \"words\". In this game, players name words and each next word (starting from the second) must start with the last character of the previous word. The first word can be any. For example, these sequence of words can be named during the game: \"0101\", \"1\", \"10\", \"00\", \"00001\". Word reversal is the operation of reversing the order of the characters. For example, the word \"0111\" after the reversal becomes \"1110\", the word \"11010\" after the reversal becomes \"01011\". Probably, Polycarp has such a set of words that there is no way to put them in the order correspondent to the game rules. In this situation, he wants to reverse some words from his set so that: the final set of $n$ words still contains different words (i.e. all words are unique); there is a way to put all words of the final set of words in the order so that the final sequence of $n$ words is consistent with the game rules. Polycarp wants to reverse minimal number of words. Please, help him. -----Input----- The first line of the input contains one integer $t$ ($1 \\le t \\le 10^4$) — the number of test cases in the input. Then $t$ test cases follow. The first line of a test case contains one integer $n$ ($1 \\le n \\le 2\\cdot10^5$) — the number of words in the Polycarp's set. Next $n$ lines contain these words. All of $n$ words aren't empty and contains only characters '0' and '1'. The sum of word lengths doesn't exceed $4\\cdot10^6$. All words are different. Guaranteed, that the sum of $n$ for all test cases in the input doesn't exceed $2\\cdot10^5$. Also, guaranteed that the sum of word lengths for all test cases in the input doesn't exceed $4\\cdot10^6$. -----Output----- Print answer for all of $t$ test cases in the order they appear. If there is no answer for the test case, print -1. Otherwise, the first line of the output should contain $k$ ($0 \\le k \\le n$) — the minimal number of words in the set which should be reversed. The second line of the output should contain $k$ distinct integers — the indexes of the words in the set which should be reversed. Words are numerated from $1$ to $n$ in the order they appear. If $k=0$ you can skip this line (or you can print an empty line). If there are many answers you can print any of them. -----Example----- Input 4 4 0001 1000 0011 0111 3 010 101 0 2 00000 00001 4 01 001 0001 00001 Output 1 3 -1 0 2 1 2 The input will be stdin and you should print your solution to stdout Now solve the problem and return the code.",
+  "verification_info": {
+    "language": "python",
+    "test_cases": [
+      {
+        "input": "4\n4\n0001\n1000\n0011\n0111\n3\n010\n101\n0\n2\n00000\n00001\n4\n01\n001\n0001\n00001\n",
+        "output": "1\n3 \n-1\n0\n\n2\n1 2 \n",
+        "type": "stdin_stdout"
+      }
+    ]
+  }
+}
+```
+`verification_info` 提供了程序语言以及测试用例，其中包含输入和预期的输出。
+## 奖励函数
+使用`code_reward`和`code_format`奖励进行训练，实现细节见[代码](../../../examples/train/grpo/plugin/plugin.py)
+- `code_reward`通过[e2b](https://e2b.dev/)或[judge0](https://judge0.com/)执行生成的代码，根据数据集中的测试用例对代码进行验证给出奖励值。
+- `code_format`要求模型输出包含代码块的格式化回答。
+注：当前通过e2b执行代码仅支持python语言，如需执行其他语言，可以使用judge0执行（[judge0支持语言列表](https://github.com/judge0/judge0?tab=readme-ov-file#supported-languages)）。
+## 训练脚本
+### e2b
+- 在[e2b](https://e2b.dev/dashboard)注册获取E2B_API_KEY，并设置为环境变量。
+- `--reward_funcs`添加`external_code_reward`作为奖励函数。
+- `--external_plugins`设置为plugin.py的路径。
+```bash
+E2B_API_KEY=xxx \
+WANDB_API_KEY=xxx \
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=7 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --external_plugins examples/train/grpo/plugin/plugin.py \
+    --reward_funcs external_code_reward external_code_format \
+    --reward_weights 1.0 0.1 \
+    --use_vllm true \
+    --vllm_device auto \
+    --vllm_gpu_memory_utilization 0.7 \
+    --vllm_max_model_len 8192 \
+    --train_type lora \
+    --torch_dtype bfloat16 \
+    --dataset 'open-r1/verifiable-coding-problems-python-10k' \
+    --max_completion_length 2048 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 2 \
+    --per_device_eval_batch_size 2 \
+    --learning_rate 1e-6 \
+    --gradient_accumulation_steps 1 \
+    --eval_steps 200 \
+    --save_steps 200 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 4096 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 14 \
+    --temperature 0.9 \
+    --num_infer_workers 1 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --deepspeed zero2 \
+    --log_completions true \
+    --report_to wandb
+```
+### judge0
+- 设置环境变量：
+    - （必需）JUDGE0_ENDPOINT: judge0访问地址。
+    - （可选）JUDGE0_X_AUTH_TOKEN: judge0访问Token。
+- `--reward_funcs`添加`external_code_reward_by_judge0`作为奖励函数。
+- `--external_plugins`设置为plugin.py的路径。
+```bash
+JUDGE0_ENDPOINT=xxx \
+JUDGE0_X_AUTH_TOKEN=xxx \
+WANDB_API_KEY=xxx \
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=7 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --external_plugins examples/train/grpo/plugin/plugin.py \
+    --reward_funcs external_code_reward_by_judge0 external_code_format \
+    --reward_weights 1.0 0.1 \
+    --use_vllm true \
+    --vllm_device auto \
+    --vllm_gpu_memory_utilization 0.7 \
+    --vllm_max_model_len 8192 \
+    --train_type lora \
+    --torch_dtype bfloat16 \
+    --dataset 'open-r1/verifiable-coding-problems-python-10k' \
+    --max_completion_length 2048 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 2 \
+    --per_device_eval_batch_size 2 \
+    --learning_rate 1e-6 \
+    --gradient_accumulation_steps 1 \
+    --eval_steps 200 \
+    --save_steps 200 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 4096 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 14 \
+    --temperature 0.9 \
+    --num_infer_workers 1 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --deepspeed zero2 \
+    --log_completions true \
+    --report_to wandb
+```
+训练奖励曲线图
+![训练奖励曲线图](../../resources/grpo_code.png)

docs/source/BestPractices/GRPO多模态训练.md ADDED Viewed

	@@ -0,0 +1,306 @@

+# 多模态GRPO完整实验流程
+本文介绍如何使用SWIFT GRPO进行多模态模型和任务的训练。目标是对多个多模态任务进行训练，提升任务精度，任务定义和训练参数等参考了 [R1-V](https://github.com/Deep-Agent/R1-V.git) 和 [open-r1-multimodal](https://github.com/EvolvingLMMs-Lab/open-r1-multimodal.git)
+## ClevrCount 任务
+### 任务与数据集定义
+本任务从clevr_cogen_a_train数据集出发，模型的目标是输出图像中包含的物体数量，因此，我们定义数据集如下：
+```python
+class ClevrPreprocessor(ResponsePreprocessor):
+    def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
+        query = row.get('query', '')
+        query = f"""{query} Output the thinking process in <think> </think> and
+ final answer (number) in <answer> </answer> tags."""
+        row.update({'query': query})
+        return super().preprocess(row)
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='okwinds/clevr_cogen_a_train',
+        subsets=[
+            SubsetDataset(
+                name='default',
+                subset='default',
+                split=['train'],
+            ),
+        ],
+        preprocess_func=ClevrPreprocessor(),
+        tags=['qa', 'math']))
+```
+这里重新定义dataset preprocessor的目的是修改query。数据集示例样本如下，包含messages,images和solution字段，solution会送入后续的奖励函数中，而messages和images则会作为模型输入。
+- 注意：`{'role': 'assistant', 'content': '<answer> 3 </answer>'}`将会在GRPOTrainer中被移除，可以忽略。'solution'字段将会透传入ORM中。在自定义数据集时，'images'字段组织成`["image_path1", "image_path2"]`即可。
+```json
+{
+    "images": ["image_path1", "image_path2"],
+    "messages": [
+        {
+            "role": "user",
+            "content": "How many items are there in the image? Output the thinking process in <think> </think> and \n final answer (number) in <answer> </answer> tags."
+        }
+    ],
+    "solution": "<answer> 3 </answer>"
+}
+```
+## 奖励函数定义：
+本任务使用的奖励函数有两个，一个是 Deepseek-R1 中提到的格式奖励函数，另一是 ClevrCount 的准确性奖励函数。前者已经在swift中内置，通过 `--reward_funcs format` 可以直接使用，而后者需要我们自己定义，在这里我们使用 external_plugin 的方式定义准确性奖励函数，将代码放在`swift/examples/train/grpo/plugin/plugin.py`中。
+在这里，奖励函数的输入包括completions和solution两个字段，分别表示模型生成的文本和真值。每个都是list，支持多个completion同时计算。注意，在这里，solution字段是数据集中定义的字段透传而来，如果有任务上的变动，可以分别对数据集和奖励函数做对应的改变即可。
+```python
+class MultiModalAccuracyORM(ORM):
+    def __call__(self, completions, solution, **kwargs) -> List[float]:
+        """
+        Reward function that checks if the completion is correct.
+        Args:
+            completions (list[str]): Generated outputs
+            solution (list[str]): Ground Truths.
+        Returns:
+            list[float]: Reward scores
+        """
+        rewards = []
+        from math_verify import parse, verify
+        for content, sol in zip(completions, solution):
+            reward = 0.0
+            # Try symbolic verification first
+            try:
+                answer = parse(content)
+                if float(verify(answer, parse(sol))) > 0:
+                    reward = 1.0
+            except Exception:
+                pass  # Continue to next verification method if this fails
+            # If symbolic verification failed, try string matching
+            if reward == 0.0:
+                try:
+                    # Extract answer from solution if it has think/answer tags
+                    sol_match = re.search(r'<answer>(.*?)</answer>', sol)
+                    ground_truth = sol_match.group(1).strip() if sol_match else sol.strip()
+                    # Extract answer from content if it has think/answer tags
+                    content_match = re.search(r'<answer>(.*?)</answer>', content)
+                    student_answer = content_match.group(1).strip() if content_match else content.strip()
+                    # Compare the extracted answers
+                    if student_answer == ground_truth:
+                        reward = 1.0
+                except Exception:
+                    pass  # Keep reward as 0.0 if both methods fail
+            rewards.append(reward)
+        return rewards
+orms['external_r1v_acc'] = MultiModalAccuracyORM
+```
+### GRPO训练实验记录
+#### 训练参数：
+我们选取 Qwen2.5-VL-3B-Instruct 作为基础模型进行训练，选取 Instruct 而不是基模的主要原因是可以更快地获取 format reward。我们在八卡 GPU 上进行实验。SWIFT GRPO训练已支持多卡部署模型以加速rollout，因此我们设置num_infer_workers为2，进程数为6，即2卡部署，6卡训练。如果遇到vllm部署qwen2.5-vl报错，可以参考[issue](https://github.com/vllm-project/vllm/issues/13285)
+由于任务简单，我们设置max_completion_length为1024，奖励函数选择external_r1v_acc和format，学习率和beta分别设置为1e-6和0.001。其他设置如下所示，batch_size和num_generations的设置原则可以参考[GRPO完整流程](./GRPO完整流程.md)。
+```shell
+WANDB_API_KEY=your_wandb_api_key \
+NPROC_PER_NODE=6 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-VL-3B-Instruct \
+    --external_plugins examples/train/grpo/plugin/plugin.py \
+    --reward_funcs external_r1v_acc format \
+    --use_vllm true \
+    --vllm_device auto \
+    --vllm_gpu_memory_utilization 0.6 \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset 'okwinds/clevr_cogen_a_train' \
+    --vllm_max_model_len 8192 \
+    --max_completion_length 1024 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 8 \
+    --learning_rate 1e-6 \
+    --gradient_accumulation_steps 2 \
+    --save_strategy 'steps' \
+    --eval_strategy 'steps' \
+    --eval_steps 1000 \
+    --save_steps 1000 \
+    --save_total_limit 10 \
+    --logging_steps 1 \
+    --output_dir output/GRPO_CLEVR_COUNTDOWN \
+    --warmup_ratio 0.01 \
+    --dataloader_num_workers 4 \
+    --num_generations 24 \
+    --temperature 1.0 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --deepspeed zero3 \
+    --log_completions true \
+    --report_to wandb \
+    --num_iterations 1 \
+    --num_infer_workers 2 \
+    --async_generate false \
+    --beta 0.001 \
+```
+#### 实验现象
+![image.png](../../resources/grpo_clevr_count.png)
+由于数据集和任务比较简单，模型训练了500个epoch已经基本收敛，有以下观察。
+1. 自定义的ClevrORM在不断增加，证明模型学习到了如何完成这一任务，最终任务成功率从初始的0.4攀升到1左右。
+2. 由于是Instruct模型，Format Reward基本稳定在1，这可能也与数据集所有样本都是一样的query有一定关系。
+3. reward_std 稳定到0.1以下。
+4. completion length最终稳定在60-80，模型学习到的固定输出范式是逐个物体列举数数。
+step 1:
+```
+"<think> The image shows 5 distinct items: a cyan metallic cylinder, a red metallic cylinder, a small purple metallic cylinder, a large gray metallic sphere, and a small yellow metallic cube. To count the number of items, I add the counts of each type of object: 1 cyan cylinder, 1 red cylinder, 1 purple cylinder, 1 gray sphere, and 1 yellow cube. Therefore, the total number of items is 5. </think>\n<answer>5</answer>"
+"Let's count the number of items in the image:\n1. A large cyan metallic cylinder.\n2. A small purple metallic cylinder.\n3. A large red metallic cylinder.\n4. A large gray metallic sphere.\n5. A small yellow metallic cube.\n\n<answer>5</answer>"
+"<think> The image shows four distinct objects: a large cyan metallic cylinder, a smaller purple metallic cylinder, a large red metallic cylinder, and a small yellow metallic cube. Counting each of these items individually, there are five items in total. </think>\n<answer>5</answer>"
+```
+step 400:
+```
+"<think> Counting the number of items in the image:\n1. Green matte sphere\n2. Large metallic yellow sphere\n3. Small metallic brown cube\n\nThere are three distinct objects in total.\n</think>\n<answer>3</answer>"
+"<think> Counting the objects in the image:\n1. Large yellow metallic sphere\n2. Small green rubber sphere\n3. Small brown metallic cube\n\nThere are three distinct items in the image.\n</think>\n<answer>3</answer>"
+```
+总体来讲，这一任务比较简单，reward的收敛也比较典型。
+## Geometric QA任务
+### 任务与数据集定义
+本任务为Geometric QA任务，任务描述为：给定一个几何图形，回答有关几何图形的数学问题。原始数据来自于[论文](https://arxiv.org/pdf/2312.11370)，[R1-V](https://github.com/Deep-Agent/R1-V.git)对数据进行了预处理，将所有数据全部处理成了problem-solution的格式，而图像则保留在image字段中，因此，我们不需要额外定义数据集，直接使用`--dataset AI-ModelScope/GEOQA_R1V_Train_8K`即可。
+### 奖励函数
+由于也是数学题，同时，答案也处理成了最终结果，因此，我们直接使用以上定义过的`MultiModalAccuracyORM`奖励函数。
+### GRPO训练实验记录
+#### 训练参数：
+选取的模型和大部分超参数与上一个实验相似，主要有两点不同：
+1. SWIFT 已支持`--num_iteration`参数，单次rollout可以进行多次更新，这里设置为2。
+2. 在实验时发现，在数学问题中，训练可能会出现不稳定现象，导致模型训崩，具体表现为所有rewar迅速降低，loss、grad_norm和kl都迅速增大，后续也难以恢复正常状态。因此，这里设置`--max_grad_norm 0.5`，保证稳定训练，当然，这种现��的出现也有一定的随机性。
+```shell
+WANDB_API_KEY=your_wandb_api_key \
+MAX_PIXELS=401408 \
+NPROC_PER_NODE=6 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-VL-3B-Instruct \
+    --external_plugins examples/train/grpo/plugin/plugin.py \
+    --reward_funcs external_r1v_acc format \
+    --use_vllm true \
+    --vllm_device auto \
+    --vllm_gpu_memory_utilization 0.6 \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset 'AI-ModelScope/GEOQA_R1V_Train_8K' \
+    --vllm_max_model_len 8192 \
+    --max_completion_length 1024 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 8 \
+    --learning_rate 1e-6 \
+    --gradient_accumulation_steps 2 \
+    --save_strategy 'steps' \
+    --eval_strategy 'steps' \
+    --eval_steps 400 \
+    --save_steps 400 \
+    --save_total_limit 10 \
+    --logging_steps 1 \
+    --output_dir output/GRPO_GEOQA \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --num_generations 8 \
+    --temperature 1.0 \
+    --repetition_penalty 1.1 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --deepspeed zero3 \
+    --log_completions true \
+    --report_to wandb \
+    --num_iterations 2 \
+    --num_infer_workers 2 \
+    --async_generate false \
+    --beta 0.001 \
+    --max_grad_norm 0.5 \
+```
+#### 实验现象
+![image.png](../../resources/grpo_geoqa.png)
+训练曲线如上图所示。
+1. 相比于Count任务，Geometric QA任务的难度明显更大，因此，Accuracy Reward收敛明显更慢，同时震荡幅度也明显更大。
+2. completion_length的变化趋势比较明显，最终收敛到250 token左右。
+3. reward基本收敛，标准差在一共1300个step时稳定在0.2左右。
+以下是训练后的模型completition示例：
+```
+User:
+"If AB is the tangent to circle O with the tangent point A, and BO intersects circle O at point C, and point D is located on circle O, what is the measure of angle ADC if the degree of angle ABO is 32.0?"
+Assistant:
+"<think>\nTo find the measure of angle ADC, we first need to understand the properties of the tangent and the circle. Since AB is a tangent to the circle at point A, the line segment OA (which is perpendicular to the tangent AB) is perpendicular to AB. This means that angle OAB is 90 degrees.\n\nGiven that angle ABO is 32 degrees, we can find angle BAO by subtracting angle ABO from 90 degrees:\n∠BAO = 90° - ∠ABO = 90° - 32° = 58°\n\nSince angle BAO is an angle in the minor arc AD, the angle ADC, which is an angle in the minor arc AD, is half the angle BAO. Therefore, angle ADC is:\n∠ADC = 1/2 × ∠BAO = 1/2 × 58° = 29°\n\nSo, the measure of angle ADC is 29 degrees.\n</think>\n<answer>\nThe measure of angle ADC is 29 degrees.\n</answer>"
+```
+## Multimodal Open R1 数据集实验
+### 任务与数据集定义
+本任务为参考[open-r1-multimodal](https://github.com/EvolvingLMMs-Lab/open-r1-multimodal.git)的实验，使用数据集：[lmms-lab/multimodal-open-r1-8k-verified](https://www.modelscope.cn/datasets/lmms-lab/multimodal-open-r1-8k-verified)，该数据集专注于多模态的数学推理任务，数据由GPT4o基于`Math360K`和`Geo170K`数据集生成，包含推理路径和可验证答案。数据集中已包含了image, problem和solution字段，我们也不需要针对prompt进行修改，因此无需额外定义数据集。
+### 奖励函数
+我们直接使用以上定义过的`MultiModalAccuracyORM`奖励函数。
+### GRPO训练实验记录
+#### 训练参数：
+选取的模型和大部分超参数与上一个实验相似，由于训练的时候出现了OOM，我们设置`MAX_PIXELS=262144`以降低显存占用。
+```shell
+WANDB_API_KEY=your_wandb_api_key \
+MAX_PIXELS=262144 \
+MASTER_PORT=29600 \
+NPROC_PER_NODE=6 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-VL-3B-Instruct \
+    --external_plugins examples/train/grpo/plugin/plugin.py \
+    --reward_funcs external_r1v_acc format \
+    --use_vllm true \
+    --vllm_device auto \
+    --vllm_gpu_memory_utilization 0.6 \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset 'lmms-lab/multimodal-open-r1-8k-verified' \
+    --vllm_max_model_len 8192
+    --max_completion_length 1024 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 8 \
+    --learning_rate 1e-6 \
+    --gradient_accumulation_steps 2 \
+    --save_strategy 'steps' \
+    --eval_strategy 'steps' \
+    --eval_steps 400 \
+    --save_steps 400 \
+    --save_total_limit 10 \
+    --logging_steps 1 \
+    --output_dir output/GRPO_GEOQA \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --num_generations 8 \
+    --temperature 1.0 \
+    --repetition_penalty 1.1 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --deepspeed zero3 \
+    --log_completions true \
+    --report_to wandb \
+    --num_iterations 2 \
+    --num_infer_workers 2 \
+    --async_generate false \
+    --beta 0.001 \
+    --max_grad_norm 0.5 \
+```
+#### 实验现象
+![image.png](../../resources/grpo_openr1_multimodal.png)
+训练曲线如上图所示。
+一共8k数据，训练了1268个step，Accuracy Reward收敛到0.5左右，completion_length基本收敛到200token左右，reward_std最终在0.2左右震荡。
+以下是训练后的模型completition示例：
+```
+User:
+"Based on the map, which state falls into the lowest percentage range of lots, and what is that percentage range?"
+Assistant:
+"<think>\nThe image provided shows a color-coded map of the USA indicating different ranges of lot percentages across various states. According to the legend at the bottom, the lowest percentage range (6.0% - 6.5%) is represented by white. In the image, Alabama (AL) is shaded in white, which corresponds to the 6.0% - 6.5% category. Therefore, based on the map, the state that falls into the lowest percentage range of lots is Alabama, with the percentage range of 6.0% - 6.5%.\nTherefore, the answer is 6.0% - 6.5%.\n</think>\n<answer>Alabama</answer>"
+```

docs/source/BestPractices/GRPO完整流程.md ADDED Viewed

	@@ -0,0 +1,179 @@

+# GRPO完整实验流程
+本文从较为简单的数学任务 Coundown Game 出发，从数据集定义、奖励函数定义和GRPO训练几个步骤介绍完整的GRPO训练流程。任务定义和训练参数等参考了 [mini-deepseek-r1](https://github.com/philschmid/deep-learning-pytorch-huggingface/blob/main/training/mini-deepseek-r1-aha-grpo.ipynb)。
+## 任务与数据集定义
+Coundown Game 的任务目标是根据给定的几个数字和加减乘除四种运算，得到目标数字，因此，我们定义数据集如下：
+```python
+class CoundownTaskPreprocessor(ResponsePreprocessor):
+    def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
+        numbers = row['nums']
+        target = row.pop('response', None)
+        query = f"""
+        Using the numbers {numbers}, create an equation that equals {target}.
+        You can use basic arithmetic operations (+, -, *, /) and each number can only be used once.
+        Show your work in <think> </think> tags. And return the final equation and answer in <answer> </answer> tags,
+        for example <answer> (1 + 2) / 3 * 4 = 4 </answer>.
+        """
+        row.update({'target': target, 'query': query})
+        return super().preprocess(row)
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='zouxuhong/Countdown-Tasks-3to4',
+        subsets=['default'],
+        preprocess_func=CoundownTaskPreprocessor(),
+        tags=['math']))
+```
+通过 template， 使用 numbers 和 target 完成任务定义，并给到 query 字段供模型采样使用。同时，我们需要保留 nums 和 target 两个字段，用于后续的奖励函数计算。
+## 奖励函数定义：
+本任务使用的奖励函数有两个，一个是 Deepseek-R1 中提到的格式奖励函数，另一是 Coundown Game 的准确性奖励函数。前者已经在swift中内置，通过 `--reward_funcs format` 可以直接使用，而后者需要我们自己定义，在这里我们使用 external_plugin 的方式定义准确性奖励函数，将代码放在`swift/examples/train/grpo/plugin/plugin.py`中。
+在这里，奖励函数的输入包括 completions、target 和 nums 三个字段，分别表示模型生成的文本、目标答案和可用的数字。每个都是list，支持多个 completion 同时计算。注意，在这里，除了 completions 之外的参数都是数据集中定义的字段透传而来，如果有任务上的变动，可以分别对数据集和奖励函数做对应的改变即可。
+```python
+class CountdownORM(ORM):
+    def __call__(self, completions, target, nums, **kwargs) -> List[float]:
+        """
+        Evaluates completions based on Mathematical correctness of the answer
+        Args:
+            completions (list[str]): Generated outputs
+            target (list[str]): Expected answers
+            nums (list[str]): Available numbers
+        Returns:
+            list[float]: Reward scores
+        """
+        rewards = []
+        for completion, gt, numbers in zip(completions, target, nums):
+            try:
+                # Check if the format is correct
+                match = re.search(r"<answer>(.*?)<\/answer>", completion)
+                if match is None:
+                    rewards.append(0.0)
+                    continue
+                # Extract the "answer" part from the completion
+                equation = match.group(1).strip()
+                if '=' in equation:
+                    equation = equation.split('=')[0]
+                # Extract all numbers from the equation
+                used_numbers = [int(n) for n in re.findall(r'\d+', equation)]
+                # Check if all numbers are used exactly once
+                if sorted(used_numbers) != sorted(numbers):
+                    rewards.append(0.0)
+                    continue
+                # Define a regex pattern that only allows numbers, operators, parentheses, and whitespace
+                allowed_pattern = r'^[\d+\-*/().\s]+$'
+                if not re.match(allowed_pattern, equation):
+                    rewards.append(0.0)
+                    continue
+                # Evaluate the equation with restricted globals and locals
+                result = eval(equation, {"__builti'ns__": None}, {})
+                # Check if the equation is correct and matches the ground truth
+                if abs(float(result) - float(gt)) < 1e-5:
+                    rewards.append(1.0)
+                else:
+                    rewards.append(0.0)
+            except Exception as e:
+                # If evaluation fails, reward is 0
+                rewards.append(0.0)
+        return rewards
+orms['external_countdown'] = CountdownORM
+```
+## GRPO训练实验记录
+首先贴上GRPO的公式：
+$$
+\begin{aligned}
+\mathcal{J}_{G R P O}(\theta) & =\mathbb{E}\left[q \sim P(Q),\left\{o_i\right\}_{i=1}^G \sim \pi_{\theta_{o l d}}(O \mid q)\right] \\
+& \frac{1}{G} \sum_{i=1}^G \frac{1}{\left|o_i\right|} \sum_{t=1}^{\left|o_i\right|}\left\{\min \left[\frac{\pi_\theta\left(o_{i, t} \mid q, o_{i,<t}\right)}{\pi_{\theta_{o l d}}\left(o_{i, t} \mid q, o_{i,<t}\right)} \hat{A}_{i, t}, \operatorname{clip}\left(\frac{\pi_\theta\left(o_{i, t} \mid q, o_{i,<t}\right)}{\pi_{\theta_{o l d}}\left(o_{i, t} \mid q, o_{i,<t}\right)}, 1-\varepsilon, 1+\varepsilon\right) \hat{A}_{i, t}\right]-\beta \mathbb{D}_{K L}\left[\pi_\theta| | \pi_{r e f}\right]\right\}
+\end{aligned}
+$$
+### 训练参数：
+我们选取 Qwen2.5-3B-Instruct 作为基础模型进行训练，选取 Instruct 而不是基模的主要原因是可以更快地获取 format reward。我们在三卡 GPU 上进行实验，因此vllm的推理部署在最后一张卡上，而进程数设置为2，在剩下两张卡上进行梯度更新。
+由于任务较为简单，我们设置 max_completion_length 和 vllm_max_model_len 为1024，如果有更复杂的任务，可以适当加大模型输出长度，但请注意，**这两个参数越大，模型训练需要的显存越多，训练速度越慢，单个step的训练时间与max_completion_length呈现线性关系**。
+在我们的实验中，总batch_size为 $num\_processes \times per\_device\_train\_batch\_size \times gradient\_accumulation\_steps = 2 \times 8 \times 8 = 128$ 而参数设置有一个限制，即：$num\_processes \times per\_device\_train\_batch\_size$ 必须整除 $num\_generations$，其中，$num\_generations$就是GRPO公式中的 $G$，故我们设置为8。 注意，这里单卡batch_size设置也与显存息息相关，请根据显存上限设置一个合适的值。 同时，还有一个公式，即总的steps数量 :$num\_steps = epochs \times len(datasets) \times num\_generations \div batch\_size $，需要根据这个来合理规划训练的学习率和warmup设置。
+最后比较重要的设置是学习率和 beta，学习率比较好理解，而beta则是是以上公式的 $\beta$，即KL散度的梯度的权重。这两个参数设置的越大，模型收敛原则上更快，但训练往往会不稳定。经过实验，我们分别设置为 `5e-7` 和 `0.001`。在实际训练中，请根据是否出现不稳定的震荡情况适当调整这两个参数。
+对于KL散度，社区有很多的讨论，可以参考[为什么GRPO坚持用KL散度](https://zhuanlan.zhihu.com/p/25862547100)。
+其他参数的设置，没有做太多探讨，所以这里不进行详细说明。
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2 \
+WANDB_API_KEY=your_wandb_key \
+NPROC_PER_NODE=2 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-3B-Instruct \
+    --external_plugins examples/train/grpo/plugin/plugin.py \
+    --reward_funcs external_countdown format \
+    --use_vllm true \
+    --vllm_device auto \
+    --vllm_gpu_memory_utilization 0.6 \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset 'zouxuhong/Countdown-Tasks-3to4#50000' \
+    --max_length 2048 \
+    --max_completion_length 1024 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 8 \
+    --learning_rate 5e-7 \
+    --gradient_accumulation_steps 8 \
+    --eval_steps 500 \
+    --save_steps 100 \
+    --save_total_limit 20 \
+    --logging_steps 1 \
+    --output_dir output/GRPO_COUNTDOWN \
+    --warmup_ratio 0.01 \
+    --dataloader_num_workers 4 \
+    --num_generations 8 \
+    --temperature 1.0 \
+    --system 'You are a helpful assistant. You first thinks about the reasoning process in the mind and then provides the user with the answer.' \
+    --deepspeed zero3 \
+    --log_completions true \
+    --vllm_max_model_len 1024 \
+    --report_to wandb \
+    --beta 0.001 \
+    --num_iterations 1
+```
+### 实验现象
+![image.png](../../resources/grpo_countdown.png)
+从训练了2000 step的实验结果来看，有几点观察：
+1. 自定义的CoundownORM在不断增加，证明模型学习到了如何完成这一任务，最终任务成功率在0.5左右。
+2. Format Reward整体处在较高的水平，在0.9-1左右震荡。
+3. reward_std 最终在0.2-0.3左右，仍未完全收敛，模型仍然有继续提升reward的空间。
+4. 观察模型输出可以发现，相当一部分失败案例是由于模型简单加减乘除做不对，因此模型最终的收敛方向也变成了逐步运算上，如果换成更大的模型，可能有不同的收敛方向。
+5. completion length呈现先下降再增加，在200 step左右最低，然后逐渐上升到350左右。证明模型经历了思考方式的转变。
+6. 模型的思考方式在不断变化，最终模型发现做排列组合并逐步运算能更好地完成这一任务，最终的采样输出也大多是这一格式，以下是不同step的completion变化：
+step 1:
+```
+"<think>\nWe need to create an equation using the given numbers [70, 5, 42, 30] exactly once and make it equal to 77. \n\nFirst, let's consider the possibility of multiplication or division to get closer to 77. Notice that 77 is close to 70 and 42. If we can use these numbers together, it might work.\n\nMultiplying 70 by 1.1 would give us approximately 77, which is very close. So, let's try to make 1.1 using the remaining numbers 5 and 30. \nIf we take 30/5 = 6, and then 5/6 = 0.8333..., this isn't useful here, so let's look at other possibilities.\n\nNext, let's consider if there's another way to achieve 77 using addition, subtraction, or another combination of our numbers. Given the numbers provided, it seems that multiplication or division will be key. \n\nWe can try subtracting 5 from 30 to get 25. Then, if we divide 42 by 25, we don't get a whole number, but we can round it to the nearest whole number, 1. Now, we need to make the equation fit 77. Let's multiply 25 by 3 and add 7 (since 77 = 25*3 + 7). However, we only have one 7 available and we need to find a way to use the remaining numbers to get 7.\n\nLet's try:  \n70 - 42 = 28  \nThen, 28 + 5 = 33  \nNext, 33 + 30 = 63  \nFinally, 77 - 63 = 14, which doesn't help.\n\nConsidering different combinations, one viable way is:\n(70 - 42) * (30 / 5) = 28 * 6 = 168 / 2 = 84 / 2 = 42 / 2 = 21 / 3 = 7\n\nSo, (70 - 42) * (30 / 5) = 77.\n</think>\n<answer>(70 - 42) * (30 / 5) = 77</answer>"
+```
+step 200:
+```
+"<think>\nWe need to use all the given numbers in a way that the resultant value of the calculation is 99. Given these restrictions, it's clear we should look into how the numbers relate and the mathematical operations that will bring them together to meet our condition.\n\nFirstly, let's evaluate the sums, differences, and multiplication/division among the numbers and their relations to see how this could lead us towards an outcome of 99.\n- Summing the three numbers: 96 + 70 + 73 = 239, which is far from 99. This shows addition isn't practical.\n- Subtracting numbers: For instance, subtracting the lowest two (i.e., 73 - 70 = 3) and combining with the highest one (96) does not work. Also, there is no other combination of subtraction that would result in the number needing to add up to 99.\n- Multiplying or dividing the numbers alone wouldn’t reach to any near solution since none of the products or division results of the numbers themselves is very close to 99. We might think more carefully about the arrangement of them using a combination of the operations provided.\nTherefore, considering the possible arrangement, a combination like taking (73 - 70 + 96) may produce an effective solution as this will sum the values in such a manner that it aligns with the goal of 99.\n\nSo we conclude that the operation: 73 - 70 + 96 gives the right sequence to equal 99.\n</think>\n\n<answer>73 - 70 + 96 = 99</answer>"
+```
+step 500:
+```
+"<think>\nLet's begin by exploring various possible combinations of the given numbers using the arithmetic operators we have access to.\n\n- If I add the numbers, I get: 56 + 73 + 51 = 180 which is not equal to our target number (68)\n- If I subtract one number from another I will still exceed our target value as well since the maximum difference is: |(difference between max-min):| | (73 - 51) or | (73 - 56), etc.|\n- Let's multiply the numbers individually and see if the division operation can bring us closer to the goal:\n    - (73 * 51) - 56 = 3723 - 56 = 3667 which isn't the goal either.\n    - The only operator left is addition and subtraction; I'll try the combination: \n    ((73 - 56) + 51) gives me (17 + 51) = 68.\n    \nThus, our solution fits the provided problem constraints and meets the required outcome.\n</think>\n<answer>(73 - 56) + 51 = 68</answer>"
+```
+step 1000:
+```
+"<think>\nWe need to form an expression using the given numbers (50, 86, and 49) such that it equals 85. Let's try different combinations:\n\n(86 - 50) + 49 = 76 + 49 = 125 (too big)\n(86 - 49) + 50 = 37 + 50 = 87 (too big)\n(86 - 50) + 49 = 37 + 49 = 86 (too big)\n(86 + 50) - 49 = 136 - 49 = 87 (too big)\n(86 - 50) + (49 - x)? Not working!\n(50 + 86) - 49: this is 136 which isn't our aim.\n50 + (86 - 49) = 50 + 37 = 87 which wasn't correct.\n50 - 86 + 49 = -16 + 49: This doesn't match up as well.\n86 - (50 + 49) = 86 - 99 which doesn't work too.\n\nWe try one more:\n(86 + 49) - 50 = 135 - 50 = 85!\n\nThat's it!\n</think>\n\n<answer>(86 + 49) - 50 = 85</answer>"
+```
+step 2000:
+```
+"<think>\nWe need to form an expression using the numbers 58, 9, 78, and 44 which evaluates to 73. Let's try different combinations:\n(78 - 58) + (9 + 44) = 10 + 53 = 63 (too low)\n(78 - 58) + (9 - 44) = 20 - 35 = -15 (too low)\n(78 - 58) + (44 - 9) = 20 + 35 = 55 (too low)\n(78 + 58) - (9 + 44) = 136 - 53 = 83 (too high)\n(78 + 58) - (9 - 44) = 136 + 35 = 171 (too high)\n(78 + 58) - (44 + 9) = 136 + 53 = 189 (too high)\n(78 + 9) - (58 + 44) = 87 - 102 = -15 (too low)\n(78 + 9) - (58 - 44) = 87 - 14 = 73\n\nSo our solution is: (78 + 9) - (58 - 44) = 73</think>\n\n<answer>(78 + 9) - (58 - 44) = 73</answer>"
+```
+附learning_rate和beta分别取值1e-6和0.04的不稳定实验记录，模型在step 200左右出现了震荡，format和CountdownORM瞬间走低：
+![](../../resources/grpo_countdown_1.png)

docs/source/BestPractices/NPU支持.md ADDED Viewed

	@@ -0,0 +1,225 @@

+# NPU支持
+作者: [chuanzhubin](https://github.com/chuanzhubin)
+## 环境准备
+实验环境：8 * 昇腾910B3 64G (设备由[@chuanzhubin](https://github.com/chuanzhubin)提供, 感谢对modelscope和swift的支持～)
+```shell
+# 创建新的conda虚拟环境(可选)
+conda create -n swift-npu python=3.10 -y
+conda activate swift-npu
+# 设置pip全局镜像 (可选,加速下载)
+pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/
+pip install ms-swift -U
+# 安装torch-npu
+pip install torch-npu decorator
+# 如果你想要使用deepspeed (控制显存占用,训练速度会有一定下降)
+pip install deepspeed
+```
+测试环境是否安装正确，NPU能否被正常加载：
+```python
+from transformers.utils import is_torch_npu_available
+import torch
+print(is_torch_npu_available())  # True
+print(torch.npu.device_count())  # 8
+print(torch.randn(10, device='npu:0'))
+```
+查看NPU的P2P连接，这里看到每个NPU都通过7条HCCS与其他NPU互联
+```shell
+(valle) root@valle:~/src# npu-smi info -t topo
+	   NPU0       NPU1       NPU2       NPU3       NPU4       NPU5       NPU6       NPU7       CPU Affinity
+NPU0       X          HCCS       HCCS       HCCS       HCCS       HCCS       HCCS       HCCS       144-167
+NPU1       HCCS       X          HCCS       HCCS       HCCS       HCCS       HCCS       HCCS       144-167
+NPU2       HCCS       HCCS       X          HCCS       HCCS       HCCS       HCCS       HCCS       96-119
+NPU3       HCCS       HCCS       HCCS       X          HCCS       HCCS       HCCS       HCCS       96-119
+NPU4       HCCS       HCCS       HCCS       HCCS       X          HCCS       HCCS       HCCS       0-23
+NPU5       HCCS       HCCS       HCCS       HCCS       HCCS       X          HCCS       HCCS       0-23
+NPU6       HCCS       HCCS       HCCS       HCCS       HCCS       HCCS       X          HCCS       48-71
+NPU7       HCCS       HCCS       HCCS       HCCS       HCCS       HCCS       HCCS       X          48-71
+Legend:
+  X    = Self
+  SYS  = Path traversing PCIe and NUMA nodes. Nodes are connected through SMP, such as QPI, UPI.
+  PHB  = Path traversing PCIe and the PCIe host bridge of a CPU.
+  PIX  = Path traversing a single PCIe switch
+  PXB  = Path traversing multiple PCIe switches
+  HCCS = Connection traversing HCCS.
+  NA   = Unknown relationship.
+```
+查看NPU状态, npu-smi命令详解可以查看[官方文档](https://support.huawei.com/enterprise/zh/doc/EDOC1100079287/10dcd668)
+```shell
+(valle) root@valle:~/src# npu-smi info
++------------------------------------------------------------------------------------------------+
+| npu-smi 24.1.rc1.b030            Version: 24.1.rc1.b030                                        |
++---------------------------+---------------+----------------------------------------------------+
+| NPU   Name                | Health        | Power(W)    Temp(C)           Hugepages-Usage(page)|
+| Chip                      | Bus-Id        | AICore(%)   Memory-Usage(MB)  HBM-Usage(MB)        |
++===========================+===============+====================================================+
+| 0     910B3               | OK            | 101.8       43                0    / 0             |
+| 0                         | 0000:C1:00.0  | 0           0    / 0          3318 / 65536         |
++===========================+===============+====================================================+
+| 1     910B3               | OK            | 92.0        39                0    / 0             |
+| 0                         | 0000:C2:00.0  | 0           0    / 0          3314 / 65536         |
++===========================+===============+====================================================+
+| 2     910B3               | OK            | 102.0       40                0    / 0             |
+| 0                         | 0000:81:00.0  | 0           0    / 0          3314 / 65536         |
++===========================+===============+====================================================+
+| 3     910B3               | OK            | 99.8        40                0    / 0             |
+| 0                         | 0000:82:00.0  | 0           0    / 0          3314 / 65536         |
++===========================+===============+====================================================+
+| 4     910B3               | OK            | 98.6        45                0    / 0             |
+| 0                         | 0000:01:00.0  | 0           0    / 0          3314 / 65536         |
++===========================+===============+====================================================+
+| 5     910B3               | OK            | 99.7        44                0    / 0             |
+| 0                         | 0000:02:00.0  | 0           0    / 0          3314 / 65536         |
++===========================+===============+====================================================+
+| 6     910B3               | OK            | 103.8       45                0    / 0             |
+| 0                         | 0000:41:00.0  | 0           0    / 0          3314 / 65536         |
++===========================+===============+====================================================+
+| 7     910B3               | OK            | 98.2        44                0    / 0             |
+| 0                         | 0000:42:00.0  | 0           0    / 0          3315 / 65536         |
++===========================+===============+====================================================+
+```
+## 微调
+以下介绍LoRA的微调, 全参数微调设置参数`--train_type full`即可.
+| 模型大小 | NPU数量 | deepspeed类型 | 最大显存占用量   |
+|------|-------|-------------|-----------|
+| 7B   | 1     | None        | 1 * 28 GB |
+| 7B   | 4     | None        | 4 * 22 GB |
+| 7B   | 4     | zero2       | 4 * 28 GB |
+| 7B   | 4     | zero3       | 4 * 22 GB |
+| 7B   | 8     | None        | 8 * 22 GB |
+| 14B  | 1     | None        | 1 * 45 GB |
+| 14B  | 8     | None        | 8 * 51 GB |
+| 14B  | 8     | zero2       | 8 * 49 GB |
+| 14B  | 8     | zero3       | 8 * 31 GB |
+### 单卡训练
+通过如下命令启动单卡微调: （注意: 如果微调期间出现nan的情况, 请设置`--torch_dtype float32`.）
+```shell
+# 实验环境: 昇腾910B3
+# 显存需求: 28 GB
+# 运行时长: 8小时
+ASCEND_RT_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2-7B-Instruct \
+    --dataset AI-ModelScope/blossom-math-v2 \
+    --num_train_epochs 5 \
+    --train_type lora \
+    --output_dir output \
+    --learning_rate 1e-4 \
+    --gradient_accumulation_steps 16 \
+    --save_steps 100 \
+    --eval_steps 100
+```
+### 数据并行训练
+我们使用其中的4卡进行ddp训练
+```shell
+# 实验环境: 4 * 昇腾910B3
+# 显存需求: 4 * 22 GB
+# 运行时长: 2小时
+NPROC_PER_NODE=4 \
+ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 \
+swift sft \
+    --model Qwen/Qwen2-7B-Instruct \
+    --dataset AI-ModelScope/blossom-math-v2 \
+    --num_train_epochs 5 \
+    --train_type lora \
+    --output_dir output \
+    ...
+```
+### Deepspeed训练
+ZeRO2:
+```shell
+# 实验环境: 4 * 昇腾910B3
+# 显存需求: 4 * 28GB
+# 运行时长: 3.5小时
+NPROC_PER_NODE=4 \
+ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 \
+swift sft \
+    --model Qwen/Qwen2-7B-Instruct \
+    --dataset AI-ModelScope/blossom-math-v2 \
+    --num_train_epochs 5 \
+    --train_type lora \
+    --output_dir output \
+    --deepspeed zero2 \
+    ...
+```
+ZeRO3:
+```shell
+# 实验环境: 4 * 昇腾910B3
+# 显存需求: 4 * 22 GB
+# 运行时长: 8.5小时
+NPROC_PER_NODE=4 \
+ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 \
+swift sft \
+    --model Qwen/Qwen2-7B-Instruct \
+    --dataset AI-ModelScope/blossom-math-v2 \
+    --num_train_epochs 5 \
+    --train_type lora \
+    --output_dir output \
+    --deepspeed zero3 \
+    ...
+```
+## 推理
+原始模型:
+```shell
+ASCEND_RT_VISIBLE_DEVICES=0 swift infer \
+    --model Qwen/Qwen2-7B-Instruct \
+    --stream true --max_new_tokens 2048
+```
+LoRA微调后:
+```shell
+ASCEND_RT_VISIBLE_DEVICES=0 swift infer \
+    --adapters xxx/checkpoint-xxx --load_data_args true \
+    --stream true --max_new_tokens 2048
+# merge-lora并推理
+ASCEND_RT_VISIBLE_DEVICES=0 swift export --adapters xx/checkpoint-xxx --merge_lora true
+ASCEND_RT_VISIBLE_DEVICES=0 swift infer \
+    --model xxx/checkpoint-xxx-merged --load_data_args true \
+    --stream true --max_new_tokens 2048
+```
+## 部署
+NPU不支持使用vllm进行推理/部署加速, 但是可以使用原生pytorch进行部署.
+原始模型:
+```shell
+ASCEND_RT_VISIBLE_DEVICES=0 swift deploy --model Qwen/Qwen2-7B-Instruct --max_new_tokens 2048
+```
+LoRA微调后:
+```shell
+ASCEND_RT_VISIBLE_DEVICES=0 swift deploy --adapters xxx/checkpoint-xxx --max_new_tokens 2048
+# merge-lora并推理
+ASCEND_RT_VISIBLE_DEVICES=0 swift export --adapters xx/checkpoint-xxx --merge_lora true
+ASCEND_RT_VISIBLE_DEVICES=0 swift deploy --model xxx/checkpoint-xxx-merged --max_new_tokens 2048
+```

docs/source/BestPractices/快速训练VL模型.md ADDED Viewed

	@@ -0,0 +1,227 @@

+# 快速训练视觉语言（Vision-Language, VL）模型的最佳实践
+本文档提供从零开始快速训练视觉语言(Vision-Language, VL)模型的最佳实践。
+涉及的模型链接：
+- [Qwen2.5-VL-7B-Instruct](https://www.modelscope.cn/models/Qwen/Qwen2.5-VL-7B-Instruct)
+- [Qwen3-8B](https://www.modelscope.cn/models/Qwen/Qwen3-8B)
+训练的模型链接：
+- [Simple-VL-8B](https://www.modelscope.cn/models/swift/Simple-VL-8B/summary)
+本训练流程基于 Qwen2.5-VL-7B-Instruct 模型架构，将其内部的语言模型（LLM）部分替换为 Qwen3-8B 的权重，训练模型的视觉理解能力。具体步骤如下：
+1. 修改原始模型的配置文件 config.json，使其适配 Qwen3-8B 的模型结构。
+2. 初始化并加载新的模型权重，保存为新模型。
+3. 对新模型进行两阶段微调：
+    1. 第一阶段：仅训练视觉到语言的对齐模块（aligner），冻结 ViT 和 LLM 部分。
+    2. 第二阶段：解冻所有模块，联合训练提升整体性能。
+## 模型修改
+### 修改配置文件 config.json
+因为 Qwen2.5-VL-7B-Instruct 模型的底模 Qwen2.5-7B-Instruct 与 Qwen3-8B 在模型结构上存在部分差异（比如层数，hidden_state_dims），我们首先需要基于Qwen2.5-VL-7B-Instruct的config.json文件，创建一个新的config.json文件，并修改以下参数对齐Qwen3-8B
+```
+修改
+1. hidden_size 3584->4096
+2. intermediate_size: 18944->12288
+3. num_attention_heads: 28->32
+4. num_key_value_heads: 4->8
+5. num_hidden_layers: 28->32
+6. vocab_size:152064->151936
+7. max_window_layers:28->36
+新增
+1. head_dim： 128
+```
+### 模型权重初始化与替换
+使用以下 Python 脚本完成模型权重的初始化、替换与保存：
+```python
+import torch
+from modelscope import Qwen2_5_VLForConditionalGeneration, AutoModelForCausalLM, AutoConfig
+from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import Qwen2_5_VLPatchMerger, Qwen2_5_VLModel
+from accelerate import Accelerator
+# 加载原始 VL 模型和 Qwen3-8B 模型
+qwen2_5_vl_7b_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+    "Qwen/Qwen2.5-VL-7B-Instruct",
+    device_map="cuda",
+    torch_dtype=torch.bfloat16
+)
+device = qwen2_5_vl_7b_model.device
+qwen3_8b_model = AutoModelForCausalLM.from_pretrained(
+    "Qwen/Qwen3-8B",
+    device_map=device,
+    torch_dtype=torch.bfloat16
+)
+# 加载配置
+old_config = AutoConfig.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
+new_config = AutoConfig.from_pretrained("/path/to/new_config_dir") # 新 config 的文件夹路径
+# 1. 替换 ViT 到 LLM 的 merger(aligner) 层
+new_merger = Qwen2_5_VLPatchMerger(
+            dim=new_visual_config.out_hidden_size,
+            context_dim=new_visual_config.hidden_size,
+            spatial_merge_size=new_visual_config.spatial_merge_size,
+        ).to(device).to(torch.bfloat16)
+qwen2_5_vl_7b_model.visual.merger = new_merger
+# 2. 替换 VL 模型的 LLM 部分
+new_llm_model = Qwen2_5_VLModel(new_config).to(device).to(torch.bfloat16)
+for name, param in qwen3_8b_model.model.named_parameters():
+    if name in new_llm_model.state_dict():
+        new_llm_model.state_dict()[name].copy_(param)
+qwen2_5_vl_7b_model.model = new_llm_model
+qwen2_5_vl_7b_model.lm_head = qwen3_8b_model.lm_head
+# 3. 保存修改后的模型
+accelerator = Accelerator()
+accelerator.save_model(
+    model=qwen2_5_vl_7b_model,
+    save_directory="/path/to/save/Qwen3-VL-Model",
+    max_shard_size="4GB",
+    safe_serialization=True
+)
+```
+## 训练
+为简化流程，我们跳过预训练（pretrain），直接进入监督微调（SFT）。训练分为两个阶段：
+### stage1 训练 Aligner 层
+仅训练视觉到语言的对齐层（Aligner），冻结 ViT 和 LLM 部分：
+```bash
+NNODES=$WORLD_SIZE \
+NODE_RANK=$RANK \
+NPROC_PER_NODE=8 \
+MAX_PIXELS=1003520 \
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+swift sft \
+    --model /path/to/new_vl_model \
+    --model_type qwen2_5_vl \
+    --train_type full \
+    --dataset xxx  \
+    --torch_dtype bfloat16 \
+    --attn_impl flash_attn \
+    --freeze_vit true \
+    --freeze_llm true \
+    --freeze_aligner false \
+    --num_train_epochs 3 \
+    --per_device_train_batch_size 2 \
+    --learning_rate 5e-6 \
+    --gradient_accumulation_steps 8 \
+    --eval_steps -1 \
+    --save_steps 1000 \
+    --save_total_limit 10 \
+    --logging_steps 5 \
+    --max_length 8192 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 8 \
+    --deepspeed zero2
+```
+### stage2 训练整个模型
+解冻所有模块，联合训练以增强模型的整体视觉理解能力：
+```bash
+NNODES=$WORLD_SIZE \
+NODE_RANK=$RANK \
+NPROC_PER_NODE=8 \
+MAX_PIXELS=1003520 \
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+swift sft \
+    --model /path/to/stage1_checkpoint \
+    --model_type qwen2_5_vl \
+    --train_type full \
+    --dataset xxx \
+    --torch_dtype bfloat16 \
+    --attn_impl flash_attn \
+    --freeze_vit false \
+    --freeze_llm false \
+    --freeze_aligner false \
+    --num_train_epochs 3 \
+    --per_device_train_batch_size 2 \
+    --learning_rate 5e-6 \
+    --gradient_accumulation_steps 8 \
+    --eval_steps -1 \
+    --save_steps 1000 \
+    --save_total_limit 10 \
+    --logging_steps 5 \
+    --max_length 8192 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 8 \
+    --deepspeed zero2
+```
+## 推理/部署/评测
+### 推理
+通过`swift infer`来推理训练得到的模型
+```bash
+swift infer \
+    --model /path/to/stage2_checkpoint
+```
+### 部署
+使用 vLLM 加速模型服务部署：
+```
+CUDA_VISIBLE_DEVICES=0 \
+MAX_PIXELS=1003520 \
+VIDEO_MAX_PIXELS=50176 \
+FPS_MAX_FRAMES=12 \
+swift deploy \
+    --model /path/to/stage2_checkpoint \
+    --infer_backend vllm \
+    --gpu_memory_utilization 0.9 \
+    --max_model_len 8192 \
+    --max_new_tokens 2048 \
+    --limit_mm_per_prompt '{"image": 5, "video": 2}' \
+    --served_model_name Qwen3-VL
+```
+### 评测
+通过 [EvalScope](https://github.com/modelscope/evalscope/) 对训练得到的 VL 模型进行评测
+以下是以 MMMU benchmark 为例的评测代码：
+```python
+from evalscope import TaskConfig, run_task
+task_cfg_dict = TaskConfig(
+    work_dir='outputs',
+    eval_backend='VLMEvalKit',
+    eval_config={
+        'data': ['MMMU_DEV_VAL'],
+        'mode': 'all',
+        'model': [
+            {'api_base': 'http://localhost:8000/v1/chat/completions',
+            'key': 'EMPTY',
+            'name': 'CustomAPIModel',
+            'temperature': 0.6,
+            'type': 'Qwen3-VL',
+            'img_size': -1,
+            'video_llm': False,
+            'max_tokens': 512,}
+            ],
+        'reuse': False,
+        'nproc': 64,
+        'judge': 'exact_matching'},
+)
+run_task(task_cfg=task_cfg_dict)
+```

docs/source/BestPractices/更多最佳实践.md ADDED Viewed

	@@ -0,0 +1,7 @@

+# 更多最佳实践
+- [Qwen2.5自我认知微调](https://github.com/modelscope/ms-swift/tree/main/examples/notebook/qwen2_5-self-cognition)
+- [Qwen2-VL Latex-OCR微调](https://github.com/modelscope/ms-swift/tree/main/examples/notebook/qwen2vl-ocr)
+- [Qwen2.5-VL Grounding任务微调](https://github.com/modelscope/ms-swift/tree/main/examples/notebook/qwen2_5-vl-grounding)
+- [Qwen3全流程最佳实践](https://github.com/modelscope/ms-swift/issues/4030)

docs/source/Customization/插件化.md ADDED Viewed

	@@ -0,0 +1,215 @@

+# 插件化
+插件化是SWIFT3.0中新增的重要能力。我们希望通过插件化的方式，让开发者对开发流程的定制更加自然。
+## callback回调
+example在[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/callback.py).
+`callback`机制是transformers Trainer中的一种训练定制化机制。开发者可以在callback中控制训练流程。通常来说，callback的定制化类似下面的样子：
+```python
+class CustomCallback(TrainerCallback):
+    def on_train_begin(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
+        # Doing something when the training begins.
+        pass
+    def on_save(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
+        # Doing something when save checkpoint
+        pass
+```
+callback会在trainer构造前注册进trainer中，example中给出了一个简单版本的EarlyStop方案。注册你自己的callback的方式比较简单：
+```python
+extra_callbacks = [CustomCallback()]
+```
+开发者可以在plugin/callback.py中增加新的callback，并定制自己的训练流程。callback的具体参数可以查看[这里](https://huggingface.co/docs/transformers/main_classes/callback)。
+## 定制化loss
+example在[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/loss.py).
+SWIFT支持在plugin中定制loss。如果不使用这个能力，默认会使用交叉熵Loss（CE Loss）。开发者可以在这个文件中编写代码，注册后trainer会自动使用你定制的loss方法。
+例如在plugin/loss.py中添加下面的代码：
+```python
+@register_loss_func("custom_loss")
+def loss_scale_func(outputs, labels, loss_scale=None, num_items_in_batch=None) -> torch.Tensor:
+    # Write your own loss calculating here
+    return loss
+```
+需要注意的是，loss和trainer训练的任务是强相关的，目前的loss定制针对pt和sft任务，如果是人类对齐任务（例如DPO、PPO等）或分类任务（seq_cls）任务在插件中是无法定制的。
+## 定制化loss_scale
+example在[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/loss_scale/loss_scale.py).
+loss_scale机制在SWIFT中是非常重要的机制之一。在pt和sft任务中，可训练token的loss是均匀的，即每个token平等的进行bp。但在某些情况下，某些token的权重比较大，需要被额外关注，
+在这种情况下就需要更高的权重。loss_scale可以让开发者自由地定义自己的token权重。
+```python
+class LastRoundLossScale(LossScale):
+    def get_loss_scale(self, context: str, context_type: ContextType, is_last_round: bool, **kwargs):
+        if context_type == ContextType.RESPONSE:
+            return [context], [float(is_last_round)]
+        return super().get_loss_scale(context, context_type, is_last_round)
+```
+在上面的代码中，返回了一个Tuple，第一个返回是context（或拆解后的context），第二个参数是context对应的loss_scale，float值代表了权重。例如下面的权重设置：
+```text
+["学习", "好", "数学", "是", "重要", "的"]
+[1.0, 0.5, 2.0, 0.5, 2.0, 0.1]
+```
+我们更看重数学和重要两个词，因此我们把它们的权重提升到2.0。
+回到上面的代码，我们判断了传入的context是否是response，如果是response且如果是多轮对话的最后一轮才返回[1]，在其他情况下使用基类的实现（在本场景下loss_scale时[0]）。使用这种方案，
+我们做到了只有最后一轮的response参与训练，其他response不参与训练。使用这种方式，可以让所有token（prompt、response）参与训练，或针对agent某些特殊字符重点训练等。
+在pt和sft中，loss_scale是整体支持（是否参与训练，以及权重大小）的，而人类对齐中只能支持某些token是否参与训练，无法支持权重大小。
+## 定制化metric
+example在[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/metric.py).
+metric可以定制训练时使用的评测参数：
+```python
+METRIC_MAPPING = {
+    'acc': (compute_acc_metrics, preprocess_logits_for_acc),
+    'nlg': (compute_nlg_metrics, None),
+    'custom': (custom_metric, custom_preprocess),
+}
+def get_metric(metric: str):
+    return METRIC_MAPPING[metric]
+```
+在上面的定义中，我们添加了新的custom metric，它的value有两个值，第一个值是计算metric的过程，返回一个包含metric key-value对的dict，第二个值是针对logits做前处理，返回实际的predictions。
+## 定制化optimizer
+example在[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/optimizer.py).
+- 对模型不同部分采用不同的学习率，例如：ViT和LLM分别使用不同的学习率，参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/lora_llm_full_vit/custom_plugin.py)。
+用户可以在这里增加自己的optimizer和lr_scheduler实现：
+```python
+def create_custom_optimizers(args, model, dataset):
+    # 创建自己的optimizer
+    return CustomOptimizer(optimizer_grouped_parameters, **optimizer_kwargs), CustomScheduler(...)
+optimizers_map = {
+    'custom': create_custom_optimizers,
+    ...
+}
+```
+当开发者需要使用其他optimizer，例如某些新论文中定义的optimizer时，可以在这里定义其创建过程，并在参数中使用：
+```shell
+--optimizer custom
+```
+就可以实际调用了。
+## 定制化agent template
+example在[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/agent_template).
+## 定制化tuner
+example在[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/tuner.py).
+- 多模态模型对ViT部分使用全参数训练，LLM部分使用LoRA训练，参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/multimodal/lora_llm_full_vit)。
+- Phi4-multimodal，直接对其已有LoRA进行训练而不额外附加LoRA，参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/train/plugins/tuner_phi4_mm.sh)。
+tuner定制也是swift中有特色的能力之一，开发者可以无视复杂的tuner初始化流程和代码整合成本，将新的tuner注册在这里：
+```python
+class IA3(Tuner):
+    @staticmethod
+    def prepare_model(args: 'TrainArguments', model: torch.nn.Module) -> torch.nn.Module:
+        model_arch: ModelKeys = MODEL_ARCH_MAPPING[model.model_meta.model_arch]
+        ia3_config = IA3Config(
+            target_modules=find_all_linears(model), feedforward_modules='.*' + model_arch.mlp.split('{}.')[1] + '.*')
+        return get_peft_model(model, ia3_config)
+    @staticmethod
+    def save_pretrained(
+        model: torch.nn.Module,
+        save_directory: str,
+        state_dict: Optional[dict] = None,
+        safe_serialization: bool = True,
+        **kwargs,
+    ) -> None:
+        model: PeftModel
+        model.save_pretrained(save_directory, state_dict=state_dict, safe_serialization=safe_serialization, **kwargs)
+    @staticmethod
+    def from_pretrained(model: torch.nn.Module, model_id: str, **kwargs) -> torch.nn.Module:
+        return PeftModel.from_pretrained(model, model_id, **kwargs)
+```
+上面的例子中，我们将peft的IA3应用于模型训练中，在这个类中包含了三个方法：
+- prepare_model: 如何将原始模型使用tuner进行封装，并设置好可训练参数
+- save_pretrained: 如何在训练中保存模型
+- from_pretrained: 如何在后续训练和推理中将之前存下来的checkpoint重新拉起
+上面的三个方法会在swift训练流程中被调用，这样就做到了开发者可以不阅读复杂的训练代码而使用自己的tuner。
+## PRM
+example在[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/prm.py)。
+PRM是过程奖励模型，PRM会在`swift sample`命令中使用。PRM需要支持的接口比较简单：
+```python
+class PRM:
+    def __init__(self):
+        # init here
+        pass
+    def __call__(self, infer_requests: List[InferRequest], **kwargs) -> List[Union[float, List[float]]]:
+        raise NotImplementedError
+```
+其中的InferRequest来自于`swift.llm`，返回的`List[Union[float, List[float]]]`，列表中可能是reward也可能是若干reward。开发者可以在infer_requests中拿到queries和responses，并按照自己的方式进行切分，例如：
+```text
+Let's think step by step.
+Step1: xxx
+Step2: xxx
+So, the answer is ...
+```
+开发者可以在这里对过程进行切分，并按batch传入PRM中进行推理并返回rewards。更通用来说，开发者可以在这里调用一个远端URL，例如一个闭源PRM大模型并返回rewards。
+## ORM
+example在[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/orm.py)。
+ORM是结果奖励模型。ORM一般使用正则表达式来进行，ORM决定了response是否是正确的。例如：
+```python
+class MathORM(ORM):
+    @staticmethod
+    def extract_boxed_result(text):
+        pattern = r'\\boxed{([^}]*)}'
+        match = re.search(pattern, text)
+        if match:
+            return match.group(1).strip()
+        else:
+            return None
+    def __call__(self, infer_requests: List[InferRequest], ground_truths: List[str],
+                **kwargs) -> List[float]:
+        rewards = []
+        predictions = [request.messages[-1]['content'] for request in infer_requests]
+        for prediction, ground_truth in zip(predictions, ground_truths):
+            res1 = MathORM.extract_boxed_result(prediction) or ''
+            res2 = MathORM.extract_boxed_result(ground_truth) or ''
+            rewards.append(float(res1.strip() == res2.strip()))
+        return rewards
+orms = {
+    'math': MathORM,
+}
+```
+在上面的代码中，我们定义了一个对数学response进行解析的过程，如果结果相同则返回score为1.0，否则为0.0。和PRM不同，这个类的infer中有一个额外参数`ground_truths`，
+该参数是对应的infer_requests的��际label（数据集中定义的标准response）。

docs/source/Customization/自定义数据集.md ADDED Viewed

	@@ -0,0 +1,241 @@

+# 自定义数据集
+自定义数据集的接入方法有三种，对预处理函数的控制能力逐渐加强，但接入难度逐步增加。例如，方案一最为方便，但对预处理函数的控制能力最弱，需要预先对数据集进行转换，传入特定格式的数据集：
+1. 【推荐】直接使用命令行传参的方式接入，即`--dataset <dataset_path1> <dataset_path2>`。这将使用AutoPreprocessor将数据集转换为标准格式（支持4种数据集格式，具体查看下面对AutoPreprocessor的介绍）。你可以使用`--columns`进行列名转换。支持传入csv、json、jsonl、txt、文件夹（例如git clone开源数据集）。该方案不需要修改dataset_info.json，适合刚接触ms-swift的用户，下面两种方案适合对ms-swift进行拓展的开发者。
+2. 添加数据集到`dataset_info.json`中，可以参考ms-swift内置的[dataset_info.json](https://github.com/modelscope/ms-swift/blob/main/swift/llm/dataset/data/dataset_info.json)。该方案也将使用AutoPreprocessor将数据集转换为标准格式。dataset_info.json为数据集元信息的list，每一项元信息必填ms_dataset_id/hf_dataset_id/dataset_path中的一项，通过`columns`字段进行列名转换。添加到`dataset_info.json`或者注册的数据集在运行[run_dataset_info.py](https://github.com/modelscope/ms-swift/blob/main/scripts/utils/run_dataset_info.py)时将自动产生[支持的数据集文档](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.html)。此外，你可以采用外接`dataset_info.json`的方式，使用`--custom_dataset_info xxx.json`解析json文件（方便pip install而非git clone的用户），然后指定`--dataset <dataset_id/dataset_dir/dataset_path>`。
+3. 手动注册数据集，具有最灵活的预处理函数定制能力，支持使用函数对数据集进行预处理，但难度较高。可以参考[内置数据集](https://github.com/modelscope/ms-swift/blob/main/swift/llm/dataset/dataset/llm.py)或者[examples](https://github.com/modelscope/swift/blob/main/examples/custom)中的样例。你可以通过指定`--custom_register_path xxx.py`解析外置注册内容（方便pip install而非git clone的用户）。
+   - 方案一和二在实现中借助了方案三，只是注册的过程为自动发生。
+以下将对`AutoPreprocessor`可以处理的数据集格式进行介绍：
+ms-swift的标准数据集格式可接受的keys包括: 'messages'、'rejected_response'、'label'、'images'、'videos'、'audios'、'tools'和'objects'。其中'messages'是必需的key，'rejected_response'用于DPO等RLHF训练，'label'用于KTO训练和分类模型训练，'images'、'videos'、'audios'用于存储多模态数据的路径或者url，'tools'用于Agent任务，'objects'用于grounding任务。
+ms-swift中存在三种核心预处理器：`MessagesPreprocessor`、`AlpacaPreprocessor`、`ResponsePreprocessor`。MessagesPreprocessor用于将类messages和sharegpt格式的数据集转换为标准格式，AlpacaPreprocessor则转换alpaca格式的数据集，ResponsePreprocessor则转换类query/response格式的数据集。`AutoPreprocessor`则自动选择合适的预处理进行处理。
+以下四种格式在`AutoPreprocessor`处理下都会转换成ms-swift标准格式中的messages字段，即都可以直接使用`--dataset <dataset-path>`接入：
+messages格式（标准格式）:
+```jsonl
+{"messages": [{"role": "system", "content": "<system>"}, {"role": "user", "content": "<query1>"}, {"role": "assistant", "content": "<response1>"}, {"role": "user", "content": "<query2>"}, {"role": "assistant", "content": "<response2>"}]}
+```
+- 注意：system部分是可选的。数据集中的system优先级高于命令行传入的`--system`，最后是定义在template中的`default_system`。
+sharegpt格式:
+```jsonl
+{"system": "<system>", "conversation": [{"human": "<query1>", "assistant": "<resonse1>"}, {"human": "<query2>", "assistant": "<resonse2>"}]}
+```
+alpaca格式:
+```jsonl
+{"system": "<system>", "instruction": "<query-inst>", "input": "<query-input>", "output": "<response>"}
+```
+query-response格式：
+```jsonl
+{"system": "<system>", "query": "<query2>", "response": "<response2>", "history": [["<query1>", "<response1>"]]}
+```
+## 标准数据集格式
+以下给出ms-swift的标准数据集格式，其中system字段是可选的，默认使用template中定义的`default_system`。之前介绍的4种数据集格式也可以被AutoPreprocessor处理成标准数据集格式。
+### 预训练
+```jsonl
+{"messages": [{"role": "assistant", "content": "I love music"}]}
+{"messages": [{"role": "assistant", "content": "教练我要打篮球"}]}
+{"messages": [{"role": "assistant", "content": "西红柿鸡蛋盖饭和地三鲜盖饭哪个更权威"}]}
+```
+### 监督微调
+```jsonl
+{"messages": [{"role": "system", "content": "你是个有用无害的助手"}, {"role": "user", "content": "告诉我明天��天气"}, {"role": "assistant", "content": "明天天气晴朗"}]}
+{"messages": [{"role": "system", "content": "你是个有用无害的数学计算器"}, {"role": "user", "content": "1+1等于几"}, {"role": "assistant", "content": "等于2"}, {"role": "user", "content": "再加1呢"}, {"role": "assistant", "content": "等于3"}]}
+```
+### RLHF
+#### DPO/ORPO/CPO/SimPO/RM
+```jsonl
+{"messages": [{"role": "system", "content": "你是个有用无害的助手"}, {"role": "user", "content": "告诉我明天的天气"}, {"role": "assistant", "content": "明天天气晴朗"}], "rejected_response": "我不知道"}
+{"messages": [{"role": "system", "content": "你是个有用无害的数学计算器"}, {"role": "user", "content": "1+1等于几"}, {"role": "assistant", "content": "等于2"}, {"role": "user", "content": "再加1呢"}, {"role": "assistant", "content": "等于3"}], "rejected_response": "我不知道"}
+```
+#### KTO
+```jsonl
+{"messages": [{"role": "system", "content": "你是个有用无害的助手"}, {"role": "user", "content": "告诉我明天的天气"}, {"role": "assistant", "content": "我不知道"}], "label": false}
+{"messages": [{"role": "system", "content": "你是个有用无害的数学计算器"}, {"role": "user", "content": "1+1等于几"}, {"role": "assistant", "content": "等于2"}, {"role": "user", "content": "再加1呢"}, {"role": "assistant", "content": "等于3"}], "label": true}
+```
+#### PPO/GRPO
+```jsonl
+{"messages": [{"role": "system", "content": "你是个有用无害的助手"}, {"role": "user", "content": "告诉我明天的天气"}]}
+{"messages": [{"role": "system", "content": "你是个有用无害的数学计算器"}, {"role": "user", "content": "1+1等于几"}, {"role": "assistant", "content": "等于2"}, {"role": "user", "content": "再加1呢"}]}
+{"messages": [{"role": "user", "content": "你的名字是什么"}]}
+```
+- 注意：GRPO会透传所有额外的字段内容给ORM，而不像其他训练方法，默认将额外的字段删除。例如: 你可以额外传入'solution'。自定义的ORM需要包含一个位置参数completions，其他为关键词参数，由数据集额外字段透传。
+### 序列分类
+**单标签任务**：
+```jsonl
+{"messages": [{"role": "user", "content": "今天天气真好呀"}], "label": 1}
+{"messages": [{"role": "user", "content": "今天真倒霉"}], "label": 0}
+{"messages": [{"role": "user", "content": "好开心"}], "label": 1}
+```
+**多标签任务**：
+```jsonl
+{"messages": [{"role": "user", "content": "<sentence>"}], "label": [1, 3, 5]}
+```
+**单回归任务**：
+```jsonl
+{"messages": [{"role": "user", "content": "求两句话的相似度，范围为0-1。\nsentence1: <sentence1>\nsentence2: <sentence2>"}], "label": 0.8}
+```
+**多回归任务**：
+```jsonl
+{"messages": [{"role": "user", "content": "<sentence>"}], "label": [1.2, -0.6, 0.8]}
+```
+### Embedding
+请参考[embedding训练文档](../BestPractices/Embedding训练.md#数据集格式)
+### 多模态
+对于多模态数据集，和上述任务的格式相同。区别在于增加了`images`, `videos`, `audios`几个key，分别代表多模态资源的url或者path（推荐使用绝对路径），`<image>` `<video>` `<audio>`标签代表了插入图片/视频/音频的位置，ms-swift支持多图片/视频/音频的情况。这些特殊tokens将在预处理的时候进行替换，参考[这里](https://github.com/modelscope/ms-swift/blob/main/swift/llm/template/template/qwen.py#L198)。下面给出的四条示例分别展示了纯文本，以及包含图像、视频和音频数据的数据格式。
+预训练：
+```
+{"messages": [{"role": "assistant", "content": "预训练的文本在这里"}]}
+{"messages": [{"role": "assistant", "content": "<image>是一只小狗，<image>是一只小猫"}], "images": ["/xxx/x.jpg", "/xxx/x.png"]}
+{"messages": [{"role": "assistant", "content": "<audio>描述了今天天气真不错"}], "audios": ["/xxx/x.wav"]}
+{"messages": [{"role": "assistant", "content": "<image>是一个大象，<video>是一只狮子在跑步"}], "images": ["/xxx/x.jpg"], "videos": ["/xxx/x.mp4"]}
+```
+微调：
+```jsonl
+{"messages": [{"role": "user", "content": "浙江的省会在哪？"}, {"role": "assistant", "content": "浙江的省会在杭州。"}]}
+{"messages": [{"role": "user", "content": "<image><image>两张图片有什么区别"}, {"role": "assistant", "content": "前一张是小猫，后一张是小狗"}], "images": ["/xxx/x.jpg", "/xxx/x.png"]}
+{"messages": [{"role": "user", "content": "<audio>语音说了什么"}, {"role": "assistant", "content": "今天天气真好呀"}], "audios": ["/xxx/x.mp3"]}
+{"messages": [{"role": "system", "content": "你是个有用无害的助手"}, {"role": "user", "content": "<image>图片中是什么，<video>视频中是什么"}, {"role": "assistant", "content": "图片中是一个大象，视频中是一只小狗在草地上奔跑"}], "images": ["/xxx/x.jpg"], "videos": ["/xxx/x.mp4"]}
+```
+多模态模型的RLHF和序列分类的数据格式可以参考���文本大模型的格式，并在此基础上增加`images`等字段。
+#### grounding
+如果是grounding（物体检测）任务，SWIFT支持两种方式：
+1. 直接使用对应模型grounding任务的数据集格式，例如qwen2-vl的格式如下：
+```jsonl
+{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "<image>描述图像"}, {"role": "assistant", "content": "<|object_ref_start|>一只狗<|object_ref_end|><|box_start|>(221,423),(569,886)<|box_end|>和<|object_ref_start|>一个女人<|object_ref_end|><|box_start|>(451,381),(733,793)<|box_end|>正在沙滩上玩耍"}], "images": ["/xxx/x.jpg"]}
+{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "<image>找到图像中的<|object_ref_start|>羊<|object_ref_end|>"}, {"role": "assistant", "content": "<|box_start|>(101,201),(150,266)<|box_end|><|box_start|>(401,601),(550,666)<|box_end|>"}], "images": ["/xxx/x.jpg"]}
+{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "<image>帮我打开谷歌浏览器"}, {"role": "assistant", "content": "Action: click(start_box='<|box_start|>(246,113)<|box_end|>')"}], "images": ["/xxx/x.jpg"]}
+```
+使用这种类型的数据需要注意：
+  - 不同模型grounding任务的特殊字符和数据集格式不同。
+  - 不同模型对bbox是否归一化的处理不同。例如：qwen2.5-vl使用绝对坐标，而qwen2-vl、internvl2.5需要对bbox的坐标进行千分位坐标归一化。
+2. 使用SWIFT的grounding数据格式：
+```jsonl
+{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "<image>描述图像"}, {"role": "assistant", "content": "<ref-object><bbox>和<ref-object><bbox>正在沙滩上玩耍"}], "images": ["/xxx/x.jpg"], "objects": {"ref": ["一只狗", "一个女人"], "bbox": [[331.5, 761.4, 853.5, 1594.8], [676.5, 685.8, 1099.5, 1427.4]]}}
+{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "<image>找到图像中的<ref-object>"}, {"role": "assistant", "content": "<bbox><bbox>"}], "images": ["/xxx/x.jpg"], "objects": {"ref": ["羊"], "bbox": [[90.9, 160.8, 135, 212.8], [360.9, 480.8, 495, 532.8]]}}
+{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "<image>帮我打开谷歌浏览器"}, {"role": "assistant", "content": "Action: click(start_box='<bbox>')"}], "images": ["/xxx/x.jpg"], "objects": {"ref": [], "bbox": [[615, 226]]}}
+```
+该格式将自动转换数据集格式为对应模型的grounding任务格式，且选择对应模型的bbox归一化方式。该格式比通用格式多了objects字段，该字段包含的字段有：
+ - ref: 用于替换`<ref-object>`。
+ - bbox: 用于替换`<bbox>`。若bbox中每个box长度为2，则代表x和y坐标，若box长度为4，则代表2个点的x和y坐标。
+ - bbox_type: 可选项为'real'，'norm1'。默认为'real'，即bbox为真实bbox值。若是'norm1'，则bbox已经归一化为0~1。
+ - image_id: 该参数只有当bbox_type为'real'时生效。代表bbox对应的图片是第几张，用于缩放bbox。索引从0开始，默认全为第0张。
+### 文生图格式
+```jsonl
+{"messages": [{"role": "system", "content": "你是个有用无害的助手"}, {"role": "user", "content": "给我画出一个苹果"}, {"role": "assistant", "content": "<image>"}], "images": ["/xxx/x.jpg"]}
+```
+### Agent格式
+这里分别提供了纯文本Agent和多模态Agent的示例数据样本：
+```jsonl
+{"tools": ["{\"type\": \"function\", \"function\": {\"name\": \"realtime_aqi\", \"description\": \"天气预报。获取实时空气质量。当前空气质量，PM2.5，PM10信息\", \"parameters\": {\"type\": \"object\", \"properties\": {\"city\": {\"type\": \"string\", \"description\": \"城市名，例如：上海\"}}, \"required\": [\"city\"]}}}"], "messages": [{"role": "user", "content": "北京和上海今天的天气情况"}, {"role": "tool_call", "content": "{\"name\": \"realtime_aqi\", \"arguments\": {\"city\": \"北京\"}}"}, {"role": "tool_call", "content": "{\"name\": \"realtime_aqi\", \"arguments\": {\"city\": \"上海\"}}"}, {"role": "tool_response", "content": "{\"city\": \"北京\", \"aqi\": \"10\", \"unit\": \"celsius\"}"}, {"role": "tool_response", "content": "{\"city\": \"上海\", \"aqi\": \"72\", \"unit\": \"fahrenheit\"}"}, {"role": "assistant", "content": "根据天气预报工具，北京今天的空气质量指数为10，属于良好水平；上海今天的空气质量指数为72，属于轻度污染水平。"}]}
+{"tools": ["{\"type\": \"function\", \"function\": {\"name\": \"click\", \"description\": \"点击屏幕中的某个位置\", \"parameters\": {\"type\": \"object\", \"properties\": {\"x\": {\"type\": \"integer\", \"description\": \"横坐标，表示屏幕上的水平位置\"}, \"y\": {\"type\": \"integer\", \"description\": \"纵坐标，表示屏幕上的垂直位置\"}}, \"required\": [\"x\", \"y\"]}}}"], "messages": [{"role": "user", "content": "<image>现在几点了？"}, {"role": "assistant", "content": "<think>\n我可以通过打开日历App来获取当前时间。\n</think>\n"}, {"role": "tool_call", "content": "{\"name\": \"click\", \"arguments\": {\"x\": 105, \"y\": 132}}"}, {"role": "tool_response", "content": "{\"images\": \"<image>\", \"status\": \"success\"}"}, {"role": "assistant", "content": "成功打开日历App，现在的时间为中午11点"}], "images": ["desktop.png", "calendar.png"]}
+```
+- agent_template为"react_en", "hermes"等情况下，该格式适配所有模型Agent训练，可以轻松在不同模型间切换。
+- 其中tools是一个`List[str]`，其中每一个tool需要是json字符串，messages中role为'tool_call'和'tool_response/tool'的content部分都需要是json字符串。
+- tools字段将在训练/推理时和`{"role": "system", ...}"`部分组合，根据agent_template组成完整的system部分。
+- `{"role": "tool_call", ...}`部分将根据agent_template自动转成对应格式的`{"role": "assistant", ...}`，多条连续的`{"role": "assistant", ...}`将拼接在一起组成完整的assistant_content。
+- `{"role": "tool_response", ...}`也可以写成`{"role": "tool", ...}`，这两种写法是等价的。该部分也将根据`agent_template`自动转换格式。该部分在训练时将不进行损失的计算，角色类似于`{"role": "user", ...}`。
+- 该格式支持并行调用工具，例子参考第一条数据样本。多模态Agent数据样本中`<image>`标签数量应与"images"长度相同，其标签位置代表图像特征的插入位置。当然也支持其他模态，例如audios, videos。
+- 更多请参考[Agent文档](../Instruction/Agent支持.md)。
+## dataset_info.json
+可以参考ms-swift内置的[dataset_info.json](https://github.com/modelscope/ms-swift/blob/main/swift/llm/dataset/data/dataset_info.json)。该方案使用AutoPreprocessor预处理函数将数据集转换为标准格式。dataset_info.json文件中包含了数据集元信息的list，以下为一些例子：
+```json
+[
+  {
+    "ms_dataset_id": "xxx/xxx"
+  },
+  {
+    "dataset_path": "<dataset_dir/dataset_path>"
+  },
+  {
+    "ms_dataset_id": "<dataset_id>",
+    "subsets": ["v1"],
+    "split": ["train", "validation"],
+    "columns": {
+      "input": "query",
+      "output": "response"
+    }
+  },
+  {
+    "ms_dataset_id": "<dataset_id>",
+    "hf_dataset_id": "<hf_dataset_id>",
+    "subsets": [{
+      "subset": "subset1",
+      "columns": {
+        "problem": "query",
+        "content": "response"
+      }
+    },
+    {
+      "subset": "subset2",
+      "columns": {
+        "messages": "_",
+        "new_messages": "messages"
+      }
+    }]
+  }
+]
+```
+支持以下参数：
+- ms_dataset_id: 参考DatasetMeta参数。
+- hf_dataset_id: 参考DatasetMeta参数。
+- dataset_path: 参考DatasetMeta参数。
+- dataset_name: 参考DatasetMeta参数。
+- subsets: 参考DatasetMeta参数。
+- split: 参考DatasetMeta参数。
+- columns: 在数据集进行预处理前，对数据集进行列名转换。
+## 数据集注册
+register_dataset会在`DATASET_MAPPING`中注册数据集，调用函数`register_dataset(dataset_meta)`即可完成数据集注册，其中dataset_meta将存储模型的元信息。DatasetMeta的参数列表如下：
+- ms_dataset_id: ModelScope的dataset_id，默认为None。
+- hf_dataset_id: HuggingFace的dataset_id，默认为None。
+- dataset_path: dataset的本地路径（推荐使用绝对路径）。默认为None。
+- dataset_name: 数据集别名，可以通过`--dataset <dataset_name>`指定数据集，这在dataset_path很长时很方便。默认为None。
+- subsets: 子数据集的名字列表或者`SubsetDataset`对象的列表，默认为`['default']`。（只有dataset_id或者dataset_dir（git clone开源数据集）有子数据集和split的概念）。
+- split: 默认为`['train']`。
+- preprocess_func: 预处理函数或可调用对象，默认为`AutoPreprocessor()`。该预处理函数接口为传入`HfDataset`，并返回满足标准格式的`HfDataset`。
+- load_function: 默认为`DatasetLoader.load`。若需要自定义载入函数，则该载入函数需返回满足标准格式的`HfDataset`，这将抛弃ms-swift的数据集载入机制，提供给用户最大的自由度。通常该参数不需要进行修改。

docs/source/Customization/自定义模型.md ADDED Viewed

	@@ -0,0 +1,34 @@

+# 自定义模型
+ms-swift内置的模型，你可以直接通过指定model_id或者model_path来使用：`--model <model_id_or_path>`。ms-swift会根据model_id/model_path的后缀和`config.json`文件来判断model_type。每种model_type都有唯一的模型结构、template和加载方式。当然，你也可以手动传入`--model_type`、`--template`来进行覆盖。ms-swift已支持的model_type和template可以查看[支持的模型与数据集](../Instruction/支持的模型和数据集.md)。
+## 模型注册
+自定义模型通常使用模型注册的方式进行，可以参考[内置模型](https://github.com/modelscope/ms-swift/blob/main/swift/llm/model/model/qwen.py)、[内置对话模板](https://github.com/modelscope/ms-swift/blob/main/swift/llm/template/template/qwen.py)或者[examples](https://github.com/modelscope/swift/blob/main/examples/custom)的示例代码。你可以通过指定`--custom_register_path xxx.py`解析外置注册的内容（方便pip install而非git clone的用户）。
+register_model会在`MODEL_MAPPING`中注册模型，调用函数`register_model(model_meta)`即可完成模型注册，其中model_meta将存储模型的元信息。ModelMeta的参数列表如下：
+- model_type: 必填项。模型类型，也是唯一ID。
+- model_groups: 必填项。罗列ModelScope/HuggingFace的模型id和模型本地路径。运行[run_model_info.py](https://github.com/modelscope/ms-swift/blob/main/scripts/utils/run_model_info.py)文件将自动产生[支持的模型文档](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.html)以及自动根据`--model`后缀匹配model_type。
+- template: 必填项。不指定`--template`时的默认template类型。
+- get_function: 必填项。模型和tokenizer/processor（多模态模型）的加载函数。LLM通常设置为`get_model_tokenizer_with_flash_attn`即可。
+- model_arch: 模型架构。默认为None。多模态模型训练需要设置该参数来确定llm/vit/aligner的前缀。
+- architectures: config.json中的architectures项，用于自动匹配模型对应的model_type。默认为`[]`。
+- additional_saved_files: 全参数训练和merge-lora时需要额外保存的文件。默认为`[]`。
+- torch_dtype: 模型加载时未传入`torch_dtype`时的默认dtype。默认为None，从config.json中读取。
+- is_multimodal: 是否是多模态模型，默认为False。
+- ignore_patterns: 从hub端下载文件需要忽略的文件patterns，默认为`[]`。
+register_template会在`TEMPLATE_MAPPING`中注册对话模板，调用函数`register_template(template_meta)`即可完成对话模板注册，其中template_meta将存储template的元信息。TemplateMeta的参数列表如下：
+- template_type: 必填项。对话模板类型，也是唯一ID。
+- prefix: 必填项。对话模板的前缀，通常包含system、bos_token等部分，独立于多轮对话而产生的对话模板循环。例如qwen的prefix为`[]`。
+- prompt: 必填项。表示对话模板中的`{{RESPONSE}}`之前的对话部分。我们使用`{{QUERY}}`代表user询问部分的填充符。例如qwen的prompt为`['<|im_start|>user\n{{QUERY}}<|im_end|>\n<|im_start|>assistant\n']`。
+- chat_sep: 必填项。多轮对话中每轮的分隔符。若设置为None，则该template不支持多轮对话。例如qwen的chat_sep为`['<|im_end|>\n']`。
+- suffix: 默认为`[['eos_token_id']]`。对话模板的后缀部分，独立于多轮对话而产生的对话模板循环，通常为eos_token。例如qwen的suffix为`['<|im_end|>']。`
+- template_cls: 默认为`Template`。通常在定义多模态模型的template时需要进行自定义，自定义`_encode`、`_post_encode`、`_data_collator`函数。
+- system_prefix: 默认为None。含system的对话模板前缀。我们使用`{{SYSTEM}}`作为system的填充符。例如qwen的system_prefix为`['<|im_start|>system\n{{SYSTEM}}<|im_end|>\n']`。
+  - 注意：若system为空时，`prefix`可以被`system_prefix`替代，则可以将`prefix`写为含system的前缀，而无需设置`system_prefix`。
+  - 若prefix不含`{{SYSTEM}}`且未设置system_prefix，则该template不支持system。
+- default_system: 默认为None。不传入`--system`时使用的默认system。例如qwen的default_system为`'You are a helpful assistant.'`。
+- stop_words: 默认为`[]`。除了eos_token和`suffix[-1]`的额外停止符。例如qwen的stop_words为`['<|endoftext|>']`。
+  - 注意：推理时，输出的response将会过滤eos_token和`suffix[-1]`，但是会保留额外的stop_words。

docs/source/GetStarted/SWIFT安装.md ADDED Viewed

	@@ -0,0 +1,87 @@

+# SWIFT安装
+## Wheel包安装
+可以使用pip进行安装：
+```shell
+pip install 'ms-swift'
+# 使用评测
+pip install 'ms-swift[eval]' -U
+# 全能力
+pip install 'ms-swift[all]' -U
+```
+## 源代码安装
+```shell
+# pip install git+https://github.com/modelscope/ms-swift.git
+# 全能力
+# pip install "git+https://github.com/modelscope/ms-swift.git#egg=ms-swift[all]"
+git clone https://github.com/modelscope/ms-swift.git
+cd ms-swift
+pip install -e .
+# 全能力
+# pip install -e '.[all]'
+```
+## 旧版本
+SWIFT在3.0版本开始进行了不兼容式重构，如果需要使用2.x旧版本，请执行如下命令进行安装：
+```shell
+pip install ms-swift==2.*
+```
+## 镜像
+```
+# vllm0.8.3 (该版本vllm可能导致部分GRPO训练卡住，GRPO建议优先使用vllm0.7.3)
+modelscope-registry.cn-hangzhou.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda12.4.0-py311-torch2.6.0-vllm0.8.3-modelscope1.25.0-swift3.3.0.post1
+modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda12.4.0-py311-torch2.6.0-vllm0.8.3-modelscope1.25.0-swift3.3.0.post1
+# vllm0.7.3
+modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda12.4.0-py311-torch2.5.1-modelscope1.25.0-swift3.2.2
+```
+更多镜像可以查看[这里](https://modelscope.cn/docs/intro/environment-setup#%E6%9C%80%E6%96%B0%E9%95%9C%E5%83%8F)
+## 支持的硬件
+| 硬件环境        | 备注                        |
+| --------------- | --------------------------- |
+| A10/A100/H100   |                             |
+| RTX20/30/40系列 |                             |
+| T4/V100         | 部分模型出现NAN             |
+| Ascend NPU      | 部分模型出现NAN或算子不支持 |
+| MPS             |                             |
+| CPU             |                             |
+## 运行环境
+|        | 范围           | 推荐 | 备注 |
+| ------ |--------------| ---- | --|
+| python | >=3.9        | 3.10 ||
+| cuda |              | cuda12 |使用cpu、npu、mps则无需安装|
+| torch | >=2.0        |  ||
+| transformers | >=4.33       | 4.51 ||
+| modelscope | >=1.23       |  ||
+| peft | >=0.11,<0.16 | ||
+| trl | >=0.13,<0.18 | 0.17 |RLHF|
+| deepspeed | >=0.14       | 0.14.5 |训练|
+| vllm | >=0.5.1      | 0.7.3/0.8 |推理/部署/评测|
+| lmdeploy | >=0.5        | 0.8 |推理/部署/评测|
+| evalscope | >=0.11       | |评测|
+更多可选依赖可以参考[这里](https://github.com/modelscope/ms-swift/blob/main/requirements/install_all.sh)。
+## Notebook环境
+Swift支持训练的绝大多数模型都可以在`A10`显卡上使用，用户可以使用ModelScope官方提供的免费显卡资源：
+1. 进入[ModelScope](https://www.modelscope.cn)官方网站并登录
+2. 点击左侧的`我的Notebook`并开启一个免费GPU实例
+3. 愉快地薅A10显卡羊毛

docs/source/GetStarted/Web-UI.md ADDED Viewed

	@@ -0,0 +1,36 @@

+# Web-UI
+目前SWIFT已经支持了界面化的训练和推理，参数支持和脚本训练相同。在安装SWIFT后，使用如下命令：
+```shell
+swift web-ui --lang zh
+# or en
+swift web-ui --lang en
+```
+开启界面训练和推理。
+SWIFT web-ui是命令行的高级封装，即，在界面上启动的训练、部署等任务，会在系统中以命令行启动一个独立的进程，伪代码类似：
+```python
+import os
+os.system('swift sft --model xxx --dataset xxx')
+```
+这给web-ui带来了几个特性：
+1. web-ui的每个超参数描述都带有`--xxx`的标记，这与[命令行参数](../Instruction/命令行参数.md)的内容是一致的
+2. web-ui可以在一台多卡机器上并行启动多个训练/部署任务
+3. web-ui服务关闭后，后台服务是仍旧运行的，这防止了web-ui被关掉后影响训练进程，如果需要关闭后台服务，只需要**选择对应的任务**后在界面上的`运行时`tab点击杀死服务
+4. 重新启动web-ui后，如果需要显示正在运行的服务，在`运行时`tab点击`找回运行时任务`即可
+5. 训练界面支持显示运行日志，请在选择某个任务后手动点击`展示运行状态`，在训练时运行状态支持展示训练图表，图标包括训练loss、训练acc、学习率等基本指标，在人类对齐任务重界面图标为margin、logps等关键指标
+6. web-ui的训练不支持PPO，该过程比较复杂，建议使用examples的[shell脚本](../../../examples/train/rlhf/ppo.sh)直接运行
+如果需要使用share模式，请添加`--share true`参数。注意：请不要在dsw、notebook等环境中使用该参数。
+目前ms-swift额外支持了界面推理模式（即Space部署）：
+```shell
+swift app --model '<model>' --studio_title My-Awesome-Space --stream true
+# 或者
+swift app --model '<model>' --adapters '<adapter>' --stream true
+```
+即可启动一个只有推理页面的应用，该应用会在启动时对模型进行部署并提供后续使用。

docs/source/GetStarted/快速开始.md ADDED Viewed

	@@ -0,0 +1,103 @@

+# 快速开始
+ms-swift是魔搭社区提供的大模型与多模态大模型训练部署框架，现已支持500+大模型与200+多模态大模型的训练（预训练、微调、人类对齐）、推理、评测、量化与部署。模型开发者可以在ms-swift框架中一站式完成围绕大模型的各类需求。目前ms-swift的主要能力包含：
+- 🍎 模型类型：支持500+纯文本大模型、200+多模态大模型以及All-to-All全模态模型、序列分类模型、Embedding模型训练到部署全流程。
+- 数据集类型：内置150+预训练、微调、人类对齐、多模态等各种类型的数据集，并支持自定义数据集。
+- 硬件支持：CPU、RTX系列、T4/V100、A10/A100/H100、Ascend NPU、MPS等。
+- 🍊 轻量训练：支持了LoRA、QLoRA、DoRA、LoRA+、ReFT、RS-LoRA、LLaMAPro、Adapter、GaLore、Q-Galore、LISA、UnSloth、Liger-Kernel等轻量微调方式。
+- 分布式训练：支持分布式数据并行（DDP）、device_map简易模型并行、DeepSpeed ZeRO2 ZeRO3、FSDP等分布式训练技术。
+- 量化训练：支持对BNB、AWQ、GPTQ、AQLM、HQQ、EETQ量化模型进行训练。
+- RLHF训练：支持纯文本大模型和多模态大模型的DPO、GRPO、RM、PPO、KTO、CPO、SimPO、ORPO等人类对齐训练方法。
+- 🍓 多模态训练：支持对图像、视频和语音不同模态模型进行训练，支持VQA、Caption、OCR、Grounding任务的训练。
+- 界面训练：以界面的方式提供训练、推理、评测、量化的能力，完成大模型的全链路。
+- 插件化与拓展：支持自定义模型和数据集拓展，支持对loss、metric、trainer、loss-scale、callback、optimizer等组件进行自定义。
+- 🍉 工具箱能力：除了对大模型和多模态大模型的训练支持外，还支持其推理、评测、量化和部署全流程。
+- 推理加速：支持PyTorch、vLLM、LmDeploy推理加速引擎，并提供OpenAI接口，为推理、部署和评测模块提供加速。
+- 模型评测：以EvalScope作为评测后端，支持100+评测数据集对纯文本和多模态模型进行评测。
+- 模型量化：支持AWQ、GPTQ和BNB的量化导出，导出的模型支持使用vLLM/LmDeploy推理加速，并支持继续训练。
+## 安装
+ms-swift的安装请参考[安装文档](./SWIFT安装.md)。
+## 使用样例
+10分钟在单卡3090上对Qwen2.5-7B-Instruct进行自我认知微调：
+```shell
+# 22GB
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type lora \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot
+```
+小贴士：
+- 如果要使用自定义数据集进行训练，你可以参考[这里](../Customization/自定义数据集.md)组织数据集格式，并指定`--dataset <dataset_path>`。
+- `--model_author`和`--model_name`参数只有当数据集中包含`swift/self-cognition`时才生效。
+- 如果要使用其他模型进行训练，你只需要修改`--model <model_id/model_path>`即可。
+- 默认使用ModelScope进行模型和数据集的下载。如果要使用HuggingFace，指定`--use_hf true`即可。
+训练完成后，使用以下命令对训练后的权重进行推理：
+- 这里的`--adapters`需要替换成训练生成的last checkpoint文件夹。由于adapters文件夹中包含了训练的参数文件`args.json`，因此不需要额外指定`--model`，`--system`，swift会自动读取这些参数。如果要关闭此行为，可以设置`--load_args false`。
+```shell
+# 使用交互式命令行进行推理
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --stream true \
+    --temperature 0 \
+    --max_new_tokens 2048
+# merge-lora并使用vLLM进行推理加速
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --stream true \
+    --merge_lora true \
+    --infer_backend vllm \
+    --max_model_len 8192 \
+    --temperature 0 \
+    --max_new_tokens 2048
+```
+最后，使用以下命令将模型推送到ModelScope：
+```shell
+CUDA_VISIBLE_DEVICES=0 \
+swift export \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --push_to_hub true \
+    --hub_model_id '<your-model-id>' \
+    --hub_token '<your-sdk-token>' \
+    --use_hf false
+```
+## 了解更多
+- 更多Shell脚本：[https://github.com/modelscope/ms-swift/tree/main/examples](https://github.com/modelscope/ms-swift/tree/main/examples)
+- 使用Python：[https://github.com/modelscope/ms-swift/blob/main/examples/notebook/qwen2_5-self-cognition/self-cognition-sft.ipynb](https://github.com/modelscope/ms-swift/blob/main/examples/notebook/qwen2_5-self-cognition/self-cognition-sft.ipynb)

docs/source/Instruction/Agent支持.md ADDED Viewed

	@@ -0,0 +1,207 @@

+# Agent支持
+## 数据集格式
+纯文本Agent和多模态Agent的示例数据样本如下：
+```jsonl
+{"tools": ["{\"type\": \"function\", \"function\": {\"name\": \"realtime_aqi\", \"description\": \"天气预报。获取实时空气质量。当前空气质量，PM2.5，PM10信息\", \"parameters\": {\"type\": \"object\", \"properties\": {\"city\": {\"type\": \"string\", \"description\": \"城市名，例如：上海\"}}, \"required\": [\"city\"]}}}"], "messages": [{"role": "user", "content": "北京和上海今天的天气情况"}, {"role": "tool_call", "content": "{\"name\": \"realtime_aqi\", \"arguments\": {\"city\": \"北京\"}}"}, {"role": "tool_call", "content": "{\"name\": \"realtime_aqi\", \"arguments\": {\"city\": \"上海\"}}"}, {"role": "tool_response", "content": "{\"city\": \"北京\", \"aqi\": \"10\", \"unit\": \"celsius\"}"}, {"role": "tool_response", "content": "{\"city\": \"上海\", \"aqi\": \"72\", \"unit\": \"fahrenheit\"}"}, {"role": "assistant", "content": "根据天气预报工具，北京今天的空气质量指数为10，属于良好水平；上海今天的空气质量指数为72，属于轻度污染水平。"}]}
+{"tools": ["{\"type\": \"function\", \"function\": {\"name\": \"click\", \"description\": \"点击屏幕中的某个位置\", \"parameters\": {\"type\": \"object\", \"properties\": {\"x\": {\"type\": \"integer\", \"description\": \"横坐标，表示屏幕上的水平位置\"}, \"y\": {\"type\": \"integer\", \"description\": \"纵坐标，表示屏幕上的垂直位置\"}}, \"required\": [\"x\", \"y\"]}}}"], "messages": [{"role": "user", "content": "<image>现在几点了？"}, {"role": "assistant", "content": "<think>\n我可以通过打开日历App来获取当前时间。\n</think>\n"}, {"role": "tool_call", "content": "{\"name\": \"click\", \"arguments\": {\"x\": 105, \"y\": 132}}"}, {"role": "tool_response", "content": "{\"images\": \"<image>\", \"status\": \"success\"}"}, {"role": "assistant", "content": "成功打开日历App，现在的时间为中午11点"}], "images": ["desktop.png", "calendar.png"]}
+```
+- agent_template为"react_en", "hermes"等情况下，该格式适配所有模型Agent训练，可以轻松在不同模型间切换。
+- 其中tools是一个`List[str]`，其中每一个tool需要是json字符串，messages中role为'tool_call'和'tool_response/tool'的content部分都需要是json字符串。
+- tools字段将在训练/推理时和`{"role": "system", ...}"`部分组合，根据agent_template组成完整的system部分。
+- `{"role": "tool_call", ...}`部分将根据agent_template自动转成对应格式的`{"role": "assistant", ...}`，多条连续的`{"role": "assistant", ...}`将拼接在一起组成完整的assistant_content。
+- `{"role": "tool_response", ...}`也可以写成`{"role": "tool", ...}`，这两种写法是等价的。该部分也将根据`agent_template`自动转换格式。该部分在训练时将不进行损失的计算，角色类似于`{"role": "user", ...}`。
+- 该格式支持并行调用工具，例子参考第一条数据样本。多模态Agent数据样本中`<image>`标签数量应与"images"长度相同，其标签位置代表图像特征的插入位置。当然也支持其他模态，例如audios, videos。
+以下为上述两条数据样本由qwen2_5和qwen2_5_vl的template进行encode后的input_ids和labels，选择的agent_template为**hermes**：
+样本一（并行工具调用）：
+```text
+[INPUT_IDS] <|im_start|>system
+You are Qwen, created by Alibaba Cloud. You are a helpful assistant.
+# Tools
+You may call one or more functions to assist with the user query.
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{"type": "function", "function": {"name": "realtime_aqi", "description": "天气预报。获取实时空气质量。当前空气质量，PM2.5，PM10信息", "parameters": {"type": "object", "properties": {"city": {"type": "string", "description": "城市名，例如：上海"}}, "required": ["city"]}}}
+</tools>
+For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+<tool_call>
+{"name": <function-name>, "arguments": <args-json-object>}
+</tool_call><|im_end|>
+<|im_start|>user
+北京和上海今天的天气情况<|im_end|>
+<|im_start|>assistant
+<tool_call>
+{"name": "realtime_aqi", "arguments": {"city": "北京"}}
+</tool_call>
+<tool_call>
+{"name": "realtime_aqi", "arguments": {"city": "上海"}}
+</tool_call><|im_end|>
+<|im_start|>user
+<tool_response>
+{"city": "北京", "aqi": "10", "unit": "celsius"}
+</tool_response>
+<tool_response>
+{"city": "上海", "aqi": "72", "unit": "fahrenheit"}
+</tool_response><|im_end|>
+<|im_start|>assistant
+根据天气预报工具，北京今天的空气质量指数为10，属于良好水平；上海今天的空气质量指数为72，属于轻度污染水平。<|im_end|>
+[LABELS] [-100 * 195]<tool_call>
+{"name": "realtime_aqi", "arguments": {"city": "北京"}}
+</tool_call>
+<tool_call>
+{"name": "realtime_aqi", "arguments": {"city": "上海"}}
+</tool_call><|im_end|>[-100 * 67]根据天气预报工具，北京今天的空气质量指数为10，属于良好水平；上海今天的空气质量指数为72，属于轻度污染水平。<|im_end|>
+```
+样本二（多模态，混合assistant和tool_call）：
+```text
+[INPUT_IDS] <|im_start|>system
+You are a helpful assistant.
+# Tools
+You may call one or more functions to assist with the user query.
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{"type": "function", "function": {"name": "click", "description": "点击屏幕中的某个位置", "parameters": {"type": "object", "properties": {"x": {"type": "integer", "description": "横坐标，表示屏幕上的水平位置"}, "y": {"type": "integer", "description": "纵坐标，表示屏幕上的垂直位置"}}, "required": ["x", "y"]}}}
+</tools>
+For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+<tool_call>
+{"name": <function-name>, "arguments": <args-json-object>}
+</tool_call><|im_end|>
+<|im_start|>user
+<|vision_start|>[151655 * 729]<|vision_end|>现在几点了？<|im_end|>
+<|im_start|>assistant
+<think>
+我可以通过打开日历App来获取当前时间。
+</think>
+<tool_call>
+{"name": "click", "arguments": {"x": 105, "y": 132}}
+</tool_call><|im_end|>
+<|im_start|>user
+<tool_response>
+{"images": "<|vision_start|>[151655 * 729]<|vision_end|>", "status": "success"}
+</tool_response><|im_end|>
+<|im_start|>assistant
+成功打开日历App，现在的时间为中午11点<|im_end|>
+[LABELS] [-100 * 924]<think>
+我可以通过打开日历App来获取当前时间。
+</think>
+<tool_call>
+{"name": "click", "arguments": {"x": 105, "y": 132}}
+</tool_call><|im_end|>[-100 * 759]成功打开日历App，现在的时间为中午11点<|im_end|>
+```
+**react_en**是常用的agent template格式之一，以下为样本一由qwen2_5使用`agent_template='react_en'`进行encode后的input_ids和labels：
+```text
+[INPUT_IDS] <|im_start|>system
+Answer the following questions as best you can. You have access to the following tools:
+realtime_aqi: Call this tool to interact with the realtime_aqi API. What is the realtime_aqi API useful for? 天气预报。获取实时空气质量。当前空气质量，PM2.5，PM10信息 Parameters: {"type": "object", "properties": {"city": {"type": "string", "description": "城市名，例如：上海"}}, "required": ["city"]} Format the arguments as a JSON object.
+Use the following format:
+Question: the input question you must answer
+Thought: you should always think about what to do
+Action: the action to take, should be one of [realtime_aqi]
+Action Input: the input to the action
+Observation: the result of the action
+... (this Thought/Action/Action Input/Observation can be repeated zero or more times)
+Thought: I now know the final answer
+Final Answer: the final answer to the original input question
+Begin!
+<|im_end|>
+<|im_start|>user
+北京和上海今天的天气情况<|im_end|>
+<|im_start|>assistant
+Action: realtime_aqi
+Action Input: {'city': '北京'}
+Action: realtime_aqi
+Action Input: {'city': '上海'}
+Observation:{"city": "北京", "aqi": "10", "unit": "celsius"}
+Observation:{"city": "上海", "aqi": "72", "unit": "fahrenheit"}
+根据天气预报工具，北京今天的空气质量指数为10，属于良好水平；上海今天的空气质量指数为72，属于轻度污染水平。<|im_end|>
+[LABELS] [-100 * 233]Action: realtime_aqi
+Action Input: {'city': '北京'}
+Action: realtime_aqi
+Action Input: {'city': '上海'}
+Observation:[-100 * 45]根据天气预报工具，北京今天的空气质量指数为10，属于良好水平；上海今天的空气质量指数为72，属于轻度污染水平。<|im_end|>
+```
+更多模型和agent_template的尝试可以使用以下代码，更多的agent template可选值参考[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/agent_template/__init__.py)。
+```python
+from swift.llm import get_model_tokenizer, get_template
+_, tokenizer = get_model_tokenizer('ZhipuAI/GLM-4-9B-0414', load_model=False)
+template = get_template(tokenizer.model_meta.template, tokenizer, agent_template='hermes')
+data = {...}
+template.set_mode('train')
+encoded = template.encode(data)
+print(f'[INPUT_IDS] {template.safe_decode(encoded["input_ids"])}\n')
+print(f'[LABELS] {template.safe_decode(encoded["labels"])}')
+```
+## tools格式
+tools字段提供了模型可以调用的API信息。你需要提供tools的名字，描述和参数，示例如下：
+```python
+tools = [{
+    'type': 'function',
+    'function': {
+        'name': 'get_current_weather',
+        'description': 'Get the current weather in a given location',
+        'parameters': {
+            'type': 'object',
+            'properties': {
+                'location': {
+                    'type': 'string',
+                    'description': 'The city and state, e.g. San Francisco, CA'
+                },
+                'unit': {
+                    'type': 'string',
+                    'enum': ['celsius', 'fahrenheit']
+                }
+            },
+            'required': ['location']
+        }
+    }
+}]
+```
+## loss_scale的使用
+loss_scale可以对模型输出部分的训练损失权重进行调节。例如在ReACT格式中，可以设置`--loss_scale react`（loss_scale配置文件书写在[这里](https://github.com/modelscope/swift/blob/main/swift/plugin/loss_scale/config/react.json)），该参数起到的作用是：
+'Thought:'和'Final Answer:'部分权重为1，'Action:'和'Action Input:'部分权重为2，'Observation:'字段本身权重为2，'Observation:'后面的工具调用结果权重为0。
+具体的loss_scale插件设计，请参考[插件化](../Customization/插件化.md)文档.
+## 训练
+- 训练Base模型的Agent能力，通过修改`--model`切换不同模型，参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/train/agent/qwen2_5.sh)。
+- 训练GLM4的agent_template为hermes，参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/train/agent/glm4.sh)。
+- 使用`--loss_scale`对模型输出部分的损失权重进行调整，参加[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/agent/loss_scale)。
+## 推理
+- 🚀原始模型或者全参数训练后模型的推理，参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/infer/demo_agent.py)。
+- LoRA训练后推理，参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/agent/loss_scale/infer.md)。
+## 部署
+服务端和客户端代码，参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/deploy/agent)。

docs/source/Instruction/GRPO.md ADDED Viewed

	@@ -0,0 +1,461 @@

+# GRPO
+论文地址
+[DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://arxiv.org/abs/2402.03300)
+环境安装
+```bash
+pip install math_verify==0.5.2 # reward function
+pip install -U trl
+```
+**更新日志**
+- **2025-05-11** — 支持生成式奖励模型，通过 reward_model_plugin 自定义奖励模型逻辑。有关更多详细信息，请参阅[自定义奖励模型](#自定义奖励模型)部分。
+- **2025-04-30** — external vllm server 的启动命令改为 `swift rollout`
+**FAQ**
+1. 训练过程中 loss 接近0 是正常情况， 参考[issue](https://github.com/huggingface/open-r1/issues/239#issuecomment-2646297851)
+2. 训练的steps怎么计算? 参考[issue](https://github.com/modelscope/ms-swift/issues/3912)
+3. clip_ratio为什么总是1? 参考[issue](https://github.com/huggingface/open-r1/issues/239#issuecomment-2646297851)
+## 集群支持
+![](../../resources/grpo.png)
+GRPO 训练框架支持集成高性能推理引擎（如 vLLM）来加速采样过程，提供以下两种部署模式：
+### 1. 内部集成模式 (Internal)
+- 在Trainer内部直接启动推理服务
+- 提供两种资源分配策略：
+  - **协同模式 (Colocate)**: 训练与推理共享GPU资源
+  - **异步模式 (Async)**: 训练与推理使用独立GPU资源
+### GRPO训练资源配置方案
+| 配置场景                 | NPROC_PER_NODE | num_infer_workers | 资源分配说明             |
+|--------------------------|----------------|------------------|------------------------|
+| **Colocate**   | =总GPU数      | =总GPU数          | 训练和推理共享全部GPU资源              |
+| **Async**      | =训练卡数      | =推理卡数         | 必须满足：训练卡数 + 推理卡数 = 总GPU数 |
+**注：**
+1. 在Colocate模式下推荐设置`sleep_level=1`, 在模型训练时释放vLLM占用显存
+2. 总GPU数指可见的GPU设备总数
+### 2. 外部服务模式 (External)
+连接外部的 vLLM 推理服务器
+使用时，使用以下参数配置外部 vLLM 服务器
+```bash
+--vllm_server_host <服务器IP> \
+--vllm_server_port <服务端口> \
+--vllm_server_timeout <超时时间> \
+```
+使用`swift rollout`命令部署vLLM 服务器, 现仅支持vLLM backend
+```bash
+CUDA_VISIBLE_DEVICES=2 \
+swift rollout \
+  --model Qwen/Qwen2.5-VL-7B-Instruct \
+  --tensor_parallel_size 2 \
+```
+完整脚本可以参考[这里](../../../examples/train/grpo/multi_node/Qwen2_5_32B_full.sh)
+## 奖励函数
+### 自定义奖励函数
+奖励函数接受模型生成的文本 completions 以及其他数据集中的列作为参数(kwargs)，并对模型生成的文本进行打分。以下是一个示例，展示了如何实现一个简单的长度奖励函数。该函数会在模型生成的文本长度超过 1024 时，给予 1.0 的奖励信号；否则，奖励信号为 0.0。
+```python
+from swift.plugin import ORM, orms
+class DummyLengthRewardFunction(ORM)
+    def __call__(completions, **kwargs):
+        return [1.0 if len(completion) > 1024 else 0.0 for completion in completions]
+orms['dummy']= DummyLengthRewardFunction
+```
+可以在`swift/examples/train/grpo/plugin/plugin.py`中加入该奖励函数，使用参数`--external_plugins examples/train/grpo/plugin/plugin.py`进行注册，并通过 reward_funcs 参数进行指定
+执行脚本参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/grpo/plugin/run_external_rm.sh)
+### 内置奖励函数
+swift内置了五种基于规则的奖励函数(代码见swift/plugin/orm.py)
+| 奖励函数       | 论文                                                                 |
+|----------------|----------------------------------------------------------------------------|
+| accuracy       | [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via RL](https://arxiv.org/abs/2501.12948) |
+| format         | 同上                                                                        |
+| cosine         | [Demystifying Long Chain-of-Thought Reasoning in LLMs](https://arxiv.org/abs/2502.03373) |
+| repetition     | 同上                                                                        |
+| soft_overlong  | [Decoupled Clip and Dynamic sAmpling Policy Optimization (DAPO)](https://arxiv.org/abs/2503.14476)    |
+#### 1. **accuracy**
+该函数将模型的生成结果与数据集中的 solution 列进行比较，计算准确率分数。如果生成结果与标准答案一致，则得分为 1.0；否则为 0.0。
+注意：该奖励函数使用`math_verify`库解析生成结果和solution中的答案，可能只适用于特定的数学数据集。
+#### 2. **format**
+论文中使用以下system prompt要求模型按照固定格式进行返回
+```
+A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>
+```
+该函数检查模型是否按照 `<think>think content</think><answer>answer content</answer>` 的格式进行生成。如果生成文本符合格式要求，则得分为 1.0；否则为 0.0。
+#### 3. **cosine**
+论文发现，仅使用 accuracy 奖励函数进行训练会导致模型的生成长度趋于超长，从而影响训练效果。cosine 奖励函数通过控制模型的生成长度来优化训练过程：
+- 对于生成正确答案的文本，奖励值随长度增加而递减，鼓励模型生成简洁的回答。
+- 对于生成错误答案的文本，奖励值随长度增加而递增，鼓励模型进行更深入的思考。
+使用余弦函数平滑地调整奖励值，确保奖励变化在合理范围内。余弦函数的参数包括生成文本的长度、最大长度限制以及奖励的最小值和最大值。
+参数
+- cosine_min_len_value_wrong（默认值：-0.5）：生成错误答案时，最小长度对应的奖励值。
+- cosine_max_len_value_wrong（默认值：0.0）：生成错误答案时，最大长度对应的奖励值。
+- cosine_min_len_value_correct（默认值：1.0）：生成正确答案时，最小长度对应的奖励值。
+- cosine_max_len_value_correct（默认值：0.5）：生成正确答案时，最大长度对应的奖励值。
+- cosine_max_len（默认值等于模型生成的最大程度）：生成文本的最大长度限制。
+#### 4. **repetition**
+惩罚模型生成文本中的重复内容，通过检测生成文本中的重复 n-gram 模式来评估重复程度，并给予相应的惩罚。
+函数将生成文本分割为单词，并提取指定大小的 n-gram（默认为 3-gram）。通过统计不同 n-gram 的数量与总 n-gram 数量的比例，计算重复比例。如果生成文本中重复的 n-gram 比例较高，则给予较大的负奖励（惩罚）。惩罚值通过重复比例和最大惩罚值（默认为 -1.0）计算得出。
+参数
+- repetition_n_grams（默认值：3）：用于检测重复的 n-gram 大小。
+- repetition_max_penalty（默认值：-1.0）：最大惩罚值，用于控制惩罚的强度。
+#### 5. **soft overlong punishment**
+定义长度惩罚区间。在这个区间内，给予[-1,0]的线性惩罚。
+参数
+- soft_max_length: 论文中的L_max，模型的最大生成长度，默认等于max_completion_length
+- soft_cache_length: 论文中的L_cache，控制长度惩罚区间，区间为[soft_max_length-soft_cache_length, soft_max_length]
+论文原文
+> a length-aware penalty mechanism designed to shape the reward for truncated samples. Specifically, when the response length exceeds the predefined maximum value, we define a punishment interval. Within this interval, the longer the response, the greater the punishment it receives. This penalty is added to the original rule-based correctness reward, thereby signaling to the model to avoid excessively long responses.
+6. **奖励模型**
+除了基于规则的奖励函数外，本框架还支持使用奖励模型作为奖励函数。在使用奖励模型时，需要指定 reward_model 参数，该参数与 model 参数类似，用于指定奖励模型的路径或名称。需要注意的是，reward_model 和 reward_funcs 至少需要指定一个。
+## 参数与运行脚本
+参数
+- per_device_train_batch_size: 每个设备训练批量大小，在GRPO中，指 completion 的批次大小。
+- per_device_eval_batch_size: 每个设备评估批量大小，在GRPO中，指 completion 的批次大小。
+- num_generations: 每个prompt采样的数量，论文中的G值，需要被 per_device_batch_size * gradient_accumulation_steps * nproc_per_node 整除，默认为8
+- max_completion_length: 采样生成的最大长度，默认为512
+- ds3_gather_for_generation: 该参数适用于DeepSpeed ZeRO-3。如果启用，策略模型权重将被收集用于生成，从而提高生成速度。然而，禁用此选项允许训练超出单个GPU VRAM的模型，尽管生成速度会变慢。禁用此选项与vLLM生成不兼容。默认为True
+- reward_funcs: 奖励函数，根据模型生成结果进行打分，内置accuracy、format、cosine和repetition四个rule-based函数，详细见 swift/plugin/orm.py 文件
+- reward_weights: 每个奖励函数的权重。必须与奖励函数和奖励模型的总数量匹配。如果为 None，则所有奖励的权重都相等，为`1.0`
+  - 提示：如果GRPO训练中包含`--reward_model`，则其加在奖励函数的最后位置
+- reward_model: 同model, 使用奖励模型作为奖励函数，与reward_funcs至少需要指定一个。
+- reward_model_plugin: 奖励模型逻辑，默认为orm逻辑, 详细见[自定义奖励模型](#自定义奖励模型)。
+- dataset_shuffle: 是否对dataset进行随机操作，默认为True
+- loss_type: loss 归一化的类型，可选项为['grpo', 'bnpo', 'dr_grpo'], 默认为'grpo', 具体查看该[pr](https://github.com/huggingface/trl/pull/3256#discussion_r2033213348)
+- log_completions: 是否记录训练中的模型生成内容，搭配 `--report_to wandb` 使用。默认为False
+  - 提示：若没有设置`--report_to wandb`，则会在checkpoint中创建`completions.jsonl`来存储生成内容
+- use_vllm: 是否使用vLLM作为采样的生成后端，默认为False，建议使用加快训练速度
+- vllm_device: 设置vLLM部署的设备，默认为`auto`, 即未被使用的第一张显卡，使用`cuda:x`来设置特定的卡。
+- vllm_gpu_memory_utilization: vllm透传参数，默认为0.9
+- vllm_max_model_len: vllm透传参数，默认为None
+- vllm_max_num_seqs: vllm透传参数，默认为256
+- vllm_enforce_eager: vllm透传参数，默认为False
+- vllm_limit_mm_per_prompt: vllm透传参数，默认为None
+- vllm_enable_prefix_caching: vllm透传参数，默认为True
+- vllm_server_host：vLLM server host地址，默认为None，使用外部vLLM server时使用
+- vllm_server_port vLLM server 服务端口，默认为8000
+- vllm_server_timeout 连接vLLM server的超时时间，默认为120s
+- num_iterations: 每个批次代更新次数，默认为1.
+- epsilon: clip 系数，默认为0.2.
+- epsilon_high: upper clip 系数，默认为None，设置后与epsilon共同构成[epsilon, epsilon_high]裁剪范围.
+- async_generate: 异步rollout以提高训练速度，默认`false`.
+- sleep_level: vllm特有参数，在训练和rollout复用卡的时候，可以选择vllm进行offload.
+- move_model_batches: 在模型向vLLM/LMDeploy等快速推理框架移动参数时，将layers分为多少个batch. 默认为None, 代表整个模型不进行拆分，否则拆分为move_model_batches+1(非layer参数)+1(多模态部分参数)个
+- offload_optimizer: 是否在vLLM/LMDeploy推理时offload optimizer参数，默认为False
+- offload_model: 是否在vLLM/LMDeploy推理时offload 模型本身，默认为False
+  - 注意：若该参数设置为True，训练时grad_norm一直为0，请安装`vllm==0.7.3`
+- gc_collect_after_offload: 是否在offload结束时进行gc（python gc和GPU gc），默认为False
+- multi_turn_func: 多轮GRPO参数, 传入对应的plugin名称, 同时在plugin/multi_turn.py中添加好对应的实现
+- dynamic_sample：筛除group内奖励标准差为0的数据，额外采样新数据，默认为False。
+- max_resample_times：dynamic_sample设置下限制重采样次数，默认3次。
+- overlong_filter：跳过超长截断的样本，不参与loss计算，默认为False。
+- vllm_server_host：vLLM server host地址，默认为None，使用外部vLLM server时使用 \
+- vllm_server_port vLLM server 服务端口，默认为8000 \
+- vllm_server_timeout 连接vLLM server的超时时间，默认为120s \
+奖励函数参数，见[内置奖励函数](#内置奖励函数)
+可以使用vLLM、LMDeploy作为采样后端加速训练
+多卡vLLM
+```bash
+# async mode
+# 要求 num_infer_workers(部署) + NPROC_PER_NODE(训练) = device_count
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=7 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-7B \
+    --reward_funcs accuracy format \
+    --use_vllm true \
+    --vllm_device auto \
+    --vllm_gpu_memory_utilization 0.7 \
+    --vllm_max_model_len 8192 \
+    --num_infer_workers 1 \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset 'AI-MO/NuminaMath-TIR#5000' \
+    --max_completion_length 2048 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-6 \
+    --gradient_accumulation_steps 2 \
+    --eval_steps 200 \
+    --save_steps 200 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 4096 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 7 \
+    --temperature 0.9 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --deepspeed zero2 \
+    --log_completions true
+# colocate mode
+# 要求 num_infer_workers(部署) = NPROC_PER_NODE(训练) = device_count
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=8 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-1.5B \
+    --reward_funcs accuracy format \
+    --use_vllm true \
+    --vllm_device auto \
+    --vllm_gpu_memory_utilization 0.5 \
+    --vllm_max_model_len 8192 \
+    --num_infer_workers 8 \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset 'AI-MO/NuminaMath-TIR#5000' \
+    --max_completion_length 2048 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-6 \
+    --gradient_accumulation_steps 2 \
+    --eval_steps 200 \
+    --save_steps 200 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 4096 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 8 \
+    --temperature 0.9 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --deepspeed zero2 \
+    --log_completions true \
+    --sleep_level 1 \
+    --offload_model true \
+    --offload_optimizer true \
+    --gc_collect_after_offload true \
+    --log_completions true
+```
+单卡
+```bash
+# PT backend
+CUDA_VISIBLE_DEVICES=0 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-7B \
+    --reward_funcs accuracy format \
+    --train_type lora \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --torch_dtype bfloat16 \
+    --dataset 'AI-MO/NuminaMath-TIR#1000' \
+    --max_completion_length 1024 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 4 \
+    --per_device_eval_batch_size 4 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 1 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 4 \
+    --temperature 0.9 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --log_completions true
+# vLLM backend
+CUDA_VISIBLE_DEVICES=0 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-7B \
+    --vllm_gpu_memory_utilization 0.5 \
+    --use_vllm true \
+    --sleep_level 1 \
+    --offload_model true \
+    --offload_optimizer true \
+    --gc_collect_after_offload true \
+    --reward_funcs accuracy format \
+    --train_type lora \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --torch_dtype bfloat16 \
+    --dataset 'AI-MO/NuminaMath-TIR#1000' \
+    --max_completion_length 1024 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 4 \
+    --per_device_eval_batch_size 4 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 1 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 4 \
+    --temperature 0.9 \
+    --system 'examples/train/grpo/prompt.txt' \
+    --log_completions true
+```
+多机训练参考[这里](../../../examples/train/grpo/multi_node/)
+注：内部集成模式下，需要不同节点的GPU配置以及训练参数相同
+## 自定义奖励模型
+默认情况下，奖励模型指的是包含数值头的分类模型（通常称为输出奖励模型（ORM））。这些模型对其他模型的输出进行评分，产生一个标量值，表示模型响应的质量。
+目前，我们可以利用reward_model_plugin灵活地自定义奖励模型的处理逻辑。这使得实现诸如生成式奖励模型等技术成为可能，包括：
+- 自定义模型的系统提示：定义特定的指令和上下文以指导评估过程。
+- 处理模型交互历史：管理对话上下文，以提供有意义且具有上下文感知的评估。
+- 定义自定义评估标准：设置独特的标准和度量，用于评估模型的响应，超越默认的准确性和相关性衡量标准。
+通过reward_model_plugin，开发者可以针对其应用的特定需求定制奖励评估过程。这种灵活性允许更细致和有效的基于奖励的训练策略。
+我们在 [rm_plugin.py](../../../swift/plugin/rm_plugin.py) 中提供了一个简单的生成式奖励模型示例（GenRMPlugin）。
+您还可以在 [plugin.py](../../../examples/train/grpo/plugin/plugin.py) 中自定义您的奖励模型插件，并使用 `external_plugins` 参数进行注册。
+以下是一个训练脚本示例，用于使用两个奖励模型，包括一个 ORM 和一个 Gen-RM（此处使用 qwen2.5-3B-Instruct）进行 GRPO 训练：
+```
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=8 \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-7B \
+    --dataset AI-MO/NuminaMath-TIR#5000 \
+    --external_plugins examples/train/grpo/plugin/plugin.py \
+    --reward_funcs format \
+    --reward_model Qwen/Qwen2.5-3B-Instruct Shanghai_AI_Laboratory/internlm2-7b-reward \
+    --reward_model_plugin genrm my_rmplugin \
+    --reward_weights 0.1 1 1 \
+    --num_infer_workers 8 \
+    --vllm_gpu_memory_utilization 0.5 \
+    --sleep_level 1 \
+    --offload_model true \
+    --offload_optimizer true \
+    --gc_collect_after_offload true \
+    --log_completions true \
+    --deepspeed zero2
+```
+注意：
+1. 在 GRPOTrainer 中，reward_model 会依次append到 reward_funcs 中。因此，reward_weights 的顺序对应 [reward_funcs, reward_model]。
+2. reward_model_plugin 默认为 default，即使用 ORM 处理逻辑。
+## DAPO
+[Decoupled Clip and Dynamic sAmpling Policy Optimization (DAPO)](https://arxiv.org/abs/2503.14476)在GRPO的基础上设置了几种trick，分别是
+- Clip Higher
+- Dynamic Sampling
+- Overlong Filtering
+- Token level Loss
+- Soft Overlong Punishment
+其中Token level Loss是默认实现，不用额外设置。对于其余trick，我们可��基于GRPOTrainer，设置以下参数实现。
+| 参数                 | 类型      | 值      |
+|----------------------|-----------|-------------|
+| `--epsilon_high`     | `float`   | `0.28`      |
+| `--dynamic_sample`   | `bool`    | `true`      |
+| `--overlong_filter`  | `bool`    | `true`      |
+| `--reward_funcs`     | `str`     | `soft_overlong`|
+| `--max_resample_times` | `int`    | `3`        |
+参考训练脚本(八卡colocate mode)
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=8 \
+WANDB_API_KEY=xxx \
+swift rlhf \
+    --rlhf_type grpo \
+    --model Qwen/Qwen2.5-1.5B \
+    --reward_funcs accuracy soft_overlong \
+    --max_completion_length 4096 \
+    --soft_cache_length 819 \
+    --epsilon 0.2 \
+    --epsilon_high 0.28 \
+    --dynamic_sample true \
+    --overlong_filter true \
+    --max_resample_times 3 \
+    --use_vllm true \
+    --vllm_gpu_memory_utilization 0.6 \
+    --num_infer_workers 8 \
+    --train_type full \
+    --torch_dtype bfloat16 \
+    --dataset AI-MO/NuminaMath-TIR#5000 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 4 \
+    --per_device_eval_batch_size 4 \
+    --learning_rate 1e-6 \
+    --eval_steps 1000 \
+    --save_steps 1000 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_generations 8 \
+    --temperature 1.0 \
+    --top_p 1.0 \
+    --deepspeed zero2 \
+    --log_completions true \
+    --num_iterations 1 \
+    --report_to tensorboard wandb \
+    --beta 0.0 \
+```

docs/source/Instruction/Megatron-SWIFT训练.md ADDED Viewed

	@@ -0,0 +1,293 @@

+# Megatron-SWIFT训练
+SWIFT引入了Megatron的并行技术来加速大模型的训练，包括数据并行、张量并行、流水线并行、序列并行，上下文并行，专家并行。支持Qwen3、[Qwen3-MoE](https://github.com/modelscope/ms-swift/blob/main/examples/train/megatron/qwen3_moe.sh)、Qwen2.5、Llama3、Deepseek-R1蒸馏系等模型的预训练和微调。完整支持的模型可以参考[支持的模型与数据集文档](./支持的模型和数据集.md)。
+## 环境准备
+使用Megatron-SWIFT，除了安装swift依赖外，还需要安装以下内容：
+```shell
+# 推荐torch版本：2.5 / 2.6
+pip install pybind11
+# transformer_engine
+# 若出现安装错误，可以参考该issue解决: https://github.com/modelscope/ms-swift/issues/3793
+pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable
+# apex
+git clone https://github.com/NVIDIA/apex
+cd apex
+pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./
+# megatron-core
+pip install git+https://github.com/NVIDIA/Megatron-LM.git@core_r0.12.0
+```
+或者你也可以使用镜像：
+```
+modelscope-registry.cn-hangzhou.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda12.4.0-py311-torch2.6.0-vllm0.8.3-modelscope1.25.0-swift3.3.0.post1
+modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda12.4.0-py311-torch2.6.0-vllm0.8.3-modelscope1.25.0-swift3.3.0.post1
+```
+依赖库Megatron-LM中的训练模块将由swift进行git clone并安装。你也可以通过环境变量`MEGATRON_LM_PATH`指向已经下载好的repo路径（断网环境，[core_r0.12.0分支](https://github.com/NVIDIA/Megatron-LM/tree/core_r0.12.0)）。
+## 快速入门案例
+这里介绍使用2卡80GiB A100对Qwen2.5-7B-Instruct模型进行自我认知微调的快速入门案例，以下最佳实践可以在10分钟内完成。
+首先，我们需要将HF格式的权重转为Megatron格式：
+```shell
+CUDA_VISIBLE_DEVICES=0 \
+swift export \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --to_mcore true \
+    --torch_dtype bfloat16 \
+    --output_dir Qwen2.5-7B-Instruct-mcore
+```
+然后，使用以下脚本进行训练，训练所需显存资源为2*80GiB：
+```shell
+NPROC_PER_NODE=2 \
+CUDA_VISIBLE_DEVICES=0,1 \
+megatron sft \
+    --load Qwen2.5-7B-Instruct-mcore \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
+    --tensor_model_parallel_size 2 \
+    --micro_batch_size 4 \
+    --global_batch_size 16 \
+    --recompute_granularity selective \
+    --train_iters 100 \
+    --eval_iters 5 \
+    --finetune true \
+    --cross_entropy_loss_fusion true \
+    --lr 1e-5 \
+    --lr_warmup_iters 10 \
+    --min_lr 1e-6 \
+    --save megatron_output/Qwen2.5-7B-Instruct \
+    --save_interval 100 \
+    --max_length 2048 \
+    --system 'You are a helpful assistant.' \
+    --num_workers 4 \
+    --no_save_optim true \
+    --no_save_rng true \
+    --dataset_num_proc 4 \
+    --model_author swift \
+    --model_name swift-robot
+```
+最后，将Megatron格式权重转为HF格式：
+```shell
+CUDA_VISIBLE_DEVICES=0 \
+swift export \
+    --mcore_model megatron_output/Qwen2.5-7B-Instruct/vx-xxx \
+    --to_hf true \
+    --torch_dtype bfloat16 \
+    --output_dir megatron_output/Qwen2.5-7B-Instruct/vx-xxx-hf
+```
+我们对生成的HF格式权重进行推理：
+```shell
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --model megatron_output/Qwen2.5-7B-Instruct/vx-xxx-hf \
+    --stream true \
+    --temperature 0 \
+    --max_new_tokens 2048
+```
+推理结果如下：
+```
+<<< who are you?
+I am a language model developed by swift, you can call me swift-robot. How can I assist you?
+```
+- 若要进行预训练，你可以使用`megatron pt`替代`megatron sft`，这将会使用生成式的template进行训练。
+- **更多案例**：包括packing、多机、32K上下文、MoE模型、预训练，可以查看[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/megatron)。
+## Benchmark
+使用`megatron sft`和`swift sft`在单机八卡A800环境下进行Dense/MoE模型全参数训练的速度对比如下，对应脚本参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/megatron/benchmark)。
+**Dense** Qwen2.5-14B:
+|          | Megatron-LM | Deepspeed-ZeRO2 | Deepspeed-ZeRO3 |
+| -------- | ----------- | ---------- | ---------- |
+| 训练速度 |      9.04s/it       |  10.32s/it   | 10.56s/it |
+| 显存占用 | 8\*64GB     |  8\*80GB   | 8\*58GB |
+**MoE** Qwen1.5-MoE-A2.7B:
+|          | Megatron-LM | Deepspeed-ZeRO2 | Deepspeed-ZeRO3 |
+| -------- | ----------- | ---------- | ---------- |
+| 训练速度 |      2.93s/it       |  6.02s/it   | 24.30s/it |
+| 显存占用 | 8\*66GB     |  8\*72GB   | 8\*50GB |
+## 命令行参数
+### Megatron参数
+**训练参数**:
+- 🔥micro_batch_size: 每个device的批次大小，默认为1。
+- 🔥global_batch_size: 总批次大小，等价于`micro_batch_size*数据并行大小*梯度累加步数`。默认为16。
+- 🔥recompute_granularity: 重新计算激活的粒度，可选项为'full', 'selective'。其中full代表重新计算整个transformer layer，selective代表只计算transformer layer中的核心注意力部分。通常'selective'是推荐的。默认为'selective'。
+- 🔥recompute_method: 该参数需将recompute_granularity设置为'full'才生效，可选项为'uniform', 'block'。默认为None。
+- 🔥recompute_num_layers: 该参数需将recompute_granularity设置为'full'才生效，默认为None。若`recompute_method`设置为uniform，该参数含义为每个均匀划分的重新计算单元的transformer layers数量。例如你可以指定为`--recompute_granularity full --recompute_method uniform --recompute_num_layers 4`。recompute_num_layers越大，显存占用越小，计算成本越大。默认为None。
+- recompute_modules: 选项包括"core_attn", "moe_act", "layernorm", "mla_up_proj", "mlp", "moe" ，默认值为，["core_attn"]。例如在MoE训练时，你可以通过指定`--recompute_granularity selective --recompute_modules core_attn moe`降低显存。其中"core_attn"、"mlp" 和 "moe" 使用常规检查点，"moe_act"、"layernorm" 和 "mla_up_proj" 使用输出丢弃检查点。
+  - "core_attn"：重新计算 Transformer 层中的核心注意力部分。
+  - "mlp"：重新计算密集的 MLP 层。
+  - "moe"：重新计算 MoE 层。
+  - "moe_act"：重新计算 MoE 中的 MLP 激活函数部分。
+  - "layernorm"：重新计算 input_layernorm 和 pre_mlp_layernorm。
+  - "mla_up_proj"：重新计算 MLA 上投影和 RoPE 应用部分。
+- deterministic_mode: 确定性模式，这会导致训练速度下降，默认为False。
+- 🔥train_iters: 训练的总迭代次数，默认为None。
+- 🔥log_interval: log的时间间隔（单位：iters），默认为5。
+- tensorboard_dir: tensorboard日志写入的目录。默认None，即存储在`f'{save}/runs'`目录下。
+- no_masked_softmax_fusion: 默认为False。用于禁用query_key_value的scaling, masking, and softmax融合。
+- no_bias_dropout_fusion: 默认为False。用于禁用bias和dropout的融合。
+- no_bias_swiglu_fusion: 默认为False。指定`--no_bias_dropout_fusion true`，用于禁止bias和swiglu融合。
+- no_rope_fusion: 默认为False。指定`--no_rope_fusion true`用于禁止rope融合。
+- no_gradient_accumulation_fusion: 默认为False。指定`--no_gradient_accumulation_fusion true`用于禁用梯度累加融合。
+- 🔥cross_entropy_loss_fusion: 启动交叉熵损失计算融合。默认为False。
+- calculate_per_token_loss: 根据全局批次中的非填充token数量来对交叉熵损失进行缩放。默认为True。
+- 🔥attention_backend: 使用的注意力后端 (flash、fused、unfused、local、auto)。默认为 auto。
+- optimizer: 优化器类型，可选为'adam'、'sgd'。默认为adam。
+- dataloader_type: 默认为'cyclic'，可选为'single', 'cyclic', 'external'。若开启`--streaming`，则设置为`external`。
+- manual_gc: 禁用默认垃圾回收器，手动触发垃圾回收。默认为False。
+- manual_gc_interval: 触发垃圾回收的间隔。默认为0。
+- seed: python、numpy、pytorch和cuda的随机种子，默认为42。
+- 🔥num_workers: dataloder的workers数量，默认为4。
+  - 注意：若设置`--streaming true`，则设置为1。
+- seq_length: 默认为None，即设置为`max_length`。对数据集长度进行限制请使用基本参数中的`--max_length`控制，无需设置此参数。
+- use_cpu_initialization: 在cpu上初始化权重，默认为False。在进行HF和MCore权重转换时会被使用。
+- no_create_attention_mask_in_dataloader: 在dataloader中不创建attention mask，默认为True。
+**学习率参数**:
+- 🔥lr: 初始学习率，最终会根据学习率预热策略和衰减策略决定每个迭代的学习率，默认为1e-5。
+- lr_decay_style: 学习率衰减策略，默认为'cosine'。通常设置为'cosine', 'linear', 'constant'。
+- 🔥lr_decay_iters: 学习率衰减的迭代次数。默认为None，则设置为`--train_iters`。
+- 🔥lr_warmup_iters: 线性学习率预热的迭代次数，默认为0。
+- 🔥min_lr: 学习率的最小值，将低于改阈值的学习率裁剪为该值，默认为0。
+**正则化参数**:
+- 🔥weight_decay: 默认为0.1。
+- 🔥clip_grad: l2梯度裁剪，默认为1.0。
+- adam_beta1: 默认0.9。
+- adam_beta2: 默认0.95。
+- adam_eps: 默认1e-8。
+- sgd_momentum: 默认为0.9。
+**checkpoint参数**:
+- 🔥save: checkpoint的输出目录，默认None。在训练中，若未设置该参数，则默认为`f'megatron_output/{model_suffix}'`，例如`'megatron_output/Qwen2.5-7B-Instruct'`。
+  - 注意：若在多机训练时，请确保每个节点的保存路径指向相同位置。否则你需要在训练后手动集中这些权重。
+- 🔥save_interval: checkpoint保存的间隔（steps），默认为500。
+  - 注意：训练结束时一定会保存权重。
+- 🔥no_save_optim: 不保存optimizer，默认为False。
+- 🔥no_save_rng: 不保存rng，默认为False。
+- 🔥load: 加载的checkpoint目录，默认None。
+- 🔥no_load_optim: 不载入optimizer，默认为False。
+- 🔥no_load_rng: 不载入rng，默认为False。
+- 🔥finetune: 将模型加载并微调。不加载检查点的优化器和随机种子状态，并将迭代数设置为0。默认为False。
+- ckpt_format: checkpoint的格式。可选为'torch', 'torch_dist', 'zarr'。默认为'torch_dist'。
+- no_initialization: 不对权重进行初始化，默认为True。
+- auto_detect_ckpt_format: 自动检测ckpt format为legacy还是distributed格式。默认为True。
+- exit_on_missing_checkpoint: 如果设置了`–-load`，但找不到检查点，则直接退出，而不是初始化。默认为True。
+**分布式参数**:
+- distributed_backend: 分布式后端，可选为'nccl', 'gloo'。默认为nccl。
+- 🔥use_distributed_optimizer: 使用分布式优化器。默认为True。
+- 🔥tensor_model_parallel_size: tp数，默认为1。
+- 🔥pipeline_model_parallel_size: pp数，默认为1。
+- decoder_first_pipeline_num_layers: decoder第一个流水线阶段所包含的Transformer层数。默认为 None，表示将Transformer层数平均分配到所有流水线阶段。
+- decoder_last_pipeline_num_layers: decoder最后一个流水线阶段所包含的Transformer层数。默认为 None，表示将Transformer层数平均分配到所有流水线阶段。
+- 🔥sequence_parallel: 启动序列并行的优化器。默认为False。
+- 🔥context_parallel_size: cp数，默认为1。
+- tp_comm_overlap: 启用张量并行通信与GEMM（通用矩阵乘法）内核的重叠（降低通信耗时）。默认为False。
+- overlap_grad_reduce: 启用DDP中grad reduce操作的重叠（降低DP通信耗时）。默认为False。
+- overlap_param_gather: 启用分布式优化器中参数all-gather的重叠（降低DP通信耗时）。默认为False。
+- distributed_timeout_minutes: torch.distributed的timeout时间（单位为分钟），默认为60分钟。
+**日志参数**:
+- log_params_norm: 记录参数的norm。默认为False。
+- log_throughput: 记录每个GPU的吞吐量。默认为True。
+  - 注意：在非packing情况下，log_throughput并不准确，因为`seq_length`并不等于真实序列长度。
+- tensorboard_log_interval: 记录到tensorboard的间隔（steps），默认为1。
+- tensorboard_queue_size: 队列长度（与磁盘IO相关），类似于写入的间隔。默认为50。
+- log_timers_to_tensorboard: 记录timers到tensorboard。默认为True。
+- no_log_learning_rate_to_tensorboard: 不记录学习率到tensorboard。默认为False。
+- log_validation_ppl_to_tensorboard: 将验证困惑度写入tensorboard。默认为True。
+- log_memory_to_tensorboard: 将内存日志写入tensorboard。默认为True。
+- logging_level: 日志级别。默认为None。
+- wandb_project: wandb 项目名称。默认为''，即忽略wandb。
+- wandb_exp_name: wandb 实验名称。默认为''。
+- wandb_save_dir: 本地保存 wandb 结果的路径。默认为''。
+**评估参数**:
+- 🔥eval_iters: 评估的迭代次数，默认为100。
+- 🔥eval_interval: 评估的间隔（steps），默认为None，即设置为save_interval。
+**混合精度参数**:
+- fp16: fp16模式。默认为None，会根据模型的torch_dtype进行设置。torch_dtype默认读取config.json。
+- bf16: bf16模式。默认为None，会根据模型的torch_dtype进行设置。
+- apply_query_key_layer_scaling: 将`Q * K^T` 缩放为 `1 / 层数`（例如：第layer_num层则除以layer_num）。这对fp16训练很有帮助。默认为None，即若使用`--fp16`，则设置为True。
+- attention_softmax_in_fp32: 在attention_mask和softmax中使用fp32进行计算。默认为True。
+**模型参数**: （以下参数通常不需要进行设置，会根据HF模型的config.json进行配置，用户无需关心）
+- num_layers: transformer layers的层数，默认为None。
+- hidden_size: transformer hidden size，默认为None。
+- ffn_hidden_size: transformer FFN层的hidden size。默认为None，设置为`4*hidden_size`。
+- num_attention_heads: transformer attention heads的个数，默认为None。
+- group_query_attention: 默认为None。若`num_query_groups>1`，group_query_attention设置为True，否则为False。
+- num_query_groups: 默认为1。
+- max_position_embeddings: 位置编码的最大长度，默认为None。
+- position_embedding_type: 位置编码的类型，可选为'learned_absolute'、'rope'、'relative'和'none'，默认为'rope'。
+- rotary_base: 默认为10000。
+- rotary_percent: 默认为1.。
+- normalization: 可选为'LayerNorm', 'RMSNorm'，默认为RMSNorm。
+- norm_epsilon: 默认为1e-5。
+- swiglu: 使用swiglu替代默认的gelu。默认为True。
+- untie_embeddings_and_output_weights: 解开embedding和输出权重的绑定，默认为True。
+- disable_bias_linear: 禁用linear层的bias。默认为True。
+- add_qkv_bias: 仅在QKV的linear中增加bias，默认为True。
+- attention_dropout: 默认为0.。
+- hidden_dropout: 默认为0.。
+- kv_channels: 默认为None，设置为`args.hidden_size // args.num_attention_heads`。
+- qk_layernorm: 是否对Q和K进行层归一化。
+- transformer_impl: 使用哪种transformer实现，可选项为'local'和'transformer_engine'。默认为transformer_engine。
+- padded_vocab_size: 完整词表大小，默认为None。
+- rope_scaling: rope_scaling相关参数，默认为None。格式参考[llama3.1 config.json](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B-Instruct/file/view/master?fileName=config.json&status=1)，传入json字符串。
+- model_type: Huggingface模型权重中config.json中的model_type。
+**MoE参数**:
+- num_experts: MoE的专家数，默认为None。自动从config.json读取。
+- moe_ffn_hidden_siz: 每个专家的前馈网络（ffn）的隐藏层大小。默认为None，设置为ffn_hidden_size。自动从config.json读取。
+- moe_shared_expert_intermediate_size: 共享专家的总FFN隐藏层大小。如果有多个共享专家，它应等于 `num_shared_experts * ffn_size_of_each_shared_expert`。 默认为None。自动从config.json读取。
+- moe_router_topk: 每个token路由到的专家数量。默认为None。自动从config.json读取。
+- moe_router_pre_softmax: 为MoE启用预softmax路由，这意味着softmax会在top-k选择之前进行。默认为None。自动从config.json读取。
+- 🔥moe_aux_loss_coeff: 辅助损失的缩放系数：建议的初始值为 1e-2。默认为None。自动从config.json读取。
+- 🔥expert_model_parallel_size: 专家并行数，默认为1。
+- moe_token_dispatcher_type: 要使用的token分发器类型。可选选项包括 'allgather'、'alltoall' 和 'alltoall_seq'。默认值为 'alltoall'。
+- moe_grouped_gemm: 当每个rank包含多个专家时，通过在多个流中启动多个本地 GEMM 内核，利用 TransformerEngine中的GroupedLinear提高利用率和性能。默认为False。
+- moe_router_load_balancing_type: 确定路由器的负载均衡策略。可选项为"aux_loss"、"seq_aux_loss"、"sinkhorn"、"none"。默认值为 "aux_loss"。
+- moe_z_loss_coeff: z-loss 的缩放系数。默认为None。
+- moe_expert_capacity_factor: 每个专家的容量因子，None表示不会丢弃任何token。默认为None。
+- moe_shared_expert_overlap: 启用共享专家计算与调度器通信之间的重叠。如果不启用此选项，共享专家将在路由专家之后执行。仅在设置了`moe_shared_expert_intermediate_size`时有效。默认为False。
+### Megatron训练参数
+Megatron训练参数继承自Megatron参数和基本参数。基本参数的内容可以参考[这里](./命令行参数.md#基本参数)。此外还包括以下参数：
+- add_version: 在`save`上额外增加目录`'<版本号>-<时间戳>'`防止权重覆盖，默认为True。
+- 🔥packing: 是否使用序列packing，默认为False。
+- 🔥streaming: 流式读取并处理数据集，默认False。通常在处理大型数据集时，设置为True。更多流式的参数查看命令行参数文档。
+- lazy_tokenize: 默认为False。若该参数设置为False，则在训练之前对所有的数据集样本进行tokenize（这可以避免在训练中出现报错）；设置为True，则在训练中对数据集进行tokenize（这可以节约内存）。
+- dataloader_persistent_workers: 透传入dataloader的参数，默认为True。
+- dataloader_prefetch_factor: 透传入dataloader的参数，默认为10。
+- max_epochs: 训练到`max_epochs`时强制退出训练，并对权重进行验证和保存。该参数在使用流式数据集时很有用。默认为None。

docs/source/Instruction/ReleaseNote3.0.md ADDED Viewed

	@@ -0,0 +1,80 @@

+# ReleaseNote 3.0
+> 如果您在3.x版本使用上遇到任何问题，请提交issue给我们。如存在2.x可用而3.x不可用的情况请暂时使用2.x版本等待我们修复完成。
+## 新功能
+1. 数据集模块重构。数据集加载速度提升2-20倍，encode速度提升2-4倍，支持streaming模式
+    - 移除了dataset_name机制，采用dataset_id、dataset_dir、dataset_path方式指定数据集
+    - 使用`--dataset_num_proc`支持多进程加速处理
+    - 使用`--streaming`支持流式加载hub端和本地数据集
+    - 支持`--packing`命令以获得更稳定的训练效率
+    - 指定`--dataset <dataset_dir>`支持本地加载开源数据集
+2. 对模型进行了重构：
+    - 移除了model_type机制，使用`--model <model_id>/<model_path>`来训练和推理
+    - 若是新模型，直接使用`--model <model_id>/<model_path> --template xxx --model_type xxx`，无需书写python脚本进行模型注册
+3. template模块重构：
+    - 使用`--template_backend jinja`采用jinja模式推理
+    - 采用messages格式作为入参接口
+4. 支持了plugin机制，用于定制训练过程，目前支持的plugin有：
+    - callback 定制训练回调方法
+    - loss 定制loss方法
+    - loss_scale 定制每个token的权重
+    - metric 定制交叉验证的指标
+    - optimizer 定制训练使用的optimizer和lr_scheduler
+    - tools 定制agent训练的system格式
+    - tuner 定制新的tuner
+4. 训练模块重构：
+    - 支持了一行命令启动多机训练，详情查看[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/multi-node/deepspeed/README.md)
+    - 支持所有多模态LLM的PreTrain
+    - 训练中的predict_with_generate采用infer模块，支持多模态LLM和多卡
+    - 人类对齐KTO算法支持多模态LLM
+5. 推理与部署模块重构：
+    - 支持pt backend下的batch推理，支持多卡推理
+    - 推理和部署模块统一采用openai格式接口
+    - 支持了异步推理接口
+6. app-ui合并入web-ui，app-ui支持多模态推理
+7. 支持All-to-All模型，即Emu3-Gen或Janus等文生图或全模态模型的训练和部署等
+8. 对examples进行了功能提升，目前examples可以全面反映SWIFT的能力，易用性更强
+9. 使用`--use_hf true/false`来切换HuggingFace社区和ModelScope社区的数据集模型的下载上传
+10. 更好地支持了以代码形式进行训练、推理，代码结构更清晰，并补充了大量的代码注释
+## BreakChange
+本文档列举3.x版本和2.x版本的BreakChange。开发者在使用时应当注意这些不同。
+### 参数差异
+- model_type的含义发生了变化。3.0版本只需要指定--model，model_type仅当模型为SWIFT不支持模型时才需要额外指定
+- sft_type更名为train_type
+- model_id_or_path更名为model
+- template_type更名为template
+- quantization_bit更名为quant_bits
+- check_model_is_latest更名为check_model
+- batch_size更名为per_device_train_batch_size，沿用了transformers的命名规则
+- eval_batch_size更名为per_device_eval_batch_size，沿用了transformers的命名规则
+- tuner_backend移除了swift选项
+- use_flash_attn更名为attn_impl
+- bnb_4bit_comp_dtype更名为bnb_4bit_compute_dtype
+- 移除了train_dataset_sample和val_dataset_sample
+- dtype更名为torch_dtype，同时选项名称从bf16变更为标准的bfloat16，fp16变更为float16，fp32变更为float32
+- 移除了eval_human选项
+- dataset选项移除了HF::使用方式，使用新增的--use_hf控制下载和上传
+- 移除了do_sample选项，使用temperature进行控制
+- add_output_dir_suffix更名为add_version
+- 移除了eval_token，使用api_key支持
+- target_modules(lora_target_modules)的ALL改为了all-linear，含义相同
+- deepspeed的配置更改为`default-zero2`->`zero2`, `default-zero3`->`zero3`
+- infer/deploy/export移除了--ckpt_dir参数，使用--model, --adapters进行控制
+2.0标记为compatible参数的部分整体移除了。
+### 功能
+1. 预训练请使用swift pt命令。该命令会默认使用generation template，而swift sft命令默认使用model_type预置的template
+2. 整体移除了2.x版本的examples目录，并添加了按功能类型划分的新examples
+3. 数据集格式完全向messages格式兼容，不再支持query/response/history格式
+4. merge_lora的存储目录可以通过`--output_dir`指定了，且merge_lora和量化不能在一个命令中执行，需要最少两个命令
+5. 使用`swift app --model xxx`开启app-ui界面，支持了多模态界面推理
+6. 移除了AIGC的依赖以及对应的examples和训练代码

docs/source/Instruction/人类对齐.md ADDED Viewed

	@@ -0,0 +1,115 @@

+# 人类对齐
+本文档提供了各种人类偏好对齐算法的训练脚本。若您希望深入了解更详尽的算法信息及其选择方法，请参考[文档](https://github.com/modelscope/modelscope-classroom/blob/main/LLM-tutorial/M.%E4%BA%BA%E7%B1%BB%E5%81%8F%E5%A5%BD%E5%AF%B9%E9%BD%90%E8%AE%AD%E7%BB%83.md)
+## 数据集
+PPO与GRPO算法所需的数据仅为模型输入，也就是system prompt（可选）加上query。其中GRPO中的奖励函数可能需要额外的数据列，比如计算准确率需要`solution`列作为参考答案。
+RM和DPO类算法如ORPO，CPO，SimPO，则需要 $(x,y_w,y_l)$ 格式的数据，其中 $x$ 表示模型输入，$y_w,y_l$ 分别表示符合人类偏好的偏好回答和不符合人类偏好的拒绝回答,比如![dpo_data](../../resources/dpo_data.png)
+而KTO算法的数据比较特殊，只需要 $(x,y,\text{label})$ 格式的数据，其中 $x$ 表示模型输入，$y$ 表示模型输出，label表示回答是否符合人类偏好
+比如![kto_data](../../resources/kto_data.png)
+使用自定义数据集对文本模型或者多模态大模型进行RLHF训练可以参考[自定义数据集文档](../Customization/自定义数据集.md#rlhf)。
+## GRPO
+[论文arvix](https://arxiv.org/abs/2402.03300)
+训练脚本参考[这里](./GRPO.md).
+## DPO
+[论文arvix](https://arxiv.org/abs/2305.18290)
+超参
+- beta：KL正则系数，值越大表示对偏离参考模型的惩罚越大。默认为0.1
+建议在开始DPO训练之前，使用偏好数据集中的偏好回答部分进行SFT训练，以确保数据符合DPO算法的分布要求。
+我们也在DPO loss中混合了sft loss来稳定训练，你可以通过设置超参`rpo_alpha`来调整sft loss的系数，默认为`1.`.
+训练脚本参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/rlhf/dpo.sh).
+## RM
+[论文arvix](https://arxiv.org/abs/2203.02155)
+RLHF中的Reward Modeling阶段
+使用sft训练后的base model或者instruct model作为基底模型, 增加value head, 使用偏好数据集训练为reward model
+增加的value head权重会保存在`value_head.safetensors` 或 `value_head.bin`文件中
+训练脚本参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/rlhf/rm.sh).
+## PPO
+[论文arvix](https://arxiv.org/abs/2203.02155)
+RLHF中的PPO(proximal policy optimization)阶段, 涉及到四个模型
+- model: 训练模型, sft训练后的base model或者instruct model
+- ref_model: 参考模型, 默认为 model
+- reward_model: 奖励模型, 由RM阶段训练得到
+- value_model: 价值模型, 由reward_model初始化, 在训练中同步更新
+超参
+- local_rollout_forward_batch_size: 每次数据采样的批量大小, 默认为64
+- whiten_rewards: 对奖励进行归一化处理, 默认为False
+- kl_coef: KL散度项的系数, 默认为0.05
+- cliprange: PPO策略损失函数中的clip范围, 默认为0.2
+- vf_coef: 价值损失函数系数, 默认为0.1
+- cliprange_value: PPO价值损失函数中的clip范围, 默认为0.2
+- gamma: 累计奖励的折扣因子, 默认为1.0
+- lam: [GAE](https://arxiv.org/abs/1506.02438)中的lambda系数, 默认为0.95
+- num_sample_generations: 训练过程中生成的调试样本数量, 默认为10
+注意: 训练base model时, 需要先sft后再进行rlhf, 指定chat template, sft_type建议使用full
+训练中的指标解释参考[文档](https://huggingface.co/docs/trl/ppov2_trainer#explanation-of-the-logged-metrics)
+## KTO
+[论文arvix](https://arxiv.org/abs/2402.01306)
+超参
+- beta： KL正则系数，值越大表示对偏离参考模型的惩罚越大。默认为0.1
+- desirable_weight ：损失函数中的$\lambda_D$项，偏好回答样本的损失权重, 默认为1.0
+- undesirable_weight ：损失函数中的$\lambda_U$项，拒绝回答样本的损失权重，默认为1.0
+用 $n_D$ 和 $n_U$ 分别表示数据集中偏好回答和拒绝回答的样本数量，对于超参 $\lambda_D$ 和 $\lambda_U$ ，作者推荐设置 $\frac{\lambda_Dn_D}{\lambda_Un_U}\in[1,\frac{4}{3}]$
+训练脚本
+使用 $(x,y,\text{label})$ 格式数据训练
+训练脚本参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/rlhf/kto.sh).
+## CPO
+[论文arvix](https://arxiv.org/abs/2401.08417)
+超参
+- beta：隐含奖励前的系数，默认为0.1
+- cpo_alpha: nll loss系数, 默认为1.0
+训练脚本参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/rlhf/cpo.sh).
+## ORPO
+[论文arvix](https://arxiv.org/abs/2403.07691)
+超参
+- lambda: Odds Ratio loss系数
+注意：ORPO使用参数`--beta`传入超参`lambda`
+训练脚本参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/rlhf/orpo.sh).
+## SimPO
+[论文arvix](https://arxiv.org/abs/2405.14734)
+超参
+- beta：隐含奖励前的系数，默认为2.0
+- simpo_gamma：reward margin项，默认为1.0
+- cpo_alpha: 混合CPO nll loss提高训练稳定性, 默认为1.0, 设置0.0使用原始SimPO算法
+训练脚本参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/rlhf/simpo.sh).

docs/source/Instruction/使用tuners.md ADDED Viewed

	@@ -0,0 +1,133 @@

+# 使用Tuners
+tuner是指附加在模型上的额外结构部分，用于减少训练参数量或者提高训练精度。目前SWIFT支持的tuners有：
+- LoRA: [LORA: LOW-RANK ADAPTATION OF LARGE LANGUAGE MODELS](https://arxiv.org/abs/2106.09685)
+- LoRA+: [LoRA+: Efficient Low Rank Adaptation of Large Models](https://arxiv.org/pdf/2402.12354.pdf)
+- LLaMA PRO: [LLAMA PRO: Progressive LLaMA with Block Expansion](https://arxiv.org/pdf/2401.02415.pdf)
+- GaLore/Q-GaLore: [GaLore: Memory-Efficient LLM Training by Gradient Low-Rank Projection](https://arxiv.org/abs/2403.03507)
+- Liger Kernel: [Liger Kernel: Efficient Triton Kernels for LLM Training](https://arxiv.org/abs/2410.10989)
+- LISA: [LISA: Layerwise Importance Sampling for Memory-Efficient Large Language Model Fine-Tuning](https://arxiv.org/abs/2403.17919)
+- UnSloth: https://github.com/unslothai/unsloth
+- SCEdit: [SCEdit: Efficient and Controllable Image Diffusion Generation via Skip Connection Editing](https://arxiv.org/abs/2312.11392)  < [arXiv](https://arxiv.org/abs/2312.11392)  |  [Project Page](https://scedit.github.io/) >
+- NEFTune: [Noisy Embeddings Improve Instruction Finetuning](https://arxiv.org/abs/2310.05914)
+- LongLoRA: [Efficient Fine-tuning of Long-Context Large Language Models](https://arxiv.org/abs/2309.12307)
+- Adapter: [Parameter-Efficient Transfer Learning for NLP](http://arxiv.org/abs/1902.00751)
+- Vision Prompt Tuning: [Visual Prompt Tuning](https://arxiv.org/abs/2203.12119)
+- Side: [Side-Tuning: A Baseline for Network Adaptation via Additive Side Networks](https://arxiv.org/abs/1912.13503)
+- Res-Tuning: [Res-Tuning: A Flexible and Efficient Tuning Paradigm via Unbinding Tuner from Backbone](https://arxiv.org/abs/2310.19859)  < [arXiv](https://arxiv.org/abs/2310.19859)  |  [Project Page](https://res-tuning.github.io/)  |  [Usage](ResTuning.md) >
+- [PEFT](https://github.com/huggingface/peft)提供的tuners, 如AdaLoRA、DoRA、Fourierft等
+## 接口列表
+### Swift类静态接口
+- `Swift.prepare_model(model, config, **kwargs)`
+  - 接口作用：加载某个tuner到模型上，如果是PeftConfig的子类，则使用Peft库的对应接口加载tuner。在使用SwiftConfig的情况下，本接口可以传入SwiftModel实例并重复调用，此时和config传入字典的效果相同。
+    - 本接口支持并行加载不同类型的多个tuners共同使用
+  - 参数：
+    - `model`: `torch.nn.Module`或`SwiftModel`的实例，被加载的模型
+    - `config`: `SwiftConfig`、`PeftConfig`的实例，或者一个自定义tuner名称对config的字典
+  - 返回值：`SwiftModel`或`PeftModel`的实例
+- `Swift.merge_and_unload(model)`
+  - 接口作用：将LoRA weights合并回原模型，并将LoRA部分完全卸载
+  - 参数：
+    - model: `SwiftModel`或`PeftModel`的实例，已加载LoRA的模型实例
+  - 返回值：None
+- `Swift.merge(model)`
+  - 接口作用：将LoRA weights合并回原模型，不卸载LoRA部分
+  - 参数：
+    - model: `SwiftModel`或`PeftModel`的实例，已加载LoRA的模型实例
+  - 返回值：None
+- `Swift.unmerge(model)`
+  - 接口作用：将LoRA weights从原模型weights中拆分回LoRA结构
+  - 参数：
+    - model: `SwiftModel`或`PeftModel`的实例，已加载LoRA的模型实例
+  - 返回值：None
+- `Swift.save_to_peft_format(ckpt_dir, output_dir)`
+  - 接口作用：将存储的LoRA checkpoint转换为Peft兼容的格式。主要改变有：
+    - `default`会从对应的`default`文件夹中拆分到output_dir根目录中
+    - weights中的`{tuner_name}.`字段会被移除，如`model.layer.0.self.in_proj.lora_A.default.weight`会变为`model.layer.0.self.in_proj.lora_A.weight`
+    - weights中的key会增加`basemodel.model`前缀
+    - 注意：只有LoRA可以被转换，其他类型tuner由于Peft本身不支持，因此会报转换错误。此外，由于LoRAConfig中存在额外参数，如`dtype`，因此在这些参数有设定的情况下，不支持转换为Peft格式，此时可以手动删除adapter_config.json中的对应字段
+  - 参数：
+    - ckpt_dir：原weights目录
+    - output_dir：目标weights目录
+  - 返回值：None
+- `Swift.from_pretrained(model, model_id, adapter_name, revision, **kwargs)`
+  - 接口作用：从存储的weights目录中加载起tuner到模型上，如果adapter_name不传，则会将model_id目录下所有的tuners都加载起来。同`prepare_model`相同，本接口可以重复调用
+  - 参数：
+    - model：`torch.nn.Module`或`SwiftModel`的实例，被加载的模型
+    - model_id：`str`类型，待加载的tuner checkpoint， 可以是魔搭hub的id，或者训练产出的本地目录
+    - adapter_name：`str`或`List[str]`或`Dict[str, str]`类型或`None`，待加载tuner目录中的tuner名称，如果为`None`则加载所有名称的tuners，如果是`str`或`List[str]`则只加载某些具体的tuner，如果是`Dict`，则将`key`指代的tuner加载起来后换成`value`的名字
+    - revision: 如果model_id是魔搭的id，则revision可以指定对应版本号
+### SwiftModel接口
+下面列出用户可能调用的接口列表，其他内部接口或不推荐使用的接口可以通过`make docs`命令查看API Doc文档。
+- `SwiftModel.create_optimizer_param_groups(self, **defaults)`
+  - 接口作用：根据加载的tuners创建parameter groups，目前仅对`LoRA+`算法有作用
+  - 参数：
+    - defaults：`optimizer_groups`的默认参数，如`lr`和`weight_decay`
+  - 返回值：
+    - 创建的`optimizer_groups`
+- `SwiftModel.add_weighted_adapter(self, ...)`
+  - 接口作用：将已有的LoRA tuners合并为一个
+  - 参数：
+    - 本接口是PeftModel.add_weighted_adapter的透传，参数可以参考：[add_weighted_adapter文档](https://huggingface.co/docs/peft/main/en/package_reference/lora#peft.LoraModel.add_weighted_adapter)
+- `SwiftModel.save_pretrained(self, save_directory, safe_serialization, adapter_name)`
+  - 接口作用：存储tuner weights
+  - 参数：
+    - save_directory：存储目录
+    - safe_serialization： 是否使用safe_tensors，默认为False
+    - adapter_name：存储的adapter tuner，如果不传则默认存储所有的tuners
+- `SwiftModel.set_active_adapters(self, adapter_names, offload=None)`
+  - 接口作用：设置当前激活的adapters，不在列表中的adapters会被失活
+    - 在`推理`时支持环境变量`USE_UNIQUE_THREAD=0/1`，默认值`1`，如果为`0`则set_active_adapters只对当前线程生效，此时默认使用本线程激活的tuners，不同线程tuners互不干扰
+  - 参数：
+    - adapter_names：激活的tuners
+    - offload：失活的adapters如何处理，默认为`None`代表留在显存中，同时支持`cpu`和`meta`，代表offload到cpu和meta设备中以减轻显存消耗，在`USE_UNIQUE_THREAD=0`时offload不要传值以免影响其他线程
+  - 返回值：None
+- `SwiftModel.activate_adapter(self, adapter_name)`
+  - 接口作用：激活一个tuner
+    - 在`推理`时支持环境变量`USE_UNIQUE_THREAD=0/1`，默认值`1`，如果为`0`则activate_adapter只对当前线程生效，此时默认使用本线程激活的tuners，不同线程tuners互不干扰
+  - 参数：
+    - adapter_name：待激活的tuner名字
+  - 返回值：None
+- `SwiftModel.deactivate_adapter(self, adapter_name, offload)`
+  - 接口作用：失活一个tuner
+    - 在`推理`时环境变量`USE_UNIQUE_THREAD=0`时不要调用本接口
+  - 参数：
+    - adapter_name：待失活的tuner名字
+    - offload：失活的adapters如何处理，默认为`None`代表留在显存中，同时支持`cpu`和`meta`，代表offload到cpu和meta设备中以减轻显存消耗
+  - 返回值：None
+- `SwiftModel.get_trainable_parameters(self)`
+  - 接口作用：返回训练参数信息
+  - 参数：无
+  - 返回值：训练参数信息，格式如下：
+    ```text
+    trainable params: 100M || all params: 1000M || trainable%: 10.00% || cuda memory: 10GiB.
+    ```