update anthropic endpoint test #7083
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: pr_ete_test | |
| on: | |
| pull_request: | |
| paths: | |
| - ".github/workflows/pr_ete_test.yml" | |
| - "cmake/**" | |
| - "src/**" | |
| - "autotest/**" | |
| - "3rdparty/**" | |
| - "lmdeploy/**" | |
| - "requirements/**" | |
| - "requirements_cuda.txt" | |
| - "CMakeLists.txt" | |
| - "setup.py" | |
| workflow_dispatch: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} | |
| cancel-in-progress: true | |
| env: | |
| HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache | |
| HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai | |
| ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true | |
| PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA | |
| jobs: | |
| pr_functions_test: | |
| runs-on: [self-hosted, linux-a100-pr] | |
| timeout-minutes: 120 | |
| env: | |
| REPORT_DIR: /nvme/qa_test_models/test-reports/${{ github.head_ref }}_${{ github.run_id }} | |
| SERVER_LOG: /nvme/qa_test_models/server_log/${{ github.head_ref }}_${{ github.run_id }} | |
| container: | |
| image: openmmlab/lmdeploy:dev-cu12.8 | |
| options: --gpus all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never | |
| volumes: | |
| - /nvme/share_data/github-actions/pip-cache:/root/.cache/pip | |
| - /nvme/share_data/github-actions/packages:/root/packages | |
| - /nvme/qa_test_models:/nvme/qa_test_models | |
| - /mnt/121:/mnt/121 | |
| - /mnt/104:/mnt/104 | |
| - /mnt/bigdisk:/mnt/bigdisk | |
| - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
| steps: | |
| - name: Clone repository | |
| uses: actions/checkout@v6 | |
| - name: Install lmdeploy | |
| run: | | |
| python3 -m pip install --upgrade -r requirements/runtime_cuda.txt | |
| python3 -m pip install -r requirements/lite.txt | |
| python3 -m pip install -r requirements/test.txt | |
| python3 -m pip install -e . | |
| - name: Check env | |
| run: | | |
| python3 -m pip list | |
| lmdeploy check_env | |
| mkdir ${{env.REPORT_DIR}} -p | |
| mkdir ${{env.SERVER_LOG}} -p | |
| echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt | |
| - name: Test lmdeploy - func | |
| run: | | |
| pytest autotest -m 'pr_test and gpu_num_2' -x --alluredir=${{env.REPORT_DIR}} --clean-alluredir | |
| pytest autotest -m 'pr_test and gpu_num_1' -n 2 -x --alluredir=${{env.REPORT_DIR}} | |
| - name: Test restful server - pytorch Qwen3.5-35B-A3B | |
| run: | | |
| CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3.5-35B-A3B --tp 2 --backend pytorch --logprobs-mode raw_logprobs --enable-return-routed-experts --reasoning-parser qwen-qwq --tool-call-parser qwen3coder --speculative-algorithm qwen3_5_mtp --speculative-num-draft-tokens 4 --max-batch-size 256 --allow-terminate-by-client > ${{env.SERVER_LOG}}/pytorch_Qwen3.5-35B-A3B_start_restful.log 2>&1 & | |
| echo "restful_pid=$!" | |
| for i in $(seq 1 180) | |
| do | |
| sleep 5 | |
| echo "health check try $i" | |
| if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then | |
| pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3.5-35B-A3B and pytorch and not Qwen/Qwen3.5-35B-A3B-FP8' -m 'not not_pytorch and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}} | |
| pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3.5-35B-A3B and pytorch and not Qwen/Qwen3.5-35B-A3B-FP8' -m 'not not_pytorch' --alluredir=${{env.REPORT_DIR}} | |
| pytest autotest/interface/restful/tool_parser/test_tool_call_basic.py -n 20 -k 'Qwen/Qwen3.5-35B-A3B and pytorch and not Qwen/Qwen3.5-35B-A3B-FP8' --alluredir=${{env.REPORT_DIR}} | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| exit 0 | |
| fi | |
| done | |
| echo "health check fail" | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| cat ${{env.SERVER_LOG}}/pytorch_Qwen3.5-35B-A3B_start_restful.log | |
| exit 1 | |
| - name: Test restful server - turbomind Qwen3-32B | |
| run: | | |
| CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-32B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client > ${{env.SERVER_LOG}}/turbomind_Qwen3-32B_start_restful.log 2>&1 & | |
| echo "restful_pid=$!" | |
| for i in $(seq 1 180) | |
| do | |
| sleep 5 | |
| echo "health check try $i" | |
| if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then | |
| pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-32B and turbomind' -m 'not not_turbomind and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}} | |
| pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-32B and turbomind' -m 'not not_turbomind and not experts' --alluredir=${{env.REPORT_DIR}} | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| exit 0 | |
| fi | |
| done | |
| echo "health check fail" | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| cat ${{env.SERVER_LOG}}/turbomind_Qwen3-32B_start_restful.log | |
| exit 1 | |
| - name: Test restful server - turbomind InternVL3-38B | |
| run: | | |
| CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/OpenGVLab/InternVL3-38B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client --trust-remote-code > ${{env.SERVER_LOG}}/turbomind_InternVL3-38B_start_restful.log 2>&1 & | |
| echo "restful_pid=$!" | |
| for i in $(seq 1 180) | |
| do | |
| sleep 5 | |
| echo "health check try $i" | |
| if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then | |
| pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'OpenGVLab/InternVL3-38B and turbomind' -m 'not not_turbomind and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}} | |
| pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'OpenGVLab/InternVL3-38B and turbomind' -m 'not not_turbomind and not experts' --alluredir=${{env.REPORT_DIR}} | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| exit 0 | |
| fi | |
| done | |
| echo "health check fail" | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| cat ${{env.SERVER_LOG}}/turbomind_InternVL3-38B_start_restful.log | |
| exit 1 | |
| - name: Test restful server - turbomind Qwen3-30B-A3B | |
| run: | | |
| CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-30B-A3B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client> ${{env.SERVER_LOG}}/turbomind_Qwen3-30B-A3B_start_restful.log 2>&1 & | |
| echo "restful_pid=$!" | |
| for i in $(seq 1 180) | |
| do | |
| sleep 5 | |
| echo "health check try $i" | |
| if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then | |
| pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-30B-A3B and turbomind' -m 'not not_turbomind and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}} | |
| pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-30B-A3B and turbomind' -m 'not not_turbomind and not experts' --alluredir=${{env.REPORT_DIR}} | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| exit 0 | |
| fi | |
| done | |
| echo "health check fail" | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| cat ${{env.SERVER_LOG}}/turbomind_Qwen3-30B-A3B_start_restful.log | |
| exit 1 | |
| - name: Test restful server - pytorch Qwen3-30B-A3B | |
| run: | | |
| CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-30B-A3B --tp 2 --backend pytorch --logprobs-mode raw_logprobs --enable-return-routed-experts --allow-terminate-by-client > ${{env.SERVER_LOG}}/pytorch_Qwen3-30B-A3B_start_restful.log 2>&1 & | |
| echo "restful_pid=$!" | |
| for i in $(seq 1 180) | |
| do | |
| sleep 5 | |
| echo "health check try $i" | |
| if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then | |
| pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-30B-A3B and pytorch' -m 'not not_pytorch and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}} | |
| pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-30B-A3B and pytorch' -m 'not not_pytorch' --alluredir=${{env.REPORT_DIR}} | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| exit 0 | |
| fi | |
| done | |
| echo "health check fail" | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| cat ${{env.SERVER_LOG}}/pytorch_Qwen3-30B-A3B_start_restful.log | |
| exit 1 | |
| - name: Test restful server - pytorch Qwen3-VL-30B-A3B-Instruct | |
| run: | | |
| CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-VL-30B-A3B-Instruct --tp 2 --backend pytorch --logprobs-mode raw_logprobs --allow-terminate-by-client > ${{env.SERVER_LOG}}/pytorch_Qwen3-VL-30B-A3B-Instruct_start_restful.log 2>&1 & | |
| echo "restful_pid=$!" | |
| for i in $(seq 1 180) | |
| do | |
| sleep 5 | |
| echo "health check try $i" | |
| if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then | |
| pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-VL-30B-A3B-Instruct and pytorch' -m 'not not_pytorch and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}} | |
| pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-VL-30B-A3B-Instruct and pytorch' -m 'not not_pytorch and not experts' --alluredir=${{env.REPORT_DIR}} | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| exit 0 | |
| fi | |
| done | |
| echo "health check fail" | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| cat ${{env.SERVER_LOG}}/pytorch_Qwen3-VL-30B-A3B-Instruct_start_restful.log | |
| exit 1 | |
| - name: Test restful server - pytorch InternVL3_5-30B-A3B | |
| run: | | |
| CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/OpenGVLab/InternVL3_5-30B-A3B --tp 2 --backend pytorch --logprobs-mode raw_logprobs --allow-terminate-by-client --trust-remote-code > ${{env.SERVER_LOG}}/pytorch_InternVL3_5-30B-A3B_start_restful.log 2>&1 & | |
| echo "restful_pid=$!" | |
| for i in $(seq 1 180) | |
| do | |
| sleep 5 | |
| echo "health check try $i" | |
| if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then | |
| pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'OpenGVLab/InternVL3_5-30B-A3B and pytorch' -m 'not not_pytorch and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}} | |
| pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'OpenGVLab/InternVL3_5-30B-A3B and pytorch' -m 'not not_pytorch and not experts' --alluredir=${{env.REPORT_DIR}} | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| exit 0 | |
| fi | |
| done | |
| echo "health check fail" | |
| curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1 | |
| cat ${{env.SERVER_LOG}}/pytorch_InternVL3_5-30B-A3B_start_restful.log | |
| exit 1 | |
| - name: Clear workfile | |
| if: always() | |
| run: | | |
| echo "status=done" >> ${{env.REPORT_DIR}}/status.txt | |
| export workdir=$(pwd) | |
| cd .. | |
| rm -rf $workdir | |
| mkdir $workdir | |
| chmod -R 777 $workdir |