update anthropic endpoint test #7083

Workflow file for this run

.github/workflows/pr_ete_test.yml at f51a624

	name: pr_ete_test

	on:
	pull_request:
	paths:
	- ".github/workflows/pr_ete_test.yml"
	- "cmake/**"
	- "src/**"
	- "autotest/**"
	- "3rdparty/**"
	- "lmdeploy/**"
	- "requirements/**"
	- "requirements_cuda.txt"
	- "CMakeLists.txt"
	- "setup.py"
	workflow_dispatch:

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.ref }}
	cancel-in-progress: true


	env:
	HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
	HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
	ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
	PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA


	jobs:
	pr_functions_test:
	runs-on: [self-hosted, linux-a100-pr]
	timeout-minutes: 120
	env:
	REPORT_DIR: /nvme/qa_test_models/test-reports/${{ github.head_ref }}_${{ github.run_id }}
	SERVER_LOG: /nvme/qa_test_models/server_log/${{ github.head_ref }}_${{ github.run_id }}
	container:
	image: openmmlab/lmdeploy:dev-cu12.8
	options: --gpus all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never
	volumes:
	- /nvme/share_data/github-actions/pip-cache:/root/.cache/pip
	- /nvme/share_data/github-actions/packages:/root/packages
	- /nvme/qa_test_models:/nvme/qa_test_models
	- /mnt/121:/mnt/121
	- /mnt/104:/mnt/104
	- /mnt/bigdisk:/mnt/bigdisk
	- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
	steps:
	- name: Clone repository
	uses: actions/checkout@v6
	- name: Install lmdeploy
	run: \|
	python3 -m pip install --upgrade -r requirements/runtime_cuda.txt
	python3 -m pip install -r requirements/lite.txt
	python3 -m pip install -r requirements/test.txt
	python3 -m pip install -e .
	- name: Check env
	run: \|
	python3 -m pip list
	lmdeploy check_env
	mkdir ${{env.REPORT_DIR}} -p
	mkdir ${{env.SERVER_LOG}} -p
	echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
	- name: Test lmdeploy - func
	run: \|
	pytest autotest -m 'pr_test and gpu_num_2' -x --alluredir=${{env.REPORT_DIR}} --clean-alluredir
	pytest autotest -m 'pr_test and gpu_num_1' -n 2 -x --alluredir=${{env.REPORT_DIR}}
	- name: Test restful server - pytorch Qwen3.5-35B-A3B
	run: \|
	CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3.5-35B-A3B --tp 2 --backend pytorch --logprobs-mode raw_logprobs --enable-return-routed-experts --reasoning-parser qwen-qwq --tool-call-parser qwen3coder --speculative-algorithm qwen3_5_mtp --speculative-num-draft-tokens 4 --max-batch-size 256 --allow-terminate-by-client > ${{env.SERVER_LOG}}/pytorch_Qwen3.5-35B-A3B_start_restful.log 2>&1 &
	echo "restful_pid=$!"
	for i in $(seq 1 180)
	do
	sleep 5
	echo "health check try $i"
	if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
	pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3.5-35B-A3B and pytorch and not Qwen/Qwen3.5-35B-A3B-FP8' -m 'not not_pytorch and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}}
	pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3.5-35B-A3B and pytorch and not Qwen/Qwen3.5-35B-A3B-FP8' -m 'not not_pytorch' --alluredir=${{env.REPORT_DIR}}
	pytest autotest/interface/restful/tool_parser/test_tool_call_basic.py -n 20 -k 'Qwen/Qwen3.5-35B-A3B and pytorch and not Qwen/Qwen3.5-35B-A3B-FP8' --alluredir=${{env.REPORT_DIR}}
	curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
	exit 0
	fi
	done

	echo "health check fail"
	curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
	cat ${{env.SERVER_LOG}}/pytorch_Qwen3.5-35B-A3B_start_restful.log
	exit 1
	- name: Test restful server - turbomind Qwen3-32B
	run: \|
	CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-32B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client > ${{env.SERVER_LOG}}/turbomind_Qwen3-32B_start_restful.log 2>&1 &
	echo "restful_pid=$!"
	for i in $(seq 1 180)
	do
	sleep 5
	echo "health check try $i"
	if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
	pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-32B and turbomind' -m 'not not_turbomind and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}}
	pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-32B and turbomind' -m 'not not_turbomind and not experts' --alluredir=${{env.REPORT_DIR}}
	curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
	exit 0
	fi
	done

	echo "health check fail"
	curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
	cat ${{env.SERVER_LOG}}/turbomind_Qwen3-32B_start_restful.log
	exit 1
	- name: Test restful server - turbomind InternVL3-38B
	run: \|
	CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/OpenGVLab/InternVL3-38B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client --trust-remote-code > ${{env.SERVER_LOG}}/turbomind_InternVL3-38B_start_restful.log 2>&1 &
	echo "restful_pid=$!"
	for i in $(seq 1 180)
	do
	sleep 5
	echo "health check try $i"
	if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
	pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'OpenGVLab/InternVL3-38B and turbomind' -m 'not not_turbomind and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}}
	pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'OpenGVLab/InternVL3-38B and turbomind' -m 'not not_turbomind and not experts' --alluredir=${{env.REPORT_DIR}}
	curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
	exit 0
	fi
	done

	echo "health check fail"
	curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
	cat ${{env.SERVER_LOG}}/turbomind_InternVL3-38B_start_restful.log
	exit 1
	- name: Test restful server - turbomind Qwen3-30B-A3B
	run: \|
	CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-30B-A3B --tp 2 --backend turbomind --logprobs-mode raw_logprobs --allow-terminate-by-client> ${{env.SERVER_LOG}}/turbomind_Qwen3-30B-A3B_start_restful.log 2>&1 &
	echo "restful_pid=$!"
	for i in $(seq 1 180)
	do
	sleep 5
	echo "health check try $i"
	if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
	pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-30B-A3B and turbomind' -m 'not not_turbomind and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}}
	pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-30B-A3B and turbomind' -m 'not not_turbomind and not experts' --alluredir=${{env.REPORT_DIR}}
	curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
	exit 0
	fi
	done

	echo "health check fail"
	curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
	cat ${{env.SERVER_LOG}}/turbomind_Qwen3-30B-A3B_start_restful.log
	exit 1
	- name: Test restful server - pytorch Qwen3-30B-A3B
	run: \|
	CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-30B-A3B --tp 2 --backend pytorch --logprobs-mode raw_logprobs --enable-return-routed-experts --allow-terminate-by-client > ${{env.SERVER_LOG}}/pytorch_Qwen3-30B-A3B_start_restful.log 2>&1 &
	echo "restful_pid=$!"
	for i in $(seq 1 180)
	do
	sleep 5
	echo "health check try $i"
	if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
	pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-30B-A3B and pytorch' -m 'not not_pytorch and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}}
	pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-30B-A3B and pytorch' -m 'not not_pytorch' --alluredir=${{env.REPORT_DIR}}
	curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
	exit 0
	fi
	done

	echo "health check fail"
	curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
	cat ${{env.SERVER_LOG}}/pytorch_Qwen3-30B-A3B_start_restful.log
	exit 1
	- name: Test restful server - pytorch Qwen3-VL-30B-A3B-Instruct
	run: \|
	CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/Qwen/Qwen3-VL-30B-A3B-Instruct --tp 2 --backend pytorch --logprobs-mode raw_logprobs --allow-terminate-by-client > ${{env.SERVER_LOG}}/pytorch_Qwen3-VL-30B-A3B-Instruct_start_restful.log 2>&1 &
	echo "restful_pid=$!"
	for i in $(seq 1 180)
	do
	sleep 5
	echo "health check try $i"
	if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
	pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'Qwen/Qwen3-VL-30B-A3B-Instruct and pytorch' -m 'not not_pytorch and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}}
	pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'Qwen/Qwen3-VL-30B-A3B-Instruct and pytorch' -m 'not not_pytorch and not experts' --alluredir=${{env.REPORT_DIR}}
	curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
	exit 0
	fi
	done

	echo "health check fail"
	curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
	cat ${{env.SERVER_LOG}}/pytorch_Qwen3-VL-30B-A3B-Instruct_start_restful.log
	exit 1
	- name: Test restful server - pytorch InternVL3_5-30B-A3B
	run: \|
	CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/OpenGVLab/InternVL3_5-30B-A3B --tp 2 --backend pytorch --logprobs-mode raw_logprobs --allow-terminate-by-client --trust-remote-code > ${{env.SERVER_LOG}}/pytorch_InternVL3_5-30B-A3B_start_restful.log 2>&1 &
	echo "restful_pid=$!"
	for i in $(seq 1 180)
	do
	sleep 5
	echo "health check try $i"
	if curl -f -s http://127.0.0.1:23333/health > /dev/null 2>&1; then
	pytest autotest/interface/restful/test_restful_chat_completions_v1.py -n 20 -k 'OpenGVLab/InternVL3_5-30B-A3B and pytorch' -m 'not not_pytorch and not internlm2_5 and not interns1 and pr_test' --alluredir=${{env.REPORT_DIR}}
	pytest autotest/interface/restful/test_restful_generate.py -n 20 -k 'OpenGVLab/InternVL3_5-30B-A3B and pytorch' -m 'not not_pytorch and not experts' --alluredir=${{env.REPORT_DIR}}
	curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
	exit 0
	fi
	done

	echo "health check fail"
	curl -f -s http://127.0.0.1:23333/terminate > /dev/null 2>&1
	cat ${{env.SERVER_LOG}}/pytorch_InternVL3_5-30B-A3B_start_restful.log
	exit 1
	- name: Clear workfile
	if: always()
	run: \|
	echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
	export workdir=$(pwd)
	cd ..
	rm -rf $workdir
	mkdir $workdir
	chmod -R 777 $workdir

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

update anthropic endpoint test #7083

Workflow file

update anthropic endpoint test #7083

Uh oh!

Workflow file for this run