00000204
apt-get install git-lfs 或者yum install git-lfs
cd /home/deepseek-ai/
git clone https://www.modelscope.cn/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B.git
120.220.95.189 zibo.harbor.iluvatar.com.cn
{
"exec-opts": ["native.cgroupdriver=systemd"],
"insecure-registries": ["zibo.harbor.iluvatar.com.cn:30000"]
}
docker pull zibo.harbor.iluvatar.com.cn:30000/saas/bi100-3.2.1-x86-ubuntu20.04-py3.10-poc-llm-infer:v1.2.1
docker run --shm-size="32g" -it -v /usr/src:/usr/src -v /lib/modules:/lib/modules -v /dev:/dev -v /nvme/data/home:/nvme/data/home -p 7000-7999:7000-7999 --name=deepseek --privileged --cap-add=ALL --pid=host zibo.harbor.iluvatar.com.cn:30000/saas/bi100-3.2.1-x86-ubuntu20.04-py3.10-poc-llm-infer:v1.2.1 /bin/bash
cd /root/llm-infer
rm -rf fastchat_webui.sh
vi fastchat_webui.sh
#!/bin/bash
model_path=""
vllm_param=""
port=7860
openai_port=7000 # OpenAI API 端口
# 脚本使用说明
usage() {
echo "用法: $0 [选项] 例如:bash ./start_app.sh --model /share/fshare/common/models/baichuan-inc/Baichuan2-7B-Chat --vllm_param \"-tp 1 --trust-remote-code --max-num-batched-tokens 2048 --max-model-len 2048\" "
echo "选项:"
echo " --model=MODEL [必选]指定模型路径"
echo " --port [可选]应用服务端口号,默认7860"
echo " --openai_port [可选]OpenAI API 端口号,默认5000"
echo " --vllm_param [可选]vLLM 服务的参数,默认'-tp 1 --trust-remote-code'"
echo " --help 显示此帮助信息并退出"
exit 1
}
# 参数解释
while [[ "$#" -gt 0 ]]; do
case $1 in
--model)
if [[ -n "$2" && ! "$2" == -* ]]; then
model_path="$2"
shift 2
else
echo "错误: --model 选项需要一个参数。"
usage
fi
;;
--vllm_param)
if [[ -n "$2" ]]; then
vllm_param="$2"
shift 2
else
echo "错误: --model 选项需要一个参数。"
usage
fi
;;
--port)
if [[ -n "$2" && ! "$2" == -* ]]; then
port="$2"
shift 2
else
echo "错误: --port 选项需要一个参数。"
usage
fi
;;
--openai_port)
if [[ -n "$2" && ! "$2" == -* ]]; then
openai_port="$2"
shift 2
else
echo "错误: --openai_port 选项需要一个参数。"
usage
fi
;;
--help)
usage
;;
*)
echo "未知参数: $1"
usage
;;
esac
done
if [[ -z "$model_path" ]]; then
echo "错误:必须指定 --model 参数。"
usage
fi
# 准备模型数据
# 不同的模型存放在fshare目录不同,模型启动参数也可能不同
# 服务的启动命令
controller_cmd="python3 -m fastchat.serve.controller --host 0.0.0.0 --port 21001"
web_app_cmd="python3 -m fastchat.serve.gradio_web_server --port $port --controller-url=http://0.0.0.0:21001"
worker_cmd="python3 -m fastchat.serve.vllm_worker --model-path $model_path --host=0.0.0.0 --port 21002 --worker-address=http://0.0.0.0:21002 --controller-address=http://0.0.0.0:21001 "$vllm_param""
openai_api_cmd="python3 -m fastchat.serve.openai_api_server --host 0.0.0.0 --port $openai_port"
echo "controller_cmd=$controller_cmd"
echo "worker_cmd=$worker_cmd"
echo "web_app_cmd=$web_app_cmd"
echo "openai_api_cmd=$openai_api_cmd"
current_time=$(date '+%Y%m%d_%H%M%S')
# 日志文件
controller_log="controller_"$current_time".log"
echo -e "\n$(date +%Y-%m-%d:%H:%M:%S)===================loading model, please waiting a few minutes======================\n"
# 启动fastchat的controller服务
$controller_cmd > $controller_log 2>&1 &
sleep 5
echo -e "\n$(date +%Y-%m-%d:%H:%M:%S) controller started pid:$!\n"
# 启动fastchat的worker服务,由于需要载模型,会比较慢
$worker_cmd &
# 后台运行的Python程序的进程ID
echo -e "\n$(date +%Y-%m-%d:%H:%M:%S) worker started pid:$! ,waiting model loading\n"
# 监听worker的启动状态,必须要等worker启动后才能启动web服务
while true; do
if tail -n 100 "$controller_log" | grep -q "Register a new worker" ; then
echo -e "\nmodel load success\n"
break
else
echo -e "\nsleeping 5s for mode loading...\n"
sleep 5
fi
done
echo -e "\n$(date +%Y-%m-%d:%H:%M:%S)==============================Model loaded successfully============================\n"
# 启动web服务
$web_app_cmd &
# 启动OpenAI API服务
$openai_api_cmd &
echo -e "\n$(date +%Y-%m-%d:%H:%M:%S) OpenAI API started on port $openai_port\n"
nohup bash fastchat_webui.sh --model /nvme/data/home/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/ --vllm_param "--trust-remote-code --gpu-memory-utilization 0.9 --max-num-batched-tokens 2048 --max-model-len 2048 -tp 4" 2>&1 | grep -v "think" > fastchat.log &
curl -X POST http://127.0.0.1:7000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_API_KEY" \
-d '{
"model": "DeepSeek-R1-Distill-Qwen-32B",
"messages": [
{"role": "user", "content": "介绍一下湖南省常德市"}
],
"temperature": 0.0,
"max_tokens": 2000
}'