Chatglm2-6b模型相关问题

1. Chatglm2-6b模型p-tuning后推理答非所问

据ChatGLM-6B b站的说法:【官方教程】ChatGLM-6B 微调:P-Tuning,LoRA,Full parameter大概意思就是练了后面的前面就忘了。

  • 解决
    ptuning训练的权重本质是练model.transformer.prefix_encoder
    思路就是把老的prefix_encoder权重和ptuning训练的权重混合一下,再重新注入model.transformer.prefix_encoder中。
    经过我的测试老权重0.2,新权重0.8,这样加权平均后的模型能打招呼并保留ptuning训练的能力
    不过这种平均值混合权重的方法比较玄学,不一定有用
    脚本:解决脚本

2. ChatGLM2-6b ptuning

  • 多卡训练.bash
```bash
export PRE_SEQ_LEN=128
export LR=2e-2
export NUM_GPUS=1
# export WANDB_MODE=dryrun
export WANDB_DISABLED=true

torchrun --standalone --nnodes=1 --nproc-per-node=$NUM_GPUS ptuning/main.py \
    --do_train \
    --train_file data/AdvertiseGen/train.json \
    --validation_file data/AdvertiseGen/dev.json \
    --preprocessing_num_workers 4 \
    --prompt_column content \
    --response_column summary \
    --overwrite_cache \
    --model_name_or_path /home/geekplusa/ai/models/bigmodels/prepare_models/chatglm/chatglm2-6b-32k-int4 \
    --output_dir /home/geekplusa/ai/models/bigmodels/train_models/chatglm/chatglm2/adgen-chatglm2-6b-pt-$PRE_SEQ_LEN-$LR \
    --overwrite_output_dir \
    --max_source_length 64 \
    --max_target_length 256 \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 16 \
    --predict_with_generate \
    --max_steps 2000 \
    --logging_steps 10 \
    --save_steps 500 \
    --learning_rate $LR \
    --pre_seq_len $PRE_SEQ_LEN \
    --quantization_bit 4
```
  • 多卡训练.后台运行
```bash
export PRE_SEQ_LEN=128
export LR=2e-2
export NUM_GPUS=4
# export WANDB_MODE=dryrun
export WANDB_DISABLED=true

nohup torchrun --standalone --nnodes=1 --nproc-per-node=$NUM_GPUS ptuning/main.py     --do_train     --train_file data/AdvertiseGen/train_min.json     --validation_file data/AdvertiseGen/dev_min.json     --preprocessing_num_workers 4     --prompt_column content     --response_column summary     --overwrite_cache     --model_name_or_path models/chatglm2-6b-int4     --output_dir models/adgen-chatglm2-6b-int4-pt-128-2e-e     --overwrite_output_dir     --max_source_length 64     --max_target_length 256     --per_device_train_batch_size 1     --per_device_eval_batch_size 1     --gradient_accumulation_steps 16     --predict_with_generate     --max_steps 100     --logging_steps 10     --save_steps 50     --learning_rate $LR     --pre_seq_len $PRE_SEQ_LEN     --quantization_bit 4 > log 2>&1 &
```
  • 单卡训练
```bash
export WANDB_DISABLED=true
export PRE_SEQ_LEN=128
export LR=2e-2

CUDA_VISIBLE_DEVICES=0 python ptuning/main.py \
    --do_train \
    --train_file data/AdvertiseGen/train_min.json \
    --validation_file data/AdvertiseGen/val_min.json \
    --prompt_column content \
    --response_column summary \
    --overwrite_cache \
    --model_name_or_path /home/geekplusa/ai/models/bigmodels/prepare_models/chatglm/chatglm2-6b-int4 \
    --output_dir /home/geekplusa/ai/models/bigmodels/train_models/chatglm/chatglm2/adgen-chatglm2-6b-pt-医院1-$PRE_SEQ_LEN-$LR \
    --overwrite_output_dir \
    --max_source_length 64 \
    --max_target_length 1000 \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 16 \
    --predict_with_generate \
    --max_steps 1 \
    --logging_steps 10 \
    --save_steps 1 \
    --learning_rate 2e-2 \
    --pre_seq_len 128 \
    --quantization_bit 4
```

3. ChatGLM2-6b部署

这里包含基础模型单卡部署、基础模型多卡部署、ptuning模型单卡部署、ptuning模型多卡部署
参见下面的脚本web_demo2.py

from transformers import AutoModel, AutoTokenizer
from transformers import AutoConfig
import streamlit as st
import os


st.set_page_config(
    page_title="ChatGLM2-6b 演示",
    page_icon=":robot:",
    layout='wide'
)


@st.cache_resource
def get_model_onegpu():
    tokenizer = AutoTokenizer.from_pretrained("models/chatglm2-6b-int4", trust_remote_code=True)
    model = AutoModel.from_pretrained("models/chatglm2-6b-int4", trust_remote_code=True).cuda()
    return tokenizer, model

@st.cache_resource
def get_model_mitugpu():
    tokenizer = AutoTokenizer.from_pretrained("models/chatglm2-6b-int4", trust_remote_code=True)
    from utils import load_model_on_gpus
    model = load_model_on_gpus("models/chatglm2-6b-int4", num_gpus=4)
    return tokenizer, model

@st.cache_resource
def get_model_ptuning_onegpu():
    tokenizer = AutoTokenizer.from_pretrained("models/chatglm2-6b-int4", trust_remote_code=True)
    import torch
    CHECKPOINT_PATH = "models/gukai/checkpoint-500/"
    config = AutoConfig.from_pretrained("models/chatglm2-6b-int4", trust_remote_code=True, pre_seq_len=128)
    model = AutoModel.from_pretrained("models/chatglm2-6b-int4", config=config, trust_remote_code=True)
    prefix_state_dict = torch.load(os.path.join(CHECKPOINT_PATH, "pytorch_model.bin"))
    new_prefix_state_dict = {}
    for k, v in prefix_state_dict.items():
        if k.startswith("transformer.prefix_encoder."):
            new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v
    model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
    model = model.quantize(4)
    model = model.cuda()
    return tokenizer, model

@st.cache_resource
def get_model_ptuning_mutigpu():
    tokenizer = AutoTokenizer.from_pretrained("models/chatglm2-6b-int4", trust_remote_code=True)
    import torch
    from utils import load_model_on_gpus
    CHECKPOINT_PATH = "models/gukai/checkpoint-500/"
    prefix_state_dict = torch.load(os.path.join(CHECKPOINT_PATH, "pytorch_model.bin"))
    #prefix_state_dict = torch.load(os.path.join(CHECKPOINT_PATH, "pytorch_model.bin"), map_location=lambda storage, loc: storage.cuda(1))
    new_prefix_state_dict = {}
    for k, v in prefix_state_dict.items():
        if k.startswith("transformer.prefix_encoder."):
            new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v
    model = load_model_on_gpus("models/chatglm2-6b-int4", num_gpus=4, pre_seq_len=128)
    model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
    model.transformer.prefix_encoder.float()
    model = model.quantize(4)
    #model = model.cuda()
    return tokenizer, model


@st.cache_resource
def get_model():
    tokenizer = AutoTokenizer.from_pretrained("models/gukai/checkpoint-500", trust_remote_code=True)
    model = AutoModel.from_pretrained("models/gukai/checkpoint-500", trust_remote_code=True).cuda()
    # 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量
    # from utils import load_model_on_gpus
    # model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
    model = model.eval()
    return tokenizer, model


#tokenizer, model = get_model()

tokenizer, model = get_model_ptuning_mutigpu()


st.title("ChatGLM2-6B")

max_length = st.sidebar.slider(
    'max_length', 0, 32768, 8192, step=1
)
top_p = st.sidebar.slider(
    'top_p', 0.0, 1.0, 0.8, step=0.01
)
temperature = st.sidebar.slider(
    'temperature', 0.0, 1.0, 0.8, step=0.01
)

if 'history' not in st.session_state:
    st.session_state.history = []

if 'past_key_values' not in st.session_state:
    st.session_state.past_key_values = None

for i, (query, response) in enumerate(st.session_state.history):
    with st.chat_message(name="user", avatar="user"):
        st.markdown(query)
    with st.chat_message(name="assistant", avatar="assistant"):
        st.markdown(response)
with st.chat_message(name="user", avatar="user"):
    input_placeholder = st.empty()
with st.chat_message(name="assistant", avatar="assistant"):
    message_placeholder = st.empty()

prompt_text = st.text_area(label="用户命令输入",
                           height=100,
                           placeholder="请在这儿输入您的命令")

button = st.button("发送", key="predict")

if button:
    input_placeholder.markdown(prompt_text)
    history, past_key_values = st.session_state.history, st.session_state.past_key_values
    for response, history, past_key_values in model.stream_chat(tokenizer, prompt_text, history,
                                                                past_key_values=past_key_values,
                                                                max_length=max_length, top_p=top_p,
                                                                temperature=temperature,
                                                                return_past_key_values=True):
        message_placeholder.markdown(response)

    st.session_state.history = history
    st.session_state.past_key_values = past_key_values

猜你喜欢

转载自blog.csdn.net/qq122716072/article/details/132688425