chatglm 多轮对话限制显存

prompt = "用户输入"

'''==========   单轮对话   =========='''
response, history = model.chat(tokenizer, prompt, history=[])

'''==========   多轮对话不限制显存   =========='''
response, history = model.chat(tokenizer, prompt, history=history)

'''==========   多轮对话限制显存   =========='''
# 直接挤掉前面记忆,保留最近三轮对话内容(不包括当前轮),此时 len(history) = 4
response, history = model.chat(tokenizer, prompt, history if len(history) <= 3 else history[-3:])

# 保留第一轮和最近两轮对话记忆(不包括当前轮),此时 len(history) = 4
first_ans = ('','')

def chat_behind(tokenizer, prompt, history=None):
    global first_ans
    if history is None:
        history = []
    response, history = model.chat(tokenizer, prompt, history if len(history) <= 3 else [first_ans] + history[-2:])
    if len(history) <= 1:
        first_ans = history[0]

    return response, history

response, history = chat_behind(tokenizer, prompt, history)

猜你喜欢

转载自blog.csdn.net/qq_42363032/article/details/130824002
今日推荐