다음 문을 복사하세요.
python3 -m venv myvenv
source myvenv/bin/activate
pip install modelscope
pip install transformers_stream_generator
pip install transformers
pip install tiktoken
pip install accelerate
pip install bitsandbytes
touch run.py
vi run.py
다음 코드를 복사하여 run.py에 붙여넣으세요.
import os
import platform
from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
model_id = 'qwen/Qwen-7B-Chat'
revision = 'v1.0.1'
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, trust_remote_code=True)
# use fp16
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", revision=revision,
trust_remote_code=True, fp16=True).eval()
model.generation_config = GenerationConfig.from_pretrained(model_id,
trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参
stop_stream = False
def clear_screen():
if platform.system() == "Windows":
os.system("cls")
else:
os.system("clear")
def print_history(history):
for pair in history:
print(f"\nUser:{pair[0]}\nQwen-7B:{pair[1]}")
def main():
history, response = [], ''
global stop_stream
clear_screen()
print("欢迎使用 Qwen-7B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序")
while True:
query = input("\nUser:")
if query.strip() == "stop":
break
if query.strip() == "clear":
history = []
clear_screen()
print("欢迎使用 Qwen-7B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序")
continue
for response in model.chat(tokenizer, query, history=history, stream=True):
if stop_stream:
stop_stream = False
break
else:
clear_screen()
print_history(history)
print(f"\nUser: {query}")
print("\nQwen-7B:", end="")
print(response)
history.append((query, response))
if __name__ == "__main__":
main()
다음 키를 누르십시오. ESC 키 : wq를 누르고 Enter를 눌러 저장하십시오.
그럼 그냥 실행
python run.py