ChatSpartacus / app.py
OzTianlu's picture
Update app.py
d8d0150 verified
Raw
History Blame Contribute Delete
1.51 kB
import spaces
import torch
from threading import Thread
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import gradio as gr
MODEL_ID = "NoesisLab/Spartacus-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
"NoesisLab/Spartacus-1B-Instruct",
trust_remote_code=True,
tie_word_embeddings=False # 尝试强制关闭权重绑定检查
)
@spaces.GPU
def respond(message, history):
messages = [{"role": "system", "content": "You are Spartacus, a helpful assistant."}]
for msg in history:
messages.append({"role": msg["role"], "content": msg["content"]})
messages.append({"role": "user", "content": message})
input_ids = tokenizer.apply_chat_template(
messages, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generate_kwargs = dict(
input_ids=input_ids,
streamer=streamer,
temperature=0.5,
top_p=0.9,
do_sample=True,
)
thread = Thread(target=model.generate, kwargs=generate_kwargs)
thread.start()
response = ""
for token in streamer:
response += token
yield response
demo = gr.ChatInterface(
fn=respond,
title="Spartacus Chat",
description="Chat with NoesisLab/Spartacus-1B-Instruct",
)
if __name__ == "__main__":
demo.launch()