language / app.py
hellokawei's picture
Update app.py
0a9bd20 verified
Raw
History Blame Contribute Delete
6.89 kB
import os
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, WhisperProcessor, WhisperForConditionalGeneration
from typing import List, Tuple # 新增:导入类型
# 方案 A:使用自定义环境变量名 "language"
hf_token = os.environ.get("language")
if not hf_token:
raise EnvironmentError("未找到名为 'language' 的环境变量,请在Space设置中添加")
# 方案 B:改用规范的 "HUGGINGFACE_HUB_TOKEN"(需同步修改Space环境变量)
# hf_token = os.environ.get("HUGGINGFACE_HUB_TOKEN")
# if not hf_token:
# raise EnvironmentError("未找到HUGGINGFACE_HUB_TOKEN环境变量,请在Space设置中添加")
# 模型配置 - 使用公开模型
MODELS = {
"Zephyr 7B Beta": {
"model_id": "HuggingFaceH4/zephyr-7b-beta",
"kwargs": {"torch_dtype": torch.float16}
},
"Falcon 7B Instruct": {
"model_id": "tiiuae/falcon-7b-instruct",
"kwargs": {"torch_dtype": torch.float16, "trust_remote_code": True}
}
}
# 加载模型
def load_model(model_name):
model_config = MODELS[model_name]
tokenizer = AutoTokenizer.from_pretrained(
model_config["model_id"],
use_auth_token=hf_token
)
model = AutoModelForCausalLM.from_pretrained(
model_config["model_id"],
use_auth_token=hf_token,
**model_config["kwargs"]
)
device = "cuda" if torch.cuda.is_available() else "cpu"
return model.to(device), tokenizer, device
# 初始化模型
loaded_models = {}
for model_name in MODELS:
loaded_models[model_name] = load_model(model_name)
# 构建对话提示词
def build_prompt(message: str, history: List[Tuple[str, str]], system_prompt: str, model_name: str) -> str:
if "Zephyr" in model_name:
prompt = f"系统提示: {system_prompt}\n"
for user_msg, assistant_msg in history:
prompt += f"用户: {user_msg}\n助手: {assistant_msg}\n"
prompt += f"用户: {message}\n助手:"
elif "Falcon" in model_name:
prompt = f"### System:\n{system_prompt}\n\n"
for user_msg, assistant_msg in history:
prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"
prompt += f"### User:\n{message}\n\n### Assistant:"
else:
prompt = f"[System] {system_prompt}\n"
for user_msg, assistant_msg in history:
prompt += f"[User] {user_msg}\n[Assistant] {assistant_msg}\n"
prompt += f"[User] {message}\n[Assistant]"
return prompt
# 模型推理函数
def generate_response(
message: str,
history: List[Tuple[str, str]],
system_prompt: str,
model_name: str,
max_new_tokens: int,
temperature: float,
top_p: float,
top_k: int
) -> str:
model, tokenizer, device = loaded_models[model_name]
full_prompt = build_prompt(message, history, system_prompt, model_name)
inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
generate_kwargs = {
"max_new_tokens": max_new_tokens,
"temperature": temperature,
"top_p": top_p,
"top_k": top_k,
"do_sample": True,
"eos_token_id": tokenizer.eos_token_id or tokenizer.unk_token_id,
"pad_token_id": tokenizer.pad_token_id or tokenizer.eos_token_id
}
with torch.no_grad():
output = model.generate(**inputs, **generate_kwargs)
response = tokenizer.decode(output[0], skip_special_tokens=True)
return response[len(full_prompt):].strip()
# 处理用户输入
def process_chat(
message: str,
history: List[Tuple[str, str]],
system_prompt: str,
model_name: str,
max_new_tokens: int,
temperature: float,
top_p: float,
top_k: int
) -> Tuple[List[Tuple[str, str]], List[Tuple[str, str]]]:
response = generate_response(message, history, system_prompt, model_name, max_new_tokens, temperature, top_p, top_k)
history.append((message, response))
return history, history
# 语音转文字功能
asr = None
if torch.cuda.is_available() or torch.backends.mps.is_available():
try:
processor = WhisperProcessor.from_pretrained("openai/whisper-base")
asr_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to("cuda" if torch.cuda.is_available() else "cpu")
asr = {"processor": processor, "model": asr_model}
except Exception as e:
print(f"语音模型加载失败: {e}")
asr = None
def transcribe(audio) -> str:
if asr is None:
return "语音识别模型未加载"
processor, model = asr["processor"], asr["model"]
input_features = processor(audio, return_tensors="pt").input_features.to(model.device)
predicted_ids = model.generate(input_features)
return processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
# 构建Gradio界面
with gr.Blocks(title="无权限语言模型对话助手") as demo:
gr.Markdown("## 公开语言模型对话应用(无需访问权限)")
with gr.Row():
with gr.Column(scale=1):
message_input = gr.Textbox(label="输入消息")
system_prompt = gr.Textbox(
label="系统提示词",
value="你是一个 helpful、知识渊博的AI助手。",
)
model_choice = gr.Dropdown(
choices=list(MODELS.keys()),
value=list(MODELS.keys())[0],
label="选择语言模型"
)
with gr.Accordion("生成参数", open=False):
max_new_tokens = gr.Slider(minimum=1, maximum=2048, value=512, label="最大Token数")
temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="随机性")
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p采样")
top_k = gr.Slider(minimum=1, maximum=100, value=50, label="Top-k采样")
use_voice = gr.Checkbox(label="使用语音输入")
audio_input = gr.Audio(type="filepath", label="语音输入")
send_btn = gr.Button("发送消息", variant="primary")
clear_btn = gr.Button("清空对话")
with gr.Column(scale=2):
chat_history = gr.Chatbot(label="对话历史")
# 语音输入处理
audio_input.change(
fn=lambda audio, use: transcribe(audio) if use else "",
inputs=[audio_input, use_voice],
outputs=message_input
)
# 发送消息
send_btn.click(
fn=process_chat,
inputs=[message_input, chat_history, system_prompt, model_choice, max_new_tokens, temperature, top_p, top_k],
outputs=[chat_history, chat_history]
)
# 清空对话
clear_btn.click(fn=lambda: None, outputs=chat_history)
# 启动应用
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, share=True)