Instructions to use deepconradlabs/conrad-nit-5.1-8B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use deepconradlabs/conrad-nit-5.1-8B with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="deepconradlabs/conrad-nit-5.1-8B")

# Load model directly
from transformers import AutoTokenizer, AutoModelForMultimodalLM

tokenizer = AutoTokenizer.from_pretrained("deepconradlabs/conrad-nit-5.1-8B")
model = AutoModelForMultimodalLM.from_pretrained("deepconradlabs/conrad-nit-5.1-8B")

Notebooks
Google Colab
Kaggle
Local Apps Settings

vLLM

How to use deepconradlabs/conrad-nit-5.1-8B with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "deepconradlabs/conrad-nit-5.1-8B"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "deepconradlabs/conrad-nit-5.1-8B",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker

docker model run hf.co/deepconradlabs/conrad-nit-5.1-8B

SGLang

How to use deepconradlabs/conrad-nit-5.1-8B with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "deepconradlabs/conrad-nit-5.1-8B" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "deepconradlabs/conrad-nit-5.1-8B",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "deepconradlabs/conrad-nit-5.1-8B" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "deepconradlabs/conrad-nit-5.1-8B",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Docker Model Runner
How to use deepconradlabs/conrad-nit-5.1-8B with Docker Model Runner:
```
docker model run hf.co/deepconradlabs/conrad-nit-5.1-8B
```

conrad-nit-5.1-8B / app.py

deepconradlabs

Add Gradio space proxy

a537b22 6 days ago

raw

history blame contribute delete

4.39 kB

	import os
	from typing import Any

	import gradio as gr
	import requests


	SYSTEM_PROMPT = (
	"You are Conrad, a concise assistant built by Deep Conrad. "
	"Respond naturally, clearly, and professionally. "
	"Stay concise by default, and expand only when the user asks for depth. "
	"If you are unsure, say so briefly and avoid inventing details. "
	"Use official links and provided context when relevant. "
	"Never expose internal errors or infrastructure details."
	)


	def build_messages(user_message: str, history: list[dict[str, str]]) -> list[dict[str, str]]:
	messages: list[dict[str, str]] = [{"role": "system", "content": SYSTEM_PROMPT}]

	for item in history:
	if not item:
	continue
	role = (item.get("role") or "").strip().lower()
	content = (item.get("content") or "").strip()
	if role in {"user", "assistant"} and content:
	messages.append({"role": role, "content": content})

	messages.append({"role": "user", "content": user_message.strip()})
	return messages


	def extract_text(payload: Any) -> str:
	if isinstance(payload, str):
	return payload.strip()

	if not isinstance(payload, dict):
	return ""

	choices = payload.get("choices")
	if isinstance(choices, list) and choices:
	first_choice = choices[0]
	if isinstance(first_choice, dict):
	message = first_choice.get("message")
	if isinstance(message, dict):
	content = message.get("content")
	if isinstance(content, str) and content.strip():
	return content.strip()
	text = first_choice.get("text")
	if isinstance(text, str) and text.strip():
	return text.strip()

	for key in ("generated_text", "response", "content", "output", "answer"):
	value = payload.get(key)
	if isinstance(value, str) and value.strip():
	return value.strip()

	return ""


	def call_endpoint(messages: list[dict[str, str]]) -> str:
	endpoint_url = os.getenv("CONRAD_ENDPOINT_URL", "").strip()
	if not endpoint_url:
	return (
	"Conrad is not connected to a production endpoint yet. "
	"Set `CONRAD_ENDPOINT_URL` and `HF_TOKEN` to enable live chat."
	)

	api_token = os.getenv("HF_TOKEN", "").strip() or os.getenv("CONRAD_ENDPOINT_TOKEN", "").strip()
	headers = {"Content-Type": "application/json"}
	if api_token:
	headers["Authorization"] = f"Bearer {api_token}"

	payload = {
	"model": os.getenv("CONRAD_MODEL_NAME", "conrad-nit-120b"),
	"max_tokens": 512,
	"messages": messages,
	}

	response = requests.post(endpoint_url, json=payload, headers=headers, timeout=120)
	response.raise_for_status()

	try:
	data = response.json()
	except ValueError:
	return response.text.strip() or "The endpoint returned an empty response."

	text = extract_text(data)
	if text:
	return text

	return str(data)


	def chat(user_message: str, history: list[dict[str, str]]) -> str:
	user_message = (user_message or "").strip()
	if not user_message:
	return ""

	messages = build_messages(user_message, history)

	try:
	return call_endpoint(messages)
	except requests.RequestException as exc:
	return (
	"Conrad could not reach the configured endpoint. "
	"Check `CONRAD_ENDPOINT_URL` and `HF_TOKEN`.\n\n"
	f"Details: {exc}"
	)


	with gr.Blocks(title="Conrad") as demo:
	gr.Markdown(
	"# Conrad\n"
	"Chat through the production endpoint when configured. "
	"This Space avoids the raw checkpoint demo path."
	)
	chatbot = gr.Chatbot(type="messages", height=520)
	message = gr.Textbox(
	placeholder="Ask Conrad something...",
	label="Message",
	lines=3,
	)

	def submit(user_message: str, history: list[dict[str, str]]):
	history = list(history or [])
	assistant_reply = chat(user_message, history)
	updated_history = history + [
	{"role": "user", "content": user_message},
	{"role": "assistant", "content": assistant_reply},
	]
	return "", updated_history

	message.submit(submit, inputs=[message, chatbot], outputs=[message, chatbot])


	if __name__ == "__main__":
	demo.queue().launch()