Text Generation
Transformers
Safetensors
English
gpt2
deepconrad
conrad
conrad-nit
reasoning
instruction-following
enterprise
ai-systems
long-context
artificial-intelligence
lora
tiny-gpt
text-generation-inference
Instructions to use deepconradlabs/conrad-nit-5.1-8B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use deepconradlabs/conrad-nit-5.1-8B with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="deepconradlabs/conrad-nit-5.1-8B")# Load model directly from transformers import AutoTokenizer, AutoModelForMultimodalLM tokenizer = AutoTokenizer.from_pretrained("deepconradlabs/conrad-nit-5.1-8B") model = AutoModelForMultimodalLM.from_pretrained("deepconradlabs/conrad-nit-5.1-8B") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use deepconradlabs/conrad-nit-5.1-8B with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "deepconradlabs/conrad-nit-5.1-8B" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "deepconradlabs/conrad-nit-5.1-8B", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/deepconradlabs/conrad-nit-5.1-8B
- SGLang
How to use deepconradlabs/conrad-nit-5.1-8B with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "deepconradlabs/conrad-nit-5.1-8B" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "deepconradlabs/conrad-nit-5.1-8B", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "deepconradlabs/conrad-nit-5.1-8B" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "deepconradlabs/conrad-nit-5.1-8B", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use deepconradlabs/conrad-nit-5.1-8B with Docker Model Runner:
docker model run hf.co/deepconradlabs/conrad-nit-5.1-8B
| import os | |
| from typing import Any | |
| import gradio as gr | |
| import requests | |
| SYSTEM_PROMPT = ( | |
| "You are Conrad, a concise assistant built by Deep Conrad. " | |
| "Respond naturally, clearly, and professionally. " | |
| "Stay concise by default, and expand only when the user asks for depth. " | |
| "If you are unsure, say so briefly and avoid inventing details. " | |
| "Use official links and provided context when relevant. " | |
| "Never expose internal errors or infrastructure details." | |
| ) | |
| def build_messages(user_message: str, history: list[dict[str, str]]) -> list[dict[str, str]]: | |
| messages: list[dict[str, str]] = [{"role": "system", "content": SYSTEM_PROMPT}] | |
| for item in history: | |
| if not item: | |
| continue | |
| role = (item.get("role") or "").strip().lower() | |
| content = (item.get("content") or "").strip() | |
| if role in {"user", "assistant"} and content: | |
| messages.append({"role": role, "content": content}) | |
| messages.append({"role": "user", "content": user_message.strip()}) | |
| return messages | |
| def extract_text(payload: Any) -> str: | |
| if isinstance(payload, str): | |
| return payload.strip() | |
| if not isinstance(payload, dict): | |
| return "" | |
| choices = payload.get("choices") | |
| if isinstance(choices, list) and choices: | |
| first_choice = choices[0] | |
| if isinstance(first_choice, dict): | |
| message = first_choice.get("message") | |
| if isinstance(message, dict): | |
| content = message.get("content") | |
| if isinstance(content, str) and content.strip(): | |
| return content.strip() | |
| text = first_choice.get("text") | |
| if isinstance(text, str) and text.strip(): | |
| return text.strip() | |
| for key in ("generated_text", "response", "content", "output", "answer"): | |
| value = payload.get(key) | |
| if isinstance(value, str) and value.strip(): | |
| return value.strip() | |
| return "" | |
| def call_endpoint(messages: list[dict[str, str]]) -> str: | |
| endpoint_url = os.getenv("CONRAD_ENDPOINT_URL", "").strip() | |
| if not endpoint_url: | |
| return ( | |
| "Conrad is not connected to a production endpoint yet. " | |
| "Set `CONRAD_ENDPOINT_URL` and `HF_TOKEN` to enable live chat." | |
| ) | |
| api_token = os.getenv("HF_TOKEN", "").strip() or os.getenv("CONRAD_ENDPOINT_TOKEN", "").strip() | |
| headers = {"Content-Type": "application/json"} | |
| if api_token: | |
| headers["Authorization"] = f"Bearer {api_token}" | |
| payload = { | |
| "model": os.getenv("CONRAD_MODEL_NAME", "conrad-nit-120b"), | |
| "max_tokens": 512, | |
| "messages": messages, | |
| } | |
| response = requests.post(endpoint_url, json=payload, headers=headers, timeout=120) | |
| response.raise_for_status() | |
| try: | |
| data = response.json() | |
| except ValueError: | |
| return response.text.strip() or "The endpoint returned an empty response." | |
| text = extract_text(data) | |
| if text: | |
| return text | |
| return str(data) | |
| def chat(user_message: str, history: list[dict[str, str]]) -> str: | |
| user_message = (user_message or "").strip() | |
| if not user_message: | |
| return "" | |
| messages = build_messages(user_message, history) | |
| try: | |
| return call_endpoint(messages) | |
| except requests.RequestException as exc: | |
| return ( | |
| "Conrad could not reach the configured endpoint. " | |
| "Check `CONRAD_ENDPOINT_URL` and `HF_TOKEN`.\n\n" | |
| f"Details: {exc}" | |
| ) | |
| with gr.Blocks(title="Conrad") as demo: | |
| gr.Markdown( | |
| "# Conrad\n" | |
| "Chat through the production endpoint when configured. " | |
| "This Space avoids the raw checkpoint demo path." | |
| ) | |
| chatbot = gr.Chatbot(type="messages", height=520) | |
| message = gr.Textbox( | |
| placeholder="Ask Conrad something...", | |
| label="Message", | |
| lines=3, | |
| ) | |
| def submit(user_message: str, history: list[dict[str, str]]): | |
| history = list(history or []) | |
| assistant_reply = chat(user_message, history) | |
| updated_history = history + [ | |
| {"role": "user", "content": user_message}, | |
| {"role": "assistant", "content": assistant_reply}, | |
| ] | |
| return "", updated_history | |
| message.submit(submit, inputs=[message, chatbot], outputs=[message, chatbot]) | |
| if __name__ == "__main__": | |
| demo.queue().launch() | |