Instructions to use mygitphase/guhan-105b with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use mygitphase/guhan-105b with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="mygitphase/guhan-105b", trust_remote_code=True) messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("mygitphase/guhan-105b", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use mygitphase/guhan-105b with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "mygitphase/guhan-105b" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "mygitphase/guhan-105b", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/mygitphase/guhan-105b
- SGLang
How to use mygitphase/guhan-105b with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "mygitphase/guhan-105b" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "mygitphase/guhan-105b", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "mygitphase/guhan-105b" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "mygitphase/guhan-105b", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use mygitphase/guhan-105b with Docker Model Runner:
docker model run hf.co/mygitphase/guhan-105b
| #!/usr/bin/env python3 | |
| from __future__ import annotations | |
| import sys | |
| import subprocess | |
| from pathlib import Path | |
| from urllib.request import urlopen, Request | |
| HF_BLOB_URL = "https://huggingface.co/sarvamai/sarvam-105b/blob/main/sarvam.py" | |
| NEW_LINES = [ | |
| ' "SarvamMoEForCausalLM": ("sarvam", "SarvamMoEForCausalLM"),\n', | |
| ' "SarvamMLAForCausalLM": ("sarvam", "SarvamMLAForCausalLM"),\n', | |
| ] | |
| def run(cmd: list[str]) -> None: | |
| print(f"+ {' '.join(cmd)}") | |
| subprocess.check_call(cmd) | |
| def pip_install_vllm() -> None: | |
| run([sys.executable, "-m", "pip", "install", "vllm==0.15.0"]) | |
| def find_vllm_dir() -> Path: | |
| import vllm # type: ignore | |
| vllm_dir = Path(vllm.__file__).resolve().parent | |
| print(f"Detected vLLM package dir: {vllm_dir}") | |
| return vllm_dir | |
| def patch_text_generation_models(registry_path: Path) -> None: | |
| if not registry_path.exists(): | |
| raise FileNotFoundError(f"registry.py not found at: {registry_path}") | |
| text = registry_path.read_text(encoding="utf-8") | |
| lines = text.splitlines(keepends=True) | |
| # Idempotency: if both keys already present, do nothing | |
| if ( | |
| any('"SarvamMoEForCausalLM"' in l for l in lines) | |
| and any('"SarvamMLAForCausalLM"' in l for l in lines) | |
| ): | |
| print("registry.py already contains Sarvam entries. Skipping patch.") | |
| return | |
| # Find the start of the _TEXT_GENERATION_MODELS dict | |
| start_idx = None | |
| for i, line in enumerate(lines): | |
| if line.strip() == "_TEXT_GENERATION_MODELS = {": | |
| start_idx = i | |
| break | |
| if start_idx is None: | |
| raise RuntimeError( | |
| "Could not find '_TEXT_GENERATION_MODELS = {' in registry.py. " | |
| "vLLM version/layout may differ." | |
| ) | |
| # Find the matching closing brace for that dict using brace depth | |
| depth = 0 | |
| end_idx = None | |
| for j in range(start_idx, len(lines)): | |
| depth += lines[j].count("{") | |
| depth -= lines[j].count("}") | |
| if j > start_idx and depth == 0: | |
| end_idx = j | |
| break | |
| if end_idx is None: | |
| raise RuntimeError("Failed to find end of _TEXT_GENERATION_MODELS dict (brace matching).") | |
| # Insert new entries just before the closing brace line | |
| insert_at = end_idx | |
| lines[insert_at:insert_at] = NEW_LINES | |
| registry_path.write_text("".join(lines), encoding="utf-8") | |
| print(f"Patched _TEXT_GENERATION_MODELS in: {registry_path}") | |
| def download_sarvam_py(dst: Path) -> None: | |
| # Use /raw/ to download file contents, not HTML | |
| raw_url = HF_BLOB_URL.replace("/blob/", "/raw/") | |
| print(f"Downloading sarvam.py from: {raw_url}") | |
| req = Request(raw_url, headers={"User-Agent": "vllm-hotpatch-script"}) | |
| with urlopen(req) as resp: | |
| data = resp.read() | |
| dst.parent.mkdir(parents=True, exist_ok=True) | |
| dst.write_bytes(data) | |
| print(f"Wrote: {dst}") | |
| def main() -> None: | |
| pip_install_vllm() | |
| vllm_dir = find_vllm_dir() | |
| registry_path = vllm_dir / "model_executor" / "models" / "registry.py" | |
| sarvam_path = vllm_dir / "model_executor" / "models" / "sarvam.py" | |
| patch_text_generation_models(registry_path) | |
| download_sarvam_py(sarvam_path) | |
| print("\nDone.") | |
| print(f"- Registry patched: {registry_path}") | |
| print(f"- Sarvam module installed: {sarvam_path}") | |
| if __name__ == "__main__": | |
| main() |