Instructions to use mygitphase/guhan-105b with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use mygitphase/guhan-105b with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="mygitphase/guhan-105b", trust_remote_code=True)
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

# Load model directly
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained("mygitphase/guhan-105b", trust_remote_code=True, dtype="auto")

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use mygitphase/guhan-105b with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "mygitphase/guhan-105b"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "mygitphase/guhan-105b",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker

docker model run hf.co/mygitphase/guhan-105b

SGLang

How to use mygitphase/guhan-105b with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "mygitphase/guhan-105b" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "mygitphase/guhan-105b",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "mygitphase/guhan-105b" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "mygitphase/guhan-105b",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Docker Model Runner
How to use mygitphase/guhan-105b with Docker Model Runner:
```
docker model run hf.co/mygitphase/guhan-105b
```

guhan-105b / hotpatch_vllm.py

mygitphase

Duplicate from sarvamai/sarvam-105b

4c108e3 14 days ago

raw

history blame contribute delete

3.36 kB

	#!/usr/bin/env python3
	from __future__ import annotations

	import sys
	import subprocess
	from pathlib import Path
	from urllib.request import urlopen, Request


	HF_BLOB_URL = "https://huggingface.co/sarvamai/sarvam-105b/blob/main/sarvam.py"

	NEW_LINES = [
	' "SarvamMoEForCausalLM": ("sarvam", "SarvamMoEForCausalLM"),\n',
	' "SarvamMLAForCausalLM": ("sarvam", "SarvamMLAForCausalLM"),\n',
	]


	def run(cmd: list[str]) -> None:
	print(f"+ {' '.join(cmd)}")
	subprocess.check_call(cmd)


	def pip_install_vllm() -> None:
	run([sys.executable, "-m", "pip", "install", "vllm==0.15.0"])


	def find_vllm_dir() -> Path:
	import vllm # type: ignore

	vllm_dir = Path(vllm.__file__).resolve().parent
	print(f"Detected vLLM package dir: {vllm_dir}")
	return vllm_dir


	def patch_text_generation_models(registry_path: Path) -> None:
	if not registry_path.exists():
	raise FileNotFoundError(f"registry.py not found at: {registry_path}")

	text = registry_path.read_text(encoding="utf-8")
	lines = text.splitlines(keepends=True)

	# Idempotency: if both keys already present, do nothing
	if (
	any('"SarvamMoEForCausalLM"' in l for l in lines)
	and any('"SarvamMLAForCausalLM"' in l for l in lines)
	):
	print("registry.py already contains Sarvam entries. Skipping patch.")
	return

	# Find the start of the _TEXT_GENERATION_MODELS dict
	start_idx = None
	for i, line in enumerate(lines):
	if line.strip() == "_TEXT_GENERATION_MODELS = {":
	start_idx = i
	break

	if start_idx is None:
	raise RuntimeError(
	"Could not find '_TEXT_GENERATION_MODELS = {' in registry.py. "
	"vLLM version/layout may differ."
	)

	# Find the matching closing brace for that dict using brace depth
	depth = 0
	end_idx = None
	for j in range(start_idx, len(lines)):
	depth += lines[j].count("{")
	depth -= lines[j].count("}")
	if j > start_idx and depth == 0:
	end_idx = j
	break

	if end_idx is None:
	raise RuntimeError("Failed to find end of _TEXT_GENERATION_MODELS dict (brace matching).")

	# Insert new entries just before the closing brace line
	insert_at = end_idx
	lines[insert_at:insert_at] = NEW_LINES

	registry_path.write_text("".join(lines), encoding="utf-8")
	print(f"Patched _TEXT_GENERATION_MODELS in: {registry_path}")


	def download_sarvam_py(dst: Path) -> None:
	# Use /raw/ to download file contents, not HTML
	raw_url = HF_BLOB_URL.replace("/blob/", "/raw/")
	print(f"Downloading sarvam.py from: {raw_url}")

	req = Request(raw_url, headers={"User-Agent": "vllm-hotpatch-script"})
	with urlopen(req) as resp:
	data = resp.read()

	dst.parent.mkdir(parents=True, exist_ok=True)
	dst.write_bytes(data)
	print(f"Wrote: {dst}")


	def main() -> None:
	pip_install_vllm()

	vllm_dir = find_vllm_dir()
	registry_path = vllm_dir / "model_executor" / "models" / "registry.py"
	sarvam_path = vllm_dir / "model_executor" / "models" / "sarvam.py"

	patch_text_generation_models(registry_path)
	download_sarvam_py(sarvam_path)

	print("\nDone.")
	print(f"- Registry patched: {registry_path}")
	print(f"- Sarvam module installed: {sarvam_path}")


	if __name__ == "__main__":
	main()