# Ollama Modelfile for the smolcode fine-tuned 1.5B tool-caller. # # Build (on HAL, after pulling the merged model out of the Modal volume): # modal volume get smolcode-ft out/merged ./smolcode-merged # ollama create smolcode-coder-1.5b:tools -f finetune/Modelfile # # The tag `smolcode-coder-1.5b:tools` matches the `hal-smol` preset's tier 0 # (engine/config.py). Ollama imports the safetensors dir directly (no manual GGUF # step). The TEMPLATE is Qwen2.5's tool-calling chat format — the SAME format the # model was trained/eval'd on (finetune/qwen_template.py) — so served prompts match. FROM ./smolcode-1.5b-q4_k_m.gguf # Qwen2.5 tool-calling template (renders in the system turn and parses # from the assistant). Verify with the curl test in serve_and_bench.md. TEMPLATE """{{- if .Messages }} {{- if or .System .Tools }}<|im_start|>system {{- if .System }} {{ .System }} {{- end }} {{- if .Tools }} # Tools You may call one or more functions to assist with the user query. You are provided with function signatures within XML tags: {{- range .Tools }} {"type": "function", "function": {{ .Function }}} {{- end }} For each function call, return a json object with function name and arguments within XML tags: {"name": , "arguments": } {{- end }}<|im_end|> {{ end }} {{- range $i, $_ := .Messages }} {{- $last := eq (len (slice $.Messages $i)) 1 -}} {{- if eq .Role "user" }}<|im_start|>user {{ .Content }}<|im_end|> {{ else if eq .Role "assistant" }}<|im_start|>assistant {{ if .Content }}{{ .Content }} {{- else if .ToolCalls }} {{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}} {{ end }} {{- end }}{{ if not $last }}<|im_end|> {{ end }} {{- else if eq .Role "tool" }}<|im_start|>user {{ .Content }} <|im_end|> {{ end }} {{- if and (ne .Role "assistant") $last }}<|im_start|>assistant {{ end }} {{- end }} {{- else }} {{- if .System }}<|im_start|>system {{ .System }}<|im_end|> {{ end }}{{ if .Prompt }}<|im_start|>user {{ .Prompt }}<|im_end|> {{ end }}<|im_start|>assistant {{ end }}{{ .Response }}{{ if .Response }}<|im_end|>{{ end }}""" PARAMETER temperature 0 # CRITICAL: repeat_penalty must be 1.0. The tool system prompt literally contains # the token, so Ollama's default 1.1 penalty suppresses the model from # emitting it — the exact bug that made eval show 0% native tool calls. PARAMETER repeat_penalty 1.0 PARAMETER stop "<|im_end|>" PARAMETER stop "<|im_start|>"