seanpoyner's picture
v2: 100% native tool-call 1.5B + q4_k_m GGUF + Modelfile
dbb3c33 verified
Raw
History Blame Contribute Delete
2.65 kB
# Ollama Modelfile for the smolcode fine-tuned 1.5B tool-caller.
#
# Build (on HAL, after pulling the merged model out of the Modal volume):
# modal volume get smolcode-ft out/merged ./smolcode-merged
# ollama create smolcode-coder-1.5b:tools -f finetune/Modelfile
#
# The tag `smolcode-coder-1.5b:tools` matches the `hal-smol` preset's tier 0
# (engine/config.py). Ollama imports the safetensors dir directly (no manual GGUF
# step). The TEMPLATE is Qwen2.5's tool-calling chat format — the SAME format the
# model was trained/eval'd on (finetune/qwen_template.py) — so served prompts match.
FROM ./smolcode-1.5b-q4_k_m.gguf
# Qwen2.5 tool-calling template (renders <tools> in the system turn and parses
# <tool_call> from the assistant). Verify with the curl test in serve_and_bench.md.
TEMPLATE """{{- if .Messages }}
{{- if or .System .Tools }}<|im_start|>system
{{- if .System }}
{{ .System }}
{{- end }}
{{- if .Tools }}
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{{- range .Tools }}
{"type": "function", "function": {{ .Function }}}
{{- end }}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call>
{{- end }}<|im_end|>
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{- if eq .Role "user" }}<|im_start|>user
{{ .Content }}<|im_end|>
{{ else if eq .Role "assistant" }}<|im_start|>assistant
{{ if .Content }}{{ .Content }}
{{- else if .ToolCalls }}<tool_call>
{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{ end }}</tool_call>
{{- end }}{{ if not $last }}<|im_end|>
{{ end }}
{{- else if eq .Role "tool" }}<|im_start|>user
<tool_response>
{{ .Content }}
</tool_response><|im_end|>
{{ end }}
{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
{{ end }}
{{- end }}
{{- else }}
{{- if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}{{ if .Prompt }}<|im_start|>user
{{ .Prompt }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ end }}{{ .Response }}{{ if .Response }}<|im_end|>{{ end }}"""
PARAMETER temperature 0
# CRITICAL: repeat_penalty must be 1.0. The tool system prompt literally contains
# the <tool_call> token, so Ollama's default 1.1 penalty suppresses the model from
# emitting it — the exact bug that made eval show 0% native tool calls.
PARAMETER repeat_penalty 1.0
PARAMETER stop "<|im_end|>"
PARAMETER stop "<|im_start|>"