Instructions to use tangledgroup/tangled-alpha-0.11-core with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use tangledgroup/tangled-alpha-0.11-core with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="tangledgroup/tangled-alpha-0.11-core")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("tangledgroup/tangled-alpha-0.11-core", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use tangledgroup/tangled-alpha-0.11-core with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "tangledgroup/tangled-alpha-0.11-core" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "tangledgroup/tangled-alpha-0.11-core", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/tangledgroup/tangled-alpha-0.11-core
- SGLang
How to use tangledgroup/tangled-alpha-0.11-core with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "tangledgroup/tangled-alpha-0.11-core" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "tangledgroup/tangled-alpha-0.11-core", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "tangledgroup/tangled-alpha-0.11-core" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "tangledgroup/tangled-alpha-0.11-core", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use tangledgroup/tangled-alpha-0.11-core with Docker Model Runner:
docker model run hf.co/tangledgroup/tangled-alpha-0.11-core
| import os | |
| import shutil | |
| from transformers import PreTrainedTokenizerFast | |
| from tokenizers import Tokenizer, normalizers, pre_tokenizers, processors, decoders | |
| from tokenizers.models import BPE | |
| from tokenizers.trainers import BpeTrainer | |
| from utils import batch_dataset_iterator | |
| from core_base_datasets import core_base_datasets | |
| from core_instruct_datasets import core_instruct_datasets | |
| tokenizer_path = '../tokenizer' | |
| if os.path.exists(tokenizer_path): | |
| shutil.rmtree(tokenizer_path) | |
| os.makedirs(tokenizer_path, exist_ok=True) | |
| # | |
| # special_tokens | |
| # | |
| bos_token = '<|endoftext|>' | |
| eos_token = '<|im_end|>' | |
| pad_token = '<|pad|>' | |
| unk_token = '<|unk|>' | |
| special_tokens = [ | |
| bos_token, | |
| eos_token, | |
| pad_token, | |
| unk_token, | |
| '<|im_start|>', | |
| '<|im_sep|>', | |
| 'system', | |
| 'user', | |
| 'assistant', | |
| '<tools>', | |
| '</tools>', | |
| '<tool>', | |
| '</tool>', | |
| '<tool_call>', | |
| '</tool_call>', | |
| '<tool_response>', | |
| '</tool_response>', | |
| '<think>', | |
| '</think>', | |
| ] | |
| for i in range(64 - len(special_tokens)): | |
| special_tokens.append(f'<|reserved_{i}|>') | |
| # | |
| # BPE Tokenizer | |
| # | |
| bpe = BPE(unk_token=None, byte_fallback=True) | |
| tokenizer = Tokenizer(bpe) | |
| # normalizer | |
| tokenizer.normalizer = None | |
| # pre-tokenizer | |
| tokenizer.pre_tokenizer = pre_tokenizers.ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=True) | |
| # post-processor | |
| tokenizer.post_processor = processors.ByteLevel(add_prefix_space=True, trim_offsets=False, use_regex=True) | |
| # decoder | |
| tokenizer.decoder = decoders.ByteLevel(add_prefix_space=True, trim_offsets=True, use_regex=True) | |
| # | |
| # BPE Trainer | |
| # | |
| trainer = BpeTrainer( | |
| vocab_size=131072, # 128 * 1024 | |
| min_frequency=3, | |
| special_tokens=special_tokens, | |
| max_token_length=16, | |
| ) | |
| tokenizer_datasets = core_base_datasets + core_instruct_datasets | |
| tokenizer.train_from_iterator( | |
| (batch_dataset_iterator(n) for n in tokenizer_datasets), | |
| trainer, | |
| ) | |
| tokenizer.save(os.path.join(tokenizer_path, 'tokenizer.json')) | |
| tokenizer.model.save(tokenizer_path) | |
| # | |
| # PreTrainedTokenizerFast | |
| # | |
| CHAT_TEMPLATE = ( | |
| "{% for message in messages %}" | |
| "{{'<|im_start|>' + message['role'] + '<|im_sep|>' + message['content'] + '<|im_end|>'}}" | |
| "{% endfor %}" | |
| "{% if add_generation_prompt %}" | |
| "{{ '<|im_start|>assistant<|im_sep|>' }}" | |
| "{% endif %}" | |
| ) | |
| fast_tokenizer = PreTrainedTokenizerFast( | |
| tokenizer_object=tokenizer, | |
| chat_template=CHAT_TEMPLATE, | |
| bos_token=bos_token, | |
| eos_token=eos_token, | |
| pad_token=pad_token, | |
| unk_token=unk_token, | |
| clean_up_tokenization_spaces=False, | |
| ) | |
| fast_tokenizer.save_pretrained(tokenizer_path) | |