Reinforcement Learning
Transformers
English
post-training
distillation
agentic-coding
composer-2.5
cursor
kimi-k2
grpo
dapo
diloco
openenv
trl
verl
research
methodology
Instructions to use Codeseys/composer-replication-framework with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Codeseys/composer-replication-framework with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Codeseys/composer-replication-framework", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| % Composer 2.5 Replication Framework — BibTeX citation file | |
| % https://huggingface.co/Codeseys/composer-replication-framework | |
| % | |
| % Citation order: this work first, then the upstream sources you'd typically | |
| % cite alongside it (Cursor blog, OPSD, SDPO). | |
| @misc{composer-replication-framework-2026, | |
| author = {Codeseys}, | |
| title = {Composer 2.5 Replication Framework: Methodology and Integration Architecture for Open Replication of Cursor's Agentic Coding Recipe}, | |
| year = {2026}, | |
| publisher = {HuggingFace}, | |
| howpublished = {\url{https://huggingface.co/Codeseys/composer-replication-framework}}, | |
| note = {Pre-experimental v0.0 release. Methodology, integration architecture across TRL/VeRL/OpenEnv, and economic-feasibility result for novel multi-teacher trace-replay channel. Empirical training validation in follow-up paper.} | |
| } | |
| @article{cursor2026composer25, | |
| title = {Introducing {C}omposer 2.5}, | |
| author = {{Cursor Team}}, | |
| year = {2026}, | |
| url = {https://cursor.com/blog/composer-2-5}, | |
| note = {Cursor blog. Cited in Section 2 of the framework's methodology paper.} | |
| } | |
| @article{zhao2026opsd, | |
| title = {Self-{D}istilled {R}easoner: {O}n-{P}olicy {S}elf-{D}istillation for {L}arge {L}anguage {M}odels}, | |
| author = {Zhao, Siyan and Xie, Zhihui and Liu, Mengchen and Huang, Jing and Pang, Guan and Chen, Feiyu and Grover, Aditya}, | |
| year = {2026}, | |
| journal = {arXiv preprint arXiv:2601.18734}, | |
| url = {https://arxiv.org/abs/2601.18734}, | |
| note = {OPSD. MIT-licensed reference implementation at \url{https://github.com/siyan-zhao/OPSD}; the framework lifts \texttt{generalized\_jsd\_loss} from this codebase.} | |
| } | |
| @article{hubotter2026sdpo, | |
| title = {Reinforcement {L}earning via {S}elf-{D}istillation}, | |
| author = {H{\"u}botter, Jonas and L{\"u}beck, Frederike and Behric, Lejs and Baumann, Anton and Bagatella, Marco and Marta, Daniel and Hakimi, Ido and Shenfeld, Idan and Buening, Thomas Kleine and Guestrin, Carlos and Krause, Andreas}, | |
| year = {2026}, | |
| journal = {arXiv preprint arXiv:2601.20802}, | |
| url = {https://arxiv.org/abs/2601.20802}, | |
| note = {SDPO. ICLR 2026 Scaling Post-training Workshop. Mathematically equivalent to Cursor's ``Targeted RL with Textual Feedback.''} | |
| } | |
| @article{moonshot2026kimi-k25, | |
| title = {{K}imi {K}2.5}, | |
| author = {{Moonshot AI}}, | |
| year = {2026}, | |
| url = {https://huggingface.co/moonshotai/Kimi-K2-Thinking}, | |
| note = {Open-source 1T-total / 32B-active MoE base model used by Cursor for Composer 2 / 2.5.} | |
| } | |