Sentence Similarity
sentence-transformers
baa-embedding-reranker
retrieval
embeddings
reranker
cross-encoder
rag
Instructions to use baa-ai/Merino-Pro-4bit with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- sentence-transformers
How to use baa-ai/Merino-Pro-4bit with sentence-transformers:
from sentence_transformers import CrossEncoder model = CrossEncoder("baa-ai/Merino-Pro-4bit") query = "Which planet is known as the Red Planet?" passages = [ "Venus is often called Earth's twin because of its similar size and proximity.", "Mars, known for its reddish appearance, is often referred to as the Red Planet.", "Jupiter, the largest planet in our solar system, has a prominent red spot.", "Saturn, famous for its rings, is sometimes mistaken for the Red Planet." ] scores = model.predict([(query, passage) for passage in passages]) print(scores) - Notebooks
- Google Colab
- Kaggle
| { | |
| "quant": "q4", | |
| "group": 64, | |
| "scheme": "group-wise affine RTN (4-bit), fp16 compute", | |
| "protected_fp16": [ | |
| "classifier", | |
| "pooler", | |
| "embeddings", | |
| "LayerNorm" | |
| ], | |
| "emb_q4": [ | |
| "encoder.layer.0.attention.output.dense.weight", | |
| "encoder.layer.0.attention.self.key.weight", | |
| "encoder.layer.0.attention.self.query.weight", | |
| "encoder.layer.0.attention.self.value.weight", | |
| "encoder.layer.0.intermediate.dense.weight", | |
| "encoder.layer.0.output.dense.weight", | |
| "encoder.layer.1.attention.output.dense.weight", | |
| "encoder.layer.1.attention.self.key.weight", | |
| "encoder.layer.1.attention.self.query.weight", | |
| "encoder.layer.1.attention.self.value.weight", | |
| "encoder.layer.1.intermediate.dense.weight", | |
| "encoder.layer.1.output.dense.weight", | |
| "encoder.layer.10.attention.output.dense.weight", | |
| "encoder.layer.10.attention.self.key.weight", | |
| "encoder.layer.10.attention.self.query.weight", | |
| "encoder.layer.10.attention.self.value.weight", | |
| "encoder.layer.10.intermediate.dense.weight", | |
| "encoder.layer.10.output.dense.weight", | |
| "encoder.layer.11.attention.output.dense.weight", | |
| "encoder.layer.11.attention.self.key.weight", | |
| "encoder.layer.11.attention.self.query.weight", | |
| "encoder.layer.11.attention.self.value.weight", | |
| "encoder.layer.11.intermediate.dense.weight", | |
| "encoder.layer.11.output.dense.weight", | |
| "encoder.layer.12.attention.output.dense.weight", | |
| "encoder.layer.12.attention.self.key.weight", | |
| "encoder.layer.12.attention.self.query.weight", | |
| "encoder.layer.12.attention.self.value.weight", | |
| "encoder.layer.12.intermediate.dense.weight", | |
| "encoder.layer.12.output.dense.weight", | |
| "encoder.layer.13.attention.output.dense.weight", | |
| "encoder.layer.13.attention.self.key.weight", | |
| "encoder.layer.13.attention.self.query.weight", | |
| "encoder.layer.13.attention.self.value.weight", | |
| "encoder.layer.13.intermediate.dense.weight", | |
| "encoder.layer.13.output.dense.weight", | |
| "encoder.layer.14.attention.output.dense.weight", | |
| "encoder.layer.14.attention.self.key.weight", | |
| "encoder.layer.14.attention.self.query.weight", | |
| "encoder.layer.14.attention.self.value.weight", | |
| "encoder.layer.14.intermediate.dense.weight", | |
| "encoder.layer.14.output.dense.weight", | |
| "encoder.layer.15.attention.output.dense.weight", | |
| "encoder.layer.15.attention.self.key.weight", | |
| "encoder.layer.15.attention.self.query.weight", | |
| "encoder.layer.15.attention.self.value.weight", | |
| "encoder.layer.15.intermediate.dense.weight", | |
| "encoder.layer.15.output.dense.weight", | |
| "encoder.layer.16.attention.output.dense.weight", | |
| "encoder.layer.16.attention.self.key.weight", | |
| "encoder.layer.16.attention.self.query.weight", | |
| "encoder.layer.16.attention.self.value.weight", | |
| "encoder.layer.16.intermediate.dense.weight", | |
| "encoder.layer.16.output.dense.weight", | |
| "encoder.layer.17.attention.output.dense.weight", | |
| "encoder.layer.17.attention.self.key.weight", | |
| "encoder.layer.17.attention.self.query.weight", | |
| "encoder.layer.17.attention.self.value.weight", | |
| "encoder.layer.17.intermediate.dense.weight", | |
| "encoder.layer.17.output.dense.weight", | |
| "encoder.layer.18.attention.output.dense.weight", | |
| "encoder.layer.18.attention.self.key.weight", | |
| "encoder.layer.18.attention.self.query.weight", | |
| "encoder.layer.18.attention.self.value.weight", | |
| "encoder.layer.18.intermediate.dense.weight", | |
| "encoder.layer.18.output.dense.weight", | |
| "encoder.layer.19.attention.output.dense.weight", | |
| "encoder.layer.19.attention.self.key.weight", | |
| "encoder.layer.19.attention.self.query.weight", | |
| "encoder.layer.19.attention.self.value.weight", | |
| "encoder.layer.19.intermediate.dense.weight", | |
| "encoder.layer.19.output.dense.weight", | |
| "encoder.layer.2.attention.output.dense.weight", | |
| "encoder.layer.2.attention.self.key.weight", | |
| "encoder.layer.2.attention.self.query.weight", | |
| "encoder.layer.2.attention.self.value.weight", | |
| "encoder.layer.2.intermediate.dense.weight", | |
| "encoder.layer.2.output.dense.weight", | |
| "encoder.layer.20.attention.output.dense.weight", | |
| "encoder.layer.20.attention.self.key.weight", | |
| "encoder.layer.20.attention.self.query.weight", | |
| "encoder.layer.20.attention.self.value.weight", | |
| "encoder.layer.20.intermediate.dense.weight", | |
| "encoder.layer.20.output.dense.weight", | |
| "encoder.layer.21.attention.output.dense.weight", | |
| "encoder.layer.21.attention.self.key.weight", | |
| "encoder.layer.21.attention.self.query.weight", | |
| "encoder.layer.21.attention.self.value.weight", | |
| "encoder.layer.21.intermediate.dense.weight", | |
| "encoder.layer.21.output.dense.weight", | |
| "encoder.layer.22.attention.output.dense.weight", | |
| "encoder.layer.22.attention.self.key.weight", | |
| "encoder.layer.22.attention.self.query.weight", | |
| "encoder.layer.22.attention.self.value.weight", | |
| "encoder.layer.22.intermediate.dense.weight", | |
| "encoder.layer.22.output.dense.weight", | |
| "encoder.layer.23.attention.output.dense.weight", | |
| "encoder.layer.23.attention.self.key.weight", | |
| "encoder.layer.23.attention.self.query.weight", | |
| "encoder.layer.23.attention.self.value.weight", | |
| "encoder.layer.23.intermediate.dense.weight", | |
| "encoder.layer.23.output.dense.weight", | |
| "encoder.layer.3.attention.output.dense.weight", | |
| "encoder.layer.3.attention.self.key.weight", | |
| "encoder.layer.3.attention.self.query.weight", | |
| "encoder.layer.3.attention.self.value.weight", | |
| "encoder.layer.3.intermediate.dense.weight", | |
| "encoder.layer.3.output.dense.weight", | |
| "encoder.layer.4.attention.output.dense.weight", | |
| "encoder.layer.4.attention.self.key.weight", | |
| "encoder.layer.4.attention.self.query.weight", | |
| "encoder.layer.4.attention.self.value.weight", | |
| "encoder.layer.4.intermediate.dense.weight", | |
| "encoder.layer.4.output.dense.weight", | |
| "encoder.layer.5.attention.output.dense.weight", | |
| "encoder.layer.5.attention.self.key.weight", | |
| "encoder.layer.5.attention.self.query.weight", | |
| "encoder.layer.5.attention.self.value.weight", | |
| "encoder.layer.5.intermediate.dense.weight", | |
| "encoder.layer.5.output.dense.weight", | |
| "encoder.layer.6.attention.output.dense.weight", | |
| "encoder.layer.6.attention.self.key.weight", | |
| "encoder.layer.6.attention.self.query.weight", | |
| "encoder.layer.6.attention.self.value.weight", | |
| "encoder.layer.6.intermediate.dense.weight", | |
| "encoder.layer.6.output.dense.weight", | |
| "encoder.layer.7.attention.output.dense.weight", | |
| "encoder.layer.7.attention.self.key.weight", | |
| "encoder.layer.7.attention.self.query.weight", | |
| "encoder.layer.7.attention.self.value.weight", | |
| "encoder.layer.7.intermediate.dense.weight", | |
| "encoder.layer.7.output.dense.weight", | |
| "encoder.layer.8.attention.output.dense.weight", | |
| "encoder.layer.8.attention.self.key.weight", | |
| "encoder.layer.8.attention.self.query.weight", | |
| "encoder.layer.8.attention.self.value.weight", | |
| "encoder.layer.8.intermediate.dense.weight", | |
| "encoder.layer.8.output.dense.weight", | |
| "encoder.layer.9.attention.output.dense.weight", | |
| "encoder.layer.9.attention.self.key.weight", | |
| "encoder.layer.9.attention.self.query.weight", | |
| "encoder.layer.9.attention.self.value.weight", | |
| "encoder.layer.9.intermediate.dense.weight", | |
| "encoder.layer.9.output.dense.weight" | |
| ], | |
| "rr_q4": [ | |
| "roberta.encoder.layer.0.attention.output.dense.weight", | |
| "roberta.encoder.layer.0.attention.self.key.weight", | |
| "roberta.encoder.layer.0.attention.self.query.weight", | |
| "roberta.encoder.layer.0.attention.self.value.weight", | |
| "roberta.encoder.layer.0.intermediate.dense.weight", | |
| "roberta.encoder.layer.0.output.dense.weight", | |
| "roberta.encoder.layer.1.attention.output.dense.weight", | |
| "roberta.encoder.layer.1.attention.self.key.weight", | |
| "roberta.encoder.layer.1.attention.self.query.weight", | |
| "roberta.encoder.layer.1.attention.self.value.weight", | |
| "roberta.encoder.layer.1.intermediate.dense.weight", | |
| "roberta.encoder.layer.1.output.dense.weight", | |
| "roberta.encoder.layer.10.attention.output.dense.weight", | |
| "roberta.encoder.layer.10.attention.self.key.weight", | |
| "roberta.encoder.layer.10.attention.self.query.weight", | |
| "roberta.encoder.layer.10.attention.self.value.weight", | |
| "roberta.encoder.layer.10.intermediate.dense.weight", | |
| "roberta.encoder.layer.10.output.dense.weight", | |
| "roberta.encoder.layer.11.attention.output.dense.weight", | |
| "roberta.encoder.layer.11.attention.self.key.weight", | |
| "roberta.encoder.layer.11.attention.self.query.weight", | |
| "roberta.encoder.layer.11.attention.self.value.weight", | |
| "roberta.encoder.layer.11.intermediate.dense.weight", | |
| "roberta.encoder.layer.11.output.dense.weight", | |
| "roberta.encoder.layer.12.attention.output.dense.weight", | |
| "roberta.encoder.layer.12.attention.self.key.weight", | |
| "roberta.encoder.layer.12.attention.self.query.weight", | |
| "roberta.encoder.layer.12.attention.self.value.weight", | |
| "roberta.encoder.layer.12.intermediate.dense.weight", | |
| "roberta.encoder.layer.12.output.dense.weight", | |
| "roberta.encoder.layer.13.attention.output.dense.weight", | |
| "roberta.encoder.layer.13.attention.self.key.weight", | |
| "roberta.encoder.layer.13.attention.self.query.weight", | |
| "roberta.encoder.layer.13.attention.self.value.weight", | |
| "roberta.encoder.layer.13.intermediate.dense.weight", | |
| "roberta.encoder.layer.13.output.dense.weight", | |
| "roberta.encoder.layer.14.attention.output.dense.weight", | |
| "roberta.encoder.layer.14.attention.self.key.weight", | |
| "roberta.encoder.layer.14.attention.self.query.weight", | |
| "roberta.encoder.layer.14.attention.self.value.weight", | |
| "roberta.encoder.layer.14.intermediate.dense.weight", | |
| "roberta.encoder.layer.14.output.dense.weight", | |
| "roberta.encoder.layer.15.attention.output.dense.weight", | |
| "roberta.encoder.layer.15.attention.self.key.weight", | |
| "roberta.encoder.layer.15.attention.self.query.weight", | |
| "roberta.encoder.layer.15.attention.self.value.weight", | |
| "roberta.encoder.layer.15.intermediate.dense.weight", | |
| "roberta.encoder.layer.15.output.dense.weight", | |
| "roberta.encoder.layer.16.attention.output.dense.weight", | |
| "roberta.encoder.layer.16.attention.self.key.weight", | |
| "roberta.encoder.layer.16.attention.self.query.weight", | |
| "roberta.encoder.layer.16.attention.self.value.weight", | |
| "roberta.encoder.layer.16.intermediate.dense.weight", | |
| "roberta.encoder.layer.16.output.dense.weight", | |
| "roberta.encoder.layer.17.attention.output.dense.weight", | |
| "roberta.encoder.layer.17.attention.self.key.weight", | |
| "roberta.encoder.layer.17.attention.self.query.weight", | |
| "roberta.encoder.layer.17.attention.self.value.weight", | |
| "roberta.encoder.layer.17.intermediate.dense.weight", | |
| "roberta.encoder.layer.17.output.dense.weight", | |
| "roberta.encoder.layer.18.attention.output.dense.weight", | |
| "roberta.encoder.layer.18.attention.self.key.weight", | |
| "roberta.encoder.layer.18.attention.self.query.weight", | |
| "roberta.encoder.layer.18.attention.self.value.weight", | |
| "roberta.encoder.layer.18.intermediate.dense.weight", | |
| "roberta.encoder.layer.18.output.dense.weight", | |
| "roberta.encoder.layer.19.attention.output.dense.weight", | |
| "roberta.encoder.layer.19.attention.self.key.weight", | |
| "roberta.encoder.layer.19.attention.self.query.weight", | |
| "roberta.encoder.layer.19.attention.self.value.weight", | |
| "roberta.encoder.layer.19.intermediate.dense.weight", | |
| "roberta.encoder.layer.19.output.dense.weight", | |
| "roberta.encoder.layer.2.attention.output.dense.weight", | |
| "roberta.encoder.layer.2.attention.self.key.weight", | |
| "roberta.encoder.layer.2.attention.self.query.weight", | |
| "roberta.encoder.layer.2.attention.self.value.weight", | |
| "roberta.encoder.layer.2.intermediate.dense.weight", | |
| "roberta.encoder.layer.2.output.dense.weight", | |
| "roberta.encoder.layer.20.attention.output.dense.weight", | |
| "roberta.encoder.layer.20.attention.self.key.weight", | |
| "roberta.encoder.layer.20.attention.self.query.weight", | |
| "roberta.encoder.layer.20.attention.self.value.weight", | |
| "roberta.encoder.layer.20.intermediate.dense.weight", | |
| "roberta.encoder.layer.20.output.dense.weight", | |
| "roberta.encoder.layer.21.attention.output.dense.weight", | |
| "roberta.encoder.layer.21.attention.self.key.weight", | |
| "roberta.encoder.layer.21.attention.self.query.weight", | |
| "roberta.encoder.layer.21.attention.self.value.weight", | |
| "roberta.encoder.layer.21.intermediate.dense.weight", | |
| "roberta.encoder.layer.21.output.dense.weight", | |
| "roberta.encoder.layer.22.attention.output.dense.weight", | |
| "roberta.encoder.layer.22.attention.self.key.weight", | |
| "roberta.encoder.layer.22.attention.self.query.weight", | |
| "roberta.encoder.layer.22.attention.self.value.weight", | |
| "roberta.encoder.layer.22.intermediate.dense.weight", | |
| "roberta.encoder.layer.22.output.dense.weight", | |
| "roberta.encoder.layer.23.attention.output.dense.weight", | |
| "roberta.encoder.layer.23.attention.self.key.weight", | |
| "roberta.encoder.layer.23.attention.self.query.weight", | |
| "roberta.encoder.layer.23.attention.self.value.weight", | |
| "roberta.encoder.layer.23.intermediate.dense.weight", | |
| "roberta.encoder.layer.23.output.dense.weight", | |
| "roberta.encoder.layer.3.attention.output.dense.weight", | |
| "roberta.encoder.layer.3.attention.self.key.weight", | |
| "roberta.encoder.layer.3.attention.self.query.weight", | |
| "roberta.encoder.layer.3.attention.self.value.weight", | |
| "roberta.encoder.layer.3.intermediate.dense.weight", | |
| "roberta.encoder.layer.3.output.dense.weight", | |
| "roberta.encoder.layer.4.attention.output.dense.weight", | |
| "roberta.encoder.layer.4.attention.self.key.weight", | |
| "roberta.encoder.layer.4.attention.self.query.weight", | |
| "roberta.encoder.layer.4.attention.self.value.weight", | |
| "roberta.encoder.layer.4.intermediate.dense.weight", | |
| "roberta.encoder.layer.4.output.dense.weight", | |
| "roberta.encoder.layer.5.attention.output.dense.weight", | |
| "roberta.encoder.layer.5.attention.self.key.weight", | |
| "roberta.encoder.layer.5.attention.self.query.weight", | |
| "roberta.encoder.layer.5.attention.self.value.weight", | |
| "roberta.encoder.layer.5.intermediate.dense.weight", | |
| "roberta.encoder.layer.5.output.dense.weight", | |
| "roberta.encoder.layer.6.attention.output.dense.weight", | |
| "roberta.encoder.layer.6.attention.self.key.weight", | |
| "roberta.encoder.layer.6.attention.self.query.weight", | |
| "roberta.encoder.layer.6.attention.self.value.weight", | |
| "roberta.encoder.layer.6.intermediate.dense.weight", | |
| "roberta.encoder.layer.6.output.dense.weight", | |
| "roberta.encoder.layer.7.attention.output.dense.weight", | |
| "roberta.encoder.layer.7.attention.self.key.weight", | |
| "roberta.encoder.layer.7.attention.self.query.weight", | |
| "roberta.encoder.layer.7.attention.self.value.weight", | |
| "roberta.encoder.layer.7.intermediate.dense.weight", | |
| "roberta.encoder.layer.7.output.dense.weight", | |
| "roberta.encoder.layer.8.attention.output.dense.weight", | |
| "roberta.encoder.layer.8.attention.self.key.weight", | |
| "roberta.encoder.layer.8.attention.self.query.weight", | |
| "roberta.encoder.layer.8.attention.self.value.weight", | |
| "roberta.encoder.layer.8.intermediate.dense.weight", | |
| "roberta.encoder.layer.8.output.dense.weight", | |
| "roberta.encoder.layer.9.attention.output.dense.weight", | |
| "roberta.encoder.layer.9.attention.self.key.weight", | |
| "roberta.encoder.layer.9.attention.self.query.weight", | |
| "roberta.encoder.layer.9.attention.self.value.weight", | |
| "roberta.encoder.layer.9.intermediate.dense.weight", | |
| "roberta.encoder.layer.9.output.dense.weight" | |
| ] | |
| } |