SVSTR-Score / seqonly /examples /quickstart.py
khyeom's picture
Add sequence-only headline model (svspr_v14_seq, 11-feature) + inference package
90d0b4b verified
Raw
History Blame Contribute Delete
2.39 kB
"""SV-SPR quickstart: three ways to score SV calls."""
from svspr import classify, score, SVSPR
REF = '/path/to/GRCh38.fa' # ← edit me
# ─────────────────────────────────────────────────────────────────────────────
# 1) Single SV β€” fastest demo, returns dict
# ─────────────────────────────────────────────────────────────────────────────
result = classify(
chrom='chr1', pos=1000000, end=1005000,
svtype='DEL', svlen=5000, total_alt_support=15,
ref_path=REF,
)
print('Single SV β†’', result)
# {'CS': 0.69..., 'tier': 'moderate'}
# ─────────────────────────────────────────────────────────────────────────────
# 2) Whole VCF β€” returns DataFrame
# ─────────────────────────────────────────────────────────────────────────────
df = score(vcf_path='examples/sample.vcf', ref_path=REF)
print(df[['chrom', 'pos', 'svtype', 'svlen', 'CS', 'tier']].head())
# Filter: keep only high-confidence calls
high = df[df.tier == 'high']
print(f'{len(high):,} of {len(df):,} calls passed high-confidence filter')
# ─────────────────────────────────────────────────────────────────────────────
# 3) Reuse model across many VCFs β€” cheaper than calling `score` repeatedly
# ─────────────────────────────────────────────────────────────────────────────
model = SVSPR() # load once
for vcf in ['cohort_01.vcf', 'cohort_02.vcf', 'cohort_03.vcf']:
out = model.predict_vcf(vcf, REF)
out.to_csv(vcf.replace('.vcf', '.scored.tsv'), sep='\t', index=False)