"""SV-SPR quickstart: three ways to score SV calls.""" from svspr import classify, score, SVSPR REF = '/path/to/GRCh38.fa' # ← edit me # ───────────────────────────────────────────────────────────────────────────── # 1) Single SV — fastest demo, returns dict # ───────────────────────────────────────────────────────────────────────────── result = classify( chrom='chr1', pos=1000000, end=1005000, svtype='DEL', svlen=5000, total_alt_support=15, ref_path=REF, ) print('Single SV →', result) # {'CS': 0.69..., 'tier': 'moderate'} # ───────────────────────────────────────────────────────────────────────────── # 2) Whole VCF — returns DataFrame # ───────────────────────────────────────────────────────────────────────────── df = score(vcf_path='examples/sample.vcf', ref_path=REF) print(df[['chrom', 'pos', 'svtype', 'svlen', 'CS', 'tier']].head()) # Filter: keep only high-confidence calls high = df[df.tier == 'high'] print(f'{len(high):,} of {len(df):,} calls passed high-confidence filter') # ───────────────────────────────────────────────────────────────────────────── # 3) Reuse model across many VCFs — cheaper than calling `score` repeatedly # ───────────────────────────────────────────────────────────────────────────── model = SVSPR() # load once for vcf in ['cohort_01.vcf', 'cohort_02.vcf', 'cohort_03.vcf']: out = model.predict_vcf(vcf, REF) out.to_csv(vcf.replace('.vcf', '.scored.tsv'), sep='\t', index=False)