Add sequence-only headline model (svspr_v14_seq, 11-feature) + inference package

90d0b4b verified 10 days ago

2.39 kB

	"""SV-SPR quickstart: three ways to score SV calls."""
	from svspr import classify, score, SVSPR

	REF = '/path/to/GRCh38.fa' # ← edit me

	# ─────────────────────────────────────────────────────────────────────────────
	# 1) Single SV — fastest demo, returns dict
	# ─────────────────────────────────────────────────────────────────────────────
	result = classify(
	chrom='chr1', pos=1000000, end=1005000,
	svtype='DEL', svlen=5000, total_alt_support=15,
	ref_path=REF,
	)
	print('Single SV →', result)
	# {'CS': 0.69..., 'tier': 'moderate'}

	# ─────────────────────────────────────────────────────────────────────────────
	# 2) Whole VCF — returns DataFrame
	# ─────────────────────────────────────────────────────────────────────────────
	df = score(vcf_path='examples/sample.vcf', ref_path=REF)
	print(df[['chrom', 'pos', 'svtype', 'svlen', 'CS', 'tier']].head())

	# Filter: keep only high-confidence calls
	high = df[df.tier == 'high']
	print(f'{len(high):,} of {len(df):,} calls passed high-confidence filter')

	# ─────────────────────────────────────────────────────────────────────────────
	# 3) Reuse model across many VCFs — cheaper than calling `score` repeatedly
	# ─────────────────────────────────────────────────────────────────────────────
	model = SVSPR() # load once
	for vcf in ['cohort_01.vcf', 'cohort_02.vcf', 'cohort_03.vcf']:
	out = model.predict_vcf(vcf, REF)
	out.to_csv(vcf.replace('.vcf', '.scored.tsv'), sep='\t', index=False)