Spaces:
Sleeping
Sleeping
| import os | |
| import uuid | |
| import shutil | |
| import numpy as np | |
| import pandas as pd | |
| import soundfile as sf | |
| import torch | |
| import torchaudio | |
| import gradio as gr | |
| import threading | |
| from dotenv import load_dotenv | |
| from huggingface_hub import HfApi, create_repo, upload_file | |
| from transformers import WhisperForConditionalGeneration, WhisperProcessor | |
| # ========================================================= | |
| # LOAD ENV VARIABLES | |
| # ========================================================= | |
| load_dotenv() | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| DATASET_REPO = os.getenv("HF_DATASET_REPO") | |
| # ========================================================= | |
| # LOCAL STORAGE | |
| # ========================================================= | |
| LOCAL_DATASET_DIR = "hf_dataset" | |
| LOCAL_AUDIO_DIR = os.path.join(LOCAL_DATASET_DIR, "audio") | |
| LOCAL_METADATA = os.path.join(LOCAL_DATASET_DIR, "metadata.csv") | |
| os.makedirs(LOCAL_AUDIO_DIR, exist_ok=True) | |
| # ========================================================= | |
| # HUGGING FACE SETUP | |
| # ========================================================= | |
| api = HfApi(token=HF_TOKEN) | |
| try: | |
| create_repo( | |
| repo_id=DATASET_REPO, | |
| repo_type="dataset", | |
| exist_ok=True, | |
| token=HF_TOKEN | |
| ) | |
| except Exception as e: | |
| print("Dataset repo check:", e) | |
| # ========================================================= | |
| # LOAD MODEL | |
| # ========================================================= | |
| model = WhisperForConditionalGeneration.from_pretrained( | |
| "Kennethdot/kasanoma_whisper" | |
| ) | |
| processor = WhisperProcessor.from_pretrained( | |
| "Kennethdot/kasanoma_whisper" | |
| ) | |
| device = torch.device( | |
| "cuda" if torch.cuda.is_available() else "cpu" | |
| ) | |
| model = model.to(device) | |
| model.eval() | |
| # ========================================================= | |
| # TRANSCRIPTION FUNCTION | |
| # ========================================================= | |
| def transcribe_audio(audio_path): | |
| if audio_path is None: | |
| return "", "", None | |
| try: | |
| audio_data, sampling_rate = sf.read(audio_path) | |
| # Stereo β mono | |
| if len(audio_data.shape) > 1: | |
| audio_data = np.mean(audio_data, axis=1) | |
| audio_data = audio_data.astype(np.float32) | |
| # Resample | |
| if sampling_rate != 16000: | |
| audio_tensor = torch.tensor( | |
| audio_data, | |
| dtype=torch.float32 | |
| ) | |
| resampler = torchaudio.transforms.Resample( | |
| orig_freq=sampling_rate, | |
| new_freq=16000 | |
| ) | |
| audio_data = resampler(audio_tensor).numpy() | |
| sampling_rate = 16000 | |
| except Exception as e: | |
| return f"Error reading audio: {e}", "", None | |
| # Normalize | |
| if np.max(np.abs(audio_data)) > 0: | |
| audio_data = audio_data / np.max(np.abs(audio_data)) | |
| # Feature extraction | |
| input_features = processor.feature_extractor( | |
| audio_data, | |
| sampling_rate=sampling_rate, | |
| return_tensors="pt" | |
| ).input_features.to(device) | |
| # Inference | |
| with torch.no_grad(): | |
| generated_ids = model.generate( | |
| input_features, | |
| task="transcribe", | |
| language="yo", | |
| temperature=0.0 | |
| ) | |
| transcription = processor.batch_decode( | |
| generated_ids, | |
| skip_special_tokens=True | |
| )[0].strip() | |
| return transcription, transcription, audio_path | |
| # ========================================================= | |
| # SAVE FUNCTION | |
| # ========================================================= | |
| # Lock prevents concurrent saves from corrupting the CSV | |
| _csv_lock = threading.Lock() | |
| def _upload_in_background(saved_audio_path, relative_audio_path): | |
| """Runs in a daemon thread. Uploads audio then CSV β never blocks the UI.""" | |
| try: | |
| upload_file( | |
| path_or_fileobj=saved_audio_path, | |
| path_in_repo=relative_audio_path, | |
| repo_id=DATASET_REPO, | |
| repo_type="dataset", | |
| token=HF_TOKEN | |
| ) | |
| with _csv_lock: | |
| upload_file( | |
| path_or_fileobj=LOCAL_METADATA, | |
| path_in_repo="metadata.csv", | |
| repo_id=DATASET_REPO, | |
| repo_type="dataset", | |
| token=HF_TOKEN | |
| ) | |
| except Exception as e: | |
| print(f"[Background upload error] {e}") | |
| def save_sample(audio_path, corrected_text): | |
| if audio_path is None: | |
| return | |
| try: | |
| unique_id = str(uuid.uuid4()) | |
| saved_audio_path = os.path.join( | |
| LOCAL_AUDIO_DIR, | |
| f"{unique_id}.wav" | |
| ) | |
| # Copy audio locally β fast, no network | |
| shutil.copy(audio_path, saved_audio_path) | |
| relative_audio_path = f"audio/{unique_id}.wav" | |
| # Write metadata locally β fast, no network | |
| new_row = pd.DataFrame([{ | |
| "id": unique_id, | |
| "audio": relative_audio_path, | |
| "transcription": corrected_text, | |
| "language": "twi_en" | |
| }]) | |
| with _csv_lock: | |
| if os.path.exists(LOCAL_METADATA): | |
| existing = pd.read_csv(LOCAL_METADATA) | |
| updated = pd.concat([existing, new_row], ignore_index=True) | |
| else: | |
| updated = new_row | |
| updated.to_csv(LOCAL_METADATA, index=False) | |
| # Fire-and-forget β HuggingFace upload happens in background | |
| threading.Thread( | |
| target=_upload_in_background, | |
| args=(saved_audio_path, relative_audio_path), | |
| daemon=True | |
| ).start() | |
| return | |
| except Exception as e: | |
| print(f"[Save error] {e}") | |
| return | |
| # ========================================================= | |
| # CUSTOM CSS (from the styled version) | |
| # ========================================================= | |
| css = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Sora:wght@300;400;600;700&family=DM+Mono:wght@400;500&display=swap'); | |
| :root { | |
| --navy: #0d1b4b; | |
| --blue: #1a6fd4; | |
| --sky: #4db8f0; | |
| --violet: #7b4fd4; | |
| --lilac: #b57bee; | |
| --white: #e8f0ff; | |
| --card: rgba(255,255,255,0.07); | |
| --border: rgba(180,160,255,0.2); | |
| --radius: 16px; | |
| } | |
| /* ββ animated mesh background ββ */ | |
| body, .gradio-container { | |
| background: | |
| radial-gradient(ellipse at 15% 10%, #1a3fa8 0%, transparent 55%), | |
| radial-gradient(ellipse at 85% 5%, #7b3fc4 0%, transparent 45%), | |
| radial-gradient(ellipse at 50% 50%, #d0e8ff 0%, transparent 60%), | |
| radial-gradient(ellipse at 80% 80%, #b57bee 0%, transparent 50%), | |
| radial-gradient(ellipse at 10% 90%, #1a6fd4 0%, transparent 50%), | |
| #0d1b4b !important; | |
| font-family: 'Sora', sans-serif !important; | |
| min-height: 100vh; | |
| } | |
| /* ββ slow drifting orbs ββ */ | |
| .gradio-container::before, | |
| .gradio-container::after { | |
| content: ''; | |
| position: fixed; | |
| border-radius: 50%; | |
| filter: blur(80px); | |
| pointer-events: none; | |
| z-index: 0; | |
| } | |
| .gradio-container::before { | |
| width: 520px; height: 520px; | |
| top: -120px; left: -100px; | |
| background: radial-gradient(circle, rgba(74,130,230,0.45), transparent 70%); | |
| animation: drift1 12s ease-in-out infinite alternate; | |
| } | |
| .gradio-container::after { | |
| width: 480px; height: 480px; | |
| bottom: -100px; right: -80px; | |
| background: radial-gradient(circle, rgba(160,100,240,0.4), transparent 70%); | |
| animation: drift2 15s ease-in-out infinite alternate; | |
| } | |
| @keyframes drift1 { | |
| from { transform: translate(0, 0); } | |
| to { transform: translate(60px, 80px); } | |
| } | |
| @keyframes drift2 { | |
| from { transform: translate(0, 0); } | |
| to { transform: translate(-50px, -70px); } | |
| } | |
| /* ββ hero ββ */ | |
| #hero { | |
| text-align: center; | |
| padding: 40px 24px 16px; | |
| position: relative; | |
| z-index: 1; | |
| animation: fadeUp 0.8s ease both; | |
| } | |
| @keyframes fadeUp { | |
| from { opacity: 0; transform: translateY(24px); } | |
| to { opacity: 1; transform: translateY(0); } | |
| } | |
| /* ββ animated orb ββ */ | |
| .orb-wrap { | |
| display: flex; | |
| justify-content: center; | |
| margin-bottom: 22px; | |
| } | |
| .orb-stage { | |
| position: relative; | |
| width: 110px; | |
| height: 110px; | |
| } | |
| .orb-ring { | |
| position: absolute; | |
| border-radius: 50%; | |
| border: 1.5px solid transparent; | |
| inset: 0; | |
| animation: orbSpin 6s linear infinite; | |
| } | |
| .orb-ring:nth-child(1) { | |
| border-top-color: #4db8f0; | |
| border-right-color: rgba(77,184,240,0.25); | |
| animation-duration: 5s; | |
| } | |
| .orb-ring:nth-child(2) { | |
| inset: 8px; | |
| border-top-color: #b57bee; | |
| border-left-color: rgba(181,123,238,0.25); | |
| animation-direction: reverse; | |
| animation-duration: 7s; | |
| } | |
| .orb-ring:nth-child(3) { | |
| inset: 16px; | |
| border-top-color: #1a6fd4; | |
| border-bottom-color: rgba(26,111,212,0.25); | |
| animation-duration: 9s; | |
| } | |
| @keyframes orbSpin { to { transform: rotate(360deg); } } | |
| .orb-core { | |
| position: absolute; | |
| inset: 24px; | |
| border-radius: 50%; | |
| background: radial-gradient(circle at 35% 35%, | |
| rgba(200,220,255,0.95), | |
| rgba(100,150,240,0.8) 40%, | |
| rgba(90,55,200,0.9) 75%, | |
| rgba(25,15,75,1) | |
| ); | |
| box-shadow: | |
| 0 0 22px 6px rgba(77,184,240,0.45), | |
| 0 0 50px 12px rgba(120,80,220,0.28), | |
| inset 0 0 14px rgba(255,255,255,0.18); | |
| animation: orbBreathe 3.5s ease-in-out infinite; | |
| } | |
| .orb-core::after { | |
| content: ''; | |
| position: absolute; | |
| top: 14%; left: 22%; | |
| width: 28%; height: 20%; | |
| border-radius: 50%; | |
| background: rgba(255,255,255,0.5); | |
| filter: blur(3px); | |
| } | |
| @keyframes orbBreathe { | |
| 0%, 100% { | |
| transform: scale(1); | |
| box-shadow: 0 0 22px 6px rgba(77,184,240,0.45), 0 0 50px 12px rgba(120,80,220,0.28), inset 0 0 14px rgba(255,255,255,0.18); | |
| } | |
| 50% { | |
| transform: scale(1.1); | |
| box-shadow: 0 0 34px 12px rgba(77,184,240,0.65), 0 0 70px 20px rgba(120,80,220,0.42), inset 0 0 20px rgba(255,255,255,0.28); | |
| } | |
| } | |
| .orb-waves { | |
| position: absolute; | |
| inset: 24px; | |
| border-radius: 50%; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| gap: 3px; | |
| overflow: hidden; | |
| } | |
| .orb-wave-bar { | |
| width: 3px; | |
| border-radius: 99px; | |
| background: rgba(255,255,255,0.75); | |
| animation: waveBar 1.3s ease-in-out infinite; | |
| } | |
| .orb-wave-bar:nth-child(1) { height: 8px; animation-delay: 0s; } | |
| .orb-wave-bar:nth-child(2) { height: 16px; animation-delay: 0.18s; } | |
| .orb-wave-bar:nth-child(3) { height: 22px; animation-delay: 0.35s; } | |
| .orb-wave-bar:nth-child(4) { height: 16px; animation-delay: 0.52s; } | |
| .orb-wave-bar:nth-child(5) { height: 8px; animation-delay: 0.7s; } | |
| @keyframes waveBar { | |
| 0%, 100% { transform: scaleY(0.3); opacity: 0.45; } | |
| 50% { transform: scaleY(1); opacity: 1; } | |
| } | |
| /* ββ title & subtitle ββ */ | |
| .kasa-title { | |
| font-size: clamp(2.4rem, 7vw, 4rem); | |
| font-weight: 700; | |
| letter-spacing: -0.03em; | |
| background: linear-gradient(120deg, var(--white) 0%, var(--sky) 40%, var(--lilac) 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| background-clip: text; | |
| margin: 0 0 10px; | |
| line-height: 1.1; | |
| } | |
| .kasa-sub { | |
| font-size: 0.97rem; | |
| font-weight: 300; | |
| color: rgba(220,230,255,0.85); | |
| max-width: 460px; | |
| margin: 0 auto 14px; | |
| line-height: 1.65; | |
| } | |
| .kasa-badge { | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 6px; | |
| padding: 5px 16px; | |
| border-radius: 999px; | |
| border: 1px solid rgba(180,160,255,0.3); | |
| background: rgba(120,100,220,0.12); | |
| font-size: 0.7rem; | |
| font-weight: 500; | |
| letter-spacing: 0.13em; | |
| text-transform: uppercase; | |
| color: var(--lilac); | |
| backdrop-filter: blur(8px); | |
| } | |
| /* ββ divider ββ */ | |
| .kasa-divider { | |
| border: none; | |
| border-top: 1px solid var(--border); | |
| margin: 10px 0 28px; | |
| position: relative; | |
| z-index: 1; | |
| } | |
| /* ββ panel labels ββ */ | |
| .kasa-label { | |
| font-size: 0.7rem; | |
| font-weight: 600; | |
| letter-spacing: 0.14em; | |
| text-transform: uppercase; | |
| color: #a8d4f8; | |
| margin-bottom: 10px; | |
| } | |
| /* ββ Gradio field labels ββ */ | |
| label, .gr-form label, .svelte-1gfkn6j { | |
| color: #c8d8f8 !important; | |
| font-family: 'Sora', sans-serif !important; | |
| font-size: 0.82rem !important; | |
| font-weight: 500 !important; | |
| } | |
| /* ββ WHITE audio widget ββ */ | |
| .gr-audio, | |
| [data-testid="audio"], | |
| .gr-audio > div { | |
| background: #ffffff !important; | |
| border: 1.5px solid rgba(100,140,240,0.4) !important; | |
| border-radius: 14px !important; | |
| box-shadow: 0 4px 28px rgba(26,60,180,0.14) !important; | |
| overflow: hidden !important; | |
| } | |
| [data-testid="audio"] button, | |
| .gr-audio button { | |
| color: #1a6fd4 !important; | |
| background: transparent !important; | |
| } | |
| [data-testid="audio"] svg, | |
| .gr-audio svg { | |
| stroke: #1a6fd4 !important; | |
| fill: none !important; | |
| } | |
| [data-testid="audio"] span, | |
| [data-testid="audio"] .time, | |
| [data-testid="audio"] .duration, | |
| .gr-audio span { | |
| color: #1a3fa8 !important; | |
| font-family: 'DM Mono', monospace !important; | |
| } | |
| [data-testid="audio"] canvas, | |
| .gr-audio canvas { | |
| filter: hue-rotate(195deg) saturate(2) brightness(0.85) !important; | |
| } | |
| [data-testid="audio"] .tabs button, | |
| .gr-audio .tabs button { | |
| color: #1a6fd4 !important; | |
| font-family: 'Sora', sans-serif !important; | |
| font-weight: 600 !important; | |
| } | |
| [data-testid="audio"] .tabs button.selected, | |
| .gr-audio .tabs button.selected { | |
| border-bottom: 2px solid #1a6fd4 !important; | |
| } | |
| /* ββ WHITE textbox ββ */ | |
| textarea, .gr-textbox textarea { | |
| background: #ffffff !important; | |
| border: 1.5px solid rgba(100,140,240,0.4) !important; | |
| border-radius: 12px !important; | |
| color: #0d1b6e !important; | |
| font-family: 'Sora', sans-serif !important; | |
| font-size: 0.95rem !important; | |
| font-weight: 400 !important; | |
| padding: 14px !important; | |
| box-shadow: 0 4px 24px rgba(26,60,180,0.1) !important; | |
| transition: border-color 0.2s, box-shadow 0.2s !important; | |
| line-height: 1.75 !important; | |
| letter-spacing: 0.01em !important; | |
| } | |
| textarea::placeholder { | |
| color: #7a9acc !important; | |
| font-style: italic; | |
| font-family: 'Sora', sans-serif !important; | |
| } | |
| textarea:focus { | |
| border-color: #4db8f0 !important; | |
| box-shadow: 0 0 0 3px rgba(77,184,240,0.2) !important; | |
| outline: none !important; | |
| } | |
| .gr-textbox, [data-testid="textbox"] { | |
| background: #ffffff !important; | |
| border: 1.5px solid rgba(100,140,240,0.4) !important; | |
| border-radius: 14px !important; | |
| box-shadow: 0 4px 28px rgba(26,60,180,0.14) !important; | |
| overflow: hidden !important; | |
| } | |
| /* ββ Edit hint text ββ */ | |
| .edit-hint { | |
| font-size: 0.75rem; | |
| font-weight: 300; | |
| color: rgba(200,215,255,0.6); | |
| margin-top: -6px; | |
| margin-bottom: 8px; | |
| font-style: italic; | |
| letter-spacing: 0.02em; | |
| } | |
| /* ββ Buttons ββ */ | |
| .gr-button-primary, button.primary { | |
| background: linear-gradient(135deg, #1a6fd4 0%, #7b4fd4 100%) !important; | |
| border: none !important; | |
| border-radius: 10px !important; | |
| font-family: 'Sora', sans-serif !important; | |
| font-weight: 600 !important; | |
| font-size: 0.9rem !important; | |
| padding: 12px 28px !important; | |
| color: #ffffff !important; | |
| transition: transform 0.15s, box-shadow 0.15s !important; | |
| box-shadow: 0 4px 22px rgba(100,80,200,0.4) !important; | |
| position: relative; | |
| z-index: 1; | |
| } | |
| .gr-button-primary:hover, button.primary:hover { | |
| transform: translateY(-2px) !important; | |
| box-shadow: 0 8px 32px rgba(120,80,220,0.55) !important; | |
| } | |
| .gr-button-primary:active, button.primary:active { | |
| transform: translateY(0) !important; | |
| } | |
| /* ββ Save checkmark button ββ */ | |
| #save-btn button { | |
| width: 52px !important; | |
| height: 52px !important; | |
| min-width: 52px !important; | |
| border-radius: 50% !important; | |
| padding: 0 !important; | |
| font-size: 1.5rem !important; | |
| background: linear-gradient(135deg, #1a6fd4 0%, #7b4fd4 100%) !important; | |
| border: none !important; | |
| box-shadow: 0 4px 22px rgba(100,80,200,0.4) !important; | |
| color: #fff !important; | |
| transition: transform 0.15s, box-shadow 0.15s !important; | |
| line-height: 1 !important; | |
| } | |
| #save-btn button:hover { | |
| transform: scale(1.12) translateY(-2px) !important; | |
| box-shadow: 0 8px 32px rgba(120,80,220,0.55) !important; | |
| } | |
| #save-btn button:active { | |
| transform: scale(0.96) !important; | |
| } | |
| /* ββ footer ββ */ | |
| .kasa-footer { | |
| text-align: center; | |
| font-size: 0.72rem; | |
| color: rgba(180,190,255,0.58); | |
| padding: 24px 0 32px; | |
| letter-spacing: 0.05em; | |
| position: relative; | |
| z-index: 1; | |
| } | |
| /* ββ pulse dot ββ */ | |
| .pulse-dot { | |
| display: inline-block; | |
| width: 9px; height: 9px; | |
| border-radius: 50%; | |
| background: #ffffff; | |
| box-shadow: 0 0 6px 2px rgba(255,255,255,0.6); | |
| animation: pulse 2s ease-in-out infinite; | |
| } | |
| @keyframes pulse { | |
| 0%, 100% { opacity: 1; transform: scale(1); } | |
| 50% { opacity: 0.3; transform: scale(0.65); } | |
| } | |
| """ | |
| # ========================================================= | |
| # UI | |
| # ========================================================= | |
| with gr.Blocks(css=css, theme=gr.themes.Base()) as demo: | |
| # ββ Hero ββ | |
| gr.HTML(""" | |
| <div id="hero"> | |
| <div class="orb-wrap"> | |
| <div class="orb-stage"> | |
| <div class="orb-ring"></div> | |
| <div class="orb-ring"></div> | |
| <div class="orb-ring"></div> | |
| <div class="orb-core"></div> | |
| <div class="orb-waves"> | |
| <div class="orb-wave-bar"></div> | |
| <div class="orb-wave-bar"></div> | |
| <div class="orb-wave-bar"></div> | |
| <div class="orb-wave-bar"></div> | |
| <div class="orb-wave-bar"></div> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="kasa-title">Kasanoma ASR</div> | |
| <div class="kasa-sub"> | |
| Automatic speech recognition for natural EnglishβTwi | |
| code-switched conversations. Speak the way you actually speak. | |
| </div> | |
| <span class="kasa-badge"><span class="pulse-dot"></span>Live Transcription</span> | |
| </div> | |
| """) | |
| # ββ Main columns ββ | |
| with gr.Row(equal_height=True): | |
| with gr.Column(scale=1): | |
| gr.HTML('<div class="kasa-label">Audio Input</div>') | |
| audio_input = gr.Audio( | |
| sources=["microphone", "upload"], | |
| type="filepath", | |
| label="Record or upload audio" | |
| ) | |
| transcribe_btn = gr.Button("Transcribe β", variant="primary") | |
| with gr.Column(scale=1): | |
| gr.HTML('<div class="kasa-label">Transcription</div>') | |
| model_output = gr.Textbox( | |
| label="", | |
| placeholder="Transcription will appear hereβ¦", | |
| lines=3, | |
| interactive=False | |
| ) | |
| gr.HTML('<div class="kasa-label" style="margin-top:14px;">Edit</div>') | |
| gr.HTML('<div class="edit-hint">Type what was actually said β fix any errors the model made, then hit β to save.</div>') | |
| with gr.Row(equal_height=True): | |
| corrected_output = gr.Textbox( | |
| label="", | |
| placeholder="Correct the transcription if neededβ¦", | |
| lines=3, | |
| scale=9 | |
| ) | |
| save_btn = gr.Button("β", elem_id="save-btn", scale=1) | |
| hidden_audio_path = gr.State() | |
| # ββ Footer ββ | |
| gr.HTML(""" | |
| <div class="kasa-footer"> | |
| Kasanoma · English–Twi Code-Switching ASR · Project Kasa 2026 | |
| </div> | |
| """) | |
| # ββ Event handlers ββ | |
| transcribe_btn.click( | |
| fn=transcribe_audio, | |
| inputs=audio_input, | |
| outputs=[model_output, corrected_output, hidden_audio_path] | |
| ) | |
| save_btn.click( | |
| fn=save_sample, | |
| inputs=[hidden_audio_path, corrected_output] | |
| ) | |
| # ========================================================= | |
| # LAUNCH | |
| # ========================================================= | |
| if __name__ == "__main__": | |
| demo.launch(share=True) |