| |
| |
| |
|
|
| """Gradio Space that runs the fixed notebook workflow on bundled CSV files.""" |
|
|
| from pathlib import Path |
| import shutil |
| import warnings |
| import gradio as gr |
| import matplotlib.pyplot as plt |
| import pandas as pd |
| import seaborn as sns |
| import statsmodels.api as sm |
| from itertools import product |
| from zipfile import ZipFile |
|
|
| DATA_REVIEWS = "synthetic_book_reviews.csv" |
| DATA_SALES = "synthetic_sales_data.csv" |
| ART_DIR = Path("artifacts") |
| FIG_DIR = ART_DIR / "figures" |
| TAB_DIR = ART_DIR / "tables" |
|
|
|
|
| def ensure_dirs(): |
| """Create output folders used by the app.""" |
| FIG_DIR.mkdir(parents=True, exist_ok=True) |
| TAB_DIR.mkdir(parents=True, exist_ok=True) |
|
|
|
|
| def load_data(): |
| """Load the two fixed datasets bundled with the Space.""" |
| reviews = pd.read_csv(DATA_REVIEWS) |
| sales = pd.read_csv(DATA_SALES) |
| required_reviews = {"title", "review_text", "rating", "popularity_score"} |
| required_sales = {"title", "month", "units_sold"} |
| if not required_reviews.issubset(reviews.columns): |
| raise ValueError(f"Missing review columns: {required_reviews - set(reviews.columns)}") |
| if not required_sales.issubset(sales.columns): |
| raise ValueError(f"Missing sales columns: {required_sales - set(sales.columns)}") |
| return reviews, sales |
|
|
|
|
| def build_sample_titles(reviews): |
| """Pick up to 5 titles from each popularity score, like in the notebook.""" |
| sampled_titles = [] |
| for score in sorted(reviews["popularity_score"].dropna().unique()): |
| titles = reviews.loc[reviews["popularity_score"] == score, "title"].dropna().unique().tolist() |
| sampled_titles.extend(titles[:5]) |
| return sampled_titles |
|
|
|
|
| def save_sales_trend_chart(sampled_sales, sampled_books, sampled_titles): |
| """Create the sampled sales trend figure.""" |
| popularity_colors = {1: "darkred", 2: "orangered", 3: "gold", 4: "mediumseagreen", 5: "royalblue"} |
| fig, ax = plt.subplots(figsize=(14, 6)) |
| for title in sampled_titles: |
| row = sampled_books[sampled_books["title"] == title].iloc[0] |
| subset = sampled_sales[sampled_sales["title"] == title] |
| ax.plot(subset["month"], subset["units_sold"], label=title, |
| color=popularity_colors.get(row["popularity_score"], "gray")) |
| ax.set_title("Sales Trends Over Time") |
| ax.set_xlabel("Month") |
| ax.set_ylabel("Units Sold") |
| ax.tick_params(axis="x", rotation=45) |
| ax.grid(True, alpha=0.3) |
| ax.legend(loc="center left", bbox_to_anchor=(1, 0.5), fontsize="small") |
| fig.tight_layout() |
| out = FIG_DIR / "sales_trends_sampled_titles.png" |
| fig.savefig(out, dpi=150, bbox_inches="tight") |
| plt.close(fig) |
| return str(out) |
|
|
|
|
| def save_sentiment_chart(sampled_reviews): |
| """Create the stacked sentiment distribution chart.""" |
| sampled_reviews = sampled_reviews.copy() |
| sampled_reviews["grouped_title"] = sampled_reviews["rating"].astype(str) + "★ | " + sampled_reviews["title"] |
| counts = sampled_reviews.groupby(["grouped_title", "sentiment_label"]).size().unstack(fill_value=0) |
| counts = counts.reindex(columns=["negative", "neutral", "positive"], fill_value=0) |
| counts.reset_index().to_csv(TAB_DIR / "sentiment_counts_sampled.csv", index=False) |
| fig, ax = plt.subplots(figsize=(12, 12)) |
| counts.plot.barh(stacked=True, ax=ax, color={"negative": "royalblue", "neutral": "lightgray", "positive": "crimson"}) |
| ax.set_title("Sentiment Distribution in Reviews") |
| ax.set_xlabel("Number of Reviews") |
| ax.set_ylabel("Book Title") |
| ax.grid(axis="x", linestyle="--", alpha=0.4) |
| fig.tight_layout() |
| out = FIG_DIR / "sentiment_distribution_sampled_titles.png" |
| fig.savefig(out, dpi=150, bbox_inches="tight") |
| plt.close(fig) |
| return str(out) |
|
|
|
|
| def pricing_action(row): |
| """Apply the exact notebook decision rules.""" |
| if row["avg_units_sold"] >= 120 and row.get("positive_ratio", 0) >= 0.6: |
| return "increase price" |
| if row["avg_units_sold"] <= 60 and row.get("negative_ratio", 0) >= 0.4: |
| return "decrease price" |
| return "keep price" |
|
|
|
|
| def save_decision_table(reviews, sales): |
| """Compute and save the final pricing decision table.""" |
| avg_sales = sales.groupby("title", as_index=False)["units_sold"].mean().rename(columns={"units_sold": "avg_units_sold"}) |
| sentiment = reviews.groupby(["title", "sentiment_label"]).size().unstack(fill_value=0) |
| sentiment["total"] = sentiment.sum(axis=1) |
| sentiment["positive_ratio"] = sentiment.get("positive", 0) / sentiment["total"] |
| sentiment["negative_ratio"] = sentiment.get("negative", 0) / sentiment["total"] |
| decisions = avg_sales.merge(sentiment, on="title", how="left").fillna(0) |
| decisions["pricing_action"] = decisions.apply(pricing_action, axis=1) |
| final_cols = ["title", "avg_units_sold", "positive_ratio", "negative_ratio", "pricing_action"] |
| final_df = decisions[final_cols].sort_values("title").reset_index(drop=True) |
| final_df.to_csv(TAB_DIR / "pricing_decisions.csv", index=False) |
| return final_df |
|
|
|
|
| def save_dashboard_export(sales): |
| """Save a monthly dashboard export like the notebook fallback.""" |
| dashboard = sales.groupby("month", as_index=False).agg(total_units_sold=("units_sold", "sum")).sort_values("month") |
| dashboard.to_csv(TAB_DIR / "df_dashboard.csv", index=False) |
|
|
|
|
| def bundle_exports(): |
| """Zip all generated export files for easy download.""" |
| zip_path = ART_DIR / "exports.zip" |
| with ZipFile(zip_path, "w") as zf: |
| for path in list(FIG_DIR.glob("*")) + list(TAB_DIR.glob("*")): |
| zf.write(path, arcname=path.relative_to(ART_DIR)) |
| return str(zip_path) |
|
|
|
|
| def run_analysis(): |
| """Run the complete pipeline and return final outputs only.""" |
| ensure_dirs() |
| reviews, sales = load_data() |
| sales["month"] = pd.to_datetime(sales["month"]) |
| sampled_titles = build_sample_titles(reviews) |
| sampled_sales = sales[sales["title"].isin(sampled_titles)].copy() |
| sampled_reviews = reviews[reviews["title"].isin(sampled_titles)].copy() |
| sampled_books = reviews[reviews["title"].isin(sampled_titles)].copy() |
| chart_1 = save_sales_trend_chart(sampled_sales, sampled_books, sampled_titles) |
| chart_2 = save_sentiment_chart(sampled_reviews) |
| decision_df = save_decision_table(reviews, sales) |
| save_dashboard_export(sales) |
| export_zip = bundle_exports() |
| return chart_1, chart_2, decision_df, export_zip |
|
|
|
|
| with gr.Blocks() as demo: |
| gr.Markdown("# Book Analytics Dashboard") |
| gr.Markdown("Runs the fixed notebook workflow on the bundled review and sales datasets.") |
| run_btn = gr.Button("Run analysis") |
| sales_chart = gr.Image(label="Sales trends") |
| sentiment_chart = gr.Image(label="Sentiment distribution") |
| decision_table = gr.Dataframe(label="Pricing decisions") |
| exports = gr.File(label="Download all exports") |
| run_btn.click(fn=run_analysis, inputs=None, outputs=[sales_chart, sentiment_chart, decision_table, exports]) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|