Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| from transformers import pipeline | |
| import tempfile | |
| import os | |
| # Load and clean Titanic dataset | |
| def load_and_clean_data(): | |
| url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv" | |
| df = pd.read_csv(url) | |
| df['Age'].fillna(df['Age'].median(), inplace=True) | |
| df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True) | |
| df.drop(columns=['Cabin', 'Ticket', 'Name'], inplace=True) | |
| return df | |
| # Generate EDA plots | |
| def generate_plot_images(df): | |
| temp_files = [] | |
| for plot_func in [plot_survival_count, plot_survival_by_gender]: | |
| fig, ax = plt.subplots() | |
| plot_func(df, ax) | |
| tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False) | |
| fig.savefig(tmp.name) | |
| temp_files.append(tmp.name) | |
| plt.close(fig) | |
| return temp_files | |
| def plot_survival_count(df, ax): | |
| sns.countplot(x='Survived', data=df, ax=ax) | |
| def plot_survival_by_gender(df, ax): | |
| sns.countplot(x='Sex', hue='Survived', data=df, ax=ax) | |
| # Summarizer pipeline | |
| summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") | |
| def summarize_eda(df): | |
| try: | |
| text = f""" | |
| Titanic dataset has {len(df)} rows. Missing values in Age and Embarked were filled. | |
| Columns Cabin, Ticket, and Name were dropped. Females had higher survival rates. | |
| Most passengers embarked from {df['Embarked'].mode()[0]}. | |
| """ | |
| summary = summarizer(text, max_length=100, min_length=30, do_sample=False)[0]['summary_text'] | |
| return summary | |
| except Exception as e: | |
| return f"Summarization failed: {str(e)}" | |
| # Markdown fallback logic | |
| def safe_markdown(df): | |
| try: | |
| return df.head().to_markdown() | |
| except Exception: | |
| return df.head().to_string() | |
| # Main app logic | |
| def run_titanic_eda(): | |
| try: | |
| df = load_and_clean_data() | |
| plots = generate_plot_images(df) | |
| summary = summarize_eda(df) | |
| return safe_markdown(df), plots[0], plots[1], summary | |
| except Exception as e: | |
| return "Data load failed", None, None, f"Error: {str(e)}" | |
| # Gradio interface | |
| demo = gr.Interface( | |
| fn=run_titanic_eda, | |
| inputs=[], | |
| outputs=[ | |
| gr.Markdown(label="Cleaned Data Sample"), | |
| gr.Image(label="Survival Count"), | |
| gr.Image(label="Survival by Gender"), | |
| gr.Textbox(label="LLM Summary of EDA") | |
| ], | |
| title="π’ Titanic EDA + LLM Summary", | |
| description="Cleaned dataset, EDA plots, and LLM-generated summary β all in one page." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |